diff --git a/.gitignore b/.gitignore index 0b9ee5d..c3d869c 100644 --- a/.gitignore +++ b/.gitignore @@ -16,5 +16,7 @@ conversion_scripts/DepthAnythingV3/ conversion_scripts/Nitro-E/core/ conversion_scripts/Nitro-E/reference_dump/ conversion_scripts/Nitro-E/*.png +conversion_scripts/Pixelization/ +conversion_scripts/pixelization_sample_*.png sample_apps/NitroEDemo/NitroEDemo/Llama3Vocab.json sample_apps/NitroEDemo/NitroEDemo/Llama3Merges.txt diff --git a/README.md b/README.md index 5a4220b..6f96c82 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,7 @@ You are free to do or not. - [DCGAN](#dcgan) - [**Image2Image**](#image2image) + - [Pixelization](#pixelization) - [Anime2Sketch](#anime2sketch) - [AnimeGAN2Face_Paint_512_v2](#animegan2face_paint_512_v2) - [Photo2Cartoon](#photo2cartoon) @@ -782,6 +783,14 @@ Low Light Enhancement # Image2Image +### Pixelization + +Make Your Own Sprites: Aliasing-Aware and Cell-Controllable Pixelization (SIGGRAPH Asia 2022). Turns any photo into pixel art; the `cell_size` slider (2–8) controls pixel block size at post-processing time — the network runs once per photo. + +| Google Drive / HF Link | Size | Input / Output | Original Project | License | Year | Sample Project | Conversion Script | +| ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | +| [Pixelization_512.mlpackage.zip](https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/pixelization/Pixelization_512.mlpackage.zip) | 38MB (FP16) | Image (RGB 512×512) → Image (RGB 512×512) | [WuZongWei6/Pixelization](https://github.com/WuZongWei6/Pixelization) | [Non-commercial research](https://github.com/WuZongWei6/Pixelization/blob/main/LICENSE.md) | 2022 | [PixelizationDemo](./sample_apps/PixelizationDemo/) | [convert_pixelization.py](./conversion_scripts/convert_pixelization.py) | + ### [Anime2Sketch](https://drive.google.com/file/d/1-52NnZ1kajZI5Rk0tn3DegpU38la_jYk/view?usp=sharing) diff --git a/conversion_scripts/convert_pixelization.py b/conversion_scripts/convert_pixelization.py new file mode 100644 index 0000000..2663a05 --- /dev/null +++ b/conversion_scripts/convert_pixelization.py @@ -0,0 +1,340 @@ +""" +Convert Pixelization (SIGGRAPH Asia 2022) to CoreML. + +Repo: https://github.com/WuZongWei6/Pixelization +Weights mirror: https://huggingface.co/ashleykleynhans/pixelization + +Architecture: + G_A (C2PGen): RGBEnc -> RGBDec(modulated by cellcode) -> tanh image + where cellcode = MLP(fixed_256d_vector) is a precomputed [1, 2048] style code. + alias_net: AliasRGBEncoder -> AliasRGBDecoder -> tanh anti-aliased image. + +Pipeline (baked into a single mlpackage): + input[0,1] RGB + -> x = 2x-1 (normalize to [-1,1]) + -> feature = RGBEnc(x) + -> y = RGBDec(feature, cellcode) + -> y = alias_net(y) + -> y = (y+1)/2 clamped (denorm to [0,1]) + -> output RGB image + +Post-processing (done in Swift, not in the model): + nearest-neighbor downscale by 4 -> logical pixel grid + nearest-neighbor upscale by cell_size -> display size + +Usage: + python convert_pixelization.py --size 512 +""" + +import argparse +import os +import sys + +import numpy as np +import torch +import torch.nn as nn +import torchvision.models as tvmodels + +import coremltools as ct + +REPO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Pixelization") +sys.path.insert(0, REPO_DIR) + + +# Precomputed MLP_code constant from upstream test_pro.py (reshape to [1, 256, 1, 1]). +MLP_CODE = [ + 233356.8125, -27387.5918, -32866.8008, 126575.0312, -181590.0156, + -31543.1289, 50374.1289, 99631.4062, -188897.3750, 138322.7031, + -107266.2266, 125778.5781, 42416.1836, 139710.8594, -39614.6250, + -69972.6875, -21886.4141, 86938.4766, 31457.6270, -98892.2344, + -1191.5887, -61662.1719, -180121.9062, -32931.0859, 43109.0391, + 21490.1328, -153485.3281, 94259.1797, 43103.1992, -231953.8125, + 52496.7422, 142697.4062, -34882.7852, -98740.0625, 34458.5078, + -135436.3438, 11420.5488, -18895.8984, -71195.4141, 176947.2344, + -52747.5742, 109054.6562, -28124.9473, -17736.6152, -41327.1562, + 69853.3906, 79046.2656, -3923.7344, -5644.5229, 96586.7578, + -89315.2656, -146578.0156, -61862.1484, -83956.4375, 87574.5703, + -75055.0469, 19571.8203, 79358.7891, -16501.5000, -147169.2188, + -97861.6797, 60442.1797, 40156.9023, 223136.3906, -81118.0547, + -221443.6406, 54911.6914, 54735.9258, -58805.7305, -168884.4844, + 40865.9609, -28627.9043, -18604.7227, 120274.6172, 49712.2383, + 164402.7031, -53165.0820, -60664.0469, -97956.1484, -121468.4062, + -69926.1484, -4889.0151, 127367.7344, 200241.0781, -85817.7578, + -143190.0625, -74049.5312, 137980.5781, -150788.7656, -115719.6719, + -189250.1250, -153069.7344, -127429.7891, -187588.2500, 125264.7422, + -79082.3438, -114144.5781, 36033.5039, -57502.2188, 80488.1562, + 36501.4570, -138817.5938, -22189.6523, -222146.9688, -73292.3984, + 127717.2422, -183836.3750, -105907.0859, 145422.8750, 66981.2031, + -9596.6699, 78099.4922, 70226.3359, 35841.8789, -116117.6016, + -150986.0156, 81622.4922, 113575.0625, 154419.4844, 53586.4141, + 118494.8750, 131625.4375, -19763.1094, 75581.1172, -42750.5039, + 97934.8281, 6706.7949, -101179.0078, 83519.6172, -83054.8359, + -56749.2578, -30683.6992, 54615.9492, 84061.1406, -229136.7188, + -60554.0000, 8120.2622, -106468.7891, -28316.3418, -166351.3125, + 47797.3984, 96013.4141, 71482.9453, -101429.9297, 209063.3594, + -3033.6882, -38952.5352, -84920.6719, -5895.1543, -18641.8105, + 47884.3633, -14620.0273, -132898.6719, -40903.5859, 197217.3750, + -128599.1328, -115397.8906, -22670.7676, -78569.9688, -54559.7070, + -106855.2031, 40703.1484, 55568.3164, 60202.9844, -64757.9375, + -32068.8652, 160663.3438, 72187.0703, -148519.5469, 162952.8906, + -128048.2031, -136153.8906, -15270.3730, -52766.3281, -52517.4531, + 18652.1992, 195354.2188, -136657.3750, -8034.2622, -92699.6016, + -129169.1406, 188479.9844, 46003.7500, -93383.0781, -67831.6484, + -66710.5469, 104338.5234, 85878.8438, -73165.2031, 95857.3203, + 71213.1250, 94603.1094, -30359.8125, -107989.2578, 99822.1719, + 184626.3594, 79238.4531, -272978.9375, -137948.5781, -145245.8125, + 75359.2031, 26652.7930, 50421.4141, 60784.4102, -18286.3398, + -182851.9531, -87178.7969, -13131.7539, 195674.8906, 59951.7852, + 124353.7422, -36709.1758, -54575.4766, 77822.6953, 43697.4102, + -64394.3438, 113281.1797, -93987.0703, 221989.7188, 132902.5000, + -9538.8574, -14594.1338, 65084.9453, -12501.7227, 130330.6875, + -115123.4766, 20823.0898, 75512.4922, -75255.7422, -41936.7656, + -186678.8281, -166799.9375, 138770.6250, -78969.9531, 124516.8047, + -85558.5781, -69272.4375, -115539.1094, 228774.4844, -76529.3281, + -107735.8906, -76798.8906, -194335.2812, 56530.5742, -9397.7529, + 132985.8281, 163929.8438, -188517.7969, -141155.6406, 45071.0391, + 207788.3125, -125826.1172, 8965.3320, -159584.8438, 95842.4609, + -76929.4688, +] + + +def _prepare_dummy_vgg_weights(): + """C2PGen.__init__ insists on loading ./pixelart_vgg19.pth (cwd-relative). + The VGG branch (PixelBlockEncoder) is only used during training and is + unreachable at inference, but we still need the file to exist so + construction succeeds. Write a dummy with matching structure; the real + weights get overwritten when we load 160_net_G_A.pth anyway.""" + path = "./pixelart_vgg19.pth" + if os.path.exists(path): + return + vgg = tvmodels.vgg.vgg19(weights=None) + vgg.classifier._modules["6"] = nn.Linear(4096, 7, bias=True) + torch.save(vgg.state_dict(), path) + + +def _swap_layernorm_with_groupnorm(module): + """Replace the upstream custom LayerNorm (global mean/std + per-channel + affine) with the mathematically equivalent nn.GroupNorm(1, C). The manual + expansion (`x.view(-1).std()` over ~8M elements) diverges badly in FP16 — + coremltools' native group_norm op handles it correctly.""" + from models.basic_layer import LayerNorm as UpstreamLN + for name, ch in list(module.named_children()): + if isinstance(ch, UpstreamLN): + gn = nn.GroupNorm(1, ch.num_features, eps=ch.eps) + with torch.no_grad(): + gn.weight.data.copy_(ch.gamma.data) + gn.bias.data.copy_(ch.beta.data) + setattr(module, name, gn) + else: + _swap_layernorm_with_groupnorm(ch) + + +def build_pytorch_model(): + # Run from REPO_DIR so relative paths in the vendored code resolve. + os.chdir(REPO_DIR) + _prepare_dummy_vgg_weights() + from models.networks import define_G + + g_a = define_G(3, 3, 64, "c2pGen", "instance", False, "normal", 0.02, []) + alias = define_G(3, 3, 64, "antialias", "instance", False, "normal", 0.02, []) + + g_a_sd = torch.load( + "checkpoints/pixelize/160_net_G_A.pth", map_location="cpu" + ) + alias_sd = torch.load("alias_net.pth", map_location="cpu") + g_a.load_state_dict(g_a_sd) + alias.load_state_dict(alias_sd) + g_a.eval() + alias.eval() + + _swap_layernorm_with_groupnorm(g_a) + _swap_layernorm_with_groupnorm(alias) + + with torch.no_grad(): + code = torch.tensor(MLP_CODE).reshape(1, 256, 1, 1) + cellcode = g_a.MLP(code).detach() # [1, 2048] + return g_a, alias, cellcode + + +class BakedModConv(nn.Module): + """ModulationConvBlock with the (fixed) cellcode folded into the conv + weights. The original op computes (W*c)/norm(W*c) at every forward; since c + is constant we precompute that in FP32 and store it as a plain Conv2d + weight, which keeps FP16 inference safe (W*c alone overflows FP16 because + cellcode magnitudes reach 1e8).""" + + def __init__(self, orig, code_chunk): + super().__init__() + import torch.nn.functional as F + self.F = F + in_c = orig.in_c + out_c = orig.out_c + k = orig.ksize + with torch.no_grad(): + w = orig.weight * orig.wscale # (out_c, in_c, k, k) + # Match the original view/permute sequence exactly (no semantic + # transpose — this is the upstream convention). + _w = w.view(1, k, k, in_c, out_c) + _w = _w * code_chunk.view(1, 1, 1, in_c, 1) + norm = torch.sqrt((_w ** 2).sum(dim=[1, 2, 3]) + orig.eps) + _w = _w / norm.view(1, 1, 1, 1, out_c) + w_perm = _w.permute(1, 2, 3, 0, 4).reshape(k, k, in_c, out_c) + w_final = w_perm.permute(3, 2, 0, 1).contiguous() # (out_c, in_c, k, k) + self.register_buffer("weight", w_final) + self.bias = nn.Parameter(orig.bias.detach().clone()) + self.padding = k // 2 + + def forward(self, x): + x = self.F.conv2d(x, self.weight, bias=None, padding=self.padding) + x = x + self.bias.view(1, -1, 1, 1) + x = self.F.leaky_relu(x, 0.2, inplace=False) * (2.0 ** 0.5) + return x + + +class BakedRGBDec(nn.Module): + """RGBDec with cellcode folded in, replacing the 8 modulation convs. + Upstream reuses `mod_conv_2` for 7 of the 8 calls (mod_conv_3..8 are + defined but unused); we preserve that behavior exactly.""" + + def __init__(self, orig, cellcode): + super().__init__() + baked = [] + for i in range(8): + src = orig.mod_conv_1 if i == 0 else orig.mod_conv_2 + chunk = cellcode[:, i * 256 : (i + 1) * 256] + baked.append(BakedModConv(src, chunk)) + self.baked = nn.ModuleList(baked) + self.upsample_block1 = orig.upsample_block1 + self.conv_1 = orig.conv_1 + self.upsample_block2 = orig.upsample_block2 + self.conv_2 = orig.conv_2 + self.conv_3 = orig.conv_3 + + def forward(self, x): + residual = x + x = self.baked[0](x); x = self.baked[1](x); x = x + residual + residual = x + x = self.baked[2](x); x = self.baked[3](x); x = x + residual + residual = x + x = self.baked[4](x); x = self.baked[5](x); x = x + residual + residual = x + x = self.baked[6](x); x = self.baked[7](x); x = x + residual + x = self.upsample_block1(x) + x = self.conv_1(x) + x = self.upsample_block2(x) + x = self.conv_2(x) + x = self.conv_3(x) + return x + + +class PixelizationWrapper(nn.Module): + """Input: RGB image in [0, 1], NCHW. Output: pixelized RGB in [0, 255].""" + + def __init__(self, g_a, alias, cellcode): + super().__init__() + self.rgb_enc = g_a.RGBEnc + self.rgb_dec = BakedRGBDec(g_a.RGBDec, cellcode) + self.alias = alias + + def forward(self, image): + # `image` is in [0, 1] because ImageType sets scale=1/255. + x = image * 2.0 - 1.0 + feature = self.rgb_enc(x) + y = self.rgb_dec(feature) + y = self.alias(y) + # Scale to [0, 255] for ImageType output. + y = (y + 1.0) * 127.5 + return torch.clamp(y, 0.0, 255.0) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--size", type=int, default=512, + help="Input H=W (must be multiple of 4).") + parser.add_argument("--precision", choices=["fp16", "fp32"], default="fp16") + parser.add_argument("--output-dir", type=str, + default=os.path.dirname(os.path.abspath(__file__))) + args = parser.parse_args() + assert args.size % 4 == 0, "size must be a multiple of 4" + args.output_dir = os.path.abspath(args.output_dir) + + print("Loading PyTorch weights...") + g_a, alias, cellcode = build_pytorch_model() + wrapper = PixelizationWrapper(g_a, alias, cellcode).eval() + + dummy = torch.rand(1, 3, args.size, args.size) + with torch.no_grad(): + torch_out = wrapper(dummy) + print(f"PyTorch output shape={tuple(torch_out.shape)}, " + f"min={torch_out.min():.3f}, max={torch_out.max():.3f} (range [0,255])") + + print("Tracing...") + with torch.no_grad(): + traced = torch.jit.trace(wrapper, dummy) + + precision = (ct.precision.FLOAT16 if args.precision == "fp16" + else ct.precision.FLOAT32) + print(f"Converting to CoreML {args.precision.upper()}...") + ml = ct.convert( + traced, + inputs=[ct.ImageType( + name="image", + shape=(1, 3, args.size, args.size), + scale=1.0 / 255.0, + color_layout=ct.colorlayout.RGB, + )], + outputs=[ct.ImageType( + name="pixelized", + color_layout=ct.colorlayout.RGB, + )], + minimum_deployment_target=ct.target.iOS17, + compute_precision=precision, + convert_to="mlprogram", + ) + ml.author = "WuZongWei6 (paper) / CoreML-Models (conversion)" + ml.short_description = ( + f"Pixelization (SIGGRAPH Asia 2022). " + f"{args.size}x{args.size} RGB -> pixelized RGB (same size). " + "Non-commercial research use only." + ) + ml.license = "Non-commercial research (see upstream LICENSE.md)" + + suffix = "" if args.precision == "fp16" else "_FP32" + out_path = os.path.join(args.output_dir, + f"Pixelization_{args.size}{suffix}.mlpackage") + ml.save(out_path) + print(f"Saved: {out_path}") + + print("Parity check on example image...") + try: + import PIL.Image as Image + example = os.path.join(REPO_DIR, "examples", "2_1.png") + if os.path.exists(example): + pil = Image.open(example).convert("RGB").resize( + (args.size, args.size), Image.BICUBIC) + src = np.array(pil) + else: + src = (dummy[0].permute(1, 2, 0).numpy() * 255).astype(np.uint8) + pil = Image.fromarray(src) + + ml_loaded = ct.models.MLModel( + out_path, compute_units=ct.ComputeUnit.CPU_ONLY) + ml_out = ml_loaded.predict({"image": pil})["pixelized"] + ml_out.convert("RGB").save( + os.path.join(args.output_dir, + f"pixelization_sample_{args.precision}.png")) + + t = torch.from_numpy(src).permute(2, 0, 1).float()[None] / 255.0 + with torch.no_grad(): + pt_img = wrapper(t)[0].permute(1, 2, 0).numpy() + ml_arr = np.array(ml_out.convert("RGB")).astype(np.float32) + diff = np.abs(ml_arr - pt_img) + print(f" max abs diff (0-255): {diff.max():.3f}") + print(f" mean abs diff (0-255): {diff.mean():.3f}") + print(f" sample saved: pixelization_sample_{args.precision}.png") + except Exception as e: + print(f" parity check skipped: {e}") + + +if __name__ == "__main__": + main() diff --git a/docs/coreml_conversion_notes.md b/docs/coreml_conversion_notes.md index 9f9a4e1..f35614e 100644 --- a/docs/coreml_conversion_notes.md +++ b/docs/coreml_conversion_notes.md @@ -555,3 +555,73 @@ Additional DC-AE decoder monkey-patches required for trace: drop `output_size=` transformers 5.x rewrote `create_causal_mask` to index into `q_length.shape` / `q_length[0]` — shape assumptions that fail under `torch.jit.trace` (`IndexError: tuple index out of range`). Downgrade to transformers 4.49.0 (the version AMD Nitro-E pins) to convert Llama 3.2 1B cleanly. Keep the wrapper limited to `model.model` (drop the LM head) and return `last_hidden_state` for seq_len=128. --- + +## Weight modulation with huge style codes — bake the code into Conv2d weights + +**Pixelization (SIGGRAPH Asia 2022) `ModulationConvBlock` overflows FP16 even though intermediate activations stay < 50.** + +Symptom: the FP16 model outputs a nearly-constant image (every pixel lands at tanh(~3) ≈ 0.999, producing a flat 254.875 after `(y+1)*127.5`). FP32 is fine. + +Cause: the upstream inference path is +```python +code = torch.tensor(MLP_code).reshape(1, 256, 1, 1) # constants up to 2.3e5 +cellcode = G_A.MLP(code) # abs max ~8.4e8 (!!) +# Inside ModulationConvBlock.forward: +_weight = weight.view(1, k, k, in_c, out_c) * code.view(1,1,1,in_c,1) # OVERFLOWS FP16 +_weight_norm = torch.sqrt(_weight.pow(2).sum([1,2,3]) + eps) +_weight = _weight / _weight_norm.view(1,1,1,1,out_c) +``` +`cellcode` magnitudes easily clear FP16's 65504 ceiling, so `_weight * code` saturates to Inf and the subsequent division leaves NaNs. + +Fix: **cellcode is a constant** (the upstream authors already extracted its pre-MLP input as a 256-float magic table and `best_cell_size` is fixed at 4 — the `cell_size` slider only affects post-processing). Since `(W·c)/‖W·c‖` is fully determined at conversion time, precompute that effective weight in FP32, store it on a plain `nn.Conv2d`, and drop the modulation op entirely: + +```python +class BakedModConv(nn.Module): + def __init__(self, orig, code_chunk): + super().__init__() + with torch.no_grad(): + w = orig.weight * orig.wscale # (out, in, k, k) + _w = w.view(1, k, k, in_c, out_c) * code_chunk.view(1,1,1,in_c,1) + norm = torch.sqrt((_w**2).sum([1,2,3]) + orig.eps) + _w = _w / norm.view(1,1,1,1,out_c) + w_final = _w.permute(1,2,3,0,4).reshape(k,k,in_c,out_c).permute(3,2,0,1).contiguous() + self.register_buffer("weight", w_final) + self.bias = nn.Parameter(orig.bias.detach().clone()) + def forward(self, x): + x = F.conv2d(x, self.weight, bias=None, padding=self.ksize//2) + return F.leaky_relu(x + self.bias.view(1,-1,1,1), 0.2) * math.sqrt(2) +``` + +Bonus: also caught a bug in the upstream decoder — `mod_conv_3..8` are defined but never called; `mod_conv_2` is reused 7 times. Preserve that when baking (8 BakedModConv instances, weights from `mod_conv_1` for the first, `mod_conv_2` for the rest). + +--- + +## Upstream "LayerNorm" that flattens the whole tensor — use `nn.GroupNorm(1, C)` + +Pixelization ships a custom `LayerNorm`: +```python +def forward(self, x): + mean = x.view(-1).mean().view(-1, 1, 1, 1) # global over C*H*W + std = x.view(-1).std().view(-1, 1, 1, 1) + x = (x - mean) / (std + eps) + return x * gamma.view(1, C, 1, 1) + beta.view(1, C, 1, 1) +``` + +Semantically equivalent to `nn.GroupNorm(num_groups=1, num_channels=C)` with per-channel affine. But the manual `view(-1).std()` path converts to a primitive reduce chain that coremltools lowers badly in FP16: on a (1, 128, 256, 256) tensor (~8M elements) the resulting FP16 std comes out ~4× too large, so the output magnitudes collapse by 4× and the downstream tanh branch saturates. + +Fix: swap the layers after loading weights, before tracing: +```python +def swap_ln(module): + for name, ch in list(module.named_children()): + if isinstance(ch, UpstreamLN): + gn = nn.GroupNorm(1, ch.num_features, eps=ch.eps) + gn.weight.data.copy_(ch.gamma.data) + gn.bias.data.copy_(ch.beta.data) + setattr(module, name, gn) + else: + swap_ln(ch) +``` + +Apple's `group_norm` op handles the reduction in FP32 even under `compute_precision=FLOAT16`, so post-swap FP16 parity against PyTorch is ~10/255 max / 0.2/255 mean — imperceptible. + +--- diff --git a/sample_apps/CoreMLModelsApp/CoreMLModelsApp/Templates/ImageInOutDemoView.swift b/sample_apps/CoreMLModelsApp/CoreMLModelsApp/Templates/ImageInOutDemoView.swift index 376519d..7fa7de2 100644 --- a/sample_apps/CoreMLModelsApp/CoreMLModelsApp/Templates/ImageInOutDemoView.swift +++ b/sample_apps/CoreMLModelsApp/CoreMLModelsApp/Templates/ImageInOutDemoView.swift @@ -18,9 +18,25 @@ struct ImageInOutDemoView: View { @State private var processingTime: Double? @State private var item: PhotosPickerItem? @State private var showOriginal = false + // pixel_art: cache the raw (pre cell_size) model output so the preset + // picker only re-runs the cheap NEAREST resample + palette mapping. + @State private var pixelArtRaw: CGImage? + @State private var pixelArtPresetId: String = PixelArtPreset.all[0].id + // nil = use the preset's default cell size; non-nil = user dragged the + // slider. Reset to nil whenever the preset changes. + @State private var pixelArtCellSizeOverride: Double? + // User-selected pre-blur target (px). 512 = no blur. Smaller = more + // abstracted network input. nil = derive from cellSize. + @State private var pixelArtBlurOverride: Int? @StateObject private var session = ModelSession() private var outputType: String { model.configString("output_type") ?? "image" } + private var pixelArtPreset: PixelArtPreset { + PixelArtPreset.all.first { $0.id == pixelArtPresetId } ?? PixelArtPreset.all[0] + } + private var pixelArtCellSize: Int { + Int(pixelArtCellSizeOverride ?? Double(pixelArtPreset.cellSize)) + } var body: some View { VStack(spacing: 0) { @@ -42,7 +58,11 @@ struct ImageInOutDemoView: View { } Image(uiImage: output).resizable().aspectRatio(contentMode: .fit) } else if let img = showOriginal ? inputImage : outputImage ?? inputImage { - Image(uiImage: img).resizable().aspectRatio(contentMode: .fit) + if outputType == "pixel_art" && !showOriginal { + Image(uiImage: img).resizable().interpolation(.none).aspectRatio(contentMode: .fit) + } else { + Image(uiImage: img).resizable().aspectRatio(contentMode: .fit) + } } else { VStack(spacing: 12) { Image(systemName: "photo.on.rectangle.angled").font(.system(size: 60)).foregroundStyle(.secondary) @@ -85,6 +105,86 @@ struct ImageInOutDemoView: View { if isProcessing { ProgressView().controlSize(.small); Text(status).font(.caption).foregroundStyle(.secondary) } } + if outputType == "pixel_art" && pixelArtRaw != nil { + ScrollView(.horizontal, showsIndicators: false) { + HStack(spacing: 8) { + ForEach(PixelArtPreset.all, id: \.id) { preset in + Button { + pixelArtPresetId = preset.id + } label: { + VStack(spacing: 2) { + Image(systemName: preset.systemImage).font(.body) + Text(preset.name).font(.caption2) + } + .padding(.vertical, 6).padding(.horizontal, 10) + .background( + pixelArtPresetId == preset.id + ? Color.accentColor.opacity(0.25) + : Color(.systemGray6) + ) + .cornerRadius(8) + } + .buttonStyle(.plain) + } + } + .padding(.horizontal, 4) + } + + HStack(spacing: 10) { + Image(systemName: "square.grid.3x3") + .font(.caption).foregroundStyle(.secondary) + Slider( + value: Binding( + get: { Double(pixelArtCellSize) }, + set: { pixelArtCellSizeOverride = $0 } + ), + in: 4...10, step: 1 + ) { Text("Cell size") } + Text("\(pixelArtCellSize)") + .font(.caption.monospacedDigit()) + .frame(width: 22, alignment: .trailing) + } + + // Pre-blur (photo shrink) picker. Off = full-res network + // input; smaller targets trade fine detail for cleaner, + // more iconic palette cells. + Picker("Abstraction", selection: Binding( + get: { pixelArtBlurOverride ?? 0 }, // 0 == auto (derive from cs) + set: { pixelArtBlurOverride = $0 == 0 ? nil : $0 } + )) { + Text("Auto").tag(0) + Text("Off").tag(512) + Text("256").tag(256) + Text("128").tag(128) + Text("64").tag(64) + Text("32").tag(32) + } + .pickerStyle(.segmented) + .onChange(of: pixelArtBlurOverride) { + if let img = inputImage { Task { await runInference(on: img) } } + } + + .onChange(of: pixelArtPresetId) { + // New preset → reset override & re-run inference so the + // pre-blur matches the preset's default cellSize. + pixelArtCellSizeOverride = nil + if let img = inputImage { + Task { await runInference(on: img) } + } else if let raw = pixelArtRaw { + outputImage = pixelArtPostProcess( + raw, cellSize: pixelArtCellSize, palette: pixelArtPreset.palette) + } + } + .onChange(of: pixelArtCellSizeOverride) { + // During drag: cheap palette re-snap only. The network + // re-run happens on slider release (onEditingChanged). + if let raw = pixelArtRaw { + outputImage = pixelArtPostProcess( + raw, cellSize: pixelArtCellSize, palette: pixelArtPreset.palette) + } + } + } + HStack(spacing: 12) { PhotosPicker(selection: $item, matching: .images) { Label("Select Photo", systemImage: "photo.badge.plus") @@ -114,7 +214,11 @@ struct ImageInOutDemoView: View { // a photo. session.ensure { try await ModelLoader.loadPrimary(for: model) } } - .onChange(of: item) { _, _ in loadAndRun() } + .onChange(of: item) { _, _ in + pixelArtRaw = nil + pixelArtCellSizeOverride = nil + loadAndRun() + } } // MARK: - Load & Run @@ -165,7 +269,19 @@ struct ImageInOutDemoView: View { let inputDict: [String: Any] if let imageInput { - guard let pb = ImageUtils.pixelBuffer(from: cgImage, width: inputSize, height: inputSize) else { + // pixel_art: pre-downsample the source based on cell size so the + // fixed-512 network effectively sees a lower-resolution image + // and makes its semantic abstraction at the user's chosen + // chunkiness. Mimics upstream test_pro.py's input resize. + let sourceForBuffer: CGImage = { + guard outputType == "pixel_art" else { return cgImage } + let target = pixelArtBlurOverride + ?? pixelArtPreBlurTarget(cellSize: pixelArtCellSize, inputSize: inputSize) + return target < inputSize + ? (resizeCGImageBicubic(cgImage, to: target) ?? cgImage) + : cgImage + }() + guard let pb = ImageUtils.pixelBuffer(from: sourceForBuffer, width: inputSize, height: inputSize) else { await MainActor.run { isProcessing = false; status = "Prep failed" }; return } inputDict = [imageInput.key: pb] @@ -205,6 +321,13 @@ struct ImageInOutDemoView: View { result = processLABABOutput(output: output, originalImage: cgImage, origW: origW, origH: origH, modelSize: inputSize) case "segmap": result = processSegmapOutput(output: output, originalImage: cgImage, origW: origW, origH: origH, modelSize: inputSize) + case "pixel_art": + let raw = extractRawCGImage(output: output) + await MainActor.run { pixelArtRaw = raw } + result = raw.flatMap { + pixelArtPostProcess($0, cellSize: pixelArtCellSize, + palette: pixelArtPreset.palette) + } default: if let r = processImageOutput(output: output) { result = restoreAspect(r, origW: origW, origH: origH, inputSize: inputSize) @@ -284,6 +407,72 @@ struct ImageInOutDemoView: View { return nil } + // MARK: - Output: pixel_art (Pixelization) + + private func extractRawCGImage(output: MLFeatureProvider) -> CGImage? { + for name in output.featureNames { + if let pb = output.featureValue(for: name)?.imageBufferValue { + let ci = CIImage(cvPixelBuffer: pb) + if let cg = CIContext(options: [.useSoftwareRenderer: false]) + .createCGImage(ci, from: ci.extent) { return cg } + } + } + return nil + } + + /// Clean pixel-art rendering: + /// 1. Mean-sample one color per `cs`×`cs` cell. + /// 2. Palette-snap each cell (optional). + /// 3. NEAREST upscale by `cs` into the final image. + /// + /// We deliberately do NOT run a separate edge-detection overlay. Source- + /// resolution gradient detection picks up per-cell colour jitter and + /// texture noise, sprinkling stray dark lines across flat areas + /// ('line picture 'ちょろちょろ出る') — the chunky cells + limited palette + /// already give enough silhouette definition on their own. + private func pixelArtPostProcess(_ cg: CGImage, cellSize: Int, palette: [UInt32]?) -> UIImage? { + let cs = max(1, cellSize) + let gridW = cg.width / cs + let gridH = cg.height / cs + guard gridW > 0 && gridH > 0 else { return nil } + let outW = gridW * cs + let outH = gridH * cs + let srcW = cg.width + let srcH = cg.height + + guard let srcData = cg.dataProvider?.data, + let srcPtr = CFDataGetBytePtr(srcData) else { return nil } + let srcBPR = cg.bytesPerRow + let srcBpp = cg.bitsPerPixel / 8 + + var grid = [UInt8](repeating: 0, count: gridW * gridH * 3) + grid.withUnsafeMutableBufferPointer { gbuf in + pixelArtMeanSample( + srcPtr: srcPtr, srcW: srcW, srcH: srcH, + srcBPR: srcBPR, srcBpp: srcBpp, + cs: cs, gridW: gridW, gridH: gridH, + gbuf: gbuf.baseAddress! + ) + } + if let palette = palette, !palette.isEmpty { + applyPalette(&grid, palette: palette) + } + + let bytesPerRow = outW * 4 + var pixels = [UInt8](repeating: 0, count: bytesPerRow * outH) + pixels.withUnsafeMutableBufferPointer { dstBuf in + grid.withUnsafeBufferPointer { gbuf in + pixelArtReplicate( + dst: dstBuf.baseAddress!, + gptr: gbuf.baseAddress!, + gridW: gridW, gridH: gridH, + cs: cs, bytesPerRow: bytesPerRow + ) + } + } + return ImageUtils.makeRGBA(pixels: pixels, width: outW, height: outH) + } + // MARK: - Output: mask (RMBG) private func processMaskOutput(output: MLFeatureProvider, originalImage: CGImage, origW: Int, origH: Int, modelSize: Int) -> UIImage? { @@ -590,3 +779,201 @@ struct ImageInOutDemoView: View { return (max(0, min(1, gamma(rl))), max(0, min(1, gamma(gl))), max(0, min(1, gamma(bl)))) } } + +// MARK: - Pixel art presets + +/// A named pixel-art style. `cellSize` controls the grid resolution (larger = +/// chunkier). `palette` is an optional list of 0xRRGGBB colors to snap every +/// cell to — nil means "keep the generator's own colors". +struct PixelArtPreset { + let id: String + let name: String + let systemImage: String + let cellSize: Int + let palette: [UInt32]? + + // All presets default to the network's native cellSize (4) — the palette + // is what differentiates them. Users dial chunkiness via the slider; at + // cs=4 the pre-blur is skipped and the network's own pixelization shows + // through cleanest, which is what tends to read best across photos. + static let all: [PixelArtPreset] = [ + PixelArtPreset(id: "off", name: "Off", systemImage: "circle", cellSize: 4, palette: nil), + PixelArtPreset(id: "gameboy", name: "Game Boy", systemImage: "gamecontroller", cellSize: 4, palette: PixelArtPalettes.gameBoy), + PixelArtPreset(id: "nes", name: "NES", systemImage: "gamecontroller.fill", cellSize: 4, palette: PixelArtPalettes.nes), + PixelArtPreset(id: "pico8", name: "Pico-8", systemImage: "square.stack.3d.up.fill", cellSize: 4, palette: PixelArtPalettes.pico8), + PixelArtPreset(id: "c64", name: "C64", systemImage: "desktopcomputer", cellSize: 4, palette: PixelArtPalettes.c64), + ] +} + +enum PixelArtPalettes { + // Game Boy DMG: 4 shades of olive-green. + static let gameBoy: [UInt32] = [ + 0x9BBC0F, 0x8BAC0F, 0x306230, 0x0F380F, + ] + + // Pico-8 fantasy console: 16 colors. + static let pico8: [UInt32] = [ + 0x000000, 0x1D2B53, 0x7E2553, 0x008751, + 0xAB5236, 0x5F574F, 0xC2C3C7, 0xFFF1E8, + 0xFF004D, 0xFFA300, 0xFFEC27, 0x00E436, + 0x29ADFF, 0x83769C, 0xFF77A8, 0xFFCCAA, + ] + + // Commodore 64: 16 colors (Pepto's well-known sRGB approximation). + static let c64: [UInt32] = [ + 0x000000, 0xFFFFFF, 0x68372B, 0x70A4B2, + 0x6F3D86, 0x588D43, 0x352879, 0xB8C76F, + 0x6F4F25, 0x433900, 0x9A6759, 0x444444, + 0x6C6C6C, 0x9AD284, 0x6C5EB5, 0x959595, + ] + + // NES 2C02 PPU (Nintendulator NTSC approximation), 54 usable colors. + static let nes: [UInt32] = [ + 0x7C7C7C, 0x0000FC, 0x0000BC, 0x4428BC, + 0x940084, 0xA80020, 0xA81000, 0x881400, + 0x503000, 0x007800, 0x006800, 0x005800, + 0x004058, + 0xBCBCBC, 0x0078F8, 0x0058F8, 0x6844FC, + 0xD800CC, 0xE40058, 0xF83800, 0xE45C10, + 0xAC7C00, 0x00B800, 0x00A800, 0x00A844, + 0x008888, + 0xF8F8F8, 0x3CBCFC, 0x6888FC, 0x9878F8, + 0xF878F8, 0xF85898, 0xF87858, 0xFCA044, + 0xF8B800, 0xB8F818, 0x58D854, 0x58F898, + 0x00E8D8, 0x787878, + 0xFCFCFC, 0xA4E4FC, 0xB8B8F8, 0xD8B8F8, + 0xF8B8F8, 0xF8A4C0, 0xF0D0B0, 0xFCE0A8, + 0xF8D878, 0xD8F878, 0xB8F8B8, 0xB8F8D8, + 0x00FCFC, 0xF8D8F8, + ] +} + +/// Replicate each 3-byte grid cell into a `cs`×`cs` block of the output +/// RGBA buffer. Where `applyEdges` is true and the edge mask at the output +/// pixel's coordinate is set, write the dark RGB override instead. +/// Map the user's `cellSize` to the target resolution the photo should be +/// downsampled to before feeding a fixed-`inputSize` Pixelization network. +/// The paper's `test_pro.py` resizes the whole network input by cell_size so +/// the (fully-convolutional) generator makes its abstraction at that scale. +/// Our CoreML model is fixed-size, so we emulate the effect by shrinking the +/// source and letting CGContext resize it back up — the network sees a +/// lower-resolution image and produces cleaner coarse cells. +/// +/// Matches the upstream `test_pro.py` factor (`inputSize * 4 / cellSize`). +/// A previous 2× boost to this was too radical — at cs=16 it shrank the +/// input to 64 px, destroying readability. The useful range is cs=4-8 in +/// practice; in that window the upstream formula gives 256-512 target, +/// which blurs texture enough to clean up palette cells without wiping +/// the subject. cellSize <= 4 keeps native resolution. +func pixelArtPreBlurTarget(cellSize: Int, inputSize: Int) -> Int { + if cellSize <= 4 { return inputSize } + return max(96, min(inputSize, inputSize * 4 / cellSize)) +} + +/// Redraw `cg` into a square `size`×`size` CGImage using .high interpolation. +func resizeCGImageBicubic(_ cg: CGImage, to size: Int) -> CGImage? { + guard let ctx = CGContext( + data: nil, width: size, height: size, + bitsPerComponent: 8, bytesPerRow: size * 4, + space: CGColorSpaceCreateDeviceRGB(), + bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue + ) else { return nil } + ctx.interpolationQuality = .high + ctx.draw(cg, in: CGRect(x: 0, y: 0, width: size, height: size)) + return ctx.makeImage() +} + +/// Mean-sample a `gridW`×`gridH` buffer of RGB triplets from the source +/// image by averaging each `cs`×`cs` region. +func pixelArtMeanSample( + srcPtr: UnsafePointer, + srcW: Int, srcH: Int, + srcBPR: Int, srcBpp: Int, + cs: Int, gridW: Int, gridH: Int, + gbuf: UnsafeMutablePointer +) { + let div: Int32 = Int32(cs * cs) + DispatchQueue.concurrentPerform(iterations: gridH) { gy in + for gx in 0.., + gptr: UnsafePointer, + gridW: Int, gridH: Int, + cs: Int, bytesPerRow: Int +) { + DispatchQueue.concurrentPerform(iterations: gridH) { gy in + for gx in 0..> 16) & 0xFF) } + let pg: [Int16] = palette.map { Int16(($0 >> 8) & 0xFF) } + let pb: [Int16] = palette.map { Int16($0 & 0xFF) } + let count = buf.count / 3 + buf.withUnsafeMutableBufferPointer { buf in + let ptr = buf.baseAddress! + pr.withUnsafeBufferPointer { prBuf in + pg.withUnsafeBufferPointer { pgBuf in + pb.withUnsafeBufferPointer { pbBuf in + let prp = prBuf.baseAddress!, pgp = pgBuf.baseAddress!, pbp = pbBuf.baseAddress! + DispatchQueue.concurrentPerform(iterations: count) { i in + let off = i * 3 + let r = Int16(ptr[off]), g = Int16(ptr[off + 1]), b = Int16(ptr[off + 2]) + var bestIdx = 0 + var bestDist: Int32 = .max + for j in 0.. + + + + diff --git a/sample_apps/PixelizationDemo/PixelizationDemo/PixelizationDemoApp.swift b/sample_apps/PixelizationDemo/PixelizationDemo/PixelizationDemoApp.swift new file mode 100644 index 0000000..0ce1d18 --- /dev/null +++ b/sample_apps/PixelizationDemo/PixelizationDemo/PixelizationDemoApp.swift @@ -0,0 +1,10 @@ +import SwiftUI + +@main +struct PixelizationDemoApp: App { + var body: some Scene { + WindowGroup { + ContentView() + } + } +} diff --git a/sample_apps/PixelizationDemo/PixelizationDemo/Pixelizer.swift b/sample_apps/PixelizationDemo/PixelizationDemo/Pixelizer.swift new file mode 100644 index 0000000..3374be6 --- /dev/null +++ b/sample_apps/PixelizationDemo/PixelizationDemo/Pixelizer.swift @@ -0,0 +1,337 @@ +import CoreML +import CoreImage +import UIKit + +enum PixelizerError: LocalizedError { + case modelNotFound + case invalidImage + case predictionFailed + + var errorDescription: String? { + switch self { + case .modelNotFound: return "Pixelization model not found" + case .invalidImage: return "Failed to process image" + case .predictionFailed: return "Prediction failed" + } + } +} + +// MARK: - Presets + +/// A named pixel-art style. `cellSize` = grid chunkiness. `palette` = optional +/// list of 0xRRGGBB colors to snap every cell to. +struct PixelArtPreset { + let id: String + let name: String + let systemImage: String + let cellSize: Int + let palette: [UInt32]? + + // All presets default to cellSize 4 — the palette is what distinguishes + // each mode, chunkiness is user-tuned via the slider. + static let all: [PixelArtPreset] = [ + PixelArtPreset(id: "off", name: "Off", systemImage: "circle", cellSize: 4, palette: nil), + PixelArtPreset(id: "gameboy", name: "Game Boy", systemImage: "gamecontroller", cellSize: 4, palette: PixelArtPalettes.gameBoy), + PixelArtPreset(id: "nes", name: "NES", systemImage: "gamecontroller.fill", cellSize: 4, palette: PixelArtPalettes.nes), + PixelArtPreset(id: "pico8", name: "Pico-8", systemImage: "square.stack.3d.up.fill", cellSize: 4, palette: PixelArtPalettes.pico8), + PixelArtPreset(id: "c64", name: "C64", systemImage: "desktopcomputer", cellSize: 4, palette: PixelArtPalettes.c64), + ] +} + +enum PixelArtPalettes { + static let gameBoy: [UInt32] = [ + 0x9BBC0F, 0x8BAC0F, 0x306230, 0x0F380F, + ] + static let pico8: [UInt32] = [ + 0x000000, 0x1D2B53, 0x7E2553, 0x008751, + 0xAB5236, 0x5F574F, 0xC2C3C7, 0xFFF1E8, + 0xFF004D, 0xFFA300, 0xFFEC27, 0x00E436, + 0x29ADFF, 0x83769C, 0xFF77A8, 0xFFCCAA, + ] + static let c64: [UInt32] = [ + 0x000000, 0xFFFFFF, 0x68372B, 0x70A4B2, + 0x6F3D86, 0x588D43, 0x352879, 0xB8C76F, + 0x6F4F25, 0x433900, 0x9A6759, 0x444444, + 0x6C6C6C, 0x9AD284, 0x6C5EB5, 0x959595, + ] + static let nes: [UInt32] = [ + 0x7C7C7C, 0x0000FC, 0x0000BC, 0x4428BC, + 0x940084, 0xA80020, 0xA81000, 0x881400, + 0x503000, 0x007800, 0x006800, 0x005800, 0x004058, + 0xBCBCBC, 0x0078F8, 0x0058F8, 0x6844FC, + 0xD800CC, 0xE40058, 0xF83800, 0xE45C10, + 0xAC7C00, 0x00B800, 0x00A800, 0x00A844, 0x008888, + 0xF8F8F8, 0x3CBCFC, 0x6888FC, 0x9878F8, + 0xF878F8, 0xF85898, 0xF87858, 0xFCA044, + 0xF8B800, 0xB8F818, 0x58D854, 0x58F898, + 0x00E8D8, 0x787878, + 0xFCFCFC, 0xA4E4FC, 0xB8B8F8, 0xD8B8F8, + 0xF8B8F8, 0xF8A4C0, 0xF0D0B0, 0xFCE0A8, + 0xF8D878, 0xD8F878, 0xB8F8B8, 0xB8F8D8, + 0x00FCFC, 0xF8D8F8, + ] +} + +// MARK: - Pixelizer + +enum Pixelizer { + static let inputSize = 512 + + /// Matches the upstream `test_pro.py` factor (`inputSize * 4 / cellSize`). + /// cellSize <= 4 keeps native resolution. + static func preBlurTargetSize(for cellSize: Int) -> Int { + if cellSize <= 4 { return inputSize } + return max(96, min(inputSize, inputSize * 4 / cellSize)) + } + + /// Run the network and return the raw 512×512 pixelized CGImage. + /// `preBlurTarget` should come from `preBlurTargetSize(for:)` — pass + /// `inputSize` (= 512) for no blur. + static func runModel(on image: UIImage, preBlurTarget: Int = inputSize) async throws -> CGImage { + let fixed = image.normalizedOrientation() + guard let cgImage = fixed.cgImage else { throw PixelizerError.invalidImage } + + let blurred: CGImage = preBlurTarget < inputSize + ? (resizeCGImageBicubic(cgImage, to: preBlurTarget) ?? cgImage) + : cgImage + + guard let inputBuffer = createPixelBuffer( + from: blurred, width: inputSize, height: inputSize + ) else { throw PixelizerError.invalidImage } + + let model = try loadModel() + let input = try MLDictionaryFeatureProvider(dictionary: ["image": inputBuffer]) + let output = try await model.prediction(from: input) + guard let buffer = output.featureValue(for: "pixelized")?.imageBufferValue else { + throw PixelizerError.predictionFailed + } + let ci = CIImage(cvPixelBuffer: buffer) + guard let cg = CIContext(options: [.useSoftwareRenderer: false]) + .createCGImage(ci, from: ci.extent) + else { throw PixelizerError.predictionFailed } + return cg + } + + static func pixelize(_ image: UIImage, preset: PixelArtPreset, cellSize: Int? = nil) async throws -> UIImage { + let cs = cellSize ?? preset.cellSize + let cg = try await runModel(on: image, preBlurTarget: preBlurTargetSize(for: cs)) + return postProcess(cg, cellSize: cs, palette: preset.palette) ?? UIImage(cgImage: cg) + } + + private static func resizeCGImageBicubic(_ cg: CGImage, to size: Int) -> CGImage? { + guard let ctx = CGContext( + data: nil, width: size, height: size, + bitsPerComponent: 8, bytesPerRow: size * 4, + space: CGColorSpaceCreateDeviceRGB(), + bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue + ) else { return nil } + ctx.interpolationQuality = .high + ctx.draw(cg, in: CGRect(x: 0, y: 0, width: size, height: size)) + return ctx.makeImage() + } + + /// Mean-sample → optional palette snap → NEAREST upscale. No edge overlay + /// — source-resolution gradient detection adds stray lines in flat areas + /// (texture noise), so we rely on the cells + palette for definition. + static func postProcess(_ cg: CGImage, cellSize: Int, palette: [UInt32]?) -> UIImage? { + let cs = max(1, cellSize) + let gridW = cg.width / cs + let gridH = cg.height / cs + guard gridW > 0 && gridH > 0 else { return nil } + let outW = gridW * cs + let outH = gridH * cs + let srcW = cg.width + let srcH = cg.height + + guard let srcData = cg.dataProvider?.data, + let srcPtr = CFDataGetBytePtr(srcData) else { return nil } + let srcBPR = cg.bytesPerRow + let srcBpp = cg.bitsPerPixel / 8 + + var grid = [UInt8](repeating: 0, count: gridW * gridH * 3) + grid.withUnsafeMutableBufferPointer { gbuf in + pixelArtMeanSample( + srcPtr: srcPtr, srcW: srcW, srcH: srcH, + srcBPR: srcBPR, srcBpp: srcBpp, + cs: cs, gridW: gridW, gridH: gridH, + gbuf: gbuf.baseAddress! + ) + } + if let palette = palette, !palette.isEmpty { + applyPalette(&grid, palette: palette) + } + + let bytesPerRow = outW * 4 + var pixels = [UInt8](repeating: 0, count: bytesPerRow * outH) + pixels.withUnsafeMutableBufferPointer { dstBuf in + grid.withUnsafeBufferPointer { gbuf in + pixelArtReplicate( + dst: dstBuf.baseAddress!, + gptr: gbuf.baseAddress!, + gridW: gridW, gridH: gridH, + cs: cs, bytesPerRow: bytesPerRow + ) + } + } + + let provider = CGDataProvider(data: Data(pixels) as CFData)! + let space = CGColorSpaceCreateDeviceRGB() + let bitmap = CGBitmapInfo(rawValue: CGImageAlphaInfo.premultipliedLast.rawValue) + if let out = CGImage( + width: outW, height: outH, + bitsPerComponent: 8, bitsPerPixel: 32, + bytesPerRow: bytesPerRow, + space: space, bitmapInfo: bitmap, + provider: provider, decode: nil, + shouldInterpolate: false, intent: .defaultIntent + ) { + return UIImage(cgImage: out) + } + return nil + } + + // MARK: - Model loading + + private static func loadModel() throws -> MLModel { + guard let resourcePath = Bundle.main.resourcePath, + let items = try? FileManager.default.contentsOfDirectory(atPath: resourcePath) + else { throw PixelizerError.modelNotFound } + for item in items where item.hasSuffix(".mlmodelc") && item.contains("Pixelization") { + let url = URL(fileURLWithPath: (resourcePath as NSString).appendingPathComponent(item)) + let config = MLModelConfiguration() + config.computeUnits = .cpuAndNeuralEngine + return try MLModel(contentsOf: url, configuration: config) + } + throw PixelizerError.modelNotFound + } + + // MARK: - Pixel buffer + + private static func createPixelBuffer(from cgImage: CGImage, width: Int, height: Int) -> CVPixelBuffer? { + var pb: CVPixelBuffer? + CVPixelBufferCreate( + kCFAllocatorDefault, width, height, kCVPixelFormatType_32BGRA, + [kCVPixelBufferCGImageCompatibilityKey: true, + kCVPixelBufferCGBitmapContextCompatibilityKey: true] as CFDictionary, + &pb + ) + guard let buffer = pb else { return nil } + CVPixelBufferLockBaseAddress(buffer, []) + defer { CVPixelBufferUnlockBaseAddress(buffer, []) } + guard let ctx = CGContext( + data: CVPixelBufferGetBaseAddress(buffer), + width: width, height: height, bitsPerComponent: 8, + bytesPerRow: CVPixelBufferGetBytesPerRow(buffer), + space: CGColorSpaceCreateDeviceRGB(), + bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue + | CGBitmapInfo.byteOrder32Little.rawValue + ) else { return nil } + ctx.interpolationQuality = .high + ctx.draw(cgImage, in: CGRect(x: 0, y: 0, width: width, height: height)) + return buffer + } +} + +// MARK: - Sampling / replicate helpers + +func pixelArtMeanSample( + srcPtr: UnsafePointer, + srcW: Int, srcH: Int, + srcBPR: Int, srcBpp: Int, + cs: Int, gridW: Int, gridH: Int, + gbuf: UnsafeMutablePointer +) { + let div: Int32 = Int32(cs * cs) + DispatchQueue.concurrentPerform(iterations: gridH) { gy in + for gx in 0.., + gptr: UnsafePointer, + gridW: Int, gridH: Int, + cs: Int, bytesPerRow: Int +) { + DispatchQueue.concurrentPerform(iterations: gridH) { gy in + for gx in 0..> 16) & 0xFF) } + let pg: [Int16] = palette.map { Int16(($0 >> 8) & 0xFF) } + let pb: [Int16] = palette.map { Int16($0 & 0xFF) } + let count = buf.count / 3 + buf.withUnsafeMutableBufferPointer { buf in + let ptr = buf.baseAddress! + pr.withUnsafeBufferPointer { prBuf in + pg.withUnsafeBufferPointer { pgBuf in + pb.withUnsafeBufferPointer { pbBuf in + let prp = prBuf.baseAddress!, pgp = pgBuf.baseAddress!, pbp = pbBuf.baseAddress! + DispatchQueue.concurrentPerform(iterations: count) { i in + let off = i * 3 + let r = Int16(ptr[off]), g = Int16(ptr[off + 1]), b = Int16(ptr[off + 2]) + var bestIdx = 0 + var bestDist: Int32 = .max + for j in 0.. UIImage { + guard imageOrientation != .up else { return self } + UIGraphicsBeginImageContextWithOptions(size, false, scale) + draw(in: CGRect(origin: .zero, size: size)) + let normalized = UIGraphicsGetImageFromCurrentImageContext() + UIGraphicsEndImageContext() + return normalized ?? self + } +}