diff --git a/.gitignore b/.gitignore
index 0b9ee5d..c3d869c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,5 +16,7 @@ conversion_scripts/DepthAnythingV3/
conversion_scripts/Nitro-E/core/
conversion_scripts/Nitro-E/reference_dump/
conversion_scripts/Nitro-E/*.png
+conversion_scripts/Pixelization/
+conversion_scripts/pixelization_sample_*.png
sample_apps/NitroEDemo/NitroEDemo/Llama3Vocab.json
sample_apps/NitroEDemo/NitroEDemo/Llama3Merges.txt
diff --git a/README.md b/README.md
index 5a4220b..6f96c82 100644
--- a/README.md
+++ b/README.md
@@ -99,6 +99,7 @@ You are free to do or not.
- [DCGAN](#dcgan)
- [**Image2Image**](#image2image)
+ - [Pixelization](#pixelization)
- [Anime2Sketch](#anime2sketch)
- [AnimeGAN2Face_Paint_512_v2](#animegan2face_paint_512_v2)
- [Photo2Cartoon](#photo2cartoon)
@@ -782,6 +783,14 @@ Low Light Enhancement
# Image2Image
+### Pixelization
+
+Make Your Own Sprites: Aliasing-Aware and Cell-Controllable Pixelization (SIGGRAPH Asia 2022). Turns any photo into pixel art; the `cell_size` slider (2–8) controls pixel block size at post-processing time — the network runs once per photo.
+
+| Google Drive / HF Link | Size | Input / Output | Original Project | License | Year | Sample Project | Conversion Script |
+| ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- |
+| [Pixelization_512.mlpackage.zip](https://huggingface.co/mlboydaisuke/coreml-zoo/resolve/main/pixelization/Pixelization_512.mlpackage.zip) | 38MB (FP16) | Image (RGB 512×512) → Image (RGB 512×512) | [WuZongWei6/Pixelization](https://github.com/WuZongWei6/Pixelization) | [Non-commercial research](https://github.com/WuZongWei6/Pixelization/blob/main/LICENSE.md) | 2022 | [PixelizationDemo](./sample_apps/PixelizationDemo/) | [convert_pixelization.py](./conversion_scripts/convert_pixelization.py) |
+
### [Anime2Sketch](https://drive.google.com/file/d/1-52NnZ1kajZI5Rk0tn3DegpU38la_jYk/view?usp=sharing)
diff --git a/conversion_scripts/convert_pixelization.py b/conversion_scripts/convert_pixelization.py
new file mode 100644
index 0000000..2663a05
--- /dev/null
+++ b/conversion_scripts/convert_pixelization.py
@@ -0,0 +1,340 @@
+"""
+Convert Pixelization (SIGGRAPH Asia 2022) to CoreML.
+
+Repo: https://github.com/WuZongWei6/Pixelization
+Weights mirror: https://huggingface.co/ashleykleynhans/pixelization
+
+Architecture:
+ G_A (C2PGen): RGBEnc -> RGBDec(modulated by cellcode) -> tanh image
+ where cellcode = MLP(fixed_256d_vector) is a precomputed [1, 2048] style code.
+ alias_net: AliasRGBEncoder -> AliasRGBDecoder -> tanh anti-aliased image.
+
+Pipeline (baked into a single mlpackage):
+ input[0,1] RGB
+ -> x = 2x-1 (normalize to [-1,1])
+ -> feature = RGBEnc(x)
+ -> y = RGBDec(feature, cellcode)
+ -> y = alias_net(y)
+ -> y = (y+1)/2 clamped (denorm to [0,1])
+ -> output RGB image
+
+Post-processing (done in Swift, not in the model):
+ nearest-neighbor downscale by 4 -> logical pixel grid
+ nearest-neighbor upscale by cell_size -> display size
+
+Usage:
+ python convert_pixelization.py --size 512
+"""
+
+import argparse
+import os
+import sys
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torchvision.models as tvmodels
+
+import coremltools as ct
+
+REPO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Pixelization")
+sys.path.insert(0, REPO_DIR)
+
+
+# Precomputed MLP_code constant from upstream test_pro.py (reshape to [1, 256, 1, 1]).
+MLP_CODE = [
+ 233356.8125, -27387.5918, -32866.8008, 126575.0312, -181590.0156,
+ -31543.1289, 50374.1289, 99631.4062, -188897.3750, 138322.7031,
+ -107266.2266, 125778.5781, 42416.1836, 139710.8594, -39614.6250,
+ -69972.6875, -21886.4141, 86938.4766, 31457.6270, -98892.2344,
+ -1191.5887, -61662.1719, -180121.9062, -32931.0859, 43109.0391,
+ 21490.1328, -153485.3281, 94259.1797, 43103.1992, -231953.8125,
+ 52496.7422, 142697.4062, -34882.7852, -98740.0625, 34458.5078,
+ -135436.3438, 11420.5488, -18895.8984, -71195.4141, 176947.2344,
+ -52747.5742, 109054.6562, -28124.9473, -17736.6152, -41327.1562,
+ 69853.3906, 79046.2656, -3923.7344, -5644.5229, 96586.7578,
+ -89315.2656, -146578.0156, -61862.1484, -83956.4375, 87574.5703,
+ -75055.0469, 19571.8203, 79358.7891, -16501.5000, -147169.2188,
+ -97861.6797, 60442.1797, 40156.9023, 223136.3906, -81118.0547,
+ -221443.6406, 54911.6914, 54735.9258, -58805.7305, -168884.4844,
+ 40865.9609, -28627.9043, -18604.7227, 120274.6172, 49712.2383,
+ 164402.7031, -53165.0820, -60664.0469, -97956.1484, -121468.4062,
+ -69926.1484, -4889.0151, 127367.7344, 200241.0781, -85817.7578,
+ -143190.0625, -74049.5312, 137980.5781, -150788.7656, -115719.6719,
+ -189250.1250, -153069.7344, -127429.7891, -187588.2500, 125264.7422,
+ -79082.3438, -114144.5781, 36033.5039, -57502.2188, 80488.1562,
+ 36501.4570, -138817.5938, -22189.6523, -222146.9688, -73292.3984,
+ 127717.2422, -183836.3750, -105907.0859, 145422.8750, 66981.2031,
+ -9596.6699, 78099.4922, 70226.3359, 35841.8789, -116117.6016,
+ -150986.0156, 81622.4922, 113575.0625, 154419.4844, 53586.4141,
+ 118494.8750, 131625.4375, -19763.1094, 75581.1172, -42750.5039,
+ 97934.8281, 6706.7949, -101179.0078, 83519.6172, -83054.8359,
+ -56749.2578, -30683.6992, 54615.9492, 84061.1406, -229136.7188,
+ -60554.0000, 8120.2622, -106468.7891, -28316.3418, -166351.3125,
+ 47797.3984, 96013.4141, 71482.9453, -101429.9297, 209063.3594,
+ -3033.6882, -38952.5352, -84920.6719, -5895.1543, -18641.8105,
+ 47884.3633, -14620.0273, -132898.6719, -40903.5859, 197217.3750,
+ -128599.1328, -115397.8906, -22670.7676, -78569.9688, -54559.7070,
+ -106855.2031, 40703.1484, 55568.3164, 60202.9844, -64757.9375,
+ -32068.8652, 160663.3438, 72187.0703, -148519.5469, 162952.8906,
+ -128048.2031, -136153.8906, -15270.3730, -52766.3281, -52517.4531,
+ 18652.1992, 195354.2188, -136657.3750, -8034.2622, -92699.6016,
+ -129169.1406, 188479.9844, 46003.7500, -93383.0781, -67831.6484,
+ -66710.5469, 104338.5234, 85878.8438, -73165.2031, 95857.3203,
+ 71213.1250, 94603.1094, -30359.8125, -107989.2578, 99822.1719,
+ 184626.3594, 79238.4531, -272978.9375, -137948.5781, -145245.8125,
+ 75359.2031, 26652.7930, 50421.4141, 60784.4102, -18286.3398,
+ -182851.9531, -87178.7969, -13131.7539, 195674.8906, 59951.7852,
+ 124353.7422, -36709.1758, -54575.4766, 77822.6953, 43697.4102,
+ -64394.3438, 113281.1797, -93987.0703, 221989.7188, 132902.5000,
+ -9538.8574, -14594.1338, 65084.9453, -12501.7227, 130330.6875,
+ -115123.4766, 20823.0898, 75512.4922, -75255.7422, -41936.7656,
+ -186678.8281, -166799.9375, 138770.6250, -78969.9531, 124516.8047,
+ -85558.5781, -69272.4375, -115539.1094, 228774.4844, -76529.3281,
+ -107735.8906, -76798.8906, -194335.2812, 56530.5742, -9397.7529,
+ 132985.8281, 163929.8438, -188517.7969, -141155.6406, 45071.0391,
+ 207788.3125, -125826.1172, 8965.3320, -159584.8438, 95842.4609,
+ -76929.4688,
+]
+
+
+def _prepare_dummy_vgg_weights():
+ """C2PGen.__init__ insists on loading ./pixelart_vgg19.pth (cwd-relative).
+ The VGG branch (PixelBlockEncoder) is only used during training and is
+ unreachable at inference, but we still need the file to exist so
+ construction succeeds. Write a dummy with matching structure; the real
+ weights get overwritten when we load 160_net_G_A.pth anyway."""
+ path = "./pixelart_vgg19.pth"
+ if os.path.exists(path):
+ return
+ vgg = tvmodels.vgg.vgg19(weights=None)
+ vgg.classifier._modules["6"] = nn.Linear(4096, 7, bias=True)
+ torch.save(vgg.state_dict(), path)
+
+
+def _swap_layernorm_with_groupnorm(module):
+ """Replace the upstream custom LayerNorm (global mean/std + per-channel
+ affine) with the mathematically equivalent nn.GroupNorm(1, C). The manual
+ expansion (`x.view(-1).std()` over ~8M elements) diverges badly in FP16 —
+ coremltools' native group_norm op handles it correctly."""
+ from models.basic_layer import LayerNorm as UpstreamLN
+ for name, ch in list(module.named_children()):
+ if isinstance(ch, UpstreamLN):
+ gn = nn.GroupNorm(1, ch.num_features, eps=ch.eps)
+ with torch.no_grad():
+ gn.weight.data.copy_(ch.gamma.data)
+ gn.bias.data.copy_(ch.beta.data)
+ setattr(module, name, gn)
+ else:
+ _swap_layernorm_with_groupnorm(ch)
+
+
+def build_pytorch_model():
+ # Run from REPO_DIR so relative paths in the vendored code resolve.
+ os.chdir(REPO_DIR)
+ _prepare_dummy_vgg_weights()
+ from models.networks import define_G
+
+ g_a = define_G(3, 3, 64, "c2pGen", "instance", False, "normal", 0.02, [])
+ alias = define_G(3, 3, 64, "antialias", "instance", False, "normal", 0.02, [])
+
+ g_a_sd = torch.load(
+ "checkpoints/pixelize/160_net_G_A.pth", map_location="cpu"
+ )
+ alias_sd = torch.load("alias_net.pth", map_location="cpu")
+ g_a.load_state_dict(g_a_sd)
+ alias.load_state_dict(alias_sd)
+ g_a.eval()
+ alias.eval()
+
+ _swap_layernorm_with_groupnorm(g_a)
+ _swap_layernorm_with_groupnorm(alias)
+
+ with torch.no_grad():
+ code = torch.tensor(MLP_CODE).reshape(1, 256, 1, 1)
+ cellcode = g_a.MLP(code).detach() # [1, 2048]
+ return g_a, alias, cellcode
+
+
+class BakedModConv(nn.Module):
+ """ModulationConvBlock with the (fixed) cellcode folded into the conv
+ weights. The original op computes (W*c)/norm(W*c) at every forward; since c
+ is constant we precompute that in FP32 and store it as a plain Conv2d
+ weight, which keeps FP16 inference safe (W*c alone overflows FP16 because
+ cellcode magnitudes reach 1e8)."""
+
+ def __init__(self, orig, code_chunk):
+ super().__init__()
+ import torch.nn.functional as F
+ self.F = F
+ in_c = orig.in_c
+ out_c = orig.out_c
+ k = orig.ksize
+ with torch.no_grad():
+ w = orig.weight * orig.wscale # (out_c, in_c, k, k)
+ # Match the original view/permute sequence exactly (no semantic
+ # transpose — this is the upstream convention).
+ _w = w.view(1, k, k, in_c, out_c)
+ _w = _w * code_chunk.view(1, 1, 1, in_c, 1)
+ norm = torch.sqrt((_w ** 2).sum(dim=[1, 2, 3]) + orig.eps)
+ _w = _w / norm.view(1, 1, 1, 1, out_c)
+ w_perm = _w.permute(1, 2, 3, 0, 4).reshape(k, k, in_c, out_c)
+ w_final = w_perm.permute(3, 2, 0, 1).contiguous() # (out_c, in_c, k, k)
+ self.register_buffer("weight", w_final)
+ self.bias = nn.Parameter(orig.bias.detach().clone())
+ self.padding = k // 2
+
+ def forward(self, x):
+ x = self.F.conv2d(x, self.weight, bias=None, padding=self.padding)
+ x = x + self.bias.view(1, -1, 1, 1)
+ x = self.F.leaky_relu(x, 0.2, inplace=False) * (2.0 ** 0.5)
+ return x
+
+
+class BakedRGBDec(nn.Module):
+ """RGBDec with cellcode folded in, replacing the 8 modulation convs.
+ Upstream reuses `mod_conv_2` for 7 of the 8 calls (mod_conv_3..8 are
+ defined but unused); we preserve that behavior exactly."""
+
+ def __init__(self, orig, cellcode):
+ super().__init__()
+ baked = []
+ for i in range(8):
+ src = orig.mod_conv_1 if i == 0 else orig.mod_conv_2
+ chunk = cellcode[:, i * 256 : (i + 1) * 256]
+ baked.append(BakedModConv(src, chunk))
+ self.baked = nn.ModuleList(baked)
+ self.upsample_block1 = orig.upsample_block1
+ self.conv_1 = orig.conv_1
+ self.upsample_block2 = orig.upsample_block2
+ self.conv_2 = orig.conv_2
+ self.conv_3 = orig.conv_3
+
+ def forward(self, x):
+ residual = x
+ x = self.baked[0](x); x = self.baked[1](x); x = x + residual
+ residual = x
+ x = self.baked[2](x); x = self.baked[3](x); x = x + residual
+ residual = x
+ x = self.baked[4](x); x = self.baked[5](x); x = x + residual
+ residual = x
+ x = self.baked[6](x); x = self.baked[7](x); x = x + residual
+ x = self.upsample_block1(x)
+ x = self.conv_1(x)
+ x = self.upsample_block2(x)
+ x = self.conv_2(x)
+ x = self.conv_3(x)
+ return x
+
+
+class PixelizationWrapper(nn.Module):
+ """Input: RGB image in [0, 1], NCHW. Output: pixelized RGB in [0, 255]."""
+
+ def __init__(self, g_a, alias, cellcode):
+ super().__init__()
+ self.rgb_enc = g_a.RGBEnc
+ self.rgb_dec = BakedRGBDec(g_a.RGBDec, cellcode)
+ self.alias = alias
+
+ def forward(self, image):
+ # `image` is in [0, 1] because ImageType sets scale=1/255.
+ x = image * 2.0 - 1.0
+ feature = self.rgb_enc(x)
+ y = self.rgb_dec(feature)
+ y = self.alias(y)
+ # Scale to [0, 255] for ImageType output.
+ y = (y + 1.0) * 127.5
+ return torch.clamp(y, 0.0, 255.0)
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--size", type=int, default=512,
+ help="Input H=W (must be multiple of 4).")
+ parser.add_argument("--precision", choices=["fp16", "fp32"], default="fp16")
+ parser.add_argument("--output-dir", type=str,
+ default=os.path.dirname(os.path.abspath(__file__)))
+ args = parser.parse_args()
+ assert args.size % 4 == 0, "size must be a multiple of 4"
+ args.output_dir = os.path.abspath(args.output_dir)
+
+ print("Loading PyTorch weights...")
+ g_a, alias, cellcode = build_pytorch_model()
+ wrapper = PixelizationWrapper(g_a, alias, cellcode).eval()
+
+ dummy = torch.rand(1, 3, args.size, args.size)
+ with torch.no_grad():
+ torch_out = wrapper(dummy)
+ print(f"PyTorch output shape={tuple(torch_out.shape)}, "
+ f"min={torch_out.min():.3f}, max={torch_out.max():.3f} (range [0,255])")
+
+ print("Tracing...")
+ with torch.no_grad():
+ traced = torch.jit.trace(wrapper, dummy)
+
+ precision = (ct.precision.FLOAT16 if args.precision == "fp16"
+ else ct.precision.FLOAT32)
+ print(f"Converting to CoreML {args.precision.upper()}...")
+ ml = ct.convert(
+ traced,
+ inputs=[ct.ImageType(
+ name="image",
+ shape=(1, 3, args.size, args.size),
+ scale=1.0 / 255.0,
+ color_layout=ct.colorlayout.RGB,
+ )],
+ outputs=[ct.ImageType(
+ name="pixelized",
+ color_layout=ct.colorlayout.RGB,
+ )],
+ minimum_deployment_target=ct.target.iOS17,
+ compute_precision=precision,
+ convert_to="mlprogram",
+ )
+ ml.author = "WuZongWei6 (paper) / CoreML-Models (conversion)"
+ ml.short_description = (
+ f"Pixelization (SIGGRAPH Asia 2022). "
+ f"{args.size}x{args.size} RGB -> pixelized RGB (same size). "
+ "Non-commercial research use only."
+ )
+ ml.license = "Non-commercial research (see upstream LICENSE.md)"
+
+ suffix = "" if args.precision == "fp16" else "_FP32"
+ out_path = os.path.join(args.output_dir,
+ f"Pixelization_{args.size}{suffix}.mlpackage")
+ ml.save(out_path)
+ print(f"Saved: {out_path}")
+
+ print("Parity check on example image...")
+ try:
+ import PIL.Image as Image
+ example = os.path.join(REPO_DIR, "examples", "2_1.png")
+ if os.path.exists(example):
+ pil = Image.open(example).convert("RGB").resize(
+ (args.size, args.size), Image.BICUBIC)
+ src = np.array(pil)
+ else:
+ src = (dummy[0].permute(1, 2, 0).numpy() * 255).astype(np.uint8)
+ pil = Image.fromarray(src)
+
+ ml_loaded = ct.models.MLModel(
+ out_path, compute_units=ct.ComputeUnit.CPU_ONLY)
+ ml_out = ml_loaded.predict({"image": pil})["pixelized"]
+ ml_out.convert("RGB").save(
+ os.path.join(args.output_dir,
+ f"pixelization_sample_{args.precision}.png"))
+
+ t = torch.from_numpy(src).permute(2, 0, 1).float()[None] / 255.0
+ with torch.no_grad():
+ pt_img = wrapper(t)[0].permute(1, 2, 0).numpy()
+ ml_arr = np.array(ml_out.convert("RGB")).astype(np.float32)
+ diff = np.abs(ml_arr - pt_img)
+ print(f" max abs diff (0-255): {diff.max():.3f}")
+ print(f" mean abs diff (0-255): {diff.mean():.3f}")
+ print(f" sample saved: pixelization_sample_{args.precision}.png")
+ except Exception as e:
+ print(f" parity check skipped: {e}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/docs/coreml_conversion_notes.md b/docs/coreml_conversion_notes.md
index 9f9a4e1..f35614e 100644
--- a/docs/coreml_conversion_notes.md
+++ b/docs/coreml_conversion_notes.md
@@ -555,3 +555,73 @@ Additional DC-AE decoder monkey-patches required for trace: drop `output_size=`
transformers 5.x rewrote `create_causal_mask` to index into `q_length.shape` / `q_length[0]` — shape assumptions that fail under `torch.jit.trace` (`IndexError: tuple index out of range`). Downgrade to transformers 4.49.0 (the version AMD Nitro-E pins) to convert Llama 3.2 1B cleanly. Keep the wrapper limited to `model.model` (drop the LM head) and return `last_hidden_state` for seq_len=128.
---
+
+## Weight modulation with huge style codes — bake the code into Conv2d weights
+
+**Pixelization (SIGGRAPH Asia 2022) `ModulationConvBlock` overflows FP16 even though intermediate activations stay < 50.**
+
+Symptom: the FP16 model outputs a nearly-constant image (every pixel lands at tanh(~3) ≈ 0.999, producing a flat 254.875 after `(y+1)*127.5`). FP32 is fine.
+
+Cause: the upstream inference path is
+```python
+code = torch.tensor(MLP_code).reshape(1, 256, 1, 1) # constants up to 2.3e5
+cellcode = G_A.MLP(code) # abs max ~8.4e8 (!!)
+# Inside ModulationConvBlock.forward:
+_weight = weight.view(1, k, k, in_c, out_c) * code.view(1,1,1,in_c,1) # OVERFLOWS FP16
+_weight_norm = torch.sqrt(_weight.pow(2).sum([1,2,3]) + eps)
+_weight = _weight / _weight_norm.view(1,1,1,1,out_c)
+```
+`cellcode` magnitudes easily clear FP16's 65504 ceiling, so `_weight * code` saturates to Inf and the subsequent division leaves NaNs.
+
+Fix: **cellcode is a constant** (the upstream authors already extracted its pre-MLP input as a 256-float magic table and `best_cell_size` is fixed at 4 — the `cell_size` slider only affects post-processing). Since `(W·c)/‖W·c‖` is fully determined at conversion time, precompute that effective weight in FP32, store it on a plain `nn.Conv2d`, and drop the modulation op entirely:
+
+```python
+class BakedModConv(nn.Module):
+ def __init__(self, orig, code_chunk):
+ super().__init__()
+ with torch.no_grad():
+ w = orig.weight * orig.wscale # (out, in, k, k)
+ _w = w.view(1, k, k, in_c, out_c) * code_chunk.view(1,1,1,in_c,1)
+ norm = torch.sqrt((_w**2).sum([1,2,3]) + orig.eps)
+ _w = _w / norm.view(1,1,1,1,out_c)
+ w_final = _w.permute(1,2,3,0,4).reshape(k,k,in_c,out_c).permute(3,2,0,1).contiguous()
+ self.register_buffer("weight", w_final)
+ self.bias = nn.Parameter(orig.bias.detach().clone())
+ def forward(self, x):
+ x = F.conv2d(x, self.weight, bias=None, padding=self.ksize//2)
+ return F.leaky_relu(x + self.bias.view(1,-1,1,1), 0.2) * math.sqrt(2)
+```
+
+Bonus: also caught a bug in the upstream decoder — `mod_conv_3..8` are defined but never called; `mod_conv_2` is reused 7 times. Preserve that when baking (8 BakedModConv instances, weights from `mod_conv_1` for the first, `mod_conv_2` for the rest).
+
+---
+
+## Upstream "LayerNorm" that flattens the whole tensor — use `nn.GroupNorm(1, C)`
+
+Pixelization ships a custom `LayerNorm`:
+```python
+def forward(self, x):
+ mean = x.view(-1).mean().view(-1, 1, 1, 1) # global over C*H*W
+ std = x.view(-1).std().view(-1, 1, 1, 1)
+ x = (x - mean) / (std + eps)
+ return x * gamma.view(1, C, 1, 1) + beta.view(1, C, 1, 1)
+```
+
+Semantically equivalent to `nn.GroupNorm(num_groups=1, num_channels=C)` with per-channel affine. But the manual `view(-1).std()` path converts to a primitive reduce chain that coremltools lowers badly in FP16: on a (1, 128, 256, 256) tensor (~8M elements) the resulting FP16 std comes out ~4× too large, so the output magnitudes collapse by 4× and the downstream tanh branch saturates.
+
+Fix: swap the layers after loading weights, before tracing:
+```python
+def swap_ln(module):
+ for name, ch in list(module.named_children()):
+ if isinstance(ch, UpstreamLN):
+ gn = nn.GroupNorm(1, ch.num_features, eps=ch.eps)
+ gn.weight.data.copy_(ch.gamma.data)
+ gn.bias.data.copy_(ch.beta.data)
+ setattr(module, name, gn)
+ else:
+ swap_ln(ch)
+```
+
+Apple's `group_norm` op handles the reduction in FP32 even under `compute_precision=FLOAT16`, so post-swap FP16 parity against PyTorch is ~10/255 max / 0.2/255 mean — imperceptible.
+
+---
diff --git a/sample_apps/CoreMLModelsApp/CoreMLModelsApp/Templates/ImageInOutDemoView.swift b/sample_apps/CoreMLModelsApp/CoreMLModelsApp/Templates/ImageInOutDemoView.swift
index 376519d..7fa7de2 100644
--- a/sample_apps/CoreMLModelsApp/CoreMLModelsApp/Templates/ImageInOutDemoView.swift
+++ b/sample_apps/CoreMLModelsApp/CoreMLModelsApp/Templates/ImageInOutDemoView.swift
@@ -18,9 +18,25 @@ struct ImageInOutDemoView: View {
@State private var processingTime: Double?
@State private var item: PhotosPickerItem?
@State private var showOriginal = false
+ // pixel_art: cache the raw (pre cell_size) model output so the preset
+ // picker only re-runs the cheap NEAREST resample + palette mapping.
+ @State private var pixelArtRaw: CGImage?
+ @State private var pixelArtPresetId: String = PixelArtPreset.all[0].id
+ // nil = use the preset's default cell size; non-nil = user dragged the
+ // slider. Reset to nil whenever the preset changes.
+ @State private var pixelArtCellSizeOverride: Double?
+ // User-selected pre-blur target (px). 512 = no blur. Smaller = more
+ // abstracted network input. nil = derive from cellSize.
+ @State private var pixelArtBlurOverride: Int?
@StateObject private var session = ModelSession()
private var outputType: String { model.configString("output_type") ?? "image" }
+ private var pixelArtPreset: PixelArtPreset {
+ PixelArtPreset.all.first { $0.id == pixelArtPresetId } ?? PixelArtPreset.all[0]
+ }
+ private var pixelArtCellSize: Int {
+ Int(pixelArtCellSizeOverride ?? Double(pixelArtPreset.cellSize))
+ }
var body: some View {
VStack(spacing: 0) {
@@ -42,7 +58,11 @@ struct ImageInOutDemoView: View {
}
Image(uiImage: output).resizable().aspectRatio(contentMode: .fit)
} else if let img = showOriginal ? inputImage : outputImage ?? inputImage {
- Image(uiImage: img).resizable().aspectRatio(contentMode: .fit)
+ if outputType == "pixel_art" && !showOriginal {
+ Image(uiImage: img).resizable().interpolation(.none).aspectRatio(contentMode: .fit)
+ } else {
+ Image(uiImage: img).resizable().aspectRatio(contentMode: .fit)
+ }
} else {
VStack(spacing: 12) {
Image(systemName: "photo.on.rectangle.angled").font(.system(size: 60)).foregroundStyle(.secondary)
@@ -85,6 +105,86 @@ struct ImageInOutDemoView: View {
if isProcessing { ProgressView().controlSize(.small); Text(status).font(.caption).foregroundStyle(.secondary) }
}
+ if outputType == "pixel_art" && pixelArtRaw != nil {
+ ScrollView(.horizontal, showsIndicators: false) {
+ HStack(spacing: 8) {
+ ForEach(PixelArtPreset.all, id: \.id) { preset in
+ Button {
+ pixelArtPresetId = preset.id
+ } label: {
+ VStack(spacing: 2) {
+ Image(systemName: preset.systemImage).font(.body)
+ Text(preset.name).font(.caption2)
+ }
+ .padding(.vertical, 6).padding(.horizontal, 10)
+ .background(
+ pixelArtPresetId == preset.id
+ ? Color.accentColor.opacity(0.25)
+ : Color(.systemGray6)
+ )
+ .cornerRadius(8)
+ }
+ .buttonStyle(.plain)
+ }
+ }
+ .padding(.horizontal, 4)
+ }
+
+ HStack(spacing: 10) {
+ Image(systemName: "square.grid.3x3")
+ .font(.caption).foregroundStyle(.secondary)
+ Slider(
+ value: Binding(
+ get: { Double(pixelArtCellSize) },
+ set: { pixelArtCellSizeOverride = $0 }
+ ),
+ in: 4...10, step: 1
+ ) { Text("Cell size") }
+ Text("\(pixelArtCellSize)")
+ .font(.caption.monospacedDigit())
+ .frame(width: 22, alignment: .trailing)
+ }
+
+ // Pre-blur (photo shrink) picker. Off = full-res network
+ // input; smaller targets trade fine detail for cleaner,
+ // more iconic palette cells.
+ Picker("Abstraction", selection: Binding(
+ get: { pixelArtBlurOverride ?? 0 }, // 0 == auto (derive from cs)
+ set: { pixelArtBlurOverride = $0 == 0 ? nil : $0 }
+ )) {
+ Text("Auto").tag(0)
+ Text("Off").tag(512)
+ Text("256").tag(256)
+ Text("128").tag(128)
+ Text("64").tag(64)
+ Text("32").tag(32)
+ }
+ .pickerStyle(.segmented)
+ .onChange(of: pixelArtBlurOverride) {
+ if let img = inputImage { Task { await runInference(on: img) } }
+ }
+
+ .onChange(of: pixelArtPresetId) {
+ // New preset → reset override & re-run inference so the
+ // pre-blur matches the preset's default cellSize.
+ pixelArtCellSizeOverride = nil
+ if let img = inputImage {
+ Task { await runInference(on: img) }
+ } else if let raw = pixelArtRaw {
+ outputImage = pixelArtPostProcess(
+ raw, cellSize: pixelArtCellSize, palette: pixelArtPreset.palette)
+ }
+ }
+ .onChange(of: pixelArtCellSizeOverride) {
+ // During drag: cheap palette re-snap only. The network
+ // re-run happens on slider release (onEditingChanged).
+ if let raw = pixelArtRaw {
+ outputImage = pixelArtPostProcess(
+ raw, cellSize: pixelArtCellSize, palette: pixelArtPreset.palette)
+ }
+ }
+ }
+
HStack(spacing: 12) {
PhotosPicker(selection: $item, matching: .images) {
Label("Select Photo", systemImage: "photo.badge.plus")
@@ -114,7 +214,11 @@ struct ImageInOutDemoView: View {
// a photo.
session.ensure { try await ModelLoader.loadPrimary(for: model) }
}
- .onChange(of: item) { _, _ in loadAndRun() }
+ .onChange(of: item) { _, _ in
+ pixelArtRaw = nil
+ pixelArtCellSizeOverride = nil
+ loadAndRun()
+ }
}
// MARK: - Load & Run
@@ -165,7 +269,19 @@ struct ImageInOutDemoView: View {
let inputDict: [String: Any]
if let imageInput {
- guard let pb = ImageUtils.pixelBuffer(from: cgImage, width: inputSize, height: inputSize) else {
+ // pixel_art: pre-downsample the source based on cell size so the
+ // fixed-512 network effectively sees a lower-resolution image
+ // and makes its semantic abstraction at the user's chosen
+ // chunkiness. Mimics upstream test_pro.py's input resize.
+ let sourceForBuffer: CGImage = {
+ guard outputType == "pixel_art" else { return cgImage }
+ let target = pixelArtBlurOverride
+ ?? pixelArtPreBlurTarget(cellSize: pixelArtCellSize, inputSize: inputSize)
+ return target < inputSize
+ ? (resizeCGImageBicubic(cgImage, to: target) ?? cgImage)
+ : cgImage
+ }()
+ guard let pb = ImageUtils.pixelBuffer(from: sourceForBuffer, width: inputSize, height: inputSize) else {
await MainActor.run { isProcessing = false; status = "Prep failed" }; return
}
inputDict = [imageInput.key: pb]
@@ -205,6 +321,13 @@ struct ImageInOutDemoView: View {
result = processLABABOutput(output: output, originalImage: cgImage, origW: origW, origH: origH, modelSize: inputSize)
case "segmap":
result = processSegmapOutput(output: output, originalImage: cgImage, origW: origW, origH: origH, modelSize: inputSize)
+ case "pixel_art":
+ let raw = extractRawCGImage(output: output)
+ await MainActor.run { pixelArtRaw = raw }
+ result = raw.flatMap {
+ pixelArtPostProcess($0, cellSize: pixelArtCellSize,
+ palette: pixelArtPreset.palette)
+ }
default:
if let r = processImageOutput(output: output) {
result = restoreAspect(r, origW: origW, origH: origH, inputSize: inputSize)
@@ -284,6 +407,72 @@ struct ImageInOutDemoView: View {
return nil
}
+ // MARK: - Output: pixel_art (Pixelization)
+
+ private func extractRawCGImage(output: MLFeatureProvider) -> CGImage? {
+ for name in output.featureNames {
+ if let pb = output.featureValue(for: name)?.imageBufferValue {
+ let ci = CIImage(cvPixelBuffer: pb)
+ if let cg = CIContext(options: [.useSoftwareRenderer: false])
+ .createCGImage(ci, from: ci.extent) { return cg }
+ }
+ }
+ return nil
+ }
+
+ /// Clean pixel-art rendering:
+ /// 1. Mean-sample one color per `cs`×`cs` cell.
+ /// 2. Palette-snap each cell (optional).
+ /// 3. NEAREST upscale by `cs` into the final image.
+ ///
+ /// We deliberately do NOT run a separate edge-detection overlay. Source-
+ /// resolution gradient detection picks up per-cell colour jitter and
+ /// texture noise, sprinkling stray dark lines across flat areas
+ /// ('line picture 'ちょろちょろ出る') — the chunky cells + limited palette
+ /// already give enough silhouette definition on their own.
+ private func pixelArtPostProcess(_ cg: CGImage, cellSize: Int, palette: [UInt32]?) -> UIImage? {
+ let cs = max(1, cellSize)
+ let gridW = cg.width / cs
+ let gridH = cg.height / cs
+ guard gridW > 0 && gridH > 0 else { return nil }
+ let outW = gridW * cs
+ let outH = gridH * cs
+ let srcW = cg.width
+ let srcH = cg.height
+
+ guard let srcData = cg.dataProvider?.data,
+ let srcPtr = CFDataGetBytePtr(srcData) else { return nil }
+ let srcBPR = cg.bytesPerRow
+ let srcBpp = cg.bitsPerPixel / 8
+
+ var grid = [UInt8](repeating: 0, count: gridW * gridH * 3)
+ grid.withUnsafeMutableBufferPointer { gbuf in
+ pixelArtMeanSample(
+ srcPtr: srcPtr, srcW: srcW, srcH: srcH,
+ srcBPR: srcBPR, srcBpp: srcBpp,
+ cs: cs, gridW: gridW, gridH: gridH,
+ gbuf: gbuf.baseAddress!
+ )
+ }
+ if let palette = palette, !palette.isEmpty {
+ applyPalette(&grid, palette: palette)
+ }
+
+ let bytesPerRow = outW * 4
+ var pixels = [UInt8](repeating: 0, count: bytesPerRow * outH)
+ pixels.withUnsafeMutableBufferPointer { dstBuf in
+ grid.withUnsafeBufferPointer { gbuf in
+ pixelArtReplicate(
+ dst: dstBuf.baseAddress!,
+ gptr: gbuf.baseAddress!,
+ gridW: gridW, gridH: gridH,
+ cs: cs, bytesPerRow: bytesPerRow
+ )
+ }
+ }
+ return ImageUtils.makeRGBA(pixels: pixels, width: outW, height: outH)
+ }
+
// MARK: - Output: mask (RMBG)
private func processMaskOutput(output: MLFeatureProvider, originalImage: CGImage, origW: Int, origH: Int, modelSize: Int) -> UIImage? {
@@ -590,3 +779,201 @@ struct ImageInOutDemoView: View {
return (max(0, min(1, gamma(rl))), max(0, min(1, gamma(gl))), max(0, min(1, gamma(bl))))
}
}
+
+// MARK: - Pixel art presets
+
+/// A named pixel-art style. `cellSize` controls the grid resolution (larger =
+/// chunkier). `palette` is an optional list of 0xRRGGBB colors to snap every
+/// cell to — nil means "keep the generator's own colors".
+struct PixelArtPreset {
+ let id: String
+ let name: String
+ let systemImage: String
+ let cellSize: Int
+ let palette: [UInt32]?
+
+ // All presets default to the network's native cellSize (4) — the palette
+ // is what differentiates them. Users dial chunkiness via the slider; at
+ // cs=4 the pre-blur is skipped and the network's own pixelization shows
+ // through cleanest, which is what tends to read best across photos.
+ static let all: [PixelArtPreset] = [
+ PixelArtPreset(id: "off", name: "Off", systemImage: "circle", cellSize: 4, palette: nil),
+ PixelArtPreset(id: "gameboy", name: "Game Boy", systemImage: "gamecontroller", cellSize: 4, palette: PixelArtPalettes.gameBoy),
+ PixelArtPreset(id: "nes", name: "NES", systemImage: "gamecontroller.fill", cellSize: 4, palette: PixelArtPalettes.nes),
+ PixelArtPreset(id: "pico8", name: "Pico-8", systemImage: "square.stack.3d.up.fill", cellSize: 4, palette: PixelArtPalettes.pico8),
+ PixelArtPreset(id: "c64", name: "C64", systemImage: "desktopcomputer", cellSize: 4, palette: PixelArtPalettes.c64),
+ ]
+}
+
+enum PixelArtPalettes {
+ // Game Boy DMG: 4 shades of olive-green.
+ static let gameBoy: [UInt32] = [
+ 0x9BBC0F, 0x8BAC0F, 0x306230, 0x0F380F,
+ ]
+
+ // Pico-8 fantasy console: 16 colors.
+ static let pico8: [UInt32] = [
+ 0x000000, 0x1D2B53, 0x7E2553, 0x008751,
+ 0xAB5236, 0x5F574F, 0xC2C3C7, 0xFFF1E8,
+ 0xFF004D, 0xFFA300, 0xFFEC27, 0x00E436,
+ 0x29ADFF, 0x83769C, 0xFF77A8, 0xFFCCAA,
+ ]
+
+ // Commodore 64: 16 colors (Pepto's well-known sRGB approximation).
+ static let c64: [UInt32] = [
+ 0x000000, 0xFFFFFF, 0x68372B, 0x70A4B2,
+ 0x6F3D86, 0x588D43, 0x352879, 0xB8C76F,
+ 0x6F4F25, 0x433900, 0x9A6759, 0x444444,
+ 0x6C6C6C, 0x9AD284, 0x6C5EB5, 0x959595,
+ ]
+
+ // NES 2C02 PPU (Nintendulator NTSC approximation), 54 usable colors.
+ static let nes: [UInt32] = [
+ 0x7C7C7C, 0x0000FC, 0x0000BC, 0x4428BC,
+ 0x940084, 0xA80020, 0xA81000, 0x881400,
+ 0x503000, 0x007800, 0x006800, 0x005800,
+ 0x004058,
+ 0xBCBCBC, 0x0078F8, 0x0058F8, 0x6844FC,
+ 0xD800CC, 0xE40058, 0xF83800, 0xE45C10,
+ 0xAC7C00, 0x00B800, 0x00A800, 0x00A844,
+ 0x008888,
+ 0xF8F8F8, 0x3CBCFC, 0x6888FC, 0x9878F8,
+ 0xF878F8, 0xF85898, 0xF87858, 0xFCA044,
+ 0xF8B800, 0xB8F818, 0x58D854, 0x58F898,
+ 0x00E8D8, 0x787878,
+ 0xFCFCFC, 0xA4E4FC, 0xB8B8F8, 0xD8B8F8,
+ 0xF8B8F8, 0xF8A4C0, 0xF0D0B0, 0xFCE0A8,
+ 0xF8D878, 0xD8F878, 0xB8F8B8, 0xB8F8D8,
+ 0x00FCFC, 0xF8D8F8,
+ ]
+}
+
+/// Replicate each 3-byte grid cell into a `cs`×`cs` block of the output
+/// RGBA buffer. Where `applyEdges` is true and the edge mask at the output
+/// pixel's coordinate is set, write the dark RGB override instead.
+/// Map the user's `cellSize` to the target resolution the photo should be
+/// downsampled to before feeding a fixed-`inputSize` Pixelization network.
+/// The paper's `test_pro.py` resizes the whole network input by cell_size so
+/// the (fully-convolutional) generator makes its abstraction at that scale.
+/// Our CoreML model is fixed-size, so we emulate the effect by shrinking the
+/// source and letting CGContext resize it back up — the network sees a
+/// lower-resolution image and produces cleaner coarse cells.
+///
+/// Matches the upstream `test_pro.py` factor (`inputSize * 4 / cellSize`).
+/// A previous 2× boost to this was too radical — at cs=16 it shrank the
+/// input to 64 px, destroying readability. The useful range is cs=4-8 in
+/// practice; in that window the upstream formula gives 256-512 target,
+/// which blurs texture enough to clean up palette cells without wiping
+/// the subject. cellSize <= 4 keeps native resolution.
+func pixelArtPreBlurTarget(cellSize: Int, inputSize: Int) -> Int {
+ if cellSize <= 4 { return inputSize }
+ return max(96, min(inputSize, inputSize * 4 / cellSize))
+}
+
+/// Redraw `cg` into a square `size`×`size` CGImage using .high interpolation.
+func resizeCGImageBicubic(_ cg: CGImage, to size: Int) -> CGImage? {
+ guard let ctx = CGContext(
+ data: nil, width: size, height: size,
+ bitsPerComponent: 8, bytesPerRow: size * 4,
+ space: CGColorSpaceCreateDeviceRGB(),
+ bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue
+ ) else { return nil }
+ ctx.interpolationQuality = .high
+ ctx.draw(cg, in: CGRect(x: 0, y: 0, width: size, height: size))
+ return ctx.makeImage()
+}
+
+/// Mean-sample a `gridW`×`gridH` buffer of RGB triplets from the source
+/// image by averaging each `cs`×`cs` region.
+func pixelArtMeanSample(
+ srcPtr: UnsafePointer,
+ srcW: Int, srcH: Int,
+ srcBPR: Int, srcBpp: Int,
+ cs: Int, gridW: Int, gridH: Int,
+ gbuf: UnsafeMutablePointer
+) {
+ let div: Int32 = Int32(cs * cs)
+ DispatchQueue.concurrentPerform(iterations: gridH) { gy in
+ for gx in 0..,
+ gptr: UnsafePointer,
+ gridW: Int, gridH: Int,
+ cs: Int, bytesPerRow: Int
+) {
+ DispatchQueue.concurrentPerform(iterations: gridH) { gy in
+ for gx in 0..> 16) & 0xFF) }
+ let pg: [Int16] = palette.map { Int16(($0 >> 8) & 0xFF) }
+ let pb: [Int16] = palette.map { Int16($0 & 0xFF) }
+ let count = buf.count / 3
+ buf.withUnsafeMutableBufferPointer { buf in
+ let ptr = buf.baseAddress!
+ pr.withUnsafeBufferPointer { prBuf in
+ pg.withUnsafeBufferPointer { pgBuf in
+ pb.withUnsafeBufferPointer { pbBuf in
+ let prp = prBuf.baseAddress!, pgp = pgBuf.baseAddress!, pbp = pbBuf.baseAddress!
+ DispatchQueue.concurrentPerform(iterations: count) { i in
+ let off = i * 3
+ let r = Int16(ptr[off]), g = Int16(ptr[off + 1]), b = Int16(ptr[off + 2])
+ var bestIdx = 0
+ var bestDist: Int32 = .max
+ for j in 0..
+
+
+
+
diff --git a/sample_apps/PixelizationDemo/PixelizationDemo/PixelizationDemoApp.swift b/sample_apps/PixelizationDemo/PixelizationDemo/PixelizationDemoApp.swift
new file mode 100644
index 0000000..0ce1d18
--- /dev/null
+++ b/sample_apps/PixelizationDemo/PixelizationDemo/PixelizationDemoApp.swift
@@ -0,0 +1,10 @@
+import SwiftUI
+
+@main
+struct PixelizationDemoApp: App {
+ var body: some Scene {
+ WindowGroup {
+ ContentView()
+ }
+ }
+}
diff --git a/sample_apps/PixelizationDemo/PixelizationDemo/Pixelizer.swift b/sample_apps/PixelizationDemo/PixelizationDemo/Pixelizer.swift
new file mode 100644
index 0000000..3374be6
--- /dev/null
+++ b/sample_apps/PixelizationDemo/PixelizationDemo/Pixelizer.swift
@@ -0,0 +1,337 @@
+import CoreML
+import CoreImage
+import UIKit
+
+enum PixelizerError: LocalizedError {
+ case modelNotFound
+ case invalidImage
+ case predictionFailed
+
+ var errorDescription: String? {
+ switch self {
+ case .modelNotFound: return "Pixelization model not found"
+ case .invalidImage: return "Failed to process image"
+ case .predictionFailed: return "Prediction failed"
+ }
+ }
+}
+
+// MARK: - Presets
+
+/// A named pixel-art style. `cellSize` = grid chunkiness. `palette` = optional
+/// list of 0xRRGGBB colors to snap every cell to.
+struct PixelArtPreset {
+ let id: String
+ let name: String
+ let systemImage: String
+ let cellSize: Int
+ let palette: [UInt32]?
+
+ // All presets default to cellSize 4 — the palette is what distinguishes
+ // each mode, chunkiness is user-tuned via the slider.
+ static let all: [PixelArtPreset] = [
+ PixelArtPreset(id: "off", name: "Off", systemImage: "circle", cellSize: 4, palette: nil),
+ PixelArtPreset(id: "gameboy", name: "Game Boy", systemImage: "gamecontroller", cellSize: 4, palette: PixelArtPalettes.gameBoy),
+ PixelArtPreset(id: "nes", name: "NES", systemImage: "gamecontroller.fill", cellSize: 4, palette: PixelArtPalettes.nes),
+ PixelArtPreset(id: "pico8", name: "Pico-8", systemImage: "square.stack.3d.up.fill", cellSize: 4, palette: PixelArtPalettes.pico8),
+ PixelArtPreset(id: "c64", name: "C64", systemImage: "desktopcomputer", cellSize: 4, palette: PixelArtPalettes.c64),
+ ]
+}
+
+enum PixelArtPalettes {
+ static let gameBoy: [UInt32] = [
+ 0x9BBC0F, 0x8BAC0F, 0x306230, 0x0F380F,
+ ]
+ static let pico8: [UInt32] = [
+ 0x000000, 0x1D2B53, 0x7E2553, 0x008751,
+ 0xAB5236, 0x5F574F, 0xC2C3C7, 0xFFF1E8,
+ 0xFF004D, 0xFFA300, 0xFFEC27, 0x00E436,
+ 0x29ADFF, 0x83769C, 0xFF77A8, 0xFFCCAA,
+ ]
+ static let c64: [UInt32] = [
+ 0x000000, 0xFFFFFF, 0x68372B, 0x70A4B2,
+ 0x6F3D86, 0x588D43, 0x352879, 0xB8C76F,
+ 0x6F4F25, 0x433900, 0x9A6759, 0x444444,
+ 0x6C6C6C, 0x9AD284, 0x6C5EB5, 0x959595,
+ ]
+ static let nes: [UInt32] = [
+ 0x7C7C7C, 0x0000FC, 0x0000BC, 0x4428BC,
+ 0x940084, 0xA80020, 0xA81000, 0x881400,
+ 0x503000, 0x007800, 0x006800, 0x005800, 0x004058,
+ 0xBCBCBC, 0x0078F8, 0x0058F8, 0x6844FC,
+ 0xD800CC, 0xE40058, 0xF83800, 0xE45C10,
+ 0xAC7C00, 0x00B800, 0x00A800, 0x00A844, 0x008888,
+ 0xF8F8F8, 0x3CBCFC, 0x6888FC, 0x9878F8,
+ 0xF878F8, 0xF85898, 0xF87858, 0xFCA044,
+ 0xF8B800, 0xB8F818, 0x58D854, 0x58F898,
+ 0x00E8D8, 0x787878,
+ 0xFCFCFC, 0xA4E4FC, 0xB8B8F8, 0xD8B8F8,
+ 0xF8B8F8, 0xF8A4C0, 0xF0D0B0, 0xFCE0A8,
+ 0xF8D878, 0xD8F878, 0xB8F8B8, 0xB8F8D8,
+ 0x00FCFC, 0xF8D8F8,
+ ]
+}
+
+// MARK: - Pixelizer
+
+enum Pixelizer {
+ static let inputSize = 512
+
+ /// Matches the upstream `test_pro.py` factor (`inputSize * 4 / cellSize`).
+ /// cellSize <= 4 keeps native resolution.
+ static func preBlurTargetSize(for cellSize: Int) -> Int {
+ if cellSize <= 4 { return inputSize }
+ return max(96, min(inputSize, inputSize * 4 / cellSize))
+ }
+
+ /// Run the network and return the raw 512×512 pixelized CGImage.
+ /// `preBlurTarget` should come from `preBlurTargetSize(for:)` — pass
+ /// `inputSize` (= 512) for no blur.
+ static func runModel(on image: UIImage, preBlurTarget: Int = inputSize) async throws -> CGImage {
+ let fixed = image.normalizedOrientation()
+ guard let cgImage = fixed.cgImage else { throw PixelizerError.invalidImage }
+
+ let blurred: CGImage = preBlurTarget < inputSize
+ ? (resizeCGImageBicubic(cgImage, to: preBlurTarget) ?? cgImage)
+ : cgImage
+
+ guard let inputBuffer = createPixelBuffer(
+ from: blurred, width: inputSize, height: inputSize
+ ) else { throw PixelizerError.invalidImage }
+
+ let model = try loadModel()
+ let input = try MLDictionaryFeatureProvider(dictionary: ["image": inputBuffer])
+ let output = try await model.prediction(from: input)
+ guard let buffer = output.featureValue(for: "pixelized")?.imageBufferValue else {
+ throw PixelizerError.predictionFailed
+ }
+ let ci = CIImage(cvPixelBuffer: buffer)
+ guard let cg = CIContext(options: [.useSoftwareRenderer: false])
+ .createCGImage(ci, from: ci.extent)
+ else { throw PixelizerError.predictionFailed }
+ return cg
+ }
+
+ static func pixelize(_ image: UIImage, preset: PixelArtPreset, cellSize: Int? = nil) async throws -> UIImage {
+ let cs = cellSize ?? preset.cellSize
+ let cg = try await runModel(on: image, preBlurTarget: preBlurTargetSize(for: cs))
+ return postProcess(cg, cellSize: cs, palette: preset.palette) ?? UIImage(cgImage: cg)
+ }
+
+ private static func resizeCGImageBicubic(_ cg: CGImage, to size: Int) -> CGImage? {
+ guard let ctx = CGContext(
+ data: nil, width: size, height: size,
+ bitsPerComponent: 8, bytesPerRow: size * 4,
+ space: CGColorSpaceCreateDeviceRGB(),
+ bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue
+ ) else { return nil }
+ ctx.interpolationQuality = .high
+ ctx.draw(cg, in: CGRect(x: 0, y: 0, width: size, height: size))
+ return ctx.makeImage()
+ }
+
+ /// Mean-sample → optional palette snap → NEAREST upscale. No edge overlay
+ /// — source-resolution gradient detection adds stray lines in flat areas
+ /// (texture noise), so we rely on the cells + palette for definition.
+ static func postProcess(_ cg: CGImage, cellSize: Int, palette: [UInt32]?) -> UIImage? {
+ let cs = max(1, cellSize)
+ let gridW = cg.width / cs
+ let gridH = cg.height / cs
+ guard gridW > 0 && gridH > 0 else { return nil }
+ let outW = gridW * cs
+ let outH = gridH * cs
+ let srcW = cg.width
+ let srcH = cg.height
+
+ guard let srcData = cg.dataProvider?.data,
+ let srcPtr = CFDataGetBytePtr(srcData) else { return nil }
+ let srcBPR = cg.bytesPerRow
+ let srcBpp = cg.bitsPerPixel / 8
+
+ var grid = [UInt8](repeating: 0, count: gridW * gridH * 3)
+ grid.withUnsafeMutableBufferPointer { gbuf in
+ pixelArtMeanSample(
+ srcPtr: srcPtr, srcW: srcW, srcH: srcH,
+ srcBPR: srcBPR, srcBpp: srcBpp,
+ cs: cs, gridW: gridW, gridH: gridH,
+ gbuf: gbuf.baseAddress!
+ )
+ }
+ if let palette = palette, !palette.isEmpty {
+ applyPalette(&grid, palette: palette)
+ }
+
+ let bytesPerRow = outW * 4
+ var pixels = [UInt8](repeating: 0, count: bytesPerRow * outH)
+ pixels.withUnsafeMutableBufferPointer { dstBuf in
+ grid.withUnsafeBufferPointer { gbuf in
+ pixelArtReplicate(
+ dst: dstBuf.baseAddress!,
+ gptr: gbuf.baseAddress!,
+ gridW: gridW, gridH: gridH,
+ cs: cs, bytesPerRow: bytesPerRow
+ )
+ }
+ }
+
+ let provider = CGDataProvider(data: Data(pixels) as CFData)!
+ let space = CGColorSpaceCreateDeviceRGB()
+ let bitmap = CGBitmapInfo(rawValue: CGImageAlphaInfo.premultipliedLast.rawValue)
+ if let out = CGImage(
+ width: outW, height: outH,
+ bitsPerComponent: 8, bitsPerPixel: 32,
+ bytesPerRow: bytesPerRow,
+ space: space, bitmapInfo: bitmap,
+ provider: provider, decode: nil,
+ shouldInterpolate: false, intent: .defaultIntent
+ ) {
+ return UIImage(cgImage: out)
+ }
+ return nil
+ }
+
+ // MARK: - Model loading
+
+ private static func loadModel() throws -> MLModel {
+ guard let resourcePath = Bundle.main.resourcePath,
+ let items = try? FileManager.default.contentsOfDirectory(atPath: resourcePath)
+ else { throw PixelizerError.modelNotFound }
+ for item in items where item.hasSuffix(".mlmodelc") && item.contains("Pixelization") {
+ let url = URL(fileURLWithPath: (resourcePath as NSString).appendingPathComponent(item))
+ let config = MLModelConfiguration()
+ config.computeUnits = .cpuAndNeuralEngine
+ return try MLModel(contentsOf: url, configuration: config)
+ }
+ throw PixelizerError.modelNotFound
+ }
+
+ // MARK: - Pixel buffer
+
+ private static func createPixelBuffer(from cgImage: CGImage, width: Int, height: Int) -> CVPixelBuffer? {
+ var pb: CVPixelBuffer?
+ CVPixelBufferCreate(
+ kCFAllocatorDefault, width, height, kCVPixelFormatType_32BGRA,
+ [kCVPixelBufferCGImageCompatibilityKey: true,
+ kCVPixelBufferCGBitmapContextCompatibilityKey: true] as CFDictionary,
+ &pb
+ )
+ guard let buffer = pb else { return nil }
+ CVPixelBufferLockBaseAddress(buffer, [])
+ defer { CVPixelBufferUnlockBaseAddress(buffer, []) }
+ guard let ctx = CGContext(
+ data: CVPixelBufferGetBaseAddress(buffer),
+ width: width, height: height, bitsPerComponent: 8,
+ bytesPerRow: CVPixelBufferGetBytesPerRow(buffer),
+ space: CGColorSpaceCreateDeviceRGB(),
+ bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue
+ | CGBitmapInfo.byteOrder32Little.rawValue
+ ) else { return nil }
+ ctx.interpolationQuality = .high
+ ctx.draw(cgImage, in: CGRect(x: 0, y: 0, width: width, height: height))
+ return buffer
+ }
+}
+
+// MARK: - Sampling / replicate helpers
+
+func pixelArtMeanSample(
+ srcPtr: UnsafePointer,
+ srcW: Int, srcH: Int,
+ srcBPR: Int, srcBpp: Int,
+ cs: Int, gridW: Int, gridH: Int,
+ gbuf: UnsafeMutablePointer
+) {
+ let div: Int32 = Int32(cs * cs)
+ DispatchQueue.concurrentPerform(iterations: gridH) { gy in
+ for gx in 0..,
+ gptr: UnsafePointer,
+ gridW: Int, gridH: Int,
+ cs: Int, bytesPerRow: Int
+) {
+ DispatchQueue.concurrentPerform(iterations: gridH) { gy in
+ for gx in 0..> 16) & 0xFF) }
+ let pg: [Int16] = palette.map { Int16(($0 >> 8) & 0xFF) }
+ let pb: [Int16] = palette.map { Int16($0 & 0xFF) }
+ let count = buf.count / 3
+ buf.withUnsafeMutableBufferPointer { buf in
+ let ptr = buf.baseAddress!
+ pr.withUnsafeBufferPointer { prBuf in
+ pg.withUnsafeBufferPointer { pgBuf in
+ pb.withUnsafeBufferPointer { pbBuf in
+ let prp = prBuf.baseAddress!, pgp = pgBuf.baseAddress!, pbp = pbBuf.baseAddress!
+ DispatchQueue.concurrentPerform(iterations: count) { i in
+ let off = i * 3
+ let r = Int16(ptr[off]), g = Int16(ptr[off + 1]), b = Int16(ptr[off + 2])
+ var bestIdx = 0
+ var bestDist: Int32 = .max
+ for j in 0.. UIImage {
+ guard imageOrientation != .up else { return self }
+ UIGraphicsBeginImageContextWithOptions(size, false, scale)
+ draw(in: CGRect(origin: .zero, size: size))
+ let normalized = UIGraphicsGetImageFromCurrentImageContext()
+ UIGraphicsEndImageContext()
+ return normalized ?? self
+ }
+}