From e52a90c17304183cf5332702ce5c65a0c6b35a82 Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Thu, 2 Apr 2026 00:57:41 +0800
Subject: [PATCH 1/2] feat: add webp support

---
 .gitmodules                     |   3 +
 CMakeLists.txt                  |   5 +
 docs/build.md                   | 358 ++++++-------
 examples/cli/CMakeLists.txt     |   5 +
 examples/cli/README.md          |   4 +-
 examples/cli/avi_writer.h       | 217 --------
 examples/cli/image_metadata.cpp |  95 +++-
 examples/cli/main.cpp           |  44 +-
 examples/common/common.hpp      | 272 +---------
 examples/common/log.cpp         | 118 +++++
 examples/common/log.h           |  32 ++
 examples/common/media_io.cpp    | 879 ++++++++++++++++++++++++++++++++
 examples/common/media_io.h      |  76 +++
 examples/server/CMakeLists.txt  |  11 +-
 examples/server/main.cpp        | 123 ++---
 thirdparty/CMakeLists.txt       |  19 +-
 thirdparty/libwebp              |   1 +
 17 files changed, 1492 insertions(+), 770 deletions(-)
 delete mode 100644 examples/cli/avi_writer.h
 create mode 100644 examples/common/log.cpp
 create mode 100644 examples/common/log.h
 create mode 100644 examples/common/media_io.cpp
 create mode 100644 examples/common/media_io.h
 create mode 160000 thirdparty/libwebp

diff --git a/.gitmodules b/.gitmodules
index 5d66c8795..91cde1f28 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,3 +4,6 @@
 [submodule "examples/server/frontend"]
 	path = examples/server/frontend
 	url = https://github.com/leejet/stable-ui.git
+[submodule "thirdparty/libwebp"]
+	path = thirdparty/libwebp
+	url = https://github.com/webmproject/libwebp.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bad1ba4c2..9098f827b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,6 +29,7 @@ endif()
 # general
 #option(SD_BUILD_TESTS                "sd: build tests"    ${SD_STANDALONE})
 option(SD_BUILD_EXAMPLES             "sd: build examples" ${SD_STANDALONE})
+option(SD_WEBP                       "sd: enable WebP image I/O support" ON)
 option(SD_CUDA                       "sd: cuda backend" OFF)
 option(SD_HIPBLAS                    "sd: rocm backend" OFF)
 option(SD_METAL                      "sd: metal backend" OFF)
@@ -77,6 +78,10 @@ if(SD_MUSA)
     add_definitions(-DSD_USE_CUDA)
 endif()
 
+if(SD_WEBP)
+    add_compile_definitions(SD_USE_WEBP)
+endif()
+
 set(SD_LIB stable-diffusion)
 
 file(GLOB SD_LIB_SOURCES
diff --git a/docs/build.md b/docs/build.md
index 1ba582d9f..0bff5df00 100644
--- a/docs/build.md
+++ b/docs/build.md
@@ -1,173 +1,185 @@
-# Build from scratch
-
-## Get the Code
-
-```
-git clone --recursive https://github.com/leejet/stable-diffusion.cpp
-cd stable-diffusion.cpp
-```
-
-- If you have already cloned the repository, you can use the following command to update the repository to the latest code.
-
-```
-cd stable-diffusion.cpp
-git pull origin master
-git submodule init
-git submodule update
-```
-
-## Build (CPU only)
-
-If you don't have a GPU or CUDA installed, you can build a CPU-only version.
-
-```shell
-mkdir build && cd build
-cmake ..
-cmake --build . --config Release
-```
-
-## Build with OpenBLAS
-
-```shell
-mkdir build && cd build
-cmake .. -DGGML_OPENBLAS=ON
-cmake --build . --config Release
-```
-
-## Build with CUDA
-
-This provides GPU acceleration using NVIDIA GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). Recommended to have at least 4 GB of VRAM.
-
-```shell
-mkdir build && cd build
-cmake .. -DSD_CUDA=ON
-cmake --build . --config Release
-```
-
-## Build with HipBLAS
-
-This provides GPU acceleration using AMD GPU. Make sure to have the ROCm toolkit installed.
-To build for another GPU architecture than installed in your system, set `$GFX_NAME` manually to the desired architecture (replace first command). This is also necessary if your GPU is not officially supported by ROCm, for example you have to set `$GFX_NAME` manually to `gfx1030` for consumer RDNA2 cards.
-
-Windows User Refer to [docs/hipBLAS_on_Windows.md](docs%2FhipBLAS_on_Windows.md) for a comprehensive guide.
-
-```shell
-mkdir build && cd build
-if command -v rocminfo; then export GFX_NAME=$(rocminfo | awk '/ *Name: +gfx[1-9]/ {print $2; exit}'); else echo "rocminfo missing!"; fi
-if [ -z "${GFX_NAME}" ]; then echo "Error: Couldn't detect GPU!"; else echo "Building for GPU: ${GFX_NAME}"; fi
-cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGPU_TARGETS=$GFX_NAME -DAMDGPU_TARGETS=$GFX_NAME -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-cmake --build . --config Release
-```
-
-## Build with MUSA
-
-This provides GPU acceleration using Moore Threads GPU. Make sure to have the MUSA toolkit installed.
-
-```shell
-mkdir build && cd build
-cmake .. -DCMAKE_C_COMPILER=/usr/local/musa/bin/clang -DCMAKE_CXX_COMPILER=/usr/local/musa/bin/clang++ -DSD_MUSA=ON -DCMAKE_BUILD_TYPE=Release
-cmake --build . --config Release
-```
-
-## Build with Metal
-
-Using Metal makes the computation run on the GPU. Currently, there are some issues with Metal when performing operations on very large matrices, making it highly inefficient at the moment. Performance improvements are expected in the near future.
-
-```shell
-mkdir build && cd build
-cmake .. -DSD_METAL=ON
-cmake --build . --config Release
-```
-
-## Build with Vulkan
-
-Install Vulkan SDK from https://www.lunarg.com/vulkan-sdk/.
-
-```shell
-mkdir build && cd build
-cmake .. -DSD_VULKAN=ON
-cmake --build . --config Release
-```
-
-## Build with OpenCL (for Adreno GPU)
-
-Currently, it supports only Adreno GPUs and is primarily optimized for Q4_0 type
-
-To build for Windows ARM please refers to [Windows 11 Arm64](https://github.com/ggml-org/llama.cpp/blob/master/docs/backend/OPENCL.md#windows-11-arm64)
-
-Building for Android:
-
-  Android NDK:
-       Download and install the Android NDK from the [official Android developer site](https://developer.android.com/ndk/downloads).
-
-Setup OpenCL Dependencies for NDK:
-
-You need to provide OpenCL headers and the ICD loader library to your NDK sysroot.
-
-*   OpenCL Headers:
-    ```bash
-    # In a temporary working directory
-    git clone https://github.com/KhronosGroup/OpenCL-Headers
-    cd OpenCL-Headers
-    # Replace <YOUR_NDK_PATH> with your actual NDK installation path
-    # e.g., cp -r CL /path/to/android-ndk-r26c/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
-    sudo cp -r CL <YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
-    cd ..
-    ```
-
-*   OpenCL ICD Loader:
-    ```shell
-    # In the same temporary working directory
-    git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
-    cd OpenCL-ICD-Loader
-    mkdir build_ndk && cd build_ndk
-
-    # Replace <YOUR_NDK_PATH> in the CMAKE_TOOLCHAIN_FILE and OPENCL_ICD_LOADER_HEADERS_DIR
-    cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release \
-      -DCMAKE_TOOLCHAIN_FILE=<YOUR_NDK_PATH>/build/cmake/android.toolchain.cmake \
-      -DOPENCL_ICD_LOADER_HEADERS_DIR=<YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include \
-      -DANDROID_ABI=arm64-v8a \
-      -DANDROID_PLATFORM=24 \
-      -DANDROID_STL=c++_shared
-
-    ninja
-    # Replace <YOUR_NDK_PATH>
-    # e.g., cp libOpenCL.so /path/to/android-ndk-r26c/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
-    sudo cp libOpenCL.so <YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
-    cd ../..
-    ```
-
-Build `stable-diffusion.cpp` for Android with OpenCL:
-
-```shell
-mkdir build-android && cd build-android
-
-# Replace <YOUR_NDK_PATH> with your actual NDK installation path
-# e.g., -DCMAKE_TOOLCHAIN_FILE=/path/to/android-ndk-r26c/build/cmake/android.toolchain.cmake
-cmake .. -G Ninja \
-  -DCMAKE_TOOLCHAIN_FILE=<YOUR_NDK_PATH>/build/cmake/android.toolchain.cmake \
-  -DANDROID_ABI=arm64-v8a \
-  -DANDROID_PLATFORM=android-28 \
-  -DGGML_OPENMP=OFF \
-  -DSD_OPENCL=ON
-
-ninja
-```
-*(Note: Don't forget to include `LD_LIBRARY_PATH=/vendor/lib64` in your command line before running the binary)*
-
-## Build with SYCL
-
-Using SYCL makes the computation run on the Intel GPU. Please make sure you have installed the related driver and [Intel® oneAPI Base toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) before start. More details and steps can refer to [llama.cpp SYCL backend](https://github.com/ggml-org/llama.cpp/blob/master/docs/backend/SYCL.md#linux).
-
-```shell
-# Export relevant ENV variables
-source /opt/intel/oneapi/setvars.sh
-
-# Option 1: Use FP32 (recommended for better performance in most cases)
-cmake .. -DSD_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
-
-# Option 2: Use FP16
-cmake .. -DSD_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
-
-cmake --build . --config Release
-```
+# Build from scratch
+
+## Get the Code
+
+```
+git clone --recursive https://github.com/leejet/stable-diffusion.cpp
+cd stable-diffusion.cpp
+```
+
+- If you have already cloned the repository, you can use the following command to update the repository to the latest code.
+
+```
+cd stable-diffusion.cpp
+git pull origin master
+git submodule init
+git submodule update
+```
+
+## WebP Support in Examples
+
+The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O. This is enabled by default.
+
+If you do not want WebP support, you can disable it at configure time:
+
+```shell
+mkdir build && cd build
+cmake .. -DSD_WEBP=OFF
+cmake --build . --config Release
+```
+
+## Build (CPU only)
+
+If you don't have a GPU or CUDA installed, you can build a CPU-only version.
+
+```shell
+mkdir build && cd build
+cmake ..
+cmake --build . --config Release
+```
+
+## Build with OpenBLAS
+
+```shell
+mkdir build && cd build
+cmake .. -DGGML_OPENBLAS=ON
+cmake --build . --config Release
+```
+
+## Build with CUDA
+
+This provides GPU acceleration using NVIDIA GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). Recommended to have at least 4 GB of VRAM.
+
+```shell
+mkdir build && cd build
+cmake .. -DSD_CUDA=ON
+cmake --build . --config Release
+```
+
+## Build with HipBLAS
+
+This provides GPU acceleration using AMD GPU. Make sure to have the ROCm toolkit installed.
+To build for another GPU architecture than installed in your system, set `$GFX_NAME` manually to the desired architecture (replace first command). This is also necessary if your GPU is not officially supported by ROCm, for example you have to set `$GFX_NAME` manually to `gfx1030` for consumer RDNA2 cards.
+
+Windows User Refer to [docs/hipBLAS_on_Windows.md](docs%2FhipBLAS_on_Windows.md) for a comprehensive guide.
+
+```shell
+mkdir build && cd build
+if command -v rocminfo; then export GFX_NAME=$(rocminfo | awk '/ *Name: +gfx[1-9]/ {print $2; exit}'); else echo "rocminfo missing!"; fi
+if [ -z "${GFX_NAME}" ]; then echo "Error: Couldn't detect GPU!"; else echo "Building for GPU: ${GFX_NAME}"; fi
+cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGPU_TARGETS=$GFX_NAME -DAMDGPU_TARGETS=$GFX_NAME -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+cmake --build . --config Release
+```
+
+## Build with MUSA
+
+This provides GPU acceleration using Moore Threads GPU. Make sure to have the MUSA toolkit installed.
+
+```shell
+mkdir build && cd build
+cmake .. -DCMAKE_C_COMPILER=/usr/local/musa/bin/clang -DCMAKE_CXX_COMPILER=/usr/local/musa/bin/clang++ -DSD_MUSA=ON -DCMAKE_BUILD_TYPE=Release
+cmake --build . --config Release
+```
+
+## Build with Metal
+
+Using Metal makes the computation run on the GPU. Currently, there are some issues with Metal when performing operations on very large matrices, making it highly inefficient at the moment. Performance improvements are expected in the near future.
+
+```shell
+mkdir build && cd build
+cmake .. -DSD_METAL=ON
+cmake --build . --config Release
+```
+
+## Build with Vulkan
+
+Install Vulkan SDK from https://www.lunarg.com/vulkan-sdk/.
+
+```shell
+mkdir build && cd build
+cmake .. -DSD_VULKAN=ON
+cmake --build . --config Release
+```
+
+## Build with OpenCL (for Adreno GPU)
+
+Currently, it supports only Adreno GPUs and is primarily optimized for Q4_0 type
+
+To build for Windows ARM please refers to [Windows 11 Arm64](https://github.com/ggml-org/llama.cpp/blob/master/docs/backend/OPENCL.md#windows-11-arm64)
+
+Building for Android:
+
+  Android NDK:
+       Download and install the Android NDK from the [official Android developer site](https://developer.android.com/ndk/downloads).
+
+Setup OpenCL Dependencies for NDK:
+
+You need to provide OpenCL headers and the ICD loader library to your NDK sysroot.
+
+*   OpenCL Headers:
+    ```bash
+    # In a temporary working directory
+    git clone https://github.com/KhronosGroup/OpenCL-Headers
+    cd OpenCL-Headers
+    # Replace <YOUR_NDK_PATH> with your actual NDK installation path
+    # e.g., cp -r CL /path/to/android-ndk-r26c/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
+    sudo cp -r CL <YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
+    cd ..
+    ```
+
+*   OpenCL ICD Loader:
+    ```shell
+    # In the same temporary working directory
+    git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
+    cd OpenCL-ICD-Loader
+    mkdir build_ndk && cd build_ndk
+
+    # Replace <YOUR_NDK_PATH> in the CMAKE_TOOLCHAIN_FILE and OPENCL_ICD_LOADER_HEADERS_DIR
+    cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release \
+      -DCMAKE_TOOLCHAIN_FILE=<YOUR_NDK_PATH>/build/cmake/android.toolchain.cmake \
+      -DOPENCL_ICD_LOADER_HEADERS_DIR=<YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include \
+      -DANDROID_ABI=arm64-v8a \
+      -DANDROID_PLATFORM=24 \
+      -DANDROID_STL=c++_shared
+
+    ninja
+    # Replace <YOUR_NDK_PATH>
+    # e.g., cp libOpenCL.so /path/to/android-ndk-r26c/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
+    sudo cp libOpenCL.so <YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
+    cd ../..
+    ```
+
+Build `stable-diffusion.cpp` for Android with OpenCL:
+
+```shell
+mkdir build-android && cd build-android
+
+# Replace <YOUR_NDK_PATH> with your actual NDK installation path
+# e.g., -DCMAKE_TOOLCHAIN_FILE=/path/to/android-ndk-r26c/build/cmake/android.toolchain.cmake
+cmake .. -G Ninja \
+  -DCMAKE_TOOLCHAIN_FILE=<YOUR_NDK_PATH>/build/cmake/android.toolchain.cmake \
+  -DANDROID_ABI=arm64-v8a \
+  -DANDROID_PLATFORM=android-28 \
+  -DGGML_OPENMP=OFF \
+  -DSD_OPENCL=ON
+
+ninja
+```
+*(Note: Don't forget to include `LD_LIBRARY_PATH=/vendor/lib64` in your command line before running the binary)*
+
+## Build with SYCL
+
+Using SYCL makes the computation run on the Intel GPU. Please make sure you have installed the related driver and [Intel® oneAPI Base toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) before start. More details and steps can refer to [llama.cpp SYCL backend](https://github.com/ggml-org/llama.cpp/blob/master/docs/backend/SYCL.md#linux).
+
+```shell
+# Export relevant ENV variables
+source /opt/intel/oneapi/setvars.sh
+
+# Option 1: Use FP32 (recommended for better performance in most cases)
+cmake .. -DSD_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
+
+# Option 2: Use FP16
+cmake .. -DSD_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
+
+cmake --build . --config Release
+```
diff --git a/examples/cli/CMakeLists.txt b/examples/cli/CMakeLists.txt
index 1727268bc..e4acaac87 100644
--- a/examples/cli/CMakeLists.txt
+++ b/examples/cli/CMakeLists.txt
@@ -1,9 +1,14 @@
 set(TARGET sd-cli)
 
 add_executable(${TARGET}
+    ../common/log.cpp
+    ../common/media_io.cpp
     image_metadata.cpp
     main.cpp
 )
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE stable-diffusion zip ${CMAKE_THREAD_LIBS_INIT})
+if(SD_WEBP)
+    target_link_libraries(${TARGET} PRIVATE webp libwebpmux)
+endif()
 target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)
diff --git a/examples/cli/README.md b/examples/cli/README.md
index 9b273a705..13cefa63f 100644
--- a/examples/cli/README.md
+++ b/examples/cli/README.md
@@ -5,8 +5,8 @@ usage: ./bin/sd-cli  [options]
 
 CLI Options:
   -o, --output <string>       path to write result image to. you can use printf-style %d format specifiers for image sequences (default:
-                              ./output.png) (eg. output_%03d.png)
-  --preview-path <string>     path to write preview image to (default: ./preview.png)
+                              ./output.png) (eg. output_%03d.png). For video generation, single-file outputs support .avi and animated .webp
+  --preview-path <string>     path to write preview image to (default: ./preview.png). Multi-frame previews support .avi and animated .webp
   --preview-interval <int>    interval in denoising steps between consecutive updates of the image preview file (default is 1, meaning updating at
                               every step)
   --output-begin-idx <int>    starting index for output image sequence, must be non-negative (default 0 if specified %d in output path, 1 otherwise)
diff --git a/examples/cli/avi_writer.h b/examples/cli/avi_writer.h
deleted file mode 100644
index 53b4749cf..000000000
--- a/examples/cli/avi_writer.h
+++ /dev/null
@@ -1,217 +0,0 @@
-#ifndef __AVI_WRITER_H__
-#define __AVI_WRITER_H__
-
-#include <cstdint>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-
-#include "stable-diffusion.h"
-
-#ifndef INCLUDE_STB_IMAGE_WRITE_H
-#include "stb_image_write.h"
-#endif
-
-typedef struct {
-    uint32_t offset;
-    uint32_t size;
-} avi_index_entry;
-
-// Write 32-bit little-endian integer
-void write_u32_le(FILE* f, uint32_t val) {
-    fwrite(&val, 4, 1, f);
-}
-
-// Write 16-bit little-endian integer
-void write_u16_le(FILE* f, uint16_t val) {
-    fwrite(&val, 2, 1, f);
-}
-
-/**
- * Create an MJPG AVI file from an array of sd_image_t images.
- * Images are encoded to JPEG using stb_image_write.
- *
- * @param filename Output AVI file name.
- * @param images Array of input images.
- * @param num_images Number of images in the array.
- * @param fps Frames per second for the video.
- * @param quality JPEG quality (0-100).
- * @return 0 on success, -1 on failure.
- */
-int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality = 90) {
-    if (num_images == 0) {
-        fprintf(stderr, "Error: Image array is empty.\n");
-        return -1;
-    }
-
-    FILE* f = fopen(filename, "wb");
-    if (!f) {
-        perror("Error opening file for writing");
-        return -1;
-    }
-
-    uint32_t width    = images[0].width;
-    uint32_t height   = images[0].height;
-    uint32_t channels = images[0].channel;
-    if (channels != 3 && channels != 4) {
-        fprintf(stderr, "Error: Unsupported channel count: %u\n", channels);
-        fclose(f);
-        return -1;
-    }
-
-    // --- RIFF AVI Header ---
-    fwrite("RIFF", 4, 1, f);
-    long riff_size_pos = ftell(f);
-    write_u32_le(f, 0);  // Placeholder for file size
-    fwrite("AVI ", 4, 1, f);
-
-    // 'hdrl' LIST (header list)
-    fwrite("LIST", 4, 1, f);
-    write_u32_le(f, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40);
-    fwrite("hdrl", 4, 1, f);
-
-    // 'avih' chunk (AVI main header)
-    fwrite("avih", 4, 1, f);
-    write_u32_le(f, 56);
-    write_u32_le(f, 1000000 / fps);       // Microseconds per frame
-    write_u32_le(f, 0);                   // Max bytes per second
-    write_u32_le(f, 0);                   // Padding granularity
-    write_u32_le(f, 0x110);               // Flags (HASINDEX | ISINTERLEAVED)
-    write_u32_le(f, num_images);          // Total frames
-    write_u32_le(f, 0);                   // Initial frames
-    write_u32_le(f, 1);                   // Number of streams
-    write_u32_le(f, width * height * 3);  // Suggested buffer size
-    write_u32_le(f, width);
-    write_u32_le(f, height);
-    write_u32_le(f, 0);  // Reserved
-    write_u32_le(f, 0);  // Reserved
-    write_u32_le(f, 0);  // Reserved
-    write_u32_le(f, 0);  // Reserved
-
-    // 'strl' LIST (stream list)
-    fwrite("LIST", 4, 1, f);
-    write_u32_le(f, 4 + 8 + 56 + 8 + 40);
-    fwrite("strl", 4, 1, f);
-
-    // 'strh' chunk (stream header)
-    fwrite("strh", 4, 1, f);
-    write_u32_le(f, 56);
-    fwrite("vids", 4, 1, f);              // Stream type: video
-    fwrite("MJPG", 4, 1, f);              // Codec: Motion JPEG
-    write_u32_le(f, 0);                   // Flags
-    write_u16_le(f, 0);                   // Priority
-    write_u16_le(f, 0);                   // Language
-    write_u32_le(f, 0);                   // Initial frames
-    write_u32_le(f, 1);                   // Scale
-    write_u32_le(f, fps);                 // Rate
-    write_u32_le(f, 0);                   // Start
-    write_u32_le(f, num_images);          // Length
-    write_u32_le(f, width * height * 3);  // Suggested buffer size
-    write_u32_le(f, (uint32_t)-1);        // Quality
-    write_u32_le(f, 0);                   // Sample size
-    write_u16_le(f, 0);                   // rcFrame.left
-    write_u16_le(f, 0);                   // rcFrame.top
-    write_u16_le(f, 0);                   // rcFrame.right
-    write_u16_le(f, 0);                   // rcFrame.bottom
-
-    // 'strf' chunk (stream format: BITMAPINFOHEADER)
-    fwrite("strf", 4, 1, f);
-    write_u32_le(f, 40);
-    write_u32_le(f, 40);  // biSize
-    write_u32_le(f, width);
-    write_u32_le(f, height);
-    write_u16_le(f, 1);                   // biPlanes
-    write_u16_le(f, 24);                  // biBitCount
-    fwrite("MJPG", 4, 1, f);              // biCompression (FOURCC)
-    write_u32_le(f, width * height * 3);  // biSizeImage
-    write_u32_le(f, 0);                   // XPelsPerMeter
-    write_u32_le(f, 0);                   // YPelsPerMeter
-    write_u32_le(f, 0);                   // Colors used
-    write_u32_le(f, 0);                   // Colors important
-
-    // 'movi' LIST (video frames)
-    // long movi_list_pos = ftell(f);
-    fwrite("LIST", 4, 1, f);
-    long movi_size_pos = ftell(f);
-    write_u32_le(f, 0);  // Placeholder for movi size
-    fwrite("movi", 4, 1, f);
-
-    avi_index_entry* index = (avi_index_entry*)malloc(sizeof(avi_index_entry) * num_images);
-    if (!index) {
-        fclose(f);
-        return -1;
-    }
-
-    // Encode and write each frame as JPEG
-    struct {
-        uint8_t* buf;
-        size_t size;
-    } jpeg_data;
-
-    for (int i = 0; i < num_images; i++) {
-        jpeg_data.buf  = nullptr;
-        jpeg_data.size = 0;
-
-        // Callback function to collect JPEG data into memory
-        auto write_to_buf = [](void* context, void* data, int size) {
-            auto jd = (decltype(jpeg_data)*)context;
-            jd->buf = (uint8_t*)realloc(jd->buf, jd->size + size);
-            memcpy(jd->buf + jd->size, data, size);
-            jd->size += size;
-        };
-
-        // Encode to JPEG in memory
-        stbi_write_jpg_to_func(
-            write_to_buf,
-            &jpeg_data,
-            images[i].width,
-            images[i].height,
-            channels,
-            images[i].data,
-            quality);
-
-        // Write '00dc' chunk (video frame)
-        fwrite("00dc", 4, 1, f);
-        write_u32_le(f, (uint32_t)jpeg_data.size);
-        index[i].offset = ftell(f) - 8;
-        index[i].size   = (uint32_t)jpeg_data.size;
-        fwrite(jpeg_data.buf, 1, jpeg_data.size, f);
-
-        // Align to even byte size
-        if (jpeg_data.size % 2)
-            fputc(0, f);
-
-        free(jpeg_data.buf);
-    }
-
-    // Finalize 'movi' size
-    long cur_pos   = ftell(f);
-    long movi_size = cur_pos - movi_size_pos - 4;
-    fseek(f, movi_size_pos, SEEK_SET);
-    write_u32_le(f, movi_size);
-    fseek(f, cur_pos, SEEK_SET);
-
-    // Write 'idx1' index
-    fwrite("idx1", 4, 1, f);
-    write_u32_le(f, num_images * 16);
-    for (int i = 0; i < num_images; i++) {
-        fwrite("00dc", 4, 1, f);
-        write_u32_le(f, 0x10);
-        write_u32_le(f, index[i].offset);
-        write_u32_le(f, index[i].size);
-    }
-
-    // Finalize RIFF size
-    cur_pos        = ftell(f);
-    long file_size = cur_pos - riff_size_pos - 4;
-    fseek(f, riff_size_pos, SEEK_SET);
-    write_u32_le(f, file_size);
-    fseek(f, cur_pos, SEEK_SET);
-
-    fclose(f);
-    free(index);
-
-    return 0;
-}
-
-#endif  // __AVI_WRITER_H__
\ No newline at end of file
diff --git a/examples/cli/image_metadata.cpp b/examples/cli/image_metadata.cpp
index 015054fa4..8dd339c88 100644
--- a/examples/cli/image_metadata.cpp
+++ b/examples/cli/image_metadata.cpp
@@ -40,6 +40,13 @@ namespace {
                static_cast<uint32_t>(data[offset + 3]);
     }
 
+    uint32_t read_u32_le(const std::vector<uint8_t>& data, size_t offset) {
+        return static_cast<uint32_t>(data[offset]) |
+               (static_cast<uint32_t>(data[offset + 1]) << 8) |
+               (static_cast<uint32_t>(data[offset + 2]) << 16) |
+               (static_cast<uint32_t>(data[offset + 3]) << 24);
+    }
+
     uint16_t read_u16_tiff(const std::vector<uint8_t>& data, size_t offset, bool little_endian) {
         if (little_endian) {
             return static_cast<uint16_t>(data[offset]) |
@@ -357,6 +364,11 @@ namespace {
                     json& result,
                     std::string& error);
 
+    bool parse_webp(const std::vector<uint8_t>& data,
+                    bool include_raw,
+                    json& result,
+                    std::string& error);
+
     std::string abbreviate(const std::string& value, bool brief);
 
     void print_json_value(std::ostream& out,
@@ -1008,6 +1020,83 @@ namespace {
         return true;
     }
 
+    bool parse_webp(const std::vector<uint8_t>& data,
+                    bool include_raw,
+                    json& result,
+                    std::string& error) {
+        if (data.size() < 12 ||
+            memcmp(data.data(), "RIFF", 4) != 0 ||
+            memcmp(data.data() + 8, "WEBP", 4) != 0) {
+            error = "not a WebP file";
+            return false;
+        }
+
+        result["format"]  = "WEBP";
+        result["entries"] = json::array();
+
+        size_t offset = 12;
+        while (offset + 8 <= data.size()) {
+            const std::string raw_type =
+                bytes_to_string(data.data() + offset, data.data() + offset + 4);
+            const uint32_t length = read_u32_le(data, offset + 4);
+            offset += 8;
+
+            if (offset + static_cast<size_t>(length) > data.size()) {
+                error = "WebP chunk exceeds file size";
+                return false;
+            }
+
+            const uint8_t* payload = data.data() + offset;
+            const std::string type =
+                !raw_type.empty() && raw_type.back() == ' '
+                    ? raw_type.substr(0, raw_type.size() - 1)
+                    : raw_type;
+
+            json entry;
+            entry["entry_type"] = "chunk";
+            entry["name"]       = type;
+            entry["length"]     = length;
+            entry["metadata_like"] =
+                (raw_type == "ICCP" || raw_type == "EXIF" || raw_type == "XMP ");
+
+            if (raw_type == "VP8X" && length >= 10) {
+                entry["data"] = json{
+                    {"icc_profile", (payload[0] & 0x20) != 0},
+                    {"alpha", (payload[0] & 0x10) != 0},
+                    {"exif", (payload[0] & 0x08) != 0},
+                    {"xmp", (payload[0] & 0x04) != 0},
+                    {"animation", (payload[0] & 0x02) != 0},
+                    {"canvas_width", 1 + static_cast<uint32_t>(payload[4]) + (static_cast<uint32_t>(payload[5]) << 8) + (static_cast<uint32_t>(payload[6]) << 16)},
+                    {"canvas_height", 1 + static_cast<uint32_t>(payload[7]) + (static_cast<uint32_t>(payload[8]) << 8) + (static_cast<uint32_t>(payload[9]) << 16)},
+                };
+            } else if (raw_type == "EXIF") {
+                std::string exif_error;
+                json meta = parse_exif_tiff(payload, length, include_raw, exif_error);
+                if (!meta.empty()) {
+                    entry["data"] = std::move(meta);
+                }
+                if (!exif_error.empty()) {
+                    entry["error"] = exif_error;
+                }
+            } else if (raw_type == "XMP ") {
+                entry["data"] = json{
+                    {"type", "XMP"},
+                    {"xml", trim_trailing_nuls(bytes_to_string(payload, payload + length))},
+                };
+            } else if (raw_type == "ICCP") {
+                entry["data"] = json{{"profile_size", length}};
+                append_raw_preview(entry["data"], payload, length, include_raw);
+            } else {
+                append_raw_preview(entry, payload, length, include_raw);
+            }
+
+            result["entries"].push_back(entry);
+            offset += static_cast<size_t>(length) + (length & 1u);
+        }
+
+        return true;
+    }
+
     std::string abbreviate(const std::string& value, bool brief) {
         if (!brief || value.size() <= 240) {
             return value;
@@ -1116,8 +1205,12 @@ namespace {
         if (data.size() >= 2 && data[0] == 0xFF && data[1] == 0xD8) {
             return parse_jpeg(data, include_raw, report, error);
         }
+        if (data.size() >= 12 && memcmp(data.data(), "RIFF", 4) == 0 &&
+            memcmp(data.data() + 8, "WEBP", 4) == 0) {
+            return parse_webp(data, include_raw, report, error);
+        }
 
-        error = "unsupported image format; only PNG and JPEG are supported";
+        error = "unsupported image format; only PNG, JPEG, and WebP are supported";
         return false;
     }
 
diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
index 55538768b..b4a3c343e 100644
--- a/examples/cli/main.cpp
+++ b/examples/cli/main.cpp
@@ -16,8 +16,7 @@
 #include "stable-diffusion.h"
 
 #include "common/common.hpp"
-
-#include "avi_writer.h"
+#include "common/media_io.h"
 #include "image_metadata.h"
 
 const char* previews_str[] = {
@@ -303,7 +302,7 @@ bool load_images_from_dir(const std::string dir,
         std::string ext  = entry.path().extension().string();
         std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
 
-        if (ext == ".jpg" || ext == ".jpeg" || ext == ".png" || ext == ".bmp") {
+        if (ext == ".jpg" || ext == ".jpeg" || ext == ".png" || ext == ".bmp" || ext == ".webp") {
             LOG_DEBUG("load image %zu from '%s'", images.size(), path.c_str());
             int width             = 0;
             int height            = 0;
@@ -333,9 +332,17 @@ void step_callback(int step, int frame_count, sd_image_t* image, bool is_noisy,
     // is_noisy is set to true if the preview corresponds to noisy latents, false if it's denoised latents
     // unused in this app, it will either be always noisy or always denoised here
     if (frame_count == 1) {
-        stbi_write_png(cli_params->preview_path.c_str(), image->width, image->height, image->channel, image->data, 0);
+        if (!write_image_to_file(cli_params->preview_path,
+                                 image->data,
+                                 image->width,
+                                 image->height,
+                                 image->channel)) {
+            LOG_ERROR("save preview image to '%s' failed", cli_params->preview_path.c_str());
+        }
     } else {
-        create_mjpg_avi_from_sd_images(cli_params->preview_path.c_str(), image, frame_count, cli_params->preview_fps);
+        if (create_video_from_sd_images(cli_params->preview_path.c_str(), image, frame_count, cli_params->preview_fps) != 0) {
+            LOG_ERROR("save preview video to '%s' failed", cli_params->preview_path.c_str());
+        }
     }
 }
 
@@ -385,9 +392,11 @@ bool save_results(const SDCliParams& cli_params,
 
     std::string ext_lower = ext.string();
     std::transform(ext_lower.begin(), ext_lower.end(), ext_lower.begin(), ::tolower);
-    bool is_jpg = (ext_lower == ".jpg" || ext_lower == ".jpeg" || ext_lower == ".jpe");
+    const EncodedImageFormat output_format = encoded_image_format_from_path(out_path.string());
     if (!ext.empty()) {
-        if (is_jpg || ext_lower == ".png") {
+        if (output_format == EncodedImageFormat::JPEG ||
+            output_format == EncodedImageFormat::PNG ||
+            output_format == EncodedImageFormat::WEBP) {
             base_path.replace_extension();
         }
     }
@@ -405,20 +414,15 @@ bool save_results(const SDCliParams& cli_params,
         std::string params = gen_params.embed_image_metadata
                                  ? get_image_params(ctx_params, gen_params, gen_params.seed + idx)
                                  : "";
-        int ok             = 0;
-        if (is_jpg) {
-            ok = stbi_write_jpg(path.string().c_str(), img.width, img.height, img.channel, img.data, 90, params.size() > 0 ? params.c_str() : nullptr);
-        } else {
-            ok = stbi_write_png(path.string().c_str(), img.width, img.height, img.channel, img.data, 0, params.size() > 0 ? params.c_str() : nullptr);
-        }
+        const bool ok      = write_image_to_file(path.string(), img.data, img.width, img.height, img.channel, params, 90);
         LOG_INFO("save result image %d to '%s' (%s)", idx, path.string().c_str(), ok ? "success" : "failure");
-        return ok != 0;
+        return ok;
     };
 
     int sucessful_reults = 0;
 
     if (std::regex_search(cli_params.output_path, format_specifier_regex)) {
-        if (!is_jpg && ext_lower != ".png")
+        if (output_format == EncodedImageFormat::UNKNOWN)
             ext = ".png";
         fs::path pattern = base_path;
         pattern += ext;
@@ -434,20 +438,20 @@ bool save_results(const SDCliParams& cli_params,
     }
 
     if (cli_params.mode == VID_GEN && num_results > 1) {
-        if (ext_lower != ".avi")
+        if (ext_lower != ".avi" && ext_lower != ".webp")
             ext = ".avi";
         fs::path video_path = base_path;
         video_path += ext;
-        if (create_mjpg_avi_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps) == 0) {
-            LOG_INFO("save result MJPG AVI video to '%s'", video_path.string().c_str());
+        if (create_video_from_sd_images(video_path.string().c_str(), results, num_results, gen_params.fps) == 0) {
+            LOG_INFO("save result video to '%s'", video_path.string().c_str());
             return true;
         } else {
-            LOG_ERROR("Failed to save result MPG AVI video to '%s'", video_path.string().c_str());
+            LOG_ERROR("Failed to save result video to '%s'", video_path.string().c_str());
             return false;
         }
     }
 
-    if (!is_jpg && ext_lower != ".png")
+    if (output_format == EncodedImageFormat::UNKNOWN)
         ext = ".png";
 
     for (int i = 0; i < num_results; ++i) {
diff --git a/examples/common/common.hpp b/examples/common/common.hpp
index 4735cb0e2..51912d32d 100644
--- a/examples/common/common.hpp
+++ b/examples/common/common.hpp
@@ -1,4 +1,6 @@
 
+#include <algorithm>
+#include <cctype>
 #include <filesystem>
 #include <iostream>
 #include <map>
@@ -17,20 +19,9 @@ namespace fs = std::filesystem;
 #include <windows.h>
 #endif  // _WIN32
 
+#include "log.h"
 #include "stable-diffusion.h"
 
-#define STB_IMAGE_IMPLEMENTATION
-#define STB_IMAGE_STATIC
-#include "stb_image.h"
-
-#define STB_IMAGE_WRITE_IMPLEMENTATION
-#define STB_IMAGE_WRITE_STATIC
-#include "stb_image_write.h"
-
-#define STB_IMAGE_RESIZE_IMPLEMENTATION
-#define STB_IMAGE_RESIZE_STATIC
-#include "stb_image_resize.h"
-
 #define SAFE_STR(s) ((s) ? (s) : "")
 #define BOOL_STR(b) ((b) ? "true" : "false")
 
@@ -88,125 +79,6 @@ static std::string argv_to_utf8(int index, const char** argv) {
 
 #endif
 
-static void print_utf8(FILE* stream, const char* utf8) {
-    if (!utf8)
-        return;
-
-#ifdef _WIN32
-    HANDLE h = (stream == stderr)
-                   ? GetStdHandle(STD_ERROR_HANDLE)
-                   : GetStdHandle(STD_OUTPUT_HANDLE);
-
-    DWORD mode;
-    BOOL is_console = GetConsoleMode(h, &mode);
-
-    if (is_console) {
-        int wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
-        if (wlen <= 0)
-            return;
-
-        wchar_t* wbuf = (wchar_t*)malloc(wlen * sizeof(wchar_t));
-        if (!wbuf)
-            return;
-
-        MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wbuf, wlen);
-
-        DWORD written;
-        WriteConsoleW(h, wbuf, wlen - 1, &written, NULL);
-
-        free(wbuf);
-    } else {
-        DWORD written;
-        WriteFile(h, utf8, (DWORD)strlen(utf8), &written, NULL);
-    }
-#else
-    fputs(utf8, stream);
-#endif
-}
-
-static std::string sd_basename(const std::string& path) {
-    size_t pos = path.find_last_of('/');
-    if (pos != std::string::npos) {
-        return path.substr(pos + 1);
-    }
-    pos = path.find_last_of('\\');
-    if (pos != std::string::npos) {
-        return path.substr(pos + 1);
-    }
-    return path;
-}
-
-static void log_print(enum sd_log_level_t level, const char* log, bool verbose, bool color) {
-    int tag_color;
-    const char* level_str;
-    FILE* out_stream = (level == SD_LOG_ERROR) ? stderr : stdout;
-
-    if (!log || (!verbose && level <= SD_LOG_DEBUG)) {
-        return;
-    }
-
-    switch (level) {
-        case SD_LOG_DEBUG:
-            tag_color = 37;
-            level_str = "DEBUG";
-            break;
-        case SD_LOG_INFO:
-            tag_color = 34;
-            level_str = "INFO";
-            break;
-        case SD_LOG_WARN:
-            tag_color = 35;
-            level_str = "WARN";
-            break;
-        case SD_LOG_ERROR:
-            tag_color = 31;
-            level_str = "ERROR";
-            break;
-        default: /* Potential future-proofing */
-            tag_color = 33;
-            level_str = "?????";
-            break;
-    }
-
-    if (color) {
-        fprintf(out_stream, "\033[%d;1m[%-5s]\033[0m ", tag_color, level_str);
-    } else {
-        fprintf(out_stream, "[%-5s] ", level_str);
-    }
-    print_utf8(out_stream, log);
-    fflush(out_stream);
-}
-
-#define LOG_BUFFER_SIZE 4096
-
-static bool log_verbose = false;
-static bool log_color   = false;
-
-static void log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...) {
-    va_list args;
-    va_start(args, format);
-
-    static char log_buffer[LOG_BUFFER_SIZE + 1];
-    int written = snprintf(log_buffer, LOG_BUFFER_SIZE, "%s:%-4d - ", sd_basename(file).c_str(), line);
-
-    if (written >= 0 && written < LOG_BUFFER_SIZE) {
-        vsnprintf(log_buffer + written, LOG_BUFFER_SIZE - written, format, args);
-    }
-    size_t len = strlen(log_buffer);
-    if (log_buffer[len - 1] != '\n') {
-        strncat(log_buffer, "\n", LOG_BUFFER_SIZE - len);
-    }
-
-    log_print(level, log_buffer, log_verbose, log_color);
-
-    va_end(args);
-}
-
-#define LOG_DEBUG(format, ...) log_printf(SD_LOG_DEBUG, __FILE__, __LINE__, format, ##__VA_ARGS__)
-#define LOG_INFO(format, ...) log_printf(SD_LOG_INFO, __FILE__, __LINE__, format, ##__VA_ARGS__)
-#define LOG_WARN(format, ...) log_printf(SD_LOG_WARN, __FILE__, __LINE__, format, ##__VA_ARGS__)
-#define LOG_ERROR(format, ...) log_printf(SD_LOG_ERROR, __FILE__, __LINE__, format, ##__VA_ARGS__)
-
 struct StringOption {
     std::string short_name;
     std::string long_name;
@@ -1967,144 +1839,6 @@ static std::string version_string() {
     return std::string("stable-diffusion.cpp version ") + sd_version() + ", commit " + sd_commit();
 }
 
-uint8_t* load_image_common(bool from_memory,
-                           const char* image_path_or_bytes,
-                           int len,
-                           int& width,
-                           int& height,
-                           int expected_width   = 0,
-                           int expected_height  = 0,
-                           int expected_channel = 3) {
-    int c = 0;
-    const char* image_path;
-    uint8_t* image_buffer = nullptr;
-    if (from_memory) {
-        image_path   = "memory";
-        image_buffer = (uint8_t*)stbi_load_from_memory((const stbi_uc*)image_path_or_bytes, len, &width, &height, &c, expected_channel);
-    } else {
-        image_path   = image_path_or_bytes;
-        image_buffer = (uint8_t*)stbi_load(image_path_or_bytes, &width, &height, &c, expected_channel);
-    }
-    if (image_buffer == nullptr) {
-        LOG_ERROR("load image from '%s' failed", image_path);
-        return nullptr;
-    }
-    if (c < expected_channel) {
-        fprintf(stderr,
-                "the number of channels for the input image must be >= %d,"
-                "but got %d channels, image_path = %s",
-                expected_channel,
-                c,
-                image_path);
-        free(image_buffer);
-        return nullptr;
-    }
-    if (width <= 0) {
-        LOG_ERROR("error: the width of image must be greater than 0, image_path = %s", image_path);
-        free(image_buffer);
-        return nullptr;
-    }
-    if (height <= 0) {
-        LOG_ERROR("error: the height of image must be greater than 0, image_path = %s", image_path);
-        free(image_buffer);
-        return nullptr;
-    }
-
-    // Resize input image ...
-    if ((expected_width > 0 && expected_height > 0) && (height != expected_height || width != expected_width)) {
-        float dst_aspect = (float)expected_width / (float)expected_height;
-        float src_aspect = (float)width / (float)height;
-
-        int crop_x = 0, crop_y = 0;
-        int crop_w = width, crop_h = height;
-
-        if (src_aspect > dst_aspect) {
-            crop_w = (int)(height * dst_aspect);
-            crop_x = (width - crop_w) / 2;
-        } else if (src_aspect < dst_aspect) {
-            crop_h = (int)(width / dst_aspect);
-            crop_y = (height - crop_h) / 2;
-        }
-
-        if (crop_x != 0 || crop_y != 0) {
-            LOG_INFO("crop input image from %dx%d to %dx%d, image_path = %s", width, height, crop_w, crop_h, image_path);
-            uint8_t* cropped_image_buffer = (uint8_t*)malloc(crop_w * crop_h * expected_channel);
-            if (cropped_image_buffer == nullptr) {
-                LOG_ERROR("error: allocate memory for crop\n");
-                free(image_buffer);
-                return nullptr;
-            }
-            for (int row = 0; row < crop_h; row++) {
-                uint8_t* src = image_buffer + ((crop_y + row) * width + crop_x) * expected_channel;
-                uint8_t* dst = cropped_image_buffer + (row * crop_w) * expected_channel;
-                memcpy(dst, src, crop_w * expected_channel);
-            }
-
-            width  = crop_w;
-            height = crop_h;
-            free(image_buffer);
-            image_buffer = cropped_image_buffer;
-        }
-
-        LOG_INFO("resize input image from %dx%d to %dx%d", width, height, expected_width, expected_height);
-        int resized_height = expected_height;
-        int resized_width  = expected_width;
-
-        uint8_t* resized_image_buffer = (uint8_t*)malloc(resized_height * resized_width * expected_channel);
-        if (resized_image_buffer == nullptr) {
-            LOG_ERROR("error: allocate memory for resize input image\n");
-            free(image_buffer);
-            return nullptr;
-        }
-        stbir_resize(image_buffer, width, height, 0,
-                     resized_image_buffer, resized_width, resized_height, 0, STBIR_TYPE_UINT8,
-                     expected_channel, STBIR_ALPHA_CHANNEL_NONE, 0,
-                     STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP,
-                     STBIR_FILTER_BOX, STBIR_FILTER_BOX,
-                     STBIR_COLORSPACE_SRGB, nullptr);
-        width  = resized_width;
-        height = resized_height;
-        free(image_buffer);
-        image_buffer = resized_image_buffer;
-    }
-    return image_buffer;
-}
-
-uint8_t* load_image_from_file(const char* image_path,
-                              int& width,
-                              int& height,
-                              int expected_width   = 0,
-                              int expected_height  = 0,
-                              int expected_channel = 3) {
-    return load_image_common(false, image_path, 0, width, height, expected_width, expected_height, expected_channel);
-}
-
-bool load_sd_image_from_file(sd_image_t* image,
-                             const char* image_path,
-                             int expected_width   = 0,
-                             int expected_height  = 0,
-                             int expected_channel = 3) {
-    int width;
-    int height;
-    image->data = load_image_common(false, image_path, 0, width, height, expected_width, expected_height, expected_channel);
-    if (image->data == nullptr) {
-        return false;
-    }
-    image->width  = width;
-    image->height = height;
-    return true;
-}
-
-uint8_t* load_image_from_memory(const char* image_bytes,
-                                int len,
-                                int& width,
-                                int& height,
-                                int expected_width   = 0,
-                                int expected_height  = 0,
-                                int expected_channel = 3) {
-    return load_image_common(true, image_bytes, len, width, height, expected_width, expected_height, expected_channel);
-}
-
 std::string get_image_params(const SDContextParams& ctx_params, const SDGenerationParams& gen_params, int64_t seed) {
     std::string parameter_string;
     if (gen_params.prompt_with_lora.size() != 0) {
diff --git a/examples/common/log.cpp b/examples/common/log.cpp
new file mode 100644
index 000000000..44fcd1e43
--- /dev/null
+++ b/examples/common/log.cpp
@@ -0,0 +1,118 @@
+#include "log.h"
+
+bool log_verbose = false;
+bool log_color   = false;
+
+std::string sd_basename(const std::string& path) {
+    size_t pos = path.find_last_of('/');
+    if (pos != std::string::npos) {
+        return path.substr(pos + 1);
+    }
+    pos = path.find_last_of('\\');
+    if (pos != std::string::npos) {
+        return path.substr(pos + 1);
+    }
+    return path;
+}
+
+void print_utf8(FILE* stream, const char* utf8) {
+    if (!utf8) {
+        return;
+    }
+
+#ifdef _WIN32
+    HANDLE h = (stream == stderr)
+                   ? GetStdHandle(STD_ERROR_HANDLE)
+                   : GetStdHandle(STD_OUTPUT_HANDLE);
+
+    DWORD mode;
+    BOOL is_console = GetConsoleMode(h, &mode);
+
+    if (is_console) {
+        int wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
+        if (wlen <= 0) {
+            return;
+        }
+
+        wchar_t* wbuf = (wchar_t*)malloc(wlen * sizeof(wchar_t));
+        if (!wbuf) {
+            return;
+        }
+
+        MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wbuf, wlen);
+
+        DWORD written;
+        WriteConsoleW(h, wbuf, wlen - 1, &written, NULL);
+
+        free(wbuf);
+    } else {
+        DWORD written;
+        WriteFile(h, utf8, (DWORD)strlen(utf8), &written, NULL);
+    }
+#else
+    fputs(utf8, stream);
+#endif
+}
+
+void log_print(enum sd_log_level_t level, const char* log, bool verbose, bool color) {
+    int tag_color;
+    const char* level_str;
+    FILE* out_stream = (level == SD_LOG_ERROR) ? stderr : stdout;
+
+    if (!log || (!verbose && level <= SD_LOG_DEBUG)) {
+        return;
+    }
+
+    switch (level) {
+        case SD_LOG_DEBUG:
+            tag_color = 37;
+            level_str = "DEBUG";
+            break;
+        case SD_LOG_INFO:
+            tag_color = 34;
+            level_str = "INFO";
+            break;
+        case SD_LOG_WARN:
+            tag_color = 35;
+            level_str = "WARN";
+            break;
+        case SD_LOG_ERROR:
+            tag_color = 31;
+            level_str = "ERROR";
+            break;
+        default:
+            tag_color = 33;
+            level_str = "?????";
+            break;
+    }
+
+    if (color) {
+        fprintf(out_stream, "\033[%d;1m[%-5s]\033[0m ", tag_color, level_str);
+    } else {
+        fprintf(out_stream, "[%-5s] ", level_str);
+    }
+    print_utf8(out_stream, log);
+    fflush(out_stream);
+}
+
+void example_log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...) {
+    constexpr size_t LOG_BUFFER_SIZE = 4096;
+
+    va_list args;
+    va_start(args, format);
+
+    static char log_buffer[LOG_BUFFER_SIZE + 1];
+    int written = snprintf(log_buffer, LOG_BUFFER_SIZE, "%s:%-4d - ", sd_basename(file).c_str(), line);
+
+    if (written >= 0 && written < static_cast<int>(LOG_BUFFER_SIZE)) {
+        vsnprintf(log_buffer + written, LOG_BUFFER_SIZE - written, format, args);
+    }
+    size_t len = strlen(log_buffer);
+    if (len == 0 || log_buffer[len - 1] != '\n') {
+        strncat(log_buffer, "\n", LOG_BUFFER_SIZE - len);
+    }
+
+    log_print(level, log_buffer, log_verbose, log_color);
+
+    va_end(args);
+}
diff --git a/examples/common/log.h b/examples/common/log.h
new file mode 100644
index 000000000..f28b4b4ea
--- /dev/null
+++ b/examples/common/log.h
@@ -0,0 +1,32 @@
+#ifndef __EXAMPLE_LOG_H__
+#define __EXAMPLE_LOG_H__
+
+#include <cstdarg>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+
+#if defined(_WIN32)
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+#include <windows.h>
+#endif  // _WIN32
+
+#include "stable-diffusion.h"
+
+extern bool log_verbose;
+extern bool log_color;
+
+std::string sd_basename(const std::string& path);
+void print_utf8(FILE* stream, const char* utf8);
+void log_print(sd_log_level_t level, const char* log, bool verbose, bool color);
+void example_log_printf(sd_log_level_t level, const char* file, int line, const char* format, ...);
+
+#define LOG_DEBUG(format, ...) example_log_printf(SD_LOG_DEBUG, __FILE__, __LINE__, format, ##__VA_ARGS__)
+#define LOG_INFO(format, ...) example_log_printf(SD_LOG_INFO, __FILE__, __LINE__, format, ##__VA_ARGS__)
+#define LOG_WARN(format, ...) example_log_printf(SD_LOG_WARN, __FILE__, __LINE__, format, ##__VA_ARGS__)
+#define LOG_ERROR(format, ...) example_log_printf(SD_LOG_ERROR, __FILE__, __LINE__, format, ##__VA_ARGS__)
+
+#endif  // __EXAMPLE_LOG_H__
diff --git a/examples/common/media_io.cpp b/examples/common/media_io.cpp
new file mode 100644
index 000000000..a38513b9d
--- /dev/null
+++ b/examples/common/media_io.cpp
@@ -0,0 +1,879 @@
+#include "log.h"
+#include "media_io.h"
+
+#include <algorithm>
+#include <cctype>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <filesystem>
+#include <fstream>
+#include <string>
+#include <vector>
+
+#define STB_IMAGE_IMPLEMENTATION
+#define STB_IMAGE_STATIC
+#include "stb_image.h"
+
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#define STB_IMAGE_WRITE_STATIC
+#include "stb_image_write.h"
+
+#define STB_IMAGE_RESIZE_IMPLEMENTATION
+#define STB_IMAGE_RESIZE_STATIC
+#include "stb_image_resize.h"
+
+#ifdef SD_USE_WEBP
+#include "webp/decode.h"
+#include "webp/encode.h"
+#include "webp/mux.h"
+#endif
+
+namespace fs = std::filesystem;
+
+namespace {
+bool read_binary_file_bytes(const char* path, std::vector<uint8_t>& data) {
+    std::ifstream fin(fs::path(path), std::ios::binary);
+    if (!fin) {
+        return false;
+    }
+
+    fin.seekg(0, std::ios::end);
+    std::streampos size = fin.tellg();
+    if (size < 0) {
+        return false;
+    }
+    fin.seekg(0, std::ios::beg);
+
+    data.resize(static_cast<size_t>(size));
+    if (!data.empty()) {
+        fin.read(reinterpret_cast<char*>(data.data()), size);
+        if (!fin) {
+            return false;
+        }
+    }
+    return true;
+}
+
+bool write_binary_file_bytes(const std::string& path, const std::vector<uint8_t>& data) {
+    std::ofstream fout(fs::path(path), std::ios::binary);
+    if (!fout) {
+        return false;
+    }
+
+    if (!data.empty()) {
+        fout.write(reinterpret_cast<const char*>(data.data()), static_cast<std::streamsize>(data.size()));
+        if (!fout) {
+            return false;
+        }
+    }
+    return true;
+}
+
+int stbi_ext_write_png_to_func(stbi_write_func* func,
+                               void* context,
+                               int x,
+                               int y,
+                               int comp,
+                               const void* data,
+                               int stride_bytes,
+                               const char* parameters) {
+    int len            = 0;
+    unsigned char* png = stbi_write_png_to_mem((const unsigned char*)data, stride_bytes, x, y, comp, &len, parameters);
+    if (png == nullptr) {
+        return 0;
+    }
+    func(context, png, len);
+    STBIW_FREE(png);
+    return 1;
+}
+
+bool is_webp_signature(const uint8_t* data, size_t size) {
+    return size >= 12 &&
+           memcmp(data, "RIFF", 4) == 0 &&
+           memcmp(data + 8, "WEBP", 4) == 0;
+}
+
+std::string xml_escape(const std::string& value) {
+    std::string escaped;
+    escaped.reserve(value.size());
+
+    for (char ch : value) {
+        switch (ch) {
+            case '&':
+                escaped += "&amp;";
+                break;
+            case '<':
+                escaped += "&lt;";
+                break;
+            case '>':
+                escaped += "&gt;";
+                break;
+            case '"':
+                escaped += "&quot;";
+                break;
+            case '\'':
+                escaped += "&apos;";
+                break;
+            default:
+                escaped += ch;
+                break;
+        }
+    }
+
+    return escaped;
+}
+
+#ifdef SD_USE_WEBP
+uint8_t* decode_webp_image_to_buffer(const uint8_t* data,
+                                     size_t size,
+                                     int& width,
+                                     int& height,
+                                     int expected_channel,
+                                     int& source_channel_count) {
+    WebPBitstreamFeatures features;
+    if (WebPGetFeatures(data, size, &features) != VP8_STATUS_OK) {
+        return nullptr;
+    }
+
+    width                = features.width;
+    height               = features.height;
+    source_channel_count = features.has_alpha ? 4 : 3;
+
+    const size_t pixel_count = static_cast<size_t>(width) * static_cast<size_t>(height);
+
+    if (expected_channel == 1) {
+        int decoded_width  = width;
+        int decoded_height = height;
+        uint8_t* decoded   = features.has_alpha
+                                 ? WebPDecodeRGBA(data, size, &decoded_width, &decoded_height)
+                                 : WebPDecodeRGB(data, size, &decoded_width, &decoded_height);
+        if (decoded == nullptr) {
+            return nullptr;
+        }
+
+        uint8_t* grayscale = (uint8_t*)malloc(pixel_count);
+        if (grayscale == nullptr) {
+            WebPFree(decoded);
+            return nullptr;
+        }
+
+        const int decoded_channels = features.has_alpha ? 4 : 3;
+        for (size_t i = 0; i < pixel_count; ++i) {
+            const uint8_t* src = decoded + i * decoded_channels;
+            grayscale[i]       = static_cast<uint8_t>((77 * src[0] + 150 * src[1] + 29 * src[2] + 128) >> 8);
+        }
+
+        WebPFree(decoded);
+        return grayscale;
+    }
+
+    if (expected_channel != 3 && expected_channel != 4) {
+        return nullptr;
+    }
+
+    int decoded_width  = width;
+    int decoded_height = height;
+    uint8_t* decoded   = (expected_channel == 4)
+                             ? WebPDecodeRGBA(data, size, &decoded_width, &decoded_height)
+                             : WebPDecodeRGB(data, size, &decoded_width, &decoded_height);
+    if (decoded == nullptr) {
+        return nullptr;
+    }
+
+    const size_t out_size = pixel_count * static_cast<size_t>(expected_channel);
+    uint8_t* output       = (uint8_t*)malloc(out_size);
+    if (output == nullptr) {
+        WebPFree(decoded);
+        return nullptr;
+    }
+
+    memcpy(output, decoded, out_size);
+    WebPFree(decoded);
+    return output;
+}
+
+std::string build_webp_xmp_packet(const std::string& parameters) {
+    if (parameters.empty()) {
+        return "";
+    }
+
+    const std::string escaped_parameters = xml_escape(parameters);
+    return "<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n"
+           "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\">\n"
+           "  <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n"
+           "    <rdf:Description xmlns:sdcpp=\"https://github.com/leejet/stable-diffusion.cpp/ns/1.0/\">\n"
+           "      <sdcpp:parameters>" +
+           escaped_parameters +
+           "</sdcpp:parameters>\n"
+           "    </rdf:Description>\n"
+           "  </rdf:RDF>\n"
+           "</x:xmpmeta>\n"
+           "<?xpacket end=\"w\"?>";
+}
+
+bool encode_webp_image_to_vector(const uint8_t* image,
+                                 int width,
+                                 int height,
+                                 int channels,
+                                 const std::string& parameters,
+                                 int quality,
+                                 std::vector<uint8_t>& out) {
+    if (image == nullptr || width <= 0 || height <= 0) {
+        return false;
+    }
+
+    std::vector<uint8_t> rgb_image;
+    const uint8_t* input_image = image;
+    int input_channels         = channels;
+
+    if (channels == 1) {
+        rgb_image.resize(static_cast<size_t>(width) * static_cast<size_t>(height) * 3);
+        for (int i = 0; i < width * height; ++i) {
+            rgb_image[i * 3 + 0] = image[i];
+            rgb_image[i * 3 + 1] = image[i];
+            rgb_image[i * 3 + 2] = image[i];
+        }
+        input_image    = rgb_image.data();
+        input_channels = 3;
+    }
+
+    if (input_channels != 3 && input_channels != 4) {
+        return false;
+    }
+
+    uint8_t* encoded    = nullptr;
+    size_t encoded_size = (input_channels == 4)
+                              ? WebPEncodeRGBA(input_image, width, height, width * input_channels, static_cast<float>(quality), &encoded)
+                              : WebPEncodeRGB(input_image, width, height, width * input_channels, static_cast<float>(quality), &encoded);
+    if (encoded == nullptr || encoded_size == 0) {
+        return false;
+    }
+
+    out.assign(encoded, encoded + encoded_size);
+    WebPFree(encoded);
+
+    if (parameters.empty()) {
+        return true;
+    }
+
+    WebPData image_data;
+    WebPData assembled_data;
+    WebPDataInit(&image_data);
+    WebPDataInit(&assembled_data);
+
+    image_data.bytes = out.data();
+    image_data.size  = out.size();
+
+    WebPMux* mux = WebPMuxNew();
+    if (mux == nullptr) {
+        return false;
+    }
+
+    const std::string xmp_packet = build_webp_xmp_packet(parameters);
+    WebPData xmp_data;
+    WebPDataInit(&xmp_data);
+    xmp_data.bytes = reinterpret_cast<const uint8_t*>(xmp_packet.data());
+    xmp_data.size  = xmp_packet.size();
+
+    const bool ok = WebPMuxSetImage(mux, &image_data, 1) == WEBP_MUX_OK &&
+                    WebPMuxSetChunk(mux, "XMP ", &xmp_data, 1) == WEBP_MUX_OK &&
+                    WebPMuxAssemble(mux, &assembled_data) == WEBP_MUX_OK;
+
+    if (ok) {
+        out.assign(assembled_data.bytes, assembled_data.bytes + assembled_data.size);
+    }
+
+    WebPDataClear(&assembled_data);
+    WebPMuxDelete(mux);
+    return ok;
+}
+#endif
+
+uint8_t* load_image_common(bool from_memory,
+                           const char* image_path_or_bytes,
+                           int len,
+                           int& width,
+                           int& height,
+                           int expected_width,
+                           int expected_height,
+                           int expected_channel) {
+    const char* image_path;
+    uint8_t* image_buffer    = nullptr;
+    int source_channel_count = 0;
+
+#ifdef SD_USE_WEBP
+    if (from_memory) {
+        image_path = "memory";
+        if (len > 0 && is_webp_signature(reinterpret_cast<const uint8_t*>(image_path_or_bytes), static_cast<size_t>(len))) {
+            image_buffer = decode_webp_image_to_buffer(reinterpret_cast<const uint8_t*>(image_path_or_bytes),
+                                                       static_cast<size_t>(len),
+                                                       width,
+                                                       height,
+                                                       expected_channel,
+                                                       source_channel_count);
+        }
+    } else {
+        image_path = image_path_or_bytes;
+        if (encoded_image_format_from_path(image_path_or_bytes) == EncodedImageFormat::WEBP) {
+            std::vector<uint8_t> file_bytes;
+            if (!read_binary_file_bytes(image_path_or_bytes, file_bytes)) {
+                LOG_ERROR("load image from '%s' failed", image_path_or_bytes);
+                return nullptr;
+            }
+            if (!is_webp_signature(file_bytes.data(), file_bytes.size())) {
+                LOG_ERROR("load image from '%s' failed", image_path_or_bytes);
+                return nullptr;
+            }
+            image_buffer = decode_webp_image_to_buffer(file_bytes.data(),
+                                                       file_bytes.size(),
+                                                       width,
+                                                       height,
+                                                       expected_channel,
+                                                       source_channel_count);
+        }
+    }
+#endif
+
+    if (from_memory) {
+        image_path = "memory";
+        if (image_buffer == nullptr) {
+            int c                = 0;
+            image_buffer         = (uint8_t*)stbi_load_from_memory((const stbi_uc*)image_path_or_bytes, len, &width, &height, &c, expected_channel);
+            source_channel_count = c;
+        }
+    } else {
+        image_path = image_path_or_bytes;
+        if (image_buffer == nullptr) {
+            int c                = 0;
+            image_buffer         = (uint8_t*)stbi_load(image_path_or_bytes, &width, &height, &c, expected_channel);
+            source_channel_count = c;
+        }
+    }
+    if (image_buffer == nullptr) {
+        LOG_ERROR("load image from '%s' failed", image_path);
+        return nullptr;
+    }
+    if (source_channel_count < expected_channel) {
+        fprintf(stderr,
+                "the number of channels for the input image must be >= %d,"
+                "but got %d channels, image_path = %s",
+                expected_channel,
+                source_channel_count,
+                image_path);
+        free(image_buffer);
+        return nullptr;
+    }
+    if (width <= 0) {
+        LOG_ERROR("error: the width of image must be greater than 0, image_path = %s", image_path);
+        free(image_buffer);
+        return nullptr;
+    }
+    if (height <= 0) {
+        LOG_ERROR("error: the height of image must be greater than 0, image_path = %s", image_path);
+        free(image_buffer);
+        return nullptr;
+    }
+
+    if ((expected_width > 0 && expected_height > 0) && (height != expected_height || width != expected_width)) {
+        float dst_aspect = (float)expected_width / (float)expected_height;
+        float src_aspect = (float)width / (float)height;
+
+        int crop_x = 0, crop_y = 0;
+        int crop_w = width, crop_h = height;
+
+        if (src_aspect > dst_aspect) {
+            crop_w = (int)(height * dst_aspect);
+            crop_x = (width - crop_w) / 2;
+        } else if (src_aspect < dst_aspect) {
+            crop_h = (int)(width / dst_aspect);
+            crop_y = (height - crop_h) / 2;
+        }
+
+        if (crop_x != 0 || crop_y != 0) {
+            LOG_INFO("crop input image from %dx%d to %dx%d, image_path = %s", width, height, crop_w, crop_h, image_path);
+            uint8_t* cropped_image_buffer = (uint8_t*)malloc(crop_w * crop_h * expected_channel);
+            if (cropped_image_buffer == nullptr) {
+                LOG_ERROR("error: allocate memory for crop\n");
+                free(image_buffer);
+                return nullptr;
+            }
+            for (int row = 0; row < crop_h; row++) {
+                uint8_t* src = image_buffer + ((crop_y + row) * width + crop_x) * expected_channel;
+                uint8_t* dst = cropped_image_buffer + (row * crop_w) * expected_channel;
+                memcpy(dst, src, crop_w * expected_channel);
+            }
+
+            width  = crop_w;
+            height = crop_h;
+            free(image_buffer);
+            image_buffer = cropped_image_buffer;
+        }
+
+        LOG_INFO("resize input image from %dx%d to %dx%d", width, height, expected_width, expected_height);
+        uint8_t* resized_image_buffer = (uint8_t*)malloc(expected_height * expected_width * expected_channel);
+        if (resized_image_buffer == nullptr) {
+            LOG_ERROR("error: allocate memory for resize input image\n");
+            free(image_buffer);
+            return nullptr;
+        }
+        stbir_resize(image_buffer, width, height, 0,
+                     resized_image_buffer, expected_width, expected_height, 0, STBIR_TYPE_UINT8,
+                     expected_channel, STBIR_ALPHA_CHANNEL_NONE, 0,
+                     STBIR_EDGE_CLAMP, STBIR_EDGE_CLAMP,
+                     STBIR_FILTER_BOX, STBIR_FILTER_BOX,
+                     STBIR_COLORSPACE_SRGB, nullptr);
+        width  = expected_width;
+        height = expected_height;
+        free(image_buffer);
+        image_buffer = resized_image_buffer;
+    }
+    return image_buffer;
+}
+
+typedef struct {
+    uint32_t offset;
+    uint32_t size;
+} avi_index_entry;
+
+void write_u32_le(FILE* f, uint32_t val) {
+    fwrite(&val, 4, 1, f);
+}
+
+void write_u16_le(FILE* f, uint16_t val) {
+    fwrite(&val, 2, 1, f);
+}
+}  // namespace
+
+EncodedImageFormat encoded_image_format_from_path(const std::string& path) {
+    std::string ext = fs::path(path).extension().string();
+    std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
+
+    if (ext == ".jpg" || ext == ".jpeg" || ext == ".jpe") {
+        return EncodedImageFormat::JPEG;
+    }
+    if (ext == ".png") {
+        return EncodedImageFormat::PNG;
+    }
+    if (ext == ".webp") {
+        return EncodedImageFormat::WEBP;
+    }
+    return EncodedImageFormat::UNKNOWN;
+}
+
+std::vector<uint8_t> encode_image_to_vector(EncodedImageFormat format,
+                                            const uint8_t* image,
+                                            int width,
+                                            int height,
+                                            int channels,
+                                            const std::string& parameters,
+                                            int quality) {
+    std::vector<uint8_t> buffer;
+
+    auto write_func = [&buffer](void* context, void* data, int size) {
+        (void)context;
+        uint8_t* src = reinterpret_cast<uint8_t*>(data);
+        buffer.insert(buffer.end(), src, src + size);
+    };
+
+    struct ContextWrapper {
+        decltype(write_func)& func;
+    } ctx{write_func};
+
+    auto c_func = [](void* context, void* data, int size) {
+        auto* wrapper = reinterpret_cast<ContextWrapper*>(context);
+        wrapper->func(context, data, size);
+    };
+
+    int result = 0;
+    switch (format) {
+        case EncodedImageFormat::JPEG:
+            result = stbi_write_jpg_to_func(c_func, &ctx, width, height, channels, image, quality);
+            break;
+        case EncodedImageFormat::PNG:
+            result = stbi_ext_write_png_to_func(c_func, &ctx, width, height, channels, image, width * channels, parameters.empty() ? nullptr : parameters.c_str());
+            break;
+        case EncodedImageFormat::WEBP:
+#ifdef SD_USE_WEBP
+            if (!encode_webp_image_to_vector(image, width, height, channels, parameters, quality, buffer)) {
+                buffer.clear();
+            }
+            result = buffer.empty() ? 0 : 1;
+            break;
+#else
+            result = 0;
+            break;
+#endif
+        default:
+            result = 0;
+            break;
+    }
+
+    if (!result) {
+        buffer.clear();
+    }
+    return buffer;
+}
+
+bool write_image_to_file(const std::string& path,
+                         const uint8_t* image,
+                         int width,
+                         int height,
+                         int channels,
+                         const std::string& parameters,
+                         int quality) {
+    const EncodedImageFormat format = encoded_image_format_from_path(path);
+
+    switch (format) {
+        case EncodedImageFormat::JPEG:
+            return stbi_write_jpg(path.c_str(), width, height, channels, image, quality, parameters.empty() ? nullptr : parameters.c_str()) != 0;
+        case EncodedImageFormat::PNG:
+            return stbi_write_png(path.c_str(), width, height, channels, image, 0, parameters.empty() ? nullptr : parameters.c_str()) != 0;
+        case EncodedImageFormat::WEBP: {
+            const std::vector<uint8_t> encoded = encode_image_to_vector(format, image, width, height, channels, parameters, quality);
+            return !encoded.empty() && write_binary_file_bytes(path, encoded);
+        }
+        default:
+            return false;
+    }
+}
+
+uint8_t* load_image_from_file(const char* image_path,
+                              int& width,
+                              int& height,
+                              int expected_width,
+                              int expected_height,
+                              int expected_channel) {
+    return load_image_common(false, image_path, 0, width, height, expected_width, expected_height, expected_channel);
+}
+
+bool load_sd_image_from_file(sd_image_t* image,
+                             const char* image_path,
+                             int expected_width,
+                             int expected_height,
+                             int expected_channel) {
+    int width;
+    int height;
+    image->data = load_image_common(false, image_path, 0, width, height, expected_width, expected_height, expected_channel);
+    if (image->data == nullptr) {
+        return false;
+    }
+    image->width  = width;
+    image->height = height;
+    return true;
+}
+
+uint8_t* load_image_from_memory(const char* image_bytes,
+                                int len,
+                                int& width,
+                                int& height,
+                                int expected_width,
+                                int expected_height,
+                                int expected_channel) {
+    return load_image_common(true, image_bytes, len, width, height, expected_width, expected_height, expected_channel);
+}
+
+int create_mjpg_avi_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
+    if (num_images == 0) {
+        fprintf(stderr, "Error: Image array is empty.\n");
+        return -1;
+    }
+
+    FILE* f = fopen(filename, "wb");
+    if (!f) {
+        perror("Error opening file for writing");
+        return -1;
+    }
+
+    uint32_t width    = images[0].width;
+    uint32_t height   = images[0].height;
+    uint32_t channels = images[0].channel;
+    if (channels != 3 && channels != 4) {
+        fprintf(stderr, "Error: Unsupported channel count: %u\n", channels);
+        fclose(f);
+        return -1;
+    }
+
+    fwrite("RIFF", 4, 1, f);
+    long riff_size_pos = ftell(f);
+    write_u32_le(f, 0);
+    fwrite("AVI ", 4, 1, f);
+
+    fwrite("LIST", 4, 1, f);
+    write_u32_le(f, 4 + 8 + 56 + 8 + 4 + 8 + 56 + 8 + 40);
+    fwrite("hdrl", 4, 1, f);
+
+    fwrite("avih", 4, 1, f);
+    write_u32_le(f, 56);
+    write_u32_le(f, 1000000 / fps);
+    write_u32_le(f, 0);
+    write_u32_le(f, 0);
+    write_u32_le(f, 0x110);
+    write_u32_le(f, num_images);
+    write_u32_le(f, 0);
+    write_u32_le(f, 1);
+    write_u32_le(f, width * height * 3);
+    write_u32_le(f, width);
+    write_u32_le(f, height);
+    write_u32_le(f, 0);
+    write_u32_le(f, 0);
+    write_u32_le(f, 0);
+    write_u32_le(f, 0);
+
+    fwrite("LIST", 4, 1, f);
+    write_u32_le(f, 4 + 8 + 56 + 8 + 40);
+    fwrite("strl", 4, 1, f);
+
+    fwrite("strh", 4, 1, f);
+    write_u32_le(f, 56);
+    fwrite("vids", 4, 1, f);
+    fwrite("MJPG", 4, 1, f);
+    write_u32_le(f, 0);
+    write_u16_le(f, 0);
+    write_u16_le(f, 0);
+    write_u32_le(f, 0);
+    write_u32_le(f, 1);
+    write_u32_le(f, fps);
+    write_u32_le(f, 0);
+    write_u32_le(f, num_images);
+    write_u32_le(f, width * height * 3);
+    write_u32_le(f, (uint32_t)-1);
+    write_u32_le(f, 0);
+    write_u16_le(f, 0);
+    write_u16_le(f, 0);
+    write_u16_le(f, 0);
+    write_u16_le(f, 0);
+
+    fwrite("strf", 4, 1, f);
+    write_u32_le(f, 40);
+    write_u32_le(f, 40);
+    write_u32_le(f, width);
+    write_u32_le(f, height);
+    write_u16_le(f, 1);
+    write_u16_le(f, 24);
+    fwrite("MJPG", 4, 1, f);
+    write_u32_le(f, width * height * 3);
+    write_u32_le(f, 0);
+    write_u32_le(f, 0);
+    write_u32_le(f, 0);
+    write_u32_le(f, 0);
+
+    fwrite("LIST", 4, 1, f);
+    long movi_size_pos = ftell(f);
+    write_u32_le(f, 0);
+    fwrite("movi", 4, 1, f);
+
+    avi_index_entry* index = (avi_index_entry*)malloc(sizeof(avi_index_entry) * num_images);
+    if (!index) {
+        fclose(f);
+        return -1;
+    }
+
+    struct {
+        uint8_t* buf;
+        size_t size;
+    } jpeg_data;
+
+    for (int i = 0; i < num_images; i++) {
+        jpeg_data.buf  = nullptr;
+        jpeg_data.size = 0;
+
+        auto write_to_buf = [](void* context, void* data, int size) {
+            auto jd = (decltype(jpeg_data)*)context;
+            jd->buf = (uint8_t*)realloc(jd->buf, jd->size + size);
+            memcpy(jd->buf + jd->size, data, size);
+            jd->size += size;
+        };
+
+        stbi_write_jpg_to_func(write_to_buf, &jpeg_data, images[i].width, images[i].height, channels, images[i].data, quality);
+
+        fwrite("00dc", 4, 1, f);
+        write_u32_le(f, (uint32_t)jpeg_data.size);
+        index[i].offset = ftell(f) - 8;
+        index[i].size   = (uint32_t)jpeg_data.size;
+        fwrite(jpeg_data.buf, 1, jpeg_data.size, f);
+
+        if (jpeg_data.size % 2) {
+            fputc(0, f);
+        }
+
+        free(jpeg_data.buf);
+    }
+
+    long cur_pos   = ftell(f);
+    long movi_size = cur_pos - movi_size_pos - 4;
+    fseek(f, movi_size_pos, SEEK_SET);
+    write_u32_le(f, movi_size);
+    fseek(f, cur_pos, SEEK_SET);
+
+    fwrite("idx1", 4, 1, f);
+    write_u32_le(f, num_images * 16);
+    for (int i = 0; i < num_images; i++) {
+        fwrite("00dc", 4, 1, f);
+        write_u32_le(f, 0x10);
+        write_u32_le(f, index[i].offset);
+        write_u32_le(f, index[i].size);
+    }
+
+    cur_pos        = ftell(f);
+    long file_size = cur_pos - riff_size_pos - 4;
+    fseek(f, riff_size_pos, SEEK_SET);
+    write_u32_le(f, file_size);
+    fseek(f, cur_pos, SEEK_SET);
+
+    fclose(f);
+    free(index);
+
+    return 0;
+}
+
+#ifdef SD_USE_WEBP
+int create_animated_webp_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
+    if (num_images == 0) {
+        fprintf(stderr, "Error: Image array is empty.\n");
+        return -1;
+    }
+    if (fps <= 0) {
+        fprintf(stderr, "Error: FPS must be positive.\n");
+        return -1;
+    }
+
+    const int width    = static_cast<int>(images[0].width);
+    const int height   = static_cast<int>(images[0].height);
+    const int channels = static_cast<int>(images[0].channel);
+    if (channels != 1 && channels != 3 && channels != 4) {
+        fprintf(stderr, "Error: Unsupported channel count: %d\n", channels);
+        return -1;
+    }
+
+    WebPAnimEncoderOptions anim_options;
+    WebPConfig config;
+    if (!WebPAnimEncoderOptionsInit(&anim_options) || !WebPConfigInit(&config)) {
+        fprintf(stderr, "Error: Failed to initialize WebP animation encoder.\n");
+        return -1;
+    }
+
+    config.quality      = static_cast<float>(quality);
+    config.method       = 4;
+    config.thread_level = 1;
+    if (channels == 4) {
+        config.exact = 1;
+    }
+    if (!WebPValidateConfig(&config)) {
+        fprintf(stderr, "Error: Invalid WebP encoder configuration.\n");
+        return -1;
+    }
+
+    WebPAnimEncoder* enc = WebPAnimEncoderNew(width, height, &anim_options);
+    if (enc == nullptr) {
+        fprintf(stderr, "Error: Could not create WebPAnimEncoder object.\n");
+        return -1;
+    }
+
+    const int frame_duration_ms = std::max(1, static_cast<int>(std::lround(1000.0 / static_cast<double>(fps))));
+    int timestamp_ms            = 0;
+    int ret                     = -1;
+
+    for (int i = 0; i < num_images; ++i) {
+        const sd_image_t& image = images[i];
+        if (static_cast<int>(image.width) != width || static_cast<int>(image.height) != height) {
+            fprintf(stderr, "Error: Frame dimensions do not match.\n");
+            goto cleanup;
+        }
+
+        WebPPicture picture;
+        if (!WebPPictureInit(&picture)) {
+            fprintf(stderr, "Error: Failed to initialize WebPPicture.\n");
+            goto cleanup;
+        }
+        picture.use_argb = 1;
+        picture.width    = width;
+        picture.height   = height;
+
+        bool picture_ok = false;
+        std::vector<uint8_t> rgb_buffer;
+        if (image.channel == 1) {
+            rgb_buffer.resize(static_cast<size_t>(width) * static_cast<size_t>(height) * 3);
+            for (int p = 0; p < width * height; ++p) {
+                rgb_buffer[p * 3 + 0] = image.data[p];
+                rgb_buffer[p * 3 + 1] = image.data[p];
+                rgb_buffer[p * 3 + 2] = image.data[p];
+            }
+            picture_ok = WebPPictureImportRGB(&picture, rgb_buffer.data(), width * 3) != 0;
+        } else if (image.channel == 4) {
+            picture_ok = WebPPictureImportRGBA(&picture, image.data, width * 4) != 0;
+        } else {
+            picture_ok = WebPPictureImportRGB(&picture, image.data, width * 3) != 0;
+        }
+
+        if (!picture_ok) {
+            fprintf(stderr, "Error: Failed to import frame into WebPPicture.\n");
+            WebPPictureFree(&picture);
+            goto cleanup;
+        }
+
+        if (!WebPAnimEncoderAdd(enc, &picture, timestamp_ms, &config)) {
+            fprintf(stderr, "Error: Failed to add frame to animated WebP: %s\n", WebPAnimEncoderGetError(enc));
+            WebPPictureFree(&picture);
+            goto cleanup;
+        }
+
+        WebPPictureFree(&picture);
+        timestamp_ms += frame_duration_ms;
+    }
+
+    if (!WebPAnimEncoderAdd(enc, nullptr, timestamp_ms, nullptr)) {
+        fprintf(stderr, "Error: Failed to finalize animated WebP frames: %s\n", WebPAnimEncoderGetError(enc));
+        goto cleanup;
+    }
+
+    {
+        WebPData webp_data;
+        WebPDataInit(&webp_data);
+        if (!WebPAnimEncoderAssemble(enc, &webp_data)) {
+            fprintf(stderr, "Error: Failed to assemble animated WebP: %s\n", WebPAnimEncoderGetError(enc));
+            WebPDataClear(&webp_data);
+            goto cleanup;
+        }
+
+        FILE* f = fopen(filename, "wb");
+        if (!f) {
+            perror("Error opening file for writing");
+            WebPDataClear(&webp_data);
+            goto cleanup;
+        }
+        if (webp_data.size > 0 && fwrite(webp_data.bytes, 1, webp_data.size, f) != webp_data.size) {
+            fprintf(stderr, "Error: Failed to write animated WebP file.\n");
+            fclose(f);
+            WebPDataClear(&webp_data);
+            goto cleanup;
+        }
+        fclose(f);
+        WebPDataClear(&webp_data);
+    }
+
+    ret = 0;
+
+cleanup:
+    WebPAnimEncoderDelete(enc);
+    return ret;
+}
+#endif
+
+int create_video_from_sd_images(const char* filename, sd_image_t* images, int num_images, int fps, int quality) {
+    std::string path = filename ? filename : "";
+    auto pos         = path.find_last_of('.');
+    std::string ext  = pos == std::string::npos ? "" : path.substr(pos);
+    for (char& ch : ext) {
+        ch = static_cast<char>(tolower(static_cast<unsigned char>(ch)));
+    }
+
+#ifdef SD_USE_WEBP
+    if (ext == ".webp") {
+        return create_animated_webp_from_sd_images(filename, images, num_images, fps, quality);
+    }
+#endif
+
+    return create_mjpg_avi_from_sd_images(filename, images, num_images, fps, quality);
+}
diff --git a/examples/common/media_io.h b/examples/common/media_io.h
new file mode 100644
index 000000000..cb8302906
--- /dev/null
+++ b/examples/common/media_io.h
@@ -0,0 +1,76 @@
+#ifndef __MEDIA_IO_H__
+#define __MEDIA_IO_H__
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "stable-diffusion.h"
+
+enum class EncodedImageFormat {
+    JPEG,
+    PNG,
+    WEBP,
+    UNKNOWN,
+};
+
+EncodedImageFormat encoded_image_format_from_path(const std::string& path);
+
+std::vector<uint8_t> encode_image_to_vector(EncodedImageFormat format,
+                                            const uint8_t* image,
+                                            int width,
+                                            int height,
+                                            int channels,
+                                            const std::string& parameters = "",
+                                            int quality                   = 90);
+
+bool write_image_to_file(const std::string& path,
+                         const uint8_t* image,
+                         int width,
+                         int height,
+                         int channels,
+                         const std::string& parameters = "",
+                         int quality                   = 90);
+
+uint8_t* load_image_from_file(const char* image_path,
+                              int& width,
+                              int& height,
+                              int expected_width   = 0,
+                              int expected_height  = 0,
+                              int expected_channel = 3);
+
+bool load_sd_image_from_file(sd_image_t* image,
+                             const char* image_path,
+                             int expected_width   = 0,
+                             int expected_height  = 0,
+                             int expected_channel = 3);
+
+uint8_t* load_image_from_memory(const char* image_bytes,
+                                int len,
+                                int& width,
+                                int& height,
+                                int expected_width   = 0,
+                                int expected_height  = 0,
+                                int expected_channel = 3);
+
+int create_mjpg_avi_from_sd_images(const char* filename,
+                                   sd_image_t* images,
+                                   int num_images,
+                                   int fps,
+                                   int quality = 90);
+
+#ifdef SD_USE_WEBP
+int create_animated_webp_from_sd_images(const char* filename,
+                                        sd_image_t* images,
+                                        int num_images,
+                                        int fps,
+                                        int quality = 90);
+#endif
+
+int create_video_from_sd_images(const char* filename,
+                                sd_image_t* images,
+                                int num_images,
+                                int fps,
+                                int quality = 90);
+
+#endif  // __MEDIA_IO_H__
diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt
index f2568d72e..bf2b252bb 100644
--- a/examples/server/CMakeLists.txt
+++ b/examples/server/CMakeLists.txt
@@ -56,7 +56,11 @@ else()
     message(STATUS "Frontend disabled or directory not found: ${FRONTEND_DIR}")
 endif()
 
-add_executable(${TARGET} main.cpp)
+add_executable(${TARGET}
+    ../common/log.cpp
+    ../common/media_io.cpp
+    main.cpp
+)
 
 if(HAVE_FRONTEND_BUILD)
     add_dependencies(${TARGET} ${TARGET}_frontend)
@@ -70,10 +74,13 @@ endif()
 
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT})
+if(SD_WEBP)
+    target_link_libraries(${TARGET} PRIVATE webp libwebpmux)
+endif()
 
 # due to httplib; it contains a pragma for MSVC, but other things need explicit flags
 if(WIN32 AND NOT MSVC)
     target_link_libraries(${TARGET} PRIVATE ws2_32)
 endif()
 
-target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)
\ No newline at end of file
+target_compile_features(${TARGET} PUBLIC c_std_11 cxx_std_17)
diff --git a/examples/server/main.cpp b/examples/server/main.cpp
index 6a5036975..8d4e644b5 100644
--- a/examples/server/main.cpp
+++ b/examples/server/main.cpp
@@ -12,6 +12,7 @@
 #include "stable-diffusion.h"
 
 #include "common/common.hpp"
+#include "common/media_io.h"
 
 #ifdef HAVE_INDEX_HTML
 #include "frontend/dist/gen_index_html.h"
@@ -217,62 +218,6 @@ std::string extract_and_remove_sd_cpp_extra_args(std::string& text) {
     return extracted;
 }
 
-enum class ImageFormat { JPEG,
-                         PNG };
-
-static int stbi_ext_write_png_to_func(stbi_write_func* func, void* context, int x, int y, int comp, const void* data, int stride_bytes, const char* parameters) {
-    int len;
-    unsigned char* png = stbi_write_png_to_mem((const unsigned char*)data, stride_bytes, x, y, comp, &len, parameters);
-    if (png == NULL)
-        return 0;
-    func(context, png, len);
-    STBIW_FREE(png);
-    return 1;
-}
-
-std::vector<uint8_t> write_image_to_vector(
-    ImageFormat format,
-    const uint8_t* image,
-    int width,
-    int height,
-    int channels,
-    std::string params = "",
-    int quality        = 90) {
-    std::vector<uint8_t> buffer;
-
-    auto write_func = [&buffer](void* context, void* data, int size) {
-        uint8_t* src = reinterpret_cast<uint8_t*>(data);
-        buffer.insert(buffer.end(), src, src + size);
-    };
-
-    struct ContextWrapper {
-        decltype(write_func)& func;
-    } ctx{write_func};
-
-    auto c_func = [](void* context, void* data, int size) {
-        auto* wrapper = reinterpret_cast<ContextWrapper*>(context);
-        wrapper->func(context, data, size);
-    };
-
-    int result = 0;
-    switch (format) {
-        case ImageFormat::JPEG:
-            result = stbi_write_jpg_to_func(c_func, &ctx, width, height, channels, image, quality);
-            break;
-        case ImageFormat::PNG:
-            result = stbi_ext_write_png_to_func(c_func, &ctx, width, height, channels, image, width * channels, params.size() > 0 ? params.c_str() : nullptr);
-            break;
-        default:
-            throw std::runtime_error("invalid image format");
-    }
-
-    if (!result) {
-        throw std::runtime_error("write imgage to mem failed");
-    }
-
-    return buffer;
-}
-
 void sd_log_cb(enum sd_log_level_t level, const char* log, void* data) {
     SDSvrParams* svr_params = (SDSvrParams*)data;
     log_print(level, log, svr_params->verbose, svr_params->color);
@@ -345,7 +290,7 @@ void free_results(sd_image_t* result_images, int num_results) {
     if (result_images) {
         for (int i = 0; i < num_results; ++i) {
             if (result_images[i].data) {
-                stbi_image_free(result_images[i].data);
+                free(result_images[i].data);
                 result_images[i].data = nullptr;
             }
         }
@@ -416,9 +361,9 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
 
             std::string sd_cpp_extra_args_str = extract_and_remove_sd_cpp_extra_args(prompt);
 
-            if (output_format != "png" && output_format != "jpeg") {
+            if (output_format != "png" && output_format != "jpeg" && output_format != "webp") {
                 res.status = 400;
-                res.set_content(R"({"error":"invalid output_format, must be one of [png, jpeg]"})", "application/json");
+                res.set_content(R"({"error":"invalid output_format, must be one of [png, jpeg, webp]"})", "application/json");
                 return;
             }
             if (n <= 0)
@@ -511,13 +456,17 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
                 std::string params = gen_params.embed_image_metadata
                                          ? get_image_params(*runtime->ctx_params, gen_params, gen_params.seed + i)
                                          : "";
-                auto image_bytes   = write_image_to_vector(output_format == "jpeg" ? ImageFormat::JPEG : ImageFormat::PNG,
-                                                         results[i].data,
-                                                         results[i].width,
-                                                         results[i].height,
-                                                         results[i].channel,
-                                                         params,
-                                                         output_compression);
+                auto image_bytes   = encode_image_to_vector(output_format == "jpeg"
+                                                                ? EncodedImageFormat::JPEG
+                                                            : output_format == "webp"
+                                                                ? EncodedImageFormat::WEBP
+                                                                : EncodedImageFormat::PNG,
+                                                          results[i].data,
+                                                          results[i].width,
+                                                          results[i].height,
+                                                          results[i].channel,
+                                                          params,
+                                                          output_compression);
                 if (image_bytes.empty()) {
                     LOG_ERROR("write image to mem failed");
                     continue;
@@ -765,13 +714,17 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
                 std::string params = gen_params.embed_image_metadata
                                          ? get_image_params(*runtime->ctx_params, gen_params, gen_params.seed + i)
                                          : "";
-                auto image_bytes   = write_image_to_vector(output_format == "jpeg" ? ImageFormat::JPEG : ImageFormat::PNG,
-                                                         results[i].data,
-                                                         results[i].width,
-                                                         results[i].height,
-                                                         results[i].channel,
-                                                         params,
-                                                         output_compression);
+                auto image_bytes   = encode_image_to_vector(output_format == "jpeg"
+                                                                ? EncodedImageFormat::JPEG
+                                                            : output_format == "webp"
+                                                                ? EncodedImageFormat::WEBP
+                                                                : EncodedImageFormat::PNG,
+                                                          results[i].data,
+                                                          results[i].width,
+                                                          results[i].height,
+                                                          results[i].channel,
+                                                          params,
+                                                          output_compression);
                 std::string b64 = base64_encode(image_bytes);
                 json item;
                 item["b64_json"] = b64;
@@ -783,13 +736,13 @@ void register_openai_api_endpoints(httplib::Server& svr, ServerRuntime& rt) {
             res.status = 200;
 
             if (init_image.data) {
-                stbi_image_free(init_image.data);
+                free(init_image.data);
             }
             if (mask_image.data) {
-                stbi_image_free(mask_image.data);
+                free(mask_image.data);
             }
             for (auto ref_image : ref_images) {
-                stbi_image_free(ref_image.data);
+                free(ref_image.data);
             }
         } catch (const std::exception& e) {
             res.status = 500;
@@ -1084,12 +1037,12 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) {
                 std::string params = gen_params.embed_image_metadata
                                          ? get_image_params(*runtime->ctx_params, gen_params, gen_params.seed + i)
                                          : "";
-                auto image_bytes   = write_image_to_vector(ImageFormat::PNG,
-                                                           results[i].data,
-                                                           results[i].width,
-                                                           results[i].height,
-                                                           results[i].channel,
-                                                           params);
+                auto image_bytes   = encode_image_to_vector(EncodedImageFormat::PNG,
+                                                            results[i].data,
+                                                            results[i].width,
+                                                            results[i].height,
+                                                            results[i].channel,
+                                                            params);
 
                 if (image_bytes.empty()) {
                     LOG_ERROR("write image to mem failed");
@@ -1105,13 +1058,13 @@ void register_sdapi_endpoints(httplib::Server& svr, ServerRuntime& rt) {
             res.status = 200;
 
             if (init_image.data) {
-                stbi_image_free(init_image.data);
+                free(init_image.data);
             }
             if (mask_image.data && mask_data.empty()) {
-                stbi_image_free(mask_image.data);
+                free(mask_image.data);
             }
             for (auto ref_image : ref_images) {
-                stbi_image_free(ref_image.data);
+                free(ref_image.data);
             }
 
         } catch (const std::exception& e) {
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index 77274c336..a17178507 100644
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -1,3 +1,20 @@
 set(Z_TARGET zip)
 add_library(${Z_TARGET} OBJECT zip.c zip.h miniz.h)
-target_include_directories(${Z_TARGET} PUBLIC .)
\ No newline at end of file
+target_include_directories(${Z_TARGET} PUBLIC .)
+
+if(SD_WEBP)
+    set(WEBP_BUILD_ANIM_UTILS OFF)
+    set(WEBP_BUILD_CWEBP OFF)
+    set(WEBP_BUILD_DWEBP OFF)
+    set(WEBP_BUILD_GIF2WEBP OFF)
+    set(WEBP_BUILD_IMG2WEBP OFF)
+    set(WEBP_BUILD_VWEBP OFF)
+    set(WEBP_BUILD_WEBPINFO OFF)
+    set(WEBP_BUILD_WEBPMUX OFF)
+    set(WEBP_BUILD_EXTRAS OFF)
+    set(WEBP_BUILD_WEBP_JS OFF)
+    set(WEBP_BUILD_FUZZTEST OFF)
+    set(WEBP_BUILD_LIBWEBPMUX ON)
+
+    add_subdirectory(libwebp EXCLUDE_FROM_ALL)
+endif()
diff --git a/thirdparty/libwebp b/thirdparty/libwebp
new file mode 160000
index 000000000..0c9546f7e
--- /dev/null
+++ b/thirdparty/libwebp
@@ -0,0 +1 @@
+Subproject commit 0c9546f7efc61eac7f79ae115c3f99c91c21c443

From b9eb7f8ce0449695bbc83dbc2975b05bdbab6899 Mon Sep 17 00:00:00 2001
From: leejet <leejet714@gmail.com>
Date: Thu, 2 Apr 2026 01:09:22 +0800
Subject: [PATCH 2/2] docs/build.md: change crlf to lf

---
 docs/build.md | 370 +++++++++++++++++++++++++-------------------------
 1 file changed, 185 insertions(+), 185 deletions(-)

diff --git a/docs/build.md b/docs/build.md
index 0bff5df00..eabb51ac3 100644
--- a/docs/build.md
+++ b/docs/build.md
@@ -1,185 +1,185 @@
-# Build from scratch
-
-## Get the Code
-
-```
-git clone --recursive https://github.com/leejet/stable-diffusion.cpp
-cd stable-diffusion.cpp
-```
-
-- If you have already cloned the repository, you can use the following command to update the repository to the latest code.
-
-```
-cd stable-diffusion.cpp
-git pull origin master
-git submodule init
-git submodule update
-```
-
-## WebP Support in Examples
-
-The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O. This is enabled by default.
-
-If you do not want WebP support, you can disable it at configure time:
-
-```shell
-mkdir build && cd build
-cmake .. -DSD_WEBP=OFF
-cmake --build . --config Release
-```
-
-## Build (CPU only)
-
-If you don't have a GPU or CUDA installed, you can build a CPU-only version.
-
-```shell
-mkdir build && cd build
-cmake ..
-cmake --build . --config Release
-```
-
-## Build with OpenBLAS
-
-```shell
-mkdir build && cd build
-cmake .. -DGGML_OPENBLAS=ON
-cmake --build . --config Release
-```
-
-## Build with CUDA
-
-This provides GPU acceleration using NVIDIA GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). Recommended to have at least 4 GB of VRAM.
-
-```shell
-mkdir build && cd build
-cmake .. -DSD_CUDA=ON
-cmake --build . --config Release
-```
-
-## Build with HipBLAS
-
-This provides GPU acceleration using AMD GPU. Make sure to have the ROCm toolkit installed.
-To build for another GPU architecture than installed in your system, set `$GFX_NAME` manually to the desired architecture (replace first command). This is also necessary if your GPU is not officially supported by ROCm, for example you have to set `$GFX_NAME` manually to `gfx1030` for consumer RDNA2 cards.
-
-Windows User Refer to [docs/hipBLAS_on_Windows.md](docs%2FhipBLAS_on_Windows.md) for a comprehensive guide.
-
-```shell
-mkdir build && cd build
-if command -v rocminfo; then export GFX_NAME=$(rocminfo | awk '/ *Name: +gfx[1-9]/ {print $2; exit}'); else echo "rocminfo missing!"; fi
-if [ -z "${GFX_NAME}" ]; then echo "Error: Couldn't detect GPU!"; else echo "Building for GPU: ${GFX_NAME}"; fi
-cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGPU_TARGETS=$GFX_NAME -DAMDGPU_TARGETS=$GFX_NAME -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-cmake --build . --config Release
-```
-
-## Build with MUSA
-
-This provides GPU acceleration using Moore Threads GPU. Make sure to have the MUSA toolkit installed.
-
-```shell
-mkdir build && cd build
-cmake .. -DCMAKE_C_COMPILER=/usr/local/musa/bin/clang -DCMAKE_CXX_COMPILER=/usr/local/musa/bin/clang++ -DSD_MUSA=ON -DCMAKE_BUILD_TYPE=Release
-cmake --build . --config Release
-```
-
-## Build with Metal
-
-Using Metal makes the computation run on the GPU. Currently, there are some issues with Metal when performing operations on very large matrices, making it highly inefficient at the moment. Performance improvements are expected in the near future.
-
-```shell
-mkdir build && cd build
-cmake .. -DSD_METAL=ON
-cmake --build . --config Release
-```
-
-## Build with Vulkan
-
-Install Vulkan SDK from https://www.lunarg.com/vulkan-sdk/.
-
-```shell
-mkdir build && cd build
-cmake .. -DSD_VULKAN=ON
-cmake --build . --config Release
-```
-
-## Build with OpenCL (for Adreno GPU)
-
-Currently, it supports only Adreno GPUs and is primarily optimized for Q4_0 type
-
-To build for Windows ARM please refers to [Windows 11 Arm64](https://github.com/ggml-org/llama.cpp/blob/master/docs/backend/OPENCL.md#windows-11-arm64)
-
-Building for Android:
-
-  Android NDK:
-       Download and install the Android NDK from the [official Android developer site](https://developer.android.com/ndk/downloads).
-
-Setup OpenCL Dependencies for NDK:
-
-You need to provide OpenCL headers and the ICD loader library to your NDK sysroot.
-
-*   OpenCL Headers:
-    ```bash
-    # In a temporary working directory
-    git clone https://github.com/KhronosGroup/OpenCL-Headers
-    cd OpenCL-Headers
-    # Replace <YOUR_NDK_PATH> with your actual NDK installation path
-    # e.g., cp -r CL /path/to/android-ndk-r26c/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
-    sudo cp -r CL <YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
-    cd ..
-    ```
-
-*   OpenCL ICD Loader:
-    ```shell
-    # In the same temporary working directory
-    git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
-    cd OpenCL-ICD-Loader
-    mkdir build_ndk && cd build_ndk
-
-    # Replace <YOUR_NDK_PATH> in the CMAKE_TOOLCHAIN_FILE and OPENCL_ICD_LOADER_HEADERS_DIR
-    cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release \
-      -DCMAKE_TOOLCHAIN_FILE=<YOUR_NDK_PATH>/build/cmake/android.toolchain.cmake \
-      -DOPENCL_ICD_LOADER_HEADERS_DIR=<YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include \
-      -DANDROID_ABI=arm64-v8a \
-      -DANDROID_PLATFORM=24 \
-      -DANDROID_STL=c++_shared
-
-    ninja
-    # Replace <YOUR_NDK_PATH>
-    # e.g., cp libOpenCL.so /path/to/android-ndk-r26c/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
-    sudo cp libOpenCL.so <YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
-    cd ../..
-    ```
-
-Build `stable-diffusion.cpp` for Android with OpenCL:
-
-```shell
-mkdir build-android && cd build-android
-
-# Replace <YOUR_NDK_PATH> with your actual NDK installation path
-# e.g., -DCMAKE_TOOLCHAIN_FILE=/path/to/android-ndk-r26c/build/cmake/android.toolchain.cmake
-cmake .. -G Ninja \
-  -DCMAKE_TOOLCHAIN_FILE=<YOUR_NDK_PATH>/build/cmake/android.toolchain.cmake \
-  -DANDROID_ABI=arm64-v8a \
-  -DANDROID_PLATFORM=android-28 \
-  -DGGML_OPENMP=OFF \
-  -DSD_OPENCL=ON
-
-ninja
-```
-*(Note: Don't forget to include `LD_LIBRARY_PATH=/vendor/lib64` in your command line before running the binary)*
-
-## Build with SYCL
-
-Using SYCL makes the computation run on the Intel GPU. Please make sure you have installed the related driver and [Intel® oneAPI Base toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) before start. More details and steps can refer to [llama.cpp SYCL backend](https://github.com/ggml-org/llama.cpp/blob/master/docs/backend/SYCL.md#linux).
-
-```shell
-# Export relevant ENV variables
-source /opt/intel/oneapi/setvars.sh
-
-# Option 1: Use FP32 (recommended for better performance in most cases)
-cmake .. -DSD_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
-
-# Option 2: Use FP16
-cmake .. -DSD_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
-
-cmake --build . --config Release
-```
+# Build from scratch
+
+## Get the Code
+
+```
+git clone --recursive https://github.com/leejet/stable-diffusion.cpp
+cd stable-diffusion.cpp
+```
+
+- If you have already cloned the repository, you can use the following command to update the repository to the latest code.
+
+```
+cd stable-diffusion.cpp
+git pull origin master
+git submodule init
+git submodule update
+```
+
+## WebP Support in Examples
+
+The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O. This is enabled by default.
+
+If you do not want WebP support, you can disable it at configure time:
+
+```shell
+mkdir build && cd build
+cmake .. -DSD_WEBP=OFF
+cmake --build . --config Release
+```
+
+## Build (CPU only)
+
+If you don't have a GPU or CUDA installed, you can build a CPU-only version.
+
+```shell
+mkdir build && cd build
+cmake ..
+cmake --build . --config Release
+```
+
+## Build with OpenBLAS
+
+```shell
+mkdir build && cd build
+cmake .. -DGGML_OPENBLAS=ON
+cmake --build . --config Release
+```
+
+## Build with CUDA
+
+This provides GPU acceleration using NVIDIA GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). Recommended to have at least 4 GB of VRAM.
+
+```shell
+mkdir build && cd build
+cmake .. -DSD_CUDA=ON
+cmake --build . --config Release
+```
+
+## Build with HipBLAS
+
+This provides GPU acceleration using AMD GPU. Make sure to have the ROCm toolkit installed.
+To build for another GPU architecture than installed in your system, set `$GFX_NAME` manually to the desired architecture (replace first command). This is also necessary if your GPU is not officially supported by ROCm, for example you have to set `$GFX_NAME` manually to `gfx1030` for consumer RDNA2 cards.
+
+Windows User Refer to [docs/hipBLAS_on_Windows.md](docs%2FhipBLAS_on_Windows.md) for a comprehensive guide.
+
+```shell
+mkdir build && cd build
+if command -v rocminfo; then export GFX_NAME=$(rocminfo | awk '/ *Name: +gfx[1-9]/ {print $2; exit}'); else echo "rocminfo missing!"; fi
+if [ -z "${GFX_NAME}" ]; then echo "Error: Couldn't detect GPU!"; else echo "Building for GPU: ${GFX_NAME}"; fi
+cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGPU_TARGETS=$GFX_NAME -DAMDGPU_TARGETS=$GFX_NAME -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+cmake --build . --config Release
+```
+
+## Build with MUSA
+
+This provides GPU acceleration using Moore Threads GPU. Make sure to have the MUSA toolkit installed.
+
+```shell
+mkdir build && cd build
+cmake .. -DCMAKE_C_COMPILER=/usr/local/musa/bin/clang -DCMAKE_CXX_COMPILER=/usr/local/musa/bin/clang++ -DSD_MUSA=ON -DCMAKE_BUILD_TYPE=Release
+cmake --build . --config Release
+```
+
+## Build with Metal
+
+Using Metal makes the computation run on the GPU. Currently, there are some issues with Metal when performing operations on very large matrices, making it highly inefficient at the moment. Performance improvements are expected in the near future.
+
+```shell
+mkdir build && cd build
+cmake .. -DSD_METAL=ON
+cmake --build . --config Release
+```
+
+## Build with Vulkan
+
+Install Vulkan SDK from https://www.lunarg.com/vulkan-sdk/.
+
+```shell
+mkdir build && cd build
+cmake .. -DSD_VULKAN=ON
+cmake --build . --config Release
+```
+
+## Build with OpenCL (for Adreno GPU)
+
+Currently, it supports only Adreno GPUs and is primarily optimized for Q4_0 type
+
+To build for Windows ARM please refers to [Windows 11 Arm64](https://github.com/ggml-org/llama.cpp/blob/master/docs/backend/OPENCL.md#windows-11-arm64)
+
+Building for Android:
+
+  Android NDK:
+       Download and install the Android NDK from the [official Android developer site](https://developer.android.com/ndk/downloads).
+
+Setup OpenCL Dependencies for NDK:
+
+You need to provide OpenCL headers and the ICD loader library to your NDK sysroot.
+
+*   OpenCL Headers:
+    ```bash
+    # In a temporary working directory
+    git clone https://github.com/KhronosGroup/OpenCL-Headers
+    cd OpenCL-Headers
+    # Replace <YOUR_NDK_PATH> with your actual NDK installation path
+    # e.g., cp -r CL /path/to/android-ndk-r26c/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
+    sudo cp -r CL <YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include
+    cd ..
+    ```
+
+*   OpenCL ICD Loader:
+    ```shell
+    # In the same temporary working directory
+    git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
+    cd OpenCL-ICD-Loader
+    mkdir build_ndk && cd build_ndk
+
+    # Replace <YOUR_NDK_PATH> in the CMAKE_TOOLCHAIN_FILE and OPENCL_ICD_LOADER_HEADERS_DIR
+    cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release \
+      -DCMAKE_TOOLCHAIN_FILE=<YOUR_NDK_PATH>/build/cmake/android.toolchain.cmake \
+      -DOPENCL_ICD_LOADER_HEADERS_DIR=<YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/include \
+      -DANDROID_ABI=arm64-v8a \
+      -DANDROID_PLATFORM=24 \
+      -DANDROID_STL=c++_shared
+
+    ninja
+    # Replace <YOUR_NDK_PATH>
+    # e.g., cp libOpenCL.so /path/to/android-ndk-r26c/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
+    sudo cp libOpenCL.so <YOUR_NDK_PATH>/toolchains/llvm/prebuilt/linux-x86_64/sysroot/usr/lib/aarch64-linux-android
+    cd ../..
+    ```
+
+Build `stable-diffusion.cpp` for Android with OpenCL:
+
+```shell
+mkdir build-android && cd build-android
+
+# Replace <YOUR_NDK_PATH> with your actual NDK installation path
+# e.g., -DCMAKE_TOOLCHAIN_FILE=/path/to/android-ndk-r26c/build/cmake/android.toolchain.cmake
+cmake .. -G Ninja \
+  -DCMAKE_TOOLCHAIN_FILE=<YOUR_NDK_PATH>/build/cmake/android.toolchain.cmake \
+  -DANDROID_ABI=arm64-v8a \
+  -DANDROID_PLATFORM=android-28 \
+  -DGGML_OPENMP=OFF \
+  -DSD_OPENCL=ON
+
+ninja
+```
+*(Note: Don't forget to include `LD_LIBRARY_PATH=/vendor/lib64` in your command line before running the binary)*
+
+## Build with SYCL
+
+Using SYCL makes the computation run on the Intel GPU. Please make sure you have installed the related driver and [Intel® oneAPI Base toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) before start. More details and steps can refer to [llama.cpp SYCL backend](https://github.com/ggml-org/llama.cpp/blob/master/docs/backend/SYCL.md#linux).
+
+```shell
+# Export relevant ENV variables
+source /opt/intel/oneapi/setvars.sh
+
+# Option 1: Use FP32 (recommended for better performance in most cases)
+cmake .. -DSD_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
+
+# Option 2: Use FP16
+cmake .. -DSD_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
+
+cmake --build . --config Release
+```