image processing for llama3.2 (#6963)

Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me> Co-authored-by: Jesse Gross <jesse@ollama.com>
2025-12-11 08:17:03 +00:00 · 2024-10-18 16:12:35 -07:00
parent bf4018b9ec
commit c7cb0f0602
35 changed files with 3851 additions and 203 deletions
--- a/llama/ggml.h
+++ b/llama/ggml.h
@@ -532,6 +532,7 @@ extern "C" {
        GGML_OP_POOL_2D_BACK,
        GGML_OP_UPSCALE, // nearest interpolate
        GGML_OP_PAD,
+        GGML_OP_UNPAD,
        GGML_OP_ARANGE,
        GGML_OP_TIMESTEP_EMBEDDING,
        GGML_OP_ARGSORT,
@@ -1790,6 +1791,15 @@ extern "C" {
            int                  p2,
            int                  p3);

+    // unpad each dimension: [x, ..., x, y, ..., y] -> [x, ..., x]
+    GGML_API struct ggml_tensor * ggml_unpad(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            int                  p0,
+            int                  p1,
+            int                  p2,
+            int                  p3);
+
    // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
    // timesteps: [N,]
    // return: [N, dim]