Add support for new models and fix GitHub issues

- Add Gemma3n model support with text generation capabilities - Add new CUDA mean operations for improved performance - Add macOS documentation and performance tests - Update LLAMA patches for ROCm/CUDA compatibility - Fix various model conversion and processing issues - Update CI workflows and build configurations - Add library model tests and Shakespeare test data 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-10 15:57:04 +00:00 · 2025-07-20 00:12:36 +08:00
parent 1fa71c2670
commit cbcbc9ae07
81 changed files with 132316 additions and 747 deletions
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -23,7 +23,7 @@ jobs:
          echo GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=${GITHUB_REF_NAME#v}\" \"-X=github.com/ollama/ollama/server.mode=release\"'" >>$GITHUB_OUTPUT

  darwin-build:
-    runs-on: macos-13
+    runs-on: macos-13-xlarge
    environment: release
    needs: setup-environment
    strategy:
@@ -54,48 +54,6 @@ jobs:
          name: build-${{ matrix.os }}-${{ matrix.arch }}
          path: dist/*

-  darwin-sign:
-    runs-on: macos-13
-    environment: release
-    needs: darwin-build
-    steps:
-      - uses: actions/checkout@v4
-      - run: |
-          echo $MACOS_SIGNING_KEY | base64 --decode > certificate.p12
-          security create-keychain -p password build.keychain
-          security default-keychain -s build.keychain
-          security unlock-keychain -p password build.keychain
-          security import certificate.p12 -k build.keychain -P $MACOS_SIGNING_KEY_PASSWORD -T /usr/bin/codesign
-          security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k password build.keychain
-          security set-keychain-settings -lut 3600 build.keychain
-        env:
-          MACOS_SIGNING_KEY: ${{ secrets.MACOS_SIGNING_KEY }}
-          MACOS_SIGNING_KEY_PASSWORD: ${{ secrets.MACOS_SIGNING_KEY_PASSWORD }}
-      - uses: actions/download-artifact@v4
-        with:
-          name: build-darwin-amd64
-          path: dist/darwin-amd64
-      - uses: actions/download-artifact@v4
-        with:
-          name: build-darwin-arm64
-          path: dist/darwin-arm64
-      - run: |
-          export VERSION=${GITHUB_REF_NAME#v}
-          ./scripts/build_darwin.sh sign macapp
-        env:
-          APPLE_IDENTITY: ${{ secrets.APPLE_IDENTITY }}
-          APPLE_PASSWORD: ${{ secrets.APPLE_PASSWORD }}
-          APPLE_TEAM_ID: ${{ vars.APPLE_TEAM_ID }}
-          APPLE_ID: ${{ vars.APPLE_ID }}
-          SDKROOT: /Applications/Xcode_14.1.0.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
-          DEVELOPER_DIR: /Applications/Xcode_14.1.0.app/Contents/Developer
-      - uses: actions/upload-artifact@v4
-        with:
-          name: dist-darwin
-          path: |
-            dist/Ollama-darwin.zip
-            dist/ollama-darwin.tgz
-
  windows-depends:
    strategy:
      matrix:
@@ -103,21 +61,18 @@ jobs:
        arch: [amd64]
        preset: ['CPU']
        include:
-          - os: windows
-            arch: amd64
-            preset: 'CUDA 11'
-            install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
-            cuda-version: '11.3'
          - os: windows
            arch: amd64
            preset: 'CUDA 12'
            install: https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe
            cuda-version: '12.8'
+            flags: ''
          - os: windows
            arch: amd64
            preset: 'ROCm 6'
            install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
            rocm-version: '6.2'
+            flags: '-DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma" -DCMAKE_CXX_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma"'
    runs-on: ${{ matrix.arch == 'arm64' && format('{0}-{1}', matrix.os, matrix.arch) || matrix.os }}
    environment: release
    env:
@@ -160,6 +115,9 @@ jobs:
          echo "$hipPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          echo "CC=$hipPath\bin\clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "CXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
+          echo "HIPCXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
+          echo "HIP_PLATFORM=amd" | Out-File -FilePath $env:GITHUB_ENV -Append
+          echo "CMAKE_PREFIX_PATH=$hipPath" | Out-File -FilePath $env:GITHUB_ENV -Append
      - if: matrix.preset == 'CPU'
        run: |
          echo "CC=clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
@@ -178,9 +136,9 @@ jobs:
          key: ccache-${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.preset }}
      - name: Build target "${{ matrix.preset }}"
        run: |
-          Import-Module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
-          Enter-VsDevShell -VsInstallPath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -SkipAutomaticLocation  -DevCmdArguments '-arch=x64 -no_logo'
-          cmake --preset "${{ matrix.preset }}"
+          Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
+          Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -SkipAutomaticLocation  -DevCmdArguments '-arch=x64 -no_logo'
+          cmake --preset "${{ matrix.preset }}" ${{ matrix.flags }}
          cmake --build --parallel --preset "${{ matrix.preset }}"
          cmake --install build --component "${{ startsWith(matrix.preset, 'CUDA ') && 'CUDA' || startsWith(matrix.preset, 'ROCm ') && 'HIP' || 'CPU' }}" --strip --parallel 8
        env:
@@ -230,61 +188,11 @@ jobs:
          go-version-file: go.mod
      - run: |
          go build -o dist/${{ matrix.os }}-${{ matrix.arch }}/ .
-      - if: matrix.arch == 'arm64'
-        run: |
-          Invoke-WebRequest -Uri "https://aka.ms/vs/17/release/vc_redist.arm64.exe" -OutFile "dist\windows-arm64\vc_redist.arm64.exe"
-      - run: |
-          $env:VERSION='${{ github.ref_name }}' -Replace "v(.*)", '$1'
-          & .\scripts\build_windows.ps1 buildApp
-        env:
-          VCToolsRedistDir: stub
      - uses: actions/upload-artifact@v4
        with:
          name: build-${{ matrix.os }}-${{ matrix.arch }}
          path: |
            dist\${{ matrix.os }}-${{ matrix.arch }}\*.exe
-            dist\${{ matrix.os }}-${{ matrix.arch }}-app.exe
-
-  windows-sign:
-    runs-on: windows-2022
-    environment: release
-    needs: [windows-depends, windows-build]
-    steps:
-      - uses: actions/checkout@v4
-      - uses: google-github-actions/auth@v2
-        with:
-          project_id: ollama
-          credentials_json: ${{ secrets.GOOGLE_SIGNING_CREDENTIALS }}
-      - run: |
-          $ErrorActionPreference = "Stop"
-          Invoke-WebRequest -Uri "https://go.microsoft.com/fwlink/p/?LinkId=323507" -OutFile "${{ runner.temp }}\sdksetup.exe"
-          Start-Process "${{ runner.temp }}\sdksetup.exe" -ArgumentList @("/q") -NoNewWindow -Wait
-
-          Invoke-WebRequest -Uri "https://github.com/GoogleCloudPlatform/kms-integrations/releases/download/cng-v1.0/kmscng-1.0-windows-amd64.zip" -OutFile "${{ runner.temp }}\plugin.zip"
-          Expand-Archive -Path "${{ runner.temp }}\plugin.zip" -DestinationPath "${{ runner.temp }}\plugin\"
-          & "${{ runner.temp }}\plugin\*\kmscng.msi" /quiet
-
-          echo "${{ vars.OLLAMA_CERT }}" >ollama_inc.crt
-      - uses: actions/download-artifact@v4
-        with:
-          pattern: build-windows-*
-          path: dist\
-          merge-multiple: true
-      - uses: actions/download-artifact@v4
-        with:
-          pattern: depends-windows-amd64-*
-          path: dist\windows-amd64\
-          merge-multiple: true
-      - run: |
-          & .\scripts\build_windows.ps1 gatherDependencies sign buildInstaller distZip
-        env:
-          KEY_CONTAINER: ${{ vars.KEY_CONTAINER }}
-      - uses: actions/upload-artifact@v4
-        with:
-          name: dist-windows
-          path: |
-            dist\OllamaSetup.exe
-            dist\ollama-windows-*.zip

  linux-build:
    strategy:
@@ -317,21 +225,26 @@ jobs:
            CGO_CFLAGS=${{ env.CGO_CFLAGS }}
            CGO_CXXFLAGS=${{ env.CGO_CXXFLAGS }}
          outputs: type=local,dest=dist/${{ matrix.os }}-${{ matrix.arch }}
-          cache-from: type=registry,ref=ollama/ollama:latest
+          cache-from: type=registry,ref=${{ vars.DOCKER_REPO }}:latest
          cache-to: type=inline
      - run: |
          for COMPONENT in bin/* lib/ollama/*; do
            case "$COMPONENT" in
-              bin/ollama)               echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
-              lib/ollama/*.so)          echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
-              lib/ollama/cuda_v11)      echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
-              lib/ollama/cuda_v12)      echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
-              lib/ollama/cuda_jetpack5) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
-              lib/ollama/cuda_jetpack6) echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
-              lib/ollama/rocm)          echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;
+              bin/ollama)                echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
+              lib/ollama/*.so*)          echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
+              lib/ollama/cuda_sbsa)      echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}.tar.in ;;
+              lib/ollama/cuda_jetpack5)  echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack5.tar.in ;;
+              lib/ollama/cuda_jetpack6)  echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-jetpack6.tar.in ;;
+              lib/ollama/rocm)           echo $COMPONENT >>ollama-${{ matrix.os }}-${{ matrix.arch }}-rocm.tar.in ;;
            esac
          done
        working-directory: dist/${{ matrix.os }}-${{ matrix.arch }}
+      - run: |
+          echo "Manifests"
+          for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in ; do
+            echo $ARCHIVE
+            cat $ARCHIVE
+          done
      - run: |
          for ARCHIVE in dist/${{ matrix.os }}-${{ matrix.arch }}/*.tar.in; do
            tar c -C dist/${{ matrix.os }}-${{ matrix.arch }} -T $ARCHIVE --owner 0 --group 0 | pigz -9vc >$(basename ${ARCHIVE//.*/}.tgz);
@@ -385,8 +298,8 @@ jobs:
          context: .
          platforms: ${{ matrix.os }}/${{ matrix.arch }}
          build-args: ${{ matrix.build-args }}
-          outputs: type=image,name=ollama/ollama,push-by-digest=true,name-canonical=true,push=true
-          cache-from: type=registry,ref=ollama/ollama:latest
+          outputs: type=image,name=${{ vars.DOCKER_REPO }},push-by-digest=true,name-canonical=true,push=true
+          cache-from: type=registry,ref=${{ vars.DOCKER_REPO }}:latest
          cache-to: type=inline
      - run: |
          mkdir -p ${{ matrix.os }}-${{ matrix.arch }}
@@ -418,7 +331,7 @@ jobs:
            latest=false
            suffix=${{ matrix.suffix }}
          images: |
-            ollama/ollama
+            ${{ vars.DOCKER_REPO }}
          tags: |
            type=ref,enable=true,priority=600,prefix=pr-,event=pr
            type=semver,pattern={{version}}
@@ -428,56 +341,24 @@ jobs:
          path: ${{ runner.temp }}
          merge-multiple: true
      - run: |
-          docker buildx imagetools create $(echo '${{ steps.metadata.outputs.json }}' | jq -cr '.tags | map("-t", .) | join(" ")') $(cat *-${{ matrix.suffix }}.txt | xargs printf 'ollama/ollama@%s ')
-          docker buildx imagetools inspect ollama/ollama:${{ steps.metadata.outputs.version }}
+          docker buildx imagetools create $(echo '${{ steps.metadata.outputs.json }}' | jq -cr '.tags | map("-t", .) | join(" ")') $(cat *-${{ matrix.suffix }}.txt | xargs printf '${{ vars.DOCKER_REPO }}@%s ')
+          docker buildx imagetools inspect ${{ vars.DOCKER_REPO }}:${{ steps.metadata.outputs.version }}
        working-directory: ${{ runner.temp }}

  # Trigger downstream release process
  trigger:
    runs-on: ubuntu-latest
    environment: release
-    needs: [darwin-build, windows-build, windows-depends]
-    steps:
-      - name: Trigger downstream release process
-        run: |
-          curl -L \
-            -X POST \
-            -H "Accept: application/vnd.github+json" \
-            -H "Authorization: Bearer ${{ secrets.RELEASE_TOKEN }}" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            https://api.github.com/repos/ollama/${{ vars.RELEASE_REPO }}/dispatches \
-            -d "{\"event_type\": \"trigger-workflow\", \"client_payload\": {\"run_id\": \"${GITHUB_RUN_ID}\", \"version\": \"${GITHUB_REF_NAME#v}\"}}"
-
-  # Aggregate all the assets and ship a release
-  release:
-    needs: [darwin-sign, windows-sign, linux-build]
-    runs-on: linux
-    environment: release
+    needs: [darwin-build, windows-build, windows-depends, linux-build]
    permissions:
      contents: write
    env:
      GH_TOKEN: ${{ github.token }}
    steps:
      - uses: actions/checkout@v4
-      - uses: actions/download-artifact@v4
-        with:
-          name: dist-darwin
-          path: dist
-      - uses: actions/download-artifact@v4
-        with:
-          name: dist-windows
-          path: dist
-      - uses: actions/download-artifact@v4
-        with:
-          pattern: dist-linux-*
-          path: dist
-          merge-multiple: true
-      - run: find . -type f -not -name 'sha256sum.txt' | xargs sha256sum | tee sha256sum.txt
-        working-directory: dist
-      - name: Create or update Release
+      - name: Create or update Release for tag
        run: |
          RELEASE_VERSION="$(echo ${GITHUB_REF_NAME} | cut -f1 -d-)"
-
          echo "Looking for existing release for ${RELEASE_VERSION}"
          OLD_TAG=$(gh release ls --json name,tagName | jq -r ".[] | select(.name == \"${RELEASE_VERSION}\") | .tagName")
          if [ -n "$OLD_TAG" ]; then
@@ -491,5 +372,12 @@ jobs:
              --generate-notes \
              --prerelease
          fi
-          echo "Uploading artifacts for tag ${GITHUB_REF_NAME}"
-          gh release upload ${GITHUB_REF_NAME} dist/* --clobber
+      - name: Trigger downstream release process
+        run: |
+          curl -L \
+            -X POST \
+            -H "Accept: application/vnd.github+json" \
+            -H "Authorization: Bearer ${{ secrets.RELEASE_TOKEN }}" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            https://api.github.com/repos/ollama/${{ vars.RELEASE_REPO }}/dispatches \
+            -d "{\"event_type\": \"trigger-workflow\", \"client_payload\": {\"run_id\": \"${GITHUB_RUN_ID}\", \"version\": \"${GITHUB_REF_NAME#v}\", \"origin\": \"${GITHUB_REPOSITORY}\", \"publish\": \"1\"}}"
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -36,7 +36,7 @@ jobs:
              | xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))"
          }

-          echo changed=$(changed 'llama/llama.cpp/**' 'ml/backend/ggml/ggml/**') | tee -a $GITHUB_OUTPUT
+          echo changed=$(changed 'llama/llama.cpp/**/*' 'ml/backend/ggml/ggml/**/*') | tee -a $GITHUB_OUTPUT

  linux:
    needs: [changes]
@@ -46,7 +46,7 @@ jobs:
        include:
          - preset: CPU
          - preset: CUDA
-            container: nvidia/cuda:11.8.0-devel-ubuntu22.04
+            container: nvidia/cuda:12.8.1-devel-ubuntu22.04
            flags: '-DCMAKE_CUDA_ARCHITECTURES=87'
          - preset: ROCm
            container: rocm/dev-ubuntu-22.04:6.1.2
@@ -78,11 +78,11 @@ jobs:
        include:
          - preset: CPU
          - preset: CUDA
-            install: https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.89_win10.exe
+            install: https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_571.96_windows.exe
            flags: '-DCMAKE_CUDA_ARCHITECTURES=80'
          - preset: ROCm
            install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
-            flags: '-DAMDGPU_TARGETS=gfx1010'
+            flags: '-DAMDGPU_TARGETS=gfx1010 -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma" -DCMAKE_CXX_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma"'
    runs-on: windows
    steps:
      - run: |
@@ -102,7 +102,7 @@ jobs:
          $ErrorActionPreference = "Stop"
          if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
            Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
-            Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_11.3", "nvcc_11.3", "cublas_11.3", "cublas_dev_11.3")) -NoNewWindow -Wait
+            Start-Process -FilePath .\install.exe -ArgumentList (@("-s", "cudart_12.8", "nvcc_12.8", "cublas_12.8", "cublas_dev_12.8")) -NoNewWindow -Wait
          }

          $cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path
@@ -120,6 +120,9 @@ jobs:
          echo "$hipPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          echo "CC=$hipPath\bin\clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
          echo "CXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
+          echo "HIPCXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
+          echo "HIP_PLATFORM=amd" | Out-File -FilePath $env:GITHUB_ENV -Append
+          echo "CMAKE_PREFIX_PATH=$hipPath" | Out-File -FilePath $env:GITHUB_ENV -Append
      - if: ${{ !cancelled() && steps.cache-install.outputs.cache-hit != 'true' }}
        uses: actions/cache/save@v4
        with:
@@ -133,8 +136,8 @@ jobs:
          path: ${{ github.workspace }}\.ccache
          key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}
      - run: |
-          Import-Module 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
-          Enter-VsDevShell -VsInstallPath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise' -SkipAutomaticLocation  -DevCmdArguments '-arch=x64 -no_logo'
+          Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
+          Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -SkipAutomaticLocation  -DevCmdArguments '-arch=x64 -no_logo'
          cmake --preset "${{ matrix.preset }}" ${{ matrix.flags }}
          cmake --build --parallel --preset "${{ matrix.preset }}"
        env: