mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 07:46:59 +00:00
This commit represents a complete rework after pulling the latest changes from official ollama/ollama repository and re-applying Tesla K80 compatibility patches. ## Key Changes ### CUDA Compute Capability 3.7 Support (Tesla K80) - Added sm_37 (compute 3.7) to CMAKE_CUDA_ARCHITECTURES in CMakeLists.txt - Updated CMakePresets.json to include compute 3.7 in "CUDA 11" preset - Using 37-virtual (PTX with JIT compilation) for maximum compatibility ### Legacy Toolchain Compatibility - **NVIDIA Driver**: 470.256.02 (last version supporting Kepler/K80) - **CUDA Version**: 11.4.4 (last CUDA 11.x supporting compute 3.7) - **GCC Version**: 10.5.0 (required by CUDA 11.4 host_config.h) ### CPU Architecture Trade-offs Due to GCC 10.5 limitation, sacrificed newer CPU optimizations: - Alderlake CPU variant enabled WITHOUT AVX_VNNI (requires GCC 11+) - Still supports: SSE4.2, AVX, F16C, AVX2, BMI2, FMA - Performance impact: ~3-7% on newer CPUs (acceptable for K80 compatibility) ### Build System Updates - Modified ml/backend/ggml/ggml/src/ggml-cuda/CMakeLists.txt for compute 3.7 - Added -Wno-deprecated-gpu-targets flag to suppress warnings - Updated ml/backend/ggml/ggml/src/CMakeLists.txt for Alderlake without AVX_VNNI ### Upstream Sync Merged latest llama.cpp changes including: - Enhanced KV cache management with ISWA and hybrid memory support - Improved multi-modal support (mtmd framework) - New model architectures (Gemma3, Llama4, Qwen3, etc.) - GPU backend improvements for CUDA, Metal, and ROCm - Updated quantization support and GGUF format handling ### Documentation - Updated CLAUDE.md with comprehensive build instructions - Documented toolchain constraints and CPU architecture trade-offs - Removed outdated CI/CD workflows (tesla-k80-*.yml) - Cleaned up temporary development artifacts ## Rationale This fork maintains Tesla K80 GPU support (compute 3.7) which was dropped in official Ollama due to legacy driver/CUDA requirements. The toolchain constraint creates a deadlock: - K80 → Driver 470 → CUDA 11.4 → GCC 10 → No AVX_VNNI We accept the loss of cutting-edge CPU optimizations to enable running modern LLMs on legacy but still capable Tesla K80 hardware (12GB VRAM per GPU). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
287 lines
13 KiB
YAML
287 lines
13 KiB
YAML
name: test
|
|
|
|
concurrency:
|
|
# For PRs, later CI runs preempt previous ones. e.g. a force push on a PR
|
|
# cancels running CI jobs and starts all new ones.
|
|
#
|
|
# For non-PR pushes, concurrency.group needs to be unique for every distinct
|
|
# CI run we want to have happen. Use run_id, which in practice means all
|
|
# non-PR CI runs will be allowed to run without preempting each other.
|
|
group: ${{ github.workflow }}-$${{ github.pull_request.number || github.run_id }}
|
|
cancel-in-progress: true
|
|
|
|
on:
|
|
pull_request:
|
|
paths:
|
|
- '**/*'
|
|
- '!docs/**'
|
|
- '!README.md'
|
|
|
|
jobs:
|
|
changes:
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
changed: ${{ steps.changes.outputs.changed }}
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
fetch-depth: 0
|
|
- id: changes
|
|
run: |
|
|
changed() {
|
|
local BASE=${{ github.event.pull_request.base.sha }}
|
|
local HEAD=${{ github.event.pull_request.head.sha }}
|
|
local MERGE_BASE=$(git merge-base $BASE $HEAD)
|
|
git diff-tree -r --no-commit-id --name-only "$MERGE_BASE" "$HEAD" \
|
|
| xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))"
|
|
}
|
|
|
|
echo changed=$(changed 'llama/llama.cpp/**/*' 'ml/backend/ggml/ggml/**/*') | tee -a $GITHUB_OUTPUT
|
|
|
|
linux:
|
|
needs: [changes]
|
|
if: needs.changes.outputs.changed == 'True'
|
|
strategy:
|
|
matrix:
|
|
include:
|
|
- preset: CPU
|
|
- preset: CUDA
|
|
container: nvidia/cuda:13.0.0-devel-ubuntu22.04
|
|
flags: '-DCMAKE_CUDA_ARCHITECTURES=87'
|
|
- preset: ROCm
|
|
container: rocm/dev-ubuntu-22.04:6.1.2
|
|
extra-packages: rocm-libs
|
|
flags: '-DAMDGPU_TARGETS=gfx1010 -DCMAKE_PREFIX_PATH=/opt/rocm'
|
|
- preset: Vulkan
|
|
container: ubuntu:22.04
|
|
extra-packages: >
|
|
mesa-vulkan-drivers vulkan-tools
|
|
libvulkan1 libvulkan-dev
|
|
vulkan-sdk cmake ccache g++ make
|
|
runs-on: linux
|
|
container: ${{ matrix.container }}
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- run: |
|
|
[ -n "${{ matrix.container }}" ] || sudo=sudo
|
|
$sudo apt-get update
|
|
# Add LunarG Vulkan SDK apt repo for Ubuntu 22.04
|
|
if [ "${{ matrix.preset }}" = "Vulkan" ]; then
|
|
$sudo apt-get install -y --no-install-recommends wget gnupg ca-certificates software-properties-common
|
|
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | $sudo gpg --dearmor -o /usr/share/keyrings/lunarg-archive-keyring.gpg
|
|
# Use signed-by to bind the repo to the installed keyring to avoid NO_PUBKEY
|
|
echo "deb [signed-by=/usr/share/keyrings/lunarg-archive-keyring.gpg] https://packages.lunarg.com/vulkan/1.4.313 jammy main" | $sudo tee /etc/apt/sources.list.d/lunarg-vulkan-1.4.313-jammy.list > /dev/null
|
|
$sudo apt-get update
|
|
fi
|
|
$sudo apt-get install -y cmake ccache ${{ matrix.extra-packages }}
|
|
# Export VULKAN_SDK if provided by LunarG package (defensive)
|
|
if [ -d "/usr/lib/x86_64-linux-gnu/vulkan" ] && [ "${{ matrix.preset }}" = "Vulkan" ]; then
|
|
echo "VULKAN_SDK=/usr" >> $GITHUB_ENV
|
|
fi
|
|
env:
|
|
DEBIAN_FRONTEND: noninteractive
|
|
- uses: actions/cache@v4
|
|
with:
|
|
path: /github/home/.cache/ccache
|
|
key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}
|
|
- run: |
|
|
cmake --preset ${{ matrix.preset }} ${{ matrix.flags }}
|
|
cmake --build --preset ${{ matrix.preset }} --parallel
|
|
|
|
windows:
|
|
needs: [changes]
|
|
if: needs.changes.outputs.changed == 'True'
|
|
strategy:
|
|
matrix:
|
|
include:
|
|
- preset: CPU
|
|
- preset: CUDA
|
|
install: https://developer.download.nvidia.com/compute/cuda/13.0.0/local_installers/cuda_13.0.0_windows.exe
|
|
flags: '-DCMAKE_CUDA_ARCHITECTURES=80'
|
|
cuda-components:
|
|
- '"cudart"'
|
|
- '"nvcc"'
|
|
- '"cublas"'
|
|
- '"cublas_dev"'
|
|
- '"crt"'
|
|
- '"nvvm"'
|
|
- '"nvptxcompiler"'
|
|
cuda-version: '13.0'
|
|
- preset: ROCm
|
|
install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
|
|
flags: '-DAMDGPU_TARGETS=gfx1010 -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma" -DCMAKE_CXX_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma"'
|
|
- preset: Vulkan
|
|
install: https://sdk.lunarg.com/sdk/download/1.4.321.1/windows/vulkansdk-windows-X64-1.4.321.1.exe
|
|
runs-on: windows
|
|
steps:
|
|
- run: |
|
|
choco install -y --no-progress ccache ninja
|
|
ccache -o cache_dir=${{ github.workspace }}\.ccache
|
|
- if: matrix.preset == 'CUDA' || matrix.preset == 'ROCm' || matrix.preset == 'Vulkan'
|
|
id: cache-install
|
|
uses: actions/cache/restore@v4
|
|
with:
|
|
path: |
|
|
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
|
|
C:\Program Files\AMD\ROCm
|
|
C:\VulkanSDK
|
|
key: ${{ matrix.install }}
|
|
- if: matrix.preset == 'CUDA'
|
|
name: Install CUDA ${{ matrix.cuda-version }}
|
|
run: |
|
|
$ErrorActionPreference = "Stop"
|
|
if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
|
|
Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
|
|
$subpackages = @(${{ join(matrix.cuda-components, ', ') }}) | Foreach-Object {"${_}_${{ matrix.cuda-version }}"}
|
|
Start-Process -FilePath .\install.exe -ArgumentList (@("-s") + $subpackages) -NoNewWindow -Wait
|
|
}
|
|
|
|
$cudaPath = (Resolve-Path "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\*").path
|
|
echo "$cudaPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
|
- if: matrix.preset == 'ROCm'
|
|
name: Install ROCm ${{ matrix.rocm-version }}
|
|
run: |
|
|
$ErrorActionPreference = "Stop"
|
|
if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
|
|
Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
|
|
Start-Process -FilePath .\install.exe -ArgumentList '-install' -NoNewWindow -Wait
|
|
}
|
|
|
|
$hipPath = (Resolve-Path "C:\Program Files\AMD\ROCm\*").path
|
|
echo "$hipPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
|
echo "CC=$hipPath\bin\clang.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
|
echo "CXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
|
echo "HIPCXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
|
|
echo "HIP_PLATFORM=amd" | Out-File -FilePath $env:GITHUB_ENV -Append
|
|
echo "CMAKE_PREFIX_PATH=$hipPath" | Out-File -FilePath $env:GITHUB_ENV -Append
|
|
- if: matrix.preset == 'Vulkan'
|
|
name: Install Vulkan ${{ matrix.rocm-version }}
|
|
run: |
|
|
$ErrorActionPreference = "Stop"
|
|
if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
|
|
Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
|
|
Start-Process -FilePath .\install.exe -ArgumentList "-c","--am","--al","in" -NoNewWindow -Wait
|
|
}
|
|
|
|
$vulkanPath = (Resolve-Path "C:\VulkanSDK\*").path
|
|
echo "$vulkanPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
|
echo "VULKAN_SDK=$vulkanPath" >> $env:GITHUB_ENV
|
|
- if: ${{ !cancelled() && steps.cache-install.outputs.cache-hit != 'true' }}
|
|
uses: actions/cache/save@v4
|
|
with:
|
|
path: |
|
|
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
|
|
C:\Program Files\AMD\ROCm
|
|
key: ${{ matrix.install }}
|
|
- uses: actions/checkout@v4
|
|
- uses: actions/cache@v4
|
|
with:
|
|
path: ${{ github.workspace }}\.ccache
|
|
key: ccache-${{ runner.os }}-${{ runner.arch }}-${{ matrix.preset }}
|
|
- run: |
|
|
Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
|
|
Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -SkipAutomaticLocation -DevCmdArguments '-arch=x64 -no_logo'
|
|
cmake --preset "${{ matrix.preset }}" ${{ matrix.flags }}
|
|
cmake --build --parallel --preset "${{ matrix.preset }}"
|
|
env:
|
|
CMAKE_GENERATOR: Ninja
|
|
|
|
go_mod_tidy:
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: check that 'go mod tidy' is clean
|
|
run: go mod tidy --diff || (echo "Please run 'go mod tidy'." && exit 1)
|
|
|
|
test:
|
|
strategy:
|
|
matrix:
|
|
os: [ubuntu-latest, macos-latest, windows-latest]
|
|
runs-on: ${{ matrix.os }}
|
|
env:
|
|
CGO_ENABLED: '1'
|
|
GOEXPERIMENT: 'synctest'
|
|
steps:
|
|
- name: checkout
|
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2
|
|
|
|
- name: cache restore
|
|
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
|
|
with:
|
|
# Note: unlike the other setups, this is only grabbing the mod download
|
|
# cache, rather than the whole mod directory, as the download cache
|
|
# contains zips that can be unpacked in parallel faster than they can be
|
|
# fetched and extracted by tar
|
|
path: |
|
|
~/.cache/go-build
|
|
~/go/pkg/mod/cache
|
|
~\AppData\Local\go-build
|
|
# NOTE: The -3- here should be incremented when the scheme of data to be
|
|
# cached changes (e.g. path above changes).
|
|
key: ${{ github.job }}-${{ runner.os }}-${{ matrix.goarch }}-${{ matrix.buildflags }}-go-3-${{ hashFiles('**/go.sum') }}-${{ github.run_id }}
|
|
restore-keys: |
|
|
${{ github.job }}-${{ runner.os }}-${{ matrix.goarch }}-${{ matrix.buildflags }}-go-3-${{ hashFiles('**/go.sum') }}
|
|
${{ github.job }}-${{ runner.os }}-${{ matrix.goarch }}-${{ matrix.buildflags }}-go-3-
|
|
|
|
- name: Setup Go
|
|
uses: actions/setup-go@v5
|
|
with:
|
|
# The caching strategy of setup-go is less than ideal, and wastes
|
|
# time by not saving artifacts due to small failures like the linter
|
|
# complaining, etc. This means subsequent have to rebuild their world
|
|
# again until all checks pass. For instance, if you mispell a word,
|
|
# you're punished until you fix it. This is more hostile than
|
|
# helpful.
|
|
cache: false
|
|
|
|
go-version-file: go.mod
|
|
|
|
# It is tempting to run this in a platform independent way, but the past
|
|
# shows this codebase will see introductions of platform specific code
|
|
# generation, and so we need to check this per platform to ensure we
|
|
# don't abuse go generate on specific platforms.
|
|
- name: check that 'go generate' is clean
|
|
if: always()
|
|
run: |
|
|
go generate ./...
|
|
git diff --name-only --exit-code || (echo "Please run 'go generate ./...'." && exit 1)
|
|
|
|
- name: go test
|
|
if: always()
|
|
run: go test -count=1 -benchtime=1x ./...
|
|
|
|
# TODO(bmizerany): replace this heavy tool with just the
|
|
# tools/checks/binaries we want and then make them all run in parallel
|
|
# across jobs, not on a single tiny vm on Github Actions.
|
|
- uses: golangci/golangci-lint-action@v6
|
|
with:
|
|
args: --timeout 10m0s -v
|
|
|
|
- name: cache save
|
|
# Always save the cache, even if the job fails. The artifacts produced
|
|
# during the building of test binaries are not all for naught. They can
|
|
# be used to speed up subsequent runs.
|
|
if: always()
|
|
|
|
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
|
|
with:
|
|
# Note: unlike the other setups, this is only grabbing the mod download
|
|
# cache, rather than the whole mod directory, as the download cache
|
|
# contains zips that can be unpacked in parallel faster than they can be
|
|
# fetched and extracted by tar
|
|
path: |
|
|
~/.cache/go-build
|
|
~/go/pkg/mod/cache
|
|
~\AppData\Local\go-build
|
|
# NOTE: The -3- here should be incremented when the scheme of data to be
|
|
# cached changes (e.g. path above changes).
|
|
key: ${{ github.job }}-${{ runner.os }}-${{ matrix.goarch }}-${{ matrix.buildflags }}-go-3-${{ hashFiles('**/go.sum') }}-${{ github.run_id }}
|
|
|
|
patches:
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- name: Verify patches apply cleanly and do not change files
|
|
run: |
|
|
make -f Makefile.sync clean checkout apply-patches sync
|
|
git diff --compact-summary --exit-code |