mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-17 19:27:00 +00:00
Sync with upstream ollama/ollama and restore Tesla K80 (compute 3.7) support
This commit represents a complete rework after pulling the latest changes from official ollama/ollama repository and re-applying Tesla K80 compatibility patches. ## Key Changes ### CUDA Compute Capability 3.7 Support (Tesla K80) - Added sm_37 (compute 3.7) to CMAKE_CUDA_ARCHITECTURES in CMakeLists.txt - Updated CMakePresets.json to include compute 3.7 in "CUDA 11" preset - Using 37-virtual (PTX with JIT compilation) for maximum compatibility ### Legacy Toolchain Compatibility - **NVIDIA Driver**: 470.256.02 (last version supporting Kepler/K80) - **CUDA Version**: 11.4.4 (last CUDA 11.x supporting compute 3.7) - **GCC Version**: 10.5.0 (required by CUDA 11.4 host_config.h) ### CPU Architecture Trade-offs Due to GCC 10.5 limitation, sacrificed newer CPU optimizations: - Alderlake CPU variant enabled WITHOUT AVX_VNNI (requires GCC 11+) - Still supports: SSE4.2, AVX, F16C, AVX2, BMI2, FMA - Performance impact: ~3-7% on newer CPUs (acceptable for K80 compatibility) ### Build System Updates - Modified ml/backend/ggml/ggml/src/ggml-cuda/CMakeLists.txt for compute 3.7 - Added -Wno-deprecated-gpu-targets flag to suppress warnings - Updated ml/backend/ggml/ggml/src/CMakeLists.txt for Alderlake without AVX_VNNI ### Upstream Sync Merged latest llama.cpp changes including: - Enhanced KV cache management with ISWA and hybrid memory support - Improved multi-modal support (mtmd framework) - New model architectures (Gemma3, Llama4, Qwen3, etc.) - GPU backend improvements for CUDA, Metal, and ROCm - Updated quantization support and GGUF format handling ### Documentation - Updated CLAUDE.md with comprehensive build instructions - Documented toolchain constraints and CPU architecture trade-offs - Removed outdated CI/CD workflows (tesla-k80-*.yml) - Cleaned up temporary development artifacts ## Rationale This fork maintains Tesla K80 GPU support (compute 3.7) which was dropped in official Ollama due to legacy driver/CUDA requirements. The toolchain constraint creates a deadlock: - K80 → Driver 470 → CUDA 11.4 → GCC 10 → No AVX_VNNI We accept the loss of cutting-edge CPU optimizations to enable running modern LLMs on legacy but still capable Tesla K80 hardware (12GB VRAM per GPU). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -78,32 +78,84 @@ function checkEnv() {
|
||||
}
|
||||
|
||||
|
||||
function buildOllama() {
|
||||
function buildCPU() {
|
||||
mkdir -Force -path "${script:DIST_DIR}\"
|
||||
if ($script:ARCH -ne "arm64") {
|
||||
Remove-Item -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}"
|
||||
New-Item "${script:SRC_DIR}\dist\windows-${script:ARCH}\lib\ollama\" -ItemType Directory -ea 0
|
||||
|
||||
& cmake --fresh --preset CPU --install-prefix $script:DIST_DIR
|
||||
& cmake -B build\cpu --preset CPU --install-prefix $script:DIST_DIR
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& cmake --build --preset CPU --config Release --parallel $script:JOBS
|
||||
& cmake --build build\cpu --target ggml-cpu --config Release --parallel $script:JOBS
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& cmake --install build --component CPU --strip
|
||||
& cmake --install build\cpu --component CPU --strip
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
}
|
||||
}
|
||||
|
||||
function buildCUDA11() {
|
||||
# CUDA v11 claims to be compatible with MSVC 2022, but the latest updates are no longer compatible
|
||||
# 19.40 is the last compiler version that works, but recent udpates are 19.43
|
||||
# So this pins to MSVC 2019 for best compatibility
|
||||
mkdir -Force -path "${script:DIST_DIR}\"
|
||||
if ($script:ARCH -ne "arm64") {
|
||||
$hashEnv = @{}
|
||||
Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value }
|
||||
if ("$script:CUDA_DIRS".Contains("v12")) {
|
||||
$hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V12")) { $v12="$_" }}
|
||||
$env:CUDAToolkit_ROOT=$hashEnv[$v12]
|
||||
write-host "Building CUDA v12 backend libraries"
|
||||
& cmake --fresh --preset "CUDA 12" --install-prefix $script:DIST_DIR
|
||||
if ("$script:CUDA_DIRS".Contains("v11")) {
|
||||
$hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V11")) { $x=$hashEnv[$_]; if (test-path -literalpath "$x\bin\nvcc.exe" ) { $cuda=$x} }}
|
||||
write-host "Building CUDA v11 backend libraries $cuda"
|
||||
$env:CUDAToolkit_ROOT=$cuda
|
||||
& cmake -B build\cuda_v11 --preset "CUDA 11" -T cuda="$cuda" -DCMAKE_CUDA_COMPILER="$cuda\bin\nvcc.exe" -G "Visual Studio 16 2019" --install-prefix $script:DIST_DIR -DOLLAMA_RUNNER_DIR="cuda_v11"
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& cmake --build --preset "CUDA 12" --config Release --parallel $script:JOBS
|
||||
& cmake --build build\cuda_v11 --target ggml-cuda --config Release --parallel $script:JOBS
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& cmake --install build --component "CUDA" --strip
|
||||
& cmake --install build\cuda_v11 --component "CUDA" --strip
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function buildCUDA12() {
|
||||
mkdir -Force -path "${script:DIST_DIR}\"
|
||||
if ($script:ARCH -ne "arm64") {
|
||||
$hashEnv = @{}
|
||||
Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value }
|
||||
if ("$script:CUDA_DIRS".Contains("v12.8")) {
|
||||
$hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V12_8")) { $x=$hashEnv[$_]; if (test-path -literalpath "$x\bin\nvcc.exe" ) { $cuda=$x} }}
|
||||
write-host "Building CUDA v12 backend libraries $cuda"
|
||||
$env:CUDAToolkit_ROOT=$cuda
|
||||
& cmake -B build\cuda_v12 --preset "CUDA 12" -T cuda="$cuda" --install-prefix $script:DIST_DIR -DOLLAMA_RUNNER_DIR="cuda_v12"
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& cmake --build build\cuda_v12 --target ggml-cuda --config Release --parallel $script:JOBS
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& cmake --install build\cuda_v12 --component "CUDA" --strip
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function buildCUDA13() {
|
||||
mkdir -Force -path "${script:DIST_DIR}\"
|
||||
if ($script:ARCH -ne "arm64") {
|
||||
$hashEnv = @{}
|
||||
Get-ChildItem env: | foreach { $hashEnv[$_.Name] = $_.Value }
|
||||
if ("$script:CUDA_DIRS".Contains("v13")) {
|
||||
$hashEnv.Keys | foreach { if ($_.Contains("CUDA_PATH_V13")) { $x=$hashEnv[$_]; if (test-path -literalpath "$x\bin\nvcc.exe" ) { $cuda=$x} }}
|
||||
$env:CUDAToolkit_ROOT=$cuda
|
||||
write-host "Building CUDA v13 backend libraries $cuda"
|
||||
& cmake -B build\cuda_v13 --preset "CUDA 13" -T cuda="$cuda" --install-prefix $script:DIST_DIR -DOLLAMA_RUNNER_DIR="cuda_v13"
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& cmake --build build\cuda_v13 --target ggml-cuda --config Release --parallel $script:JOBS
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& cmake --install build\cuda_v13 --component "CUDA" --strip
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function buildROCm() {
|
||||
mkdir -Force -path "${script:DIST_DIR}\"
|
||||
if ($script:ARCH -ne "arm64") {
|
||||
if ($env:HIP_PATH) {
|
||||
write-host "Building ROCm backend libraries"
|
||||
if (-Not (get-command -ErrorAction silent ninja)) {
|
||||
@@ -113,7 +165,7 @@ function buildOllama() {
|
||||
$env:HIPCXX="${env:HIP_PATH}\bin\clang++.exe"
|
||||
$env:HIP_PLATFORM="amd"
|
||||
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
|
||||
& cmake --fresh --preset "ROCm 6" -G Ninja `
|
||||
& cmake --fresh -B build\rocm --preset "ROCm 6" -G Ninja -DOLLAMA_RUNNER_DIR="rocm" `
|
||||
-DCMAKE_C_COMPILER=clang `
|
||||
-DCMAKE_CXX_COMPILER=clang++ `
|
||||
-DCMAKE_C_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma" `
|
||||
@@ -123,12 +175,29 @@ function buildOllama() {
|
||||
$env:HIPCXX=""
|
||||
$env:HIP_PLATFORM=""
|
||||
$env:CMAKE_PREFIX_PATH=""
|
||||
& cmake --build --preset "ROCm 6" --config Release --parallel $script:JOBS
|
||||
& cmake --build build\rocm --target ggml-hip --config Release --parallel $script:JOBS
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& cmake --install build --component "HIP" --strip
|
||||
& cmake --install build\rocm --component "HIP" --strip
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
Remove-Item -Path $script:DIST_DIR\lib\ollama\rocm\rocblas\library\*gfx906* -ErrorAction SilentlyContinue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function buildVulkan(){
|
||||
if ($env:VULKAN_SDK) {
|
||||
write-host "Building Vulkan backend libraries"
|
||||
& cmake --fresh --preset Vulkan --install-prefix $script:DIST_DIR -DOLLAMA_RUNNER_DIR="vulkan"
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& cmake --build --preset Vulkan --config Release --parallel $script:JOBS
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
& cmake --install build --component Vulkan --strip
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
}
|
||||
}
|
||||
|
||||
function buildOllama() {
|
||||
mkdir -Force -path "${script:DIST_DIR}\"
|
||||
write-host "Building ollama CLI"
|
||||
& go build -trimpath -ldflags "-s -w -X=github.com/ollama/ollama/version.Version=$script:VERSION -X=github.com/ollama/ollama/server.mode=release" .
|
||||
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
|
||||
@@ -223,7 +292,7 @@ function distZip() {
|
||||
write-host "Generating stand-alone distribution zip file ${script:SRC_DIR}\dist\ollama-windows-amd64.zip"
|
||||
Compress-Archive -CompressionLevel Optimal -Path "${script:SRC_DIR}\dist\windows-amd64\*" -DestinationPath "${script:SRC_DIR}\dist\ollama-windows-amd64.zip" -Force
|
||||
if (Test-Path -Path "${script:SRC_DIR}\dist\windows-amd64-rocm") {
|
||||
Move-Item -destination "${script:SRC_DIR}\dist\windows-amd64\lib\ollama\rocm" -path "${script:SRC_DIR}\dist\windows-amd64-rocm\lib\ollama"
|
||||
Move-Item -destination "${script:SRC_DIR}\dist\windows-amd64\lib\ollama\rocm" -path "${script:SRC_DIR}\dist\windows-amd64-rocm\lib\ollama\rocm"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -236,6 +305,11 @@ function distZip() {
|
||||
checkEnv
|
||||
try {
|
||||
if ($($args.count) -eq 0) {
|
||||
buildCPU
|
||||
buildCUDA12
|
||||
buildCUDA13
|
||||
buildROCm
|
||||
buildVulkan
|
||||
buildOllama
|
||||
buildApp
|
||||
gatherDependencies
|
||||
@@ -254,4 +328,4 @@ try {
|
||||
} finally {
|
||||
set-location $script:SRC_DIR
|
||||
$env:PKG_VERSION=""
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user