mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 07:46:59 +00:00
This commit represents a complete rework after pulling the latest changes from official ollama/ollama repository and re-applying Tesla K80 compatibility patches. ## Key Changes ### CUDA Compute Capability 3.7 Support (Tesla K80) - Added sm_37 (compute 3.7) to CMAKE_CUDA_ARCHITECTURES in CMakeLists.txt - Updated CMakePresets.json to include compute 3.7 in "CUDA 11" preset - Using 37-virtual (PTX with JIT compilation) for maximum compatibility ### Legacy Toolchain Compatibility - **NVIDIA Driver**: 470.256.02 (last version supporting Kepler/K80) - **CUDA Version**: 11.4.4 (last CUDA 11.x supporting compute 3.7) - **GCC Version**: 10.5.0 (required by CUDA 11.4 host_config.h) ### CPU Architecture Trade-offs Due to GCC 10.5 limitation, sacrificed newer CPU optimizations: - Alderlake CPU variant enabled WITHOUT AVX_VNNI (requires GCC 11+) - Still supports: SSE4.2, AVX, F16C, AVX2, BMI2, FMA - Performance impact: ~3-7% on newer CPUs (acceptable for K80 compatibility) ### Build System Updates - Modified ml/backend/ggml/ggml/src/ggml-cuda/CMakeLists.txt for compute 3.7 - Added -Wno-deprecated-gpu-targets flag to suppress warnings - Updated ml/backend/ggml/ggml/src/CMakeLists.txt for Alderlake without AVX_VNNI ### Upstream Sync Merged latest llama.cpp changes including: - Enhanced KV cache management with ISWA and hybrid memory support - Improved multi-modal support (mtmd framework) - New model architectures (Gemma3, Llama4, Qwen3, etc.) - GPU backend improvements for CUDA, Metal, and ROCm - Updated quantization support and GGUF format handling ### Documentation - Updated CLAUDE.md with comprehensive build instructions - Documented toolchain constraints and CPU architecture trade-offs - Removed outdated CI/CD workflows (tesla-k80-*.yml) - Cleaned up temporary development artifacts ## Rationale This fork maintains Tesla K80 GPU support (compute 3.7) which was dropped in official Ollama due to legacy driver/CUDA requirements. The toolchain constraint creates a deadlock: - K80 → Driver 470 → CUDA 11.4 → GCC 10 → No AVX_VNNI We accept the loss of cutting-edge CPU optimizations to enable running modern LLMs on legacy but still capable Tesla K80 hardware (12GB VRAM per GPU). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
163 lines
3.6 KiB
JSON
163 lines
3.6 KiB
JSON
{
|
|
"$schema": "https://mintlify.com/docs.json",
|
|
"name": "Ollama",
|
|
"colors": {
|
|
"primary": "#000",
|
|
"light": "#b5b5b5",
|
|
"dark": "#000"
|
|
},
|
|
"favicon": "/images/favicon.png",
|
|
"logo": {
|
|
"light": "/images/logo.png",
|
|
"dark": "/images/logo-dark.png",
|
|
"href": "https://ollama.com"
|
|
},
|
|
"theme": "maple",
|
|
"background": {
|
|
"color": {
|
|
"light": "#ffffff",
|
|
"dark": "#000000"
|
|
}
|
|
},
|
|
"fonts": {
|
|
"family": "system-ui",
|
|
"heading": {
|
|
"family": "system-ui"
|
|
},
|
|
"body": {
|
|
"family": "system-ui"
|
|
}
|
|
},
|
|
"styling": {
|
|
"codeblocks": "system"
|
|
},
|
|
"contextual": {
|
|
"options": ["copy"]
|
|
},
|
|
"navbar": {
|
|
"links": [
|
|
{
|
|
"label": "Sign in",
|
|
"href": "https://ollama.com/signin"
|
|
}
|
|
],
|
|
"primary": {
|
|
"type": "button",
|
|
"label": "Download",
|
|
"href": "https://ollama.com/download"
|
|
}
|
|
},
|
|
"api": {
|
|
"playground": {
|
|
"display": "simple"
|
|
},
|
|
"examples": {
|
|
"languages": ["curl"]
|
|
}
|
|
},
|
|
"redirects": [
|
|
{
|
|
"source": "/openai",
|
|
"destination": "/api/openai-compatibility"
|
|
},
|
|
{
|
|
"source": "/api/openai",
|
|
"destination": "/api/openai-compatibility"
|
|
}
|
|
],
|
|
"navigation": {
|
|
"tabs": [
|
|
{
|
|
"tab": "Documentation",
|
|
"groups": [
|
|
{
|
|
"group": "Get started",
|
|
"pages": [
|
|
"index",
|
|
"quickstart",
|
|
"/cloud"
|
|
]
|
|
},
|
|
{
|
|
"group": "Capabilities",
|
|
"pages": [
|
|
"/capabilities/streaming",
|
|
"/capabilities/thinking",
|
|
"/capabilities/structured-outputs",
|
|
"/capabilities/vision",
|
|
"/capabilities/embeddings",
|
|
"/capabilities/tool-calling",
|
|
"/capabilities/web-search"
|
|
]
|
|
},
|
|
{
|
|
"group": "Integrations",
|
|
"pages": [
|
|
"/integrations/vscode",
|
|
"/integrations/jetbrains",
|
|
"/integrations/codex",
|
|
"/integrations/cline",
|
|
"/integrations/droid",
|
|
"/integrations/goose",
|
|
"/integrations/zed",
|
|
"/integrations/roo-code",
|
|
"/integrations/n8n",
|
|
"/integrations/xcode"
|
|
]
|
|
},
|
|
{
|
|
"group": "More information",
|
|
"pages": [
|
|
"/cli",
|
|
"/modelfile",
|
|
"/context-length",
|
|
"/linux",
|
|
"/macos",
|
|
"/windows",
|
|
"/docker",
|
|
"/import",
|
|
"/faq",
|
|
"/gpu",
|
|
"/troubleshooting"
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"tab": "API Reference",
|
|
"openapi": "/openapi.yaml",
|
|
"groups": [
|
|
{
|
|
"group": "API Reference",
|
|
"pages": [
|
|
"/api/index",
|
|
"/api/authentication",
|
|
"/api/streaming",
|
|
"/api/usage",
|
|
"/api/errors",
|
|
"/api/openai-compatibility"
|
|
]
|
|
},
|
|
{
|
|
"group": "Endpoints",
|
|
"pages": [
|
|
"POST /api/generate",
|
|
"POST /api/chat",
|
|
"POST /api/embed",
|
|
"GET /api/tags",
|
|
"GET /api/ps",
|
|
"POST /api/show",
|
|
"POST /api/create",
|
|
"POST /api/copy",
|
|
"POST /api/pull",
|
|
"POST /api/push",
|
|
"DELETE /api/delete",
|
|
"GET /api/version"
|
|
]
|
|
}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
}
|