mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-17 19:27:00 +00:00
Replaced complex two-stage build (builder → runtime) with single-stage Dockerfile that builds and runs Ollama in one image. This fixes model loading issues caused by missing CUDA libraries and LD_LIBRARY_PATH mismatches in the previous multi-stage design. Changes: - Add docker/Dockerfile: Single-stage build with GCC 10, CMake 4, Go 1.25.3, CUDA 11.4 - Clone source from https://github.com/dogkeeper886/ollama37 - Compile Ollama with "CUDA 11" preset for Tesla K80 (compute capability 3.7) - Keep complete CUDA toolkit and all libraries in final image (~20GB) - Update docker-compose.yml: Simplified config, use ollama37:latest image - Update docker/README.md: New build instructions and architecture docs Trade-off: Larger image size (~20GB vs ~3GB) for guaranteed compatibility and reliable GPU backend operation. All libraries remain accessible with correct paths, ensuring models load properly on Tesla K80. Tested: Successfully runs gemma3:1b on Tesla K80 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
34 lines
711 B
YAML
34 lines
711 B
YAML
version: "3.8"
|
|
|
|
services:
|
|
ollama:
|
|
image: ollama37:latest
|
|
container_name: ollama37
|
|
runtime: nvidia
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
ports:
|
|
- "11434:11434"
|
|
volumes:
|
|
- ollama-data:/root/.ollama
|
|
environment:
|
|
- OLLAMA_HOST=0.0.0.0:11434
|
|
- NVIDIA_VISIBLE_DEVICES=all
|
|
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
|
restart: unless-stopped
|
|
healthcheck:
|
|
test: ["CMD", "/usr/local/bin/ollama", "list"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 5s
|
|
|
|
volumes:
|
|
ollama-data:
|
|
name: ollama-data
|