Add Docker-based build system with GPU-enabled builder and runtime containers

This commit is contained in:
Shang Chieh Tseng
2025-11-07 12:48:05 +08:00
parent 5744fb792a
commit 94bbfbb2e7
8 changed files with 750 additions and 2 deletions

1
.gitignore vendored
View File

@@ -15,3 +15,4 @@ __debug_bin*
llama/build
llama/vendor
/ollama
docker/output/

View File

@@ -74,11 +74,12 @@ This document tracks development goals and notes for this Ollama repository fork
rm -rf build
go clean -cache
# Configure the build (specify GCC 10.5 explicitly)
# Configure the build (For all 11.4 or k80)
CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++ cmake --preset "CUDA 11"
CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++ cmake --preset "CUDA 11 K80"
# Build the C/C++/CUDA libraries
CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++ cmake --build build -j 48
CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++ cmake --build build -j$(nproc)
# Build the Go binary
go build -o ollama .

288
docker/Makefile Normal file
View File

@@ -0,0 +1,288 @@
# Makefile for building Ollama with GPU-enabled builder container
#
# This Makefile uses a pre-built builder container with CUDA support and GPU access
# to compile Ollama with compute capability 3.7 support (Tesla K80).
#
# Usage:
# make build - Build ollama binary and libraries
# make clean - Remove build artifacts from host
# make clean-all - Remove build artifacts and stop/remove containers
# make shell - Open a shell in the builder container
# make test - Test the built binary
# Configuration
BUILDER_IMAGE := ollama37-builder
BUILDER_TAG := latest
BUILDER_DOCKERFILE := $(SOURCE_DIR)/docker/builder/Dockerfile
CONTAINER_NAME := ollama37-builder
RUNTIME_IMAGE := ollama37-runtime
RUNTIME_TAG := latest
SOURCE_DIR := $(shell cd .. && pwd)
BUILD_DIR := $(SOURCE_DIR)/build
DIST_DIR := $(SOURCE_DIR)/dist
OUTPUT_DIR := $(SOURCE_DIR)/docker/output
RUNTIME_DOCKERFILE := $(SOURCE_DIR)/docker/runtime/Dockerfile
# CMake preset to use
CMAKE_PRESET := CUDA 11
# Detect number of CPU cores for parallel compilation
NPROC := $(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)
.PHONY: all build clean clean-all shell test build-builder clean-builder ensure-builder start-builder stop-builder copy-source run-cmake run-build run-go-build copy-artifacts build-runtime run-runtime stop-runtime clean-runtime
# Default target
all: build
# ===== Builder Image Targets =====
# Build the builder Docker image from builder/Dockerfile
build-builder:
@echo "→ Building builder Docker image..."
@echo " Building Docker image $(BUILDER_IMAGE):$(BUILDER_TAG)..."
@cd $(SOURCE_DIR)/docker/builder && docker build \
-t $(BUILDER_IMAGE):$(BUILDER_TAG) \
.
@echo ""
@echo "✓ Builder image built successfully!"
@echo " Image: $(BUILDER_IMAGE):$(BUILDER_TAG)"
@echo ""
@echo "To use this custom builder:"
@echo " make build BUILDER_IMAGE=$(BUILDER_IMAGE):$(BUILDER_TAG)"
# Clean builder image
clean-builder:
@echo "→ Cleaning builder image..."
@docker rmi $(BUILDER_IMAGE):$(BUILDER_TAG) 2>/dev/null || echo " No builder image to remove"
@echo " Builder image cleaned"
# ===== Build Targets =====
# Main build target - orchestrates the entire build process
build: ensure-builder start-builder copy-source run-cmake run-build run-go-build copy-artifacts
@echo ""
@echo "✓ Build completed successfully!"
@echo " Binary: $(OUTPUT_DIR)/ollama"
@echo " Libraries: $(OUTPUT_DIR)/lib/"
@echo ""
@echo "To test the binary:"
@echo " cd $(OUTPUT_DIR) && ./ollama --version"
# Ensure builder image exists (build if not present)
ensure-builder:
@if ! docker images --format '{{.Repository}}:{{.Tag}}' | grep -q "^$(BUILDER_IMAGE):$(BUILDER_TAG)$$"; then \
echo "→ Builder image not found. Building $(BUILDER_IMAGE):$(BUILDER_TAG)..."; \
$(MAKE) build-builder; \
else \
echo "→ Builder image $(BUILDER_IMAGE):$(BUILDER_TAG) already exists"; \
fi
# Start the builder container with GPU access
start-builder:
@echo "→ Starting builder container with GPU access..."
@if docker ps -a --format '{{.Names}}' | grep -q "^$(CONTAINER_NAME)$$"; then \
echo " Container $(CONTAINER_NAME) already exists, checking status..."; \
if docker ps --format '{{.Names}}' | grep -q "^$(CONTAINER_NAME)$$"; then \
echo " Container is already running"; \
else \
echo " Starting existing container..."; \
docker start $(CONTAINER_NAME); \
fi \
else \
echo " Creating new container..."; \
docker run --rm -d \
--name $(CONTAINER_NAME) \
--runtime=nvidia \
--gpus all \
$(BUILDER_IMAGE) \
sleep infinity; \
sleep 2; \
docker exec $(CONTAINER_NAME) nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader; \
fi
# Stop and remove the builder container
stop-builder:
@echo "→ Stopping builder container..."
@if docker ps --format '{{.Names}}' | grep -q "^$(CONTAINER_NAME)$$"; then \
docker stop $(CONTAINER_NAME); \
echo " Container stopped"; \
else \
echo " Container not running"; \
fi
# Copy source code to the container
copy-source: start-builder
@echo "→ Copying source code to container..."
@docker cp $(SOURCE_DIR)/. $(CONTAINER_NAME):/usr/local/src/ollama37/
@echo " Source code copied"
# Run CMake configuration
run-cmake: copy-source
@echo "→ Running CMake configuration (preset: $(CMAKE_PRESET))..."
@docker exec -w /usr/local/src/ollama37 $(CONTAINER_NAME) \
scl enable gcc-toolset-10 -- bash -c 'cmake --preset "$(CMAKE_PRESET)"'
# Run CMake build (C/C++/CUDA compilation)
run-build: run-cmake
@echo "→ Building C/C++/CUDA libraries (using $(NPROC) cores)..."
@docker exec -w /usr/local/src/ollama37 $(CONTAINER_NAME) \
scl enable gcc-toolset-10 -- bash -c 'cmake --build build -j$(NPROC)'
# Run Go build
run-go-build: run-build
@echo "→ Building Go binary..."
@docker exec -w /usr/local/src/ollama37 $(CONTAINER_NAME) \
scl enable gcc-toolset-10 -- bash -c 'go build -o ollama .'
# Copy build artifacts from container to host
copy-artifacts: run-go-build
@echo "→ Copying build artifacts to host..."
@mkdir -p $(OUTPUT_DIR)/lib
@docker cp $(CONTAINER_NAME):/usr/local/src/ollama37/ollama $(OUTPUT_DIR)/
@docker cp $(CONTAINER_NAME):/usr/local/src/ollama37/build/lib/ollama/. $(OUTPUT_DIR)/lib/
@echo " Artifacts copied to $(OUTPUT_DIR)"
@echo ""
@echo " Binary: $(OUTPUT_DIR)/ollama"
@ls -lh $(OUTPUT_DIR)/ollama
@echo ""
@echo " Libraries:"
@ls -lh $(OUTPUT_DIR)/lib/
# Open an interactive shell in the builder container
shell: start-builder
@echo "→ Opening shell in builder container..."
@docker exec -it -w /usr/local/src/ollama37 $(CONTAINER_NAME) \
scl enable gcc-toolset-10 -- bash
# Test the built binary
test: build
@echo "→ Testing ollama binary..."
@cd $(OUTPUT_DIR) && LD_LIBRARY_PATH=$$PWD/lib:$$LD_LIBRARY_PATH ./ollama --version
# Clean build artifacts from host
clean:
@echo "→ Cleaning build artifacts from host..."
@rm -rf $(OUTPUT_DIR)
@echo " Cleaned $(OUTPUT_DIR)"
# Clean everything including container
clean-all: clean stop-builder
@echo "→ Cleaning build directory in source..."
@rm -rf $(BUILD_DIR)
@rm -rf $(DIST_DIR)
@echo " All cleaned"
# ===== Runtime Image Targets =====
# Build the runtime Docker image from artifacts
build-runtime:
@echo "→ Building runtime Docker image..."
@if [ ! -f "$(OUTPUT_DIR)/ollama" ]; then \
echo "Error: ollama binary not found in $(OUTPUT_DIR)"; \
echo "Run 'make build' first to create the artifacts"; \
exit 1; \
fi
@if [ ! -d "$(OUTPUT_DIR)/lib" ]; then \
echo "Error: lib directory not found in $(OUTPUT_DIR)"; \
echo "Run 'make build' first to create the artifacts"; \
exit 1; \
fi
@echo " Building Docker image $(RUNTIME_IMAGE):$(RUNTIME_TAG)..."
@docker build \
-f $(RUNTIME_DOCKERFILE) \
-t $(RUNTIME_IMAGE):$(RUNTIME_TAG) \
$(SOURCE_DIR)
@echo ""
@echo "✓ Runtime image built successfully!"
@echo " Image: $(RUNTIME_IMAGE):$(RUNTIME_TAG)"
@echo ""
@$(MAKE) stop-builder
@echo ""
@echo "To run the image:"
@echo " make run-runtime"
@echo ""
@echo "Or manually:"
@echo " docker run --rm -it --runtime=nvidia --gpus all -p 11434:11434 $(RUNTIME_IMAGE):$(RUNTIME_TAG)"
# Run the runtime container
run-runtime:
@echo "→ Starting runtime container..."
@if docker ps -a --format '{{.Names}}' | grep -q "^ollama37-runtime$$"; then \
echo " Stopping existing container..."; \
docker stop ollama37-runtime 2>/dev/null || true; \
docker rm ollama37-runtime 2>/dev/null || true; \
fi
@echo " Starting new container..."
@docker run -d \
--name ollama37-runtime \
--runtime=nvidia \
--gpus all \
-p 11434:11434 \
-v ollama-data:/root/.ollama \
$(RUNTIME_IMAGE):$(RUNTIME_TAG)
@sleep 2
@echo ""
@echo "✓ Runtime container started!"
@echo " Container: ollama37-runtime"
@echo " API: http://localhost:11434"
@echo ""
@echo "Check logs:"
@echo " docker logs -f ollama37-runtime"
@echo ""
@echo "Test the API:"
@echo " curl http://localhost:11434/api/tags"
@echo ""
@echo "Stop the container:"
@echo " make stop-runtime"
# Stop the runtime container
stop-runtime:
@echo "→ Stopping runtime container..."
@if docker ps --format '{{.Names}}' | grep -q "^ollama37-runtime$$"; then \
docker stop ollama37-runtime; \
docker rm ollama37-runtime; \
echo " Container stopped and removed"; \
else \
echo " Container not running"; \
fi
# Clean runtime image
clean-runtime:
@echo "→ Cleaning runtime image..."
@docker rmi $(RUNTIME_IMAGE):$(RUNTIME_TAG) 2>/dev/null || echo " No runtime image to remove"
@docker volume rm ollama-data 2>/dev/null || echo " No volume to remove"
@echo " Runtime image cleaned"
# Help target
help:
@echo "Ollama Build System (with GPU-enabled builder)"
@echo ""
@echo "Builder Image Targets:"
@echo " make build-builder - Build custom builder Docker image"
@echo " make clean-builder - Remove builder image"
@echo ""
@echo "Build Targets:"
@echo " make build - Build ollama binary and libraries (default)"
@echo " make clean - Remove build artifacts from host"
@echo " make clean-all - Remove all build artifacts and stop container"
@echo " make shell - Open a shell in the builder container"
@echo " make test - Test the built binary"
@echo ""
@echo "Runtime Image Targets:"
@echo " make build-runtime - Build Docker runtime image from artifacts"
@echo " make run-runtime - Start the runtime container"
@echo " make stop-runtime - Stop the runtime container"
@echo " make clean-runtime - Remove runtime image and volumes"
@echo ""
@echo " make help - Show this help message"
@echo ""
@echo "Configuration:"
@echo " BUILDER_IMAGE: $(BUILDER_IMAGE):$(BUILDER_TAG)"
@echo " RUNTIME_IMAGE: $(RUNTIME_IMAGE):$(RUNTIME_TAG)"
@echo " CONTAINER_NAME: $(CONTAINER_NAME)"
@echo " CMAKE_PRESET: $(CMAKE_PRESET)"
@echo " PARALLEL_JOBS: $(NPROC)"
@echo ""
@echo "Environment:"
@echo " SOURCE_DIR: $(SOURCE_DIR)"
@echo " OUTPUT_DIR: $(OUTPUT_DIR)"

257
docker/README.md Normal file
View File

@@ -0,0 +1,257 @@
# Ollama37 Docker Build System
**Makefile-based build system for Ollama with CUDA 11.4 and Compute Capability 3.7 support (Tesla K80)**
## Overview
This fork maintains support for legacy NVIDIA GPUs (Tesla K80, Compute Capability 3.7) using CUDA 11.4 and GCC 10. The upstream Ollama project dropped CC 3.7 support when transitioning to CUDA 12.
### Key Features
- GPU-enabled build container with automatic architecture detection
- Makefile orchestration for entire workflow
- Production-ready runtime image (3.1GB)
- Docker Compose support
## Prerequisites
- Docker with NVIDIA Container Runtime
- NVIDIA GPU drivers (470+ for Tesla K80)
- Verify GPU access:
```bash
docker run --rm --runtime=nvidia --gpus all nvidia/cuda:11.4.3-base-rockylinux8 nvidia-smi
```
## Quick Start
### 1. Build the Builder Image (First Time Only)
```bash
cd /home/jack/Documents/ollama37/docker
make build-builder
```
This builds the `ollama37-builder:latest` image containing CUDA 11.4, GCC 10, CMake, and Go. Takes ~5 minutes first time.
### 2. Build Ollama
```bash
# Build binary and libraries (~7 minutes)
make build
# Create runtime Docker image (~2 minutes)
make build-runtime
```
### 3. Run
```bash
# Option A: Using docker-compose (recommended)
docker-compose up -d
curl http://localhost:11434/api/tags
# Option B: Using Makefile
make run-runtime
curl http://localhost:11434/api/tags
```
## Directory Structure
```
docker/
├── Makefile # Build orchestration
├── docker-compose.yml # Deployment configuration
├── builder/
│ └── Dockerfile # Builder image definition
├── runtime/
│ └── Dockerfile # Runtime image definition
└── output/ # Build artifacts (created by make build)
├── ollama # Binary (61MB)
└── lib/ # Libraries (109MB)
```
## Make Targets
### Builder Image
| Command | Description |
|---------|-------------|
| `make build-builder` | Build builder Docker image |
| `make clean-builder` | Remove builder image |
### Build
| Command | Description |
|---------|-------------|
| `make build` | Build binary and libraries |
| `make test` | Test the built binary |
| `make shell` | Open shell in builder container |
| `make clean` | Remove output artifacts |
| `make clean-all` | Clean everything + stop containers |
### Runtime
| Command | Description |
|---------|-------------|
| `make build-runtime` | Build Docker runtime image |
| `make run-runtime` | Start runtime container |
| `make stop-runtime` | Stop runtime container |
| `make clean-runtime` | Remove image and volumes |
### Help
```bash
make help # Show all available targets
```
## Usage Examples
### Development Workflow
```bash
# First time setup
make build-builder
make build
make test
# After code changes
make build
make build-runtime
make run-runtime
```
### Production Deployment
```bash
make build-builder
make build build-runtime
docker-compose up -d
```
### Using the API
```bash
# List models
curl http://localhost:11434/api/tags
# Pull a model
curl http://localhost:11434/api/pull -d '{"name": "gemma3:4b"}'
# Run inference
curl http://localhost:11434/api/generate -d '{
"model": "gemma3:4b",
"prompt": "Why is the sky blue?",
"stream": false
}'
```
### Using the CLI
```bash
# List/pull/run models
docker exec ollama37-runtime ollama list
docker exec ollama37-runtime ollama pull gemma3:4b
docker exec ollama37-runtime ollama run gemma3:4b "Hello!"
```
## GPU Support
### Supported Compute Capabilities
- **3.7** - Tesla K80 (primary target)
- **5.0-8.6** - Pascal, Volta, Turing, Ampere
### Tesla K80 Recommendations
**VRAM:** 12GB per GPU (24GB for dual-GPU K80)
**Model sizes:**
- Small (3-4B): Full precision
- Medium (7-8B): Q4_K_M quantization
- Large (13B+): Q4_0 quantization or multi-GPU
**Multi-GPU:**
```bash
docker run --gpus all ... # Use all GPUs
docker run --gpus '"device=0"' ... # Use specific GPU
```
## Configuration
### Environment Variables (docker-compose.yml)
| Variable | Default | Description |
|----------|---------|-------------|
| `OLLAMA_HOST` | `0.0.0.0:11434` | Server listen address |
| `OLLAMA_MODELS` | `/root/.ollama/models` | Model storage path |
| `NVIDIA_VISIBLE_DEVICES` | `all` | Which GPUs to use |
### Makefile Variables
```bash
make build CMAKE_PRESET="CUDA 11 K80" # Use different preset
make build NPROC=4 # Control parallel jobs
make build-runtime RUNTIME_IMAGE=my-ollama # Custom image name
```
## Troubleshooting
### GPU not detected during build
```bash
make shell
nvidia-smi # Should show your GPU
```
### Out of memory during build
```bash
make build NPROC=2 # Reduce parallel jobs
```
### Container won't start
```bash
docker logs ollama37-runtime
# or
docker-compose logs
```
### GPU not accessible in runtime
```bash
docker run --rm --runtime=nvidia --gpus all ubuntu nvidia-smi
```
### Port already in use
```bash
# Edit docker-compose.yml
ports:
- "11435:11434" # Change host port
```
## Advanced
### Custom Builder Image
The builder is automatically built from `builder/Dockerfile` when running `make build` for the first time.
To customize (e.g., change CUDA version, add dependencies):
```bash
vim docker/builder/Dockerfile
make clean-builder build-builder
make build
```
See `builder/README.md` for details.
### Clean Docker Build Cache
```bash
# Remove all build cache
docker builder prune -af
# Nuclear option (cleans everything)
docker system prune -af
```
## Documentation
- **[../CLAUDE.md](../CLAUDE.md)** - Project goals and implementation notes
- **[builder/README.md](builder/README.md)** - Builder image documentation
## License
MIT (same as upstream Ollama)

55
docker/builder/Dockerfile Normal file
View File

@@ -0,0 +1,55 @@
FROM nvidia/cuda:11.4.3-devel-rockylinux8
# Update OS and install cuda toolkit 11.4 and nvdia driver 470
#RUN dnf -y update\
# && dnf -y install epel-release\
# && dnf -y config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo\
# && dnf -y module install nvidia-driver:470-dkms\
# && dnf -y install cuda-11-4
# Post install, setup path
#COPY cuda-11.4.sh /etc/profile.d/cuda-11.4.sh
# Install gcc 10
#RUN dnf -y install wget unzip lbzip2\
# && dnf -y groupinstall "Development Tools"\
# && cd /usr/local/src\
# && wget https://github.com/gcc-mirror/gcc/archive/refs/heads/releases/gcc-10.zip\
# && unzip gcc-10.zip\
# && cd gcc-releases-gcc-10\
# && contrib/download_prerequisites\
# && mkdir /usr/local/gcc-10\
# && cd /usr/local/gcc-10\
# && /usr/local/src/gcc-releases-gcc-10/configure --disable-multilib\
# && make -j ${nproc}\
# && make install
RUN dnf install -y gcc-toolset-10-gcc gcc-toolset-10-gcc-c++ gcc-toolset-10-runtime
# Post install, setup path
#COPY gcc-10.sh /etc/profile.d/gcc-10.sh
#COPY gcc-10.sh /etc/ld.so.conf.d/gcc-10.conf
#RUN scl enable gcc-toolset-10 bash
# Install cmake
#ENV LD_LIBRARY_PATH="/usr/local/lib64:/usr/local/cuda-11.4/lib64"
#ENV PATH="/usr/local/cuda-11.4/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
#RUN dnf -y install openssl-devel\
# && cd /usr/local/src\
# && wget https://github.com/Kitware/CMake/releases/download/v4.0.0/cmake-4.0.0.tar.gz\
# && tar xvf cmake-4.0.0.tar.gz\
# && mkdir /usr/local/cmake-4\
# && cd /usr/local/cmake-4\
# && /usr/local/src/cmake-4.0.0/configure\
# && make -j ${nproc}\
# && make install
RUN dnf -y install cmake
# Install go
#RUN cd /usr/local\
# && wget https://go.dev/dl/go1.24.2.linux-amd64.tar.gz\
# && tar xvf go1.24.2.linux-amd64.tar.gz
RUN dnf -y install golang
# Post install, setup path
#COPY go-1.24.2.sh /etc/profile.d/go-1.24.2.sh
#ENV PATH="$PATH:/usr/local/go/bin"

58
docker/builder/README.md Normal file
View File

@@ -0,0 +1,58 @@
# Ollama37 Builder Image
This directory contains the Dockerfile for building the `ollama37-builder:latest` image.
## What's Inside
The builder image includes:
- **Base**: `nvidia/cuda:11.4.3-devel-rockylinux8`
- **GCC 10**: `gcc-toolset-10` (required by CUDA 11.4)
- **CMake**: System package
- **Go**: System package
## Building the Builder Image
The builder image is **automatically built** by the Makefile when you run `make build` for the first time.
To manually build the builder image:
```bash
cd /home/jack/Documents/ollama37/docker
make build-builder
```
Or using Docker directly:
```bash
cd /home/jack/Documents/ollama37/docker/builder
docker build -t ollama37-builder:latest .
```
## Using the Builder Image
The Makefile handles this automatically, but for reference:
```bash
# Start builder container with GPU access
docker run --rm -d \
--name ollama37-builder \
--runtime=nvidia \
--gpus all \
ollama37-builder:latest \
sleep infinity
# Use the container
docker exec -it ollama37-builder bash
```
## Customization
If you need to modify the builder (e.g., change CUDA version, add packages):
1. Edit `Dockerfile` in this directory
2. Rebuild: `make clean-builder build-builder`
3. Build your project: `make build`
## Archived Builder
The `archived/` subdirectory contains an older Dockerfile that built GCC and CMake from source (~80 minutes). The current version uses Rocky Linux system packages for much faster builds (~5 minutes).

32
docker/docker-compose.yml Normal file
View File

@@ -0,0 +1,32 @@
version: "3.8"
services:
ollama:
image: ollama37-runtime:latest
container_name: ollama37-runtime
runtime: nvidia
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
ports:
- "11434:11434"
volumes:
- ollama-data:/root/.ollama
environment:
- OLLAMA_HOST=0.0.0.0:11434
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
restart: unless-stopped
healthcheck:
test: ["CMD", "/usr/local/bin/ollama", "list"]
interval: 30s
timeout: 10s
retries: 3
start_period: 5s
volumes:
ollama-data:
name: ollama-data

56
docker/runtime/Dockerfile Normal file
View File

@@ -0,0 +1,56 @@
# Dockerfile for Ollama37 Runtime Image
#
# This creates a minimal runtime-only image from pre-built artifacts.
# The artifacts should be built first using the Makefile in ../
#
# Build context should be the project root (../..) so we can access docker/output/
#
# Usage:
# cd /path/to/ollama37
# make -C docker build # Build the binary first
# make -C docker build-runtime # Create the runtime image
FROM nvidia/cuda:11.4.3-runtime-rockylinux8
# Install minimal runtime dependencies
# Note: Not running 'dnf update' to keep base image stable and build faster
RUN dnf -y install \
libgomp \
libstdc++ \
&& dnf clean all \
&& rm -rf /var/cache/dnf
# Create directory structure
RUN mkdir -p /usr/local/bin /usr/local/lib/ollama
# Copy the ollama binary from build output
COPY docker/output/ollama /usr/local/bin/ollama
# Copy all shared libraries from build output
COPY docker/output/lib/ /usr/local/lib/ollama/
# Prepend our library path to the existing LD_LIBRARY_PATH
# Base image already has: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
ENV LD_LIBRARY_PATH="/usr/local/lib/ollama:${LD_LIBRARY_PATH}"
# Base image already sets these, but we can override if needed:
# NVIDIA_DRIVER_CAPABILITIES=compute,utility
# NVIDIA_VISIBLE_DEVICES=all
# Ollama server configuration
ENV OLLAMA_HOST=0.0.0.0:11434
# Expose the Ollama API port
EXPOSE 11434
# Create a data directory for models
RUN mkdir -p /root/.ollama
VOLUME ["/root/.ollama"]
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD /usr/local/bin/ollama list || exit 1
# Set entrypoint and default command
ENTRYPOINT ["/usr/local/bin/ollama"]
CMD ["serve"]