Add Docker-based build system with GPU-enabled builder and runtime containers

2025-12-09 23:37:06 +00:00 · 2025-11-07 12:48:05 +08:00
parent 5744fb792a
commit 94bbfbb2e7
8 changed files with 750 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,4 @@ __debug_bin*
 llama/build
 llama/vendor
 /ollama
+docker/output/
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -74,11 +74,12 @@ This document tracks development goals and notes for this Ollama repository fork
 rm -rf build
 go clean -cache

-# Configure the build (specify GCC 10.5 explicitly)
+# Configure the build (For all 11.4 or k80)
 CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++ cmake --preset "CUDA 11"
+CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++ cmake --preset "CUDA 11 K80"

 # Build the C/C++/CUDA libraries
-CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++ cmake --build build -j 48
+CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++ cmake --build build -j$(nproc)

 # Build the Go binary
 go build -o ollama .
--- a/docker/Makefile
+++ b/docker/Makefile
@@ -0,0 +1,288 @@
+# Makefile for building Ollama with GPU-enabled builder container
+#
+# This Makefile uses a pre-built builder container with CUDA support and GPU access
+# to compile Ollama with compute capability 3.7 support (Tesla K80).
+#
+# Usage:
+#   make build          - Build ollama binary and libraries
+#   make clean          - Remove build artifacts from host
+#   make clean-all      - Remove build artifacts and stop/remove containers
+#   make shell          - Open a shell in the builder container
+#   make test           - Test the built binary
+
+# Configuration
+BUILDER_IMAGE := ollama37-builder
+BUILDER_TAG := latest
+BUILDER_DOCKERFILE := $(SOURCE_DIR)/docker/builder/Dockerfile
+CONTAINER_NAME := ollama37-builder
+RUNTIME_IMAGE := ollama37-runtime
+RUNTIME_TAG := latest
+SOURCE_DIR := $(shell cd .. && pwd)
+BUILD_DIR := $(SOURCE_DIR)/build
+DIST_DIR := $(SOURCE_DIR)/dist
+OUTPUT_DIR := $(SOURCE_DIR)/docker/output
+RUNTIME_DOCKERFILE := $(SOURCE_DIR)/docker/runtime/Dockerfile
+
+# CMake preset to use
+CMAKE_PRESET := CUDA 11
+
+# Detect number of CPU cores for parallel compilation
+NPROC := $(shell nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)
+
+.PHONY: all build clean clean-all shell test build-builder clean-builder ensure-builder start-builder stop-builder copy-source run-cmake run-build run-go-build copy-artifacts build-runtime run-runtime stop-runtime clean-runtime
+
+# Default target
+all: build
+
+# ===== Builder Image Targets =====
+
+# Build the builder Docker image from builder/Dockerfile
+build-builder:
+	@echo "→ Building builder Docker image..."
+	@echo "  Building Docker image $(BUILDER_IMAGE):$(BUILDER_TAG)..."
+	@cd $(SOURCE_DIR)/docker/builder && docker build \
+		-t $(BUILDER_IMAGE):$(BUILDER_TAG) \
+		.
+	@echo ""
+	@echo "✓ Builder image built successfully!"
+	@echo "  Image: $(BUILDER_IMAGE):$(BUILDER_TAG)"
+	@echo ""
+	@echo "To use this custom builder:"
+	@echo "  make build BUILDER_IMAGE=$(BUILDER_IMAGE):$(BUILDER_TAG)"
+
+# Clean builder image
+clean-builder:
+	@echo "→ Cleaning builder image..."
+	@docker rmi $(BUILDER_IMAGE):$(BUILDER_TAG) 2>/dev/null || echo "  No builder image to remove"
+	@echo "  Builder image cleaned"
+
+# ===== Build Targets =====
+
+# Main build target - orchestrates the entire build process
+build: ensure-builder start-builder copy-source run-cmake run-build run-go-build copy-artifacts
+	@echo ""
+	@echo "✓ Build completed successfully!"
+	@echo "  Binary:    $(OUTPUT_DIR)/ollama"
+	@echo "  Libraries: $(OUTPUT_DIR)/lib/"
+	@echo ""
+	@echo "To test the binary:"
+	@echo "  cd $(OUTPUT_DIR) && ./ollama --version"
+
+# Ensure builder image exists (build if not present)
+ensure-builder:
+	@if ! docker images --format '{{.Repository}}:{{.Tag}}' | grep -q "^$(BUILDER_IMAGE):$(BUILDER_TAG)$$"; then \
+		echo "→ Builder image not found. Building $(BUILDER_IMAGE):$(BUILDER_TAG)..."; \
+		$(MAKE) build-builder; \
+	else \
+		echo "→ Builder image $(BUILDER_IMAGE):$(BUILDER_TAG) already exists"; \
+	fi
+
+# Start the builder container with GPU access
+start-builder:
+	@echo "→ Starting builder container with GPU access..."
+	@if docker ps -a --format '{{.Names}}' | grep -q "^$(CONTAINER_NAME)$$"; then \
+		echo "  Container $(CONTAINER_NAME) already exists, checking status..."; \
+		if docker ps --format '{{.Names}}' | grep -q "^$(CONTAINER_NAME)$$"; then \
+			echo "  Container is already running"; \
+		else \
+			echo "  Starting existing container..."; \
+			docker start $(CONTAINER_NAME); \
+		fi \
+	else \
+		echo "  Creating new container..."; \
+		docker run --rm -d \
+			--name $(CONTAINER_NAME) \
+			--runtime=nvidia \
+			--gpus all \
+			$(BUILDER_IMAGE) \
+			sleep infinity; \
+		sleep 2; \
+		docker exec $(CONTAINER_NAME) nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader; \
+	fi
+
+# Stop and remove the builder container
+stop-builder:
+	@echo "→ Stopping builder container..."
+	@if docker ps --format '{{.Names}}' | grep -q "^$(CONTAINER_NAME)$$"; then \
+		docker stop $(CONTAINER_NAME); \
+		echo "  Container stopped"; \
+	else \
+		echo "  Container not running"; \
+	fi
+
+# Copy source code to the container
+copy-source: start-builder
+	@echo "→ Copying source code to container..."
+	@docker cp $(SOURCE_DIR)/. $(CONTAINER_NAME):/usr/local/src/ollama37/
+	@echo "  Source code copied"
+
+# Run CMake configuration
+run-cmake: copy-source
+	@echo "→ Running CMake configuration (preset: $(CMAKE_PRESET))..."
+	@docker exec -w /usr/local/src/ollama37 $(CONTAINER_NAME) \
+		scl enable gcc-toolset-10 -- bash -c 'cmake --preset "$(CMAKE_PRESET)"'
+
+# Run CMake build (C/C++/CUDA compilation)
+run-build: run-cmake
+	@echo "→ Building C/C++/CUDA libraries (using $(NPROC) cores)..."
+	@docker exec -w /usr/local/src/ollama37 $(CONTAINER_NAME) \
+		scl enable gcc-toolset-10 -- bash -c 'cmake --build build -j$(NPROC)'
+
+# Run Go build
+run-go-build: run-build
+	@echo "→ Building Go binary..."
+	@docker exec -w /usr/local/src/ollama37 $(CONTAINER_NAME) \
+		scl enable gcc-toolset-10 -- bash -c 'go build -o ollama .'
+
+# Copy build artifacts from container to host
+copy-artifacts: run-go-build
+	@echo "→ Copying build artifacts to host..."
+	@mkdir -p $(OUTPUT_DIR)/lib
+	@docker cp $(CONTAINER_NAME):/usr/local/src/ollama37/ollama $(OUTPUT_DIR)/
+	@docker cp $(CONTAINER_NAME):/usr/local/src/ollama37/build/lib/ollama/. $(OUTPUT_DIR)/lib/
+	@echo "  Artifacts copied to $(OUTPUT_DIR)"
+	@echo ""
+	@echo "  Binary: $(OUTPUT_DIR)/ollama"
+	@ls -lh $(OUTPUT_DIR)/ollama
+	@echo ""
+	@echo "  Libraries:"
+	@ls -lh $(OUTPUT_DIR)/lib/
+
+# Open an interactive shell in the builder container
+shell: start-builder
+	@echo "→ Opening shell in builder container..."
+	@docker exec -it -w /usr/local/src/ollama37 $(CONTAINER_NAME) \
+		scl enable gcc-toolset-10 -- bash
+
+# Test the built binary
+test: build
+	@echo "→ Testing ollama binary..."
+	@cd $(OUTPUT_DIR) && LD_LIBRARY_PATH=$$PWD/lib:$$LD_LIBRARY_PATH ./ollama --version
+
+# Clean build artifacts from host
+clean:
+	@echo "→ Cleaning build artifacts from host..."
+	@rm -rf $(OUTPUT_DIR)
+	@echo "  Cleaned $(OUTPUT_DIR)"
+
+# Clean everything including container
+clean-all: clean stop-builder
+	@echo "→ Cleaning build directory in source..."
+	@rm -rf $(BUILD_DIR)
+	@rm -rf $(DIST_DIR)
+	@echo "  All cleaned"
+
+# ===== Runtime Image Targets =====
+
+# Build the runtime Docker image from artifacts
+build-runtime:
+	@echo "→ Building runtime Docker image..."
+	@if [ ! -f "$(OUTPUT_DIR)/ollama" ]; then \
+		echo "Error: ollama binary not found in $(OUTPUT_DIR)"; \
+		echo "Run 'make build' first to create the artifacts"; \
+		exit 1; \
+	fi
+	@if [ ! -d "$(OUTPUT_DIR)/lib" ]; then \
+		echo "Error: lib directory not found in $(OUTPUT_DIR)"; \
+		echo "Run 'make build' first to create the artifacts"; \
+		exit 1; \
+	fi
+	@echo "  Building Docker image $(RUNTIME_IMAGE):$(RUNTIME_TAG)..."
+	@docker build \
+		-f $(RUNTIME_DOCKERFILE) \
+		-t $(RUNTIME_IMAGE):$(RUNTIME_TAG) \
+		$(SOURCE_DIR)
+	@echo ""
+	@echo "✓ Runtime image built successfully!"
+	@echo "  Image: $(RUNTIME_IMAGE):$(RUNTIME_TAG)"
+	@echo ""
+	@$(MAKE) stop-builder
+	@echo ""
+	@echo "To run the image:"
+	@echo "  make run-runtime"
+	@echo ""
+	@echo "Or manually:"
+	@echo "  docker run --rm -it --runtime=nvidia --gpus all -p 11434:11434 $(RUNTIME_IMAGE):$(RUNTIME_TAG)"
+
+# Run the runtime container
+run-runtime:
+	@echo "→ Starting runtime container..."
+	@if docker ps -a --format '{{.Names}}' | grep -q "^ollama37-runtime$$"; then \
+		echo "  Stopping existing container..."; \
+		docker stop ollama37-runtime 2>/dev/null || true; \
+		docker rm ollama37-runtime 2>/dev/null || true; \
+	fi
+	@echo "  Starting new container..."
+	@docker run -d \
+		--name ollama37-runtime \
+		--runtime=nvidia \
+		--gpus all \
+		-p 11434:11434 \
+		-v ollama-data:/root/.ollama \
+		$(RUNTIME_IMAGE):$(RUNTIME_TAG)
+	@sleep 2
+	@echo ""
+	@echo "✓ Runtime container started!"
+	@echo "  Container: ollama37-runtime"
+	@echo "  API: http://localhost:11434"
+	@echo ""
+	@echo "Check logs:"
+	@echo "  docker logs -f ollama37-runtime"
+	@echo ""
+	@echo "Test the API:"
+	@echo "  curl http://localhost:11434/api/tags"
+	@echo ""
+	@echo "Stop the container:"
+	@echo "  make stop-runtime"
+
+# Stop the runtime container
+stop-runtime:
+	@echo "→ Stopping runtime container..."
+	@if docker ps --format '{{.Names}}' | grep -q "^ollama37-runtime$$"; then \
+		docker stop ollama37-runtime; \
+		docker rm ollama37-runtime; \
+		echo "  Container stopped and removed"; \
+	else \
+		echo "  Container not running"; \
+	fi
+
+# Clean runtime image
+clean-runtime:
+	@echo "→ Cleaning runtime image..."
+	@docker rmi $(RUNTIME_IMAGE):$(RUNTIME_TAG) 2>/dev/null || echo "  No runtime image to remove"
+	@docker volume rm ollama-data 2>/dev/null || echo "  No volume to remove"
+	@echo "  Runtime image cleaned"
+
+# Help target
+help:
+	@echo "Ollama Build System (with GPU-enabled builder)"
+	@echo ""
+	@echo "Builder Image Targets:"
+	@echo "  make build-builder  - Build custom builder Docker image"
+	@echo "  make clean-builder  - Remove builder image"
+	@echo ""
+	@echo "Build Targets:"
+	@echo "  make build          - Build ollama binary and libraries (default)"
+	@echo "  make clean          - Remove build artifacts from host"
+	@echo "  make clean-all      - Remove all build artifacts and stop container"
+	@echo "  make shell          - Open a shell in the builder container"
+	@echo "  make test           - Test the built binary"
+	@echo ""
+	@echo "Runtime Image Targets:"
+	@echo "  make build-runtime  - Build Docker runtime image from artifacts"
+	@echo "  make run-runtime    - Start the runtime container"
+	@echo "  make stop-runtime   - Stop the runtime container"
+	@echo "  make clean-runtime  - Remove runtime image and volumes"
+	@echo ""
+	@echo "  make help           - Show this help message"
+	@echo ""
+	@echo "Configuration:"
+	@echo "  BUILDER_IMAGE:   $(BUILDER_IMAGE):$(BUILDER_TAG)"
+	@echo "  RUNTIME_IMAGE:   $(RUNTIME_IMAGE):$(RUNTIME_TAG)"
+	@echo "  CONTAINER_NAME:  $(CONTAINER_NAME)"
+	@echo "  CMAKE_PRESET:    $(CMAKE_PRESET)"
+	@echo "  PARALLEL_JOBS:   $(NPROC)"
+	@echo ""
+	@echo "Environment:"
+	@echo "  SOURCE_DIR:      $(SOURCE_DIR)"
+	@echo "  OUTPUT_DIR:      $(OUTPUT_DIR)"
--- a/docker/README.md
+++ b/docker/README.md
@@ -0,0 +1,257 @@
+# Ollama37 Docker Build System
+
+**Makefile-based build system for Ollama with CUDA 11.4 and Compute Capability 3.7 support (Tesla K80)**
+
+## Overview
+
+This fork maintains support for legacy NVIDIA GPUs (Tesla K80, Compute Capability 3.7) using CUDA 11.4 and GCC 10. The upstream Ollama project dropped CC 3.7 support when transitioning to CUDA 12.
+
+### Key Features
+
+- GPU-enabled build container with automatic architecture detection
+- Makefile orchestration for entire workflow
+- Production-ready runtime image (3.1GB)
+- Docker Compose support
+
+## Prerequisites
+
+- Docker with NVIDIA Container Runtime
+- NVIDIA GPU drivers (470+ for Tesla K80)
+- Verify GPU access:
+  ```bash
+  docker run --rm --runtime=nvidia --gpus all nvidia/cuda:11.4.3-base-rockylinux8 nvidia-smi
+  ```
+
+## Quick Start
+
+### 1. Build the Builder Image (First Time Only)
+
+```bash
+cd /home/jack/Documents/ollama37/docker
+make build-builder
+```
+
+This builds the `ollama37-builder:latest` image containing CUDA 11.4, GCC 10, CMake, and Go. Takes ~5 minutes first time.
+
+### 2. Build Ollama
+
+```bash
+# Build binary and libraries (~7 minutes)
+make build
+
+# Create runtime Docker image (~2 minutes)
+make build-runtime
+```
+
+### 3. Run
+
+```bash
+# Option A: Using docker-compose (recommended)
+docker-compose up -d
+curl http://localhost:11434/api/tags
+
+# Option B: Using Makefile
+make run-runtime
+curl http://localhost:11434/api/tags
+```
+
+## Directory Structure
+
+```
+docker/
+├── Makefile              # Build orchestration
+├── docker-compose.yml    # Deployment configuration
+├── builder/
+│   └── Dockerfile        # Builder image definition
+├── runtime/
+│   └── Dockerfile        # Runtime image definition
+└── output/               # Build artifacts (created by make build)
+    ├── ollama           # Binary (61MB)
+    └── lib/             # Libraries (109MB)
+```
+
+## Make Targets
+
+### Builder Image
+| Command | Description |
+|---------|-------------|
+| `make build-builder` | Build builder Docker image |
+| `make clean-builder` | Remove builder image |
+
+### Build
+| Command | Description |
+|---------|-------------|
+| `make build` | Build binary and libraries |
+| `make test` | Test the built binary |
+| `make shell` | Open shell in builder container |
+| `make clean` | Remove output artifacts |
+| `make clean-all` | Clean everything + stop containers |
+
+### Runtime
+| Command | Description |
+|---------|-------------|
+| `make build-runtime` | Build Docker runtime image |
+| `make run-runtime` | Start runtime container |
+| `make stop-runtime` | Stop runtime container |
+| `make clean-runtime` | Remove image and volumes |
+
+### Help
+```bash
+make help               # Show all available targets
+```
+
+## Usage Examples
+
+### Development Workflow
+
+```bash
+# First time setup
+make build-builder
+make build
+make test
+
+# After code changes
+make build
+make build-runtime
+make run-runtime
+```
+
+### Production Deployment
+
+```bash
+make build-builder
+make build build-runtime
+docker-compose up -d
+```
+
+### Using the API
+
+```bash
+# List models
+curl http://localhost:11434/api/tags
+
+# Pull a model
+curl http://localhost:11434/api/pull -d '{"name": "gemma3:4b"}'
+
+# Run inference
+curl http://localhost:11434/api/generate -d '{
+  "model": "gemma3:4b",
+  "prompt": "Why is the sky blue?",
+  "stream": false
+}'
+```
+
+### Using the CLI
+
+```bash
+# List/pull/run models
+docker exec ollama37-runtime ollama list
+docker exec ollama37-runtime ollama pull gemma3:4b
+docker exec ollama37-runtime ollama run gemma3:4b "Hello!"
+```
+
+## GPU Support
+
+### Supported Compute Capabilities
+- **3.7** - Tesla K80 (primary target)
+- **5.0-8.6** - Pascal, Volta, Turing, Ampere
+
+### Tesla K80 Recommendations
+
+**VRAM:** 12GB per GPU (24GB for dual-GPU K80)
+
+**Model sizes:**
+- Small (3-4B): Full precision
+- Medium (7-8B): Q4_K_M quantization
+- Large (13B+): Q4_0 quantization or multi-GPU
+
+**Multi-GPU:**
+```bash
+docker run --gpus all ...              # Use all GPUs
+docker run --gpus '"device=0"' ...     # Use specific GPU
+```
+
+## Configuration
+
+### Environment Variables (docker-compose.yml)
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `OLLAMA_HOST` | `0.0.0.0:11434` | Server listen address |
+| `OLLAMA_MODELS` | `/root/.ollama/models` | Model storage path |
+| `NVIDIA_VISIBLE_DEVICES` | `all` | Which GPUs to use |
+
+### Makefile Variables
+
+```bash
+make build CMAKE_PRESET="CUDA 11 K80"         # Use different preset
+make build NPROC=4                            # Control parallel jobs
+make build-runtime RUNTIME_IMAGE=my-ollama    # Custom image name
+```
+
+## Troubleshooting
+
+### GPU not detected during build
+```bash
+make shell
+nvidia-smi    # Should show your GPU
+```
+
+### Out of memory during build
+```bash
+make build NPROC=2    # Reduce parallel jobs
+```
+
+### Container won't start
+```bash
+docker logs ollama37-runtime
+# or
+docker-compose logs
+```
+
+### GPU not accessible in runtime
+```bash
+docker run --rm --runtime=nvidia --gpus all ubuntu nvidia-smi
+```
+
+### Port already in use
+```bash
+# Edit docker-compose.yml
+ports:
+  - "11435:11434"  # Change host port
+```
+
+## Advanced
+
+### Custom Builder Image
+
+The builder is automatically built from `builder/Dockerfile` when running `make build` for the first time.
+
+To customize (e.g., change CUDA version, add dependencies):
+
+```bash
+vim docker/builder/Dockerfile
+make clean-builder build-builder
+make build
+```
+
+See `builder/README.md` for details.
+
+### Clean Docker Build Cache
+
+```bash
+# Remove all build cache
+docker builder prune -af
+
+# Nuclear option (cleans everything)
+docker system prune -af
+```
+
+## Documentation
+
+- **[../CLAUDE.md](../CLAUDE.md)** - Project goals and implementation notes
+- **[builder/README.md](builder/README.md)** - Builder image documentation
+
+## License
+
+MIT (same as upstream Ollama)
--- a/docker/builder/Dockerfile
+++ b/docker/builder/Dockerfile
@@ -0,0 +1,55 @@
+FROM nvidia/cuda:11.4.3-devel-rockylinux8
+
+# Update OS and install cuda toolkit 11.4 and nvdia driver 470
+#RUN dnf -y update\
+#    && dnf -y install epel-release\
+#    && dnf -y config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo\
+#    && dnf -y module install nvidia-driver:470-dkms\
+#    && dnf -y install cuda-11-4
+
+# Post install, setup path
+#COPY cuda-11.4.sh /etc/profile.d/cuda-11.4.sh
+
+# Install gcc 10
+#RUN dnf -y install wget unzip lbzip2\
+#    && dnf -y groupinstall "Development Tools"\
+#    && cd /usr/local/src\
+#    && wget https://github.com/gcc-mirror/gcc/archive/refs/heads/releases/gcc-10.zip\
+#    && unzip gcc-10.zip\
+#    && cd gcc-releases-gcc-10\
+#    && contrib/download_prerequisites\
+#    && mkdir /usr/local/gcc-10\
+#    && cd /usr/local/gcc-10\
+#    && /usr/local/src/gcc-releases-gcc-10/configure --disable-multilib\
+#    && make -j ${nproc}\
+#    && make install
+RUN dnf install -y gcc-toolset-10-gcc gcc-toolset-10-gcc-c++ gcc-toolset-10-runtime
+
+# Post install, setup path
+#COPY gcc-10.sh /etc/profile.d/gcc-10.sh
+#COPY gcc-10.sh /etc/ld.so.conf.d/gcc-10.conf
+#RUN scl enable gcc-toolset-10 bash
+
+# Install cmake
+#ENV LD_LIBRARY_PATH="/usr/local/lib64:/usr/local/cuda-11.4/lib64"
+#ENV PATH="/usr/local/cuda-11.4/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+#RUN dnf -y install openssl-devel\
+#    && cd /usr/local/src\
+#    && wget https://github.com/Kitware/CMake/releases/download/v4.0.0/cmake-4.0.0.tar.gz\
+#    && tar xvf cmake-4.0.0.tar.gz\
+#    && mkdir /usr/local/cmake-4\
+#    && cd /usr/local/cmake-4\
+#    && /usr/local/src/cmake-4.0.0/configure\
+#    && make -j ${nproc}\
+#    && make install
+RUN dnf -y install cmake
+
+# Install go
+#RUN cd /usr/local\
+#    && wget https://go.dev/dl/go1.24.2.linux-amd64.tar.gz\
+#    && tar xvf go1.24.2.linux-amd64.tar.gz
+RUN dnf -y install golang
+
+# Post install, setup path
+#COPY go-1.24.2.sh /etc/profile.d/go-1.24.2.sh
+#ENV PATH="$PATH:/usr/local/go/bin"
--- a/docker/builder/README.md
+++ b/docker/builder/README.md
@@ -0,0 +1,58 @@
+# Ollama37 Builder Image
+
+This directory contains the Dockerfile for building the `ollama37-builder:latest` image.
+
+## What's Inside
+
+The builder image includes:
+- **Base**: `nvidia/cuda:11.4.3-devel-rockylinux8`
+- **GCC 10**: `gcc-toolset-10` (required by CUDA 11.4)
+- **CMake**: System package
+- **Go**: System package
+
+## Building the Builder Image
+
+The builder image is **automatically built** by the Makefile when you run `make build` for the first time.
+
+To manually build the builder image:
+
+```bash
+cd /home/jack/Documents/ollama37/docker
+make build-builder
+```
+
+Or using Docker directly:
+
+```bash
+cd /home/jack/Documents/ollama37/docker/builder
+docker build -t ollama37-builder:latest .
+```
+
+## Using the Builder Image
+
+The Makefile handles this automatically, but for reference:
+
+```bash
+# Start builder container with GPU access
+docker run --rm -d \
+  --name ollama37-builder \
+  --runtime=nvidia \
+  --gpus all \
+  ollama37-builder:latest \
+  sleep infinity
+
+# Use the container
+docker exec -it ollama37-builder bash
+```
+
+## Customization
+
+If you need to modify the builder (e.g., change CUDA version, add packages):
+
+1. Edit `Dockerfile` in this directory
+2. Rebuild: `make clean-builder build-builder`
+3. Build your project: `make build`
+
+## Archived Builder
+
+The `archived/` subdirectory contains an older Dockerfile that built GCC and CMake from source (~80 minutes). The current version uses Rocky Linux system packages for much faster builds (~5 minutes).
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -0,0 +1,32 @@
+version: "3.8"
+
+services:
+  ollama:
+    image: ollama37-runtime:latest
+    container_name: ollama37-runtime
+    runtime: nvidia
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    ports:
+      - "11434:11434"
+    volumes:
+      - ollama-data:/root/.ollama
+    environment:
+      - OLLAMA_HOST=0.0.0.0:11434
+      - NVIDIA_VISIBLE_DEVICES=all
+      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "/usr/local/bin/ollama", "list"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 5s
+volumes:
+  ollama-data:
+    name: ollama-data
--- a/docker/runtime/Dockerfile
+++ b/docker/runtime/Dockerfile
@@ -0,0 +1,56 @@
+# Dockerfile for Ollama37 Runtime Image
+#
+# This creates a minimal runtime-only image from pre-built artifacts.
+# The artifacts should be built first using the Makefile in ../
+#
+# Build context should be the project root (../..) so we can access docker/output/
+#
+# Usage:
+#   cd /path/to/ollama37
+#   make -C docker build           # Build the binary first
+#   make -C docker build-runtime   # Create the runtime image
+
+FROM nvidia/cuda:11.4.3-runtime-rockylinux8
+
+# Install minimal runtime dependencies
+# Note: Not running 'dnf update' to keep base image stable and build faster
+RUN dnf -y install \
+    libgomp \
+    libstdc++ \
+    && dnf clean all \
+    && rm -rf /var/cache/dnf
+
+# Create directory structure
+RUN mkdir -p /usr/local/bin /usr/local/lib/ollama
+
+# Copy the ollama binary from build output
+COPY docker/output/ollama /usr/local/bin/ollama
+
+# Copy all shared libraries from build output
+COPY docker/output/lib/ /usr/local/lib/ollama/
+
+# Prepend our library path to the existing LD_LIBRARY_PATH
+# Base image already has: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
+ENV LD_LIBRARY_PATH="/usr/local/lib/ollama:${LD_LIBRARY_PATH}"
+
+# Base image already sets these, but we can override if needed:
+# NVIDIA_DRIVER_CAPABILITIES=compute,utility
+# NVIDIA_VISIBLE_DEVICES=all
+
+# Ollama server configuration
+ENV OLLAMA_HOST=0.0.0.0:11434
+
+# Expose the Ollama API port
+EXPOSE 11434
+
+# Create a data directory for models
+RUN mkdir -p /root/.ollama
+VOLUME ["/root/.ollama"]
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD /usr/local/bin/ollama list || exit 1
+
+# Set entrypoint and default command
+ENTRYPOINT ["/usr/local/bin/ollama"]
+CMD ["serve"]