From 8380ca93f887ff55559af87083fde7809aa62481 Mon Sep 17 00:00:00 2001 From: Shang Chieh Tseng Date: Sun, 9 Nov 2025 00:05:12 +0800 Subject: [PATCH] Fix Docker build system: add library paths, GCC 10 runtime libs, and Go build flags - Add LD_LIBRARY_PATH to CMake and build steps for GCC 10 libraries - Copy GCC 10 runtime libraries (libstdc++.so.6, libgcc_s.so.1) to output - Update runtime Dockerfile to use minimal CUDA runtime packages - Add -buildvcs=false flag to Go build to avoid Git VCS errors - Simplify runtime container to only include necessary CUDA libraries - Fix library path configuration for proper runtime library loading --- docker/Makefile | 31 +++++++++++++++---------------- docker/builder/Dockerfile | 11 ++++++----- docker/runtime/Dockerfile | 38 ++++++++++++++------------------------ 3 files changed, 35 insertions(+), 45 deletions(-) diff --git a/docker/Makefile b/docker/Makefile index 55156bee..935e7747 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -80,21 +80,15 @@ ensure-builder: # Start the builder container with GPU access start-builder: @echo "→ Starting builder container with GPU access..." - @if docker ps -a --format '{{.Names}}' | grep -q "^$(CONTAINER_NAME)$$"; then \ - echo " Container $(CONTAINER_NAME) already exists, checking status..."; \ - if docker ps --format '{{.Names}}' | grep -q "^$(CONTAINER_NAME)$$"; then \ - echo " Container is already running"; \ - else \ - echo " Starting existing container..."; \ - docker start $(CONTAINER_NAME); \ - fi \ + @if docker ps --format '{{.Names}}' | grep -q "^$(CONTAINER_NAME)$$"; then \ + echo " Container $(CONTAINER_NAME) is already running"; \ else \ - echo " Creating new container..."; \ + echo " Creating new builder container..."; \ docker run --rm -d \ --name $(CONTAINER_NAME) \ --runtime=nvidia \ --gpus all \ - $(BUILDER_IMAGE) \ + $(BUILDER_IMAGE):$(BUILDER_TAG) \ sleep infinity; \ sleep 2; \ docker exec $(CONTAINER_NAME) nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader; \ @@ -105,7 +99,7 @@ stop-builder: @echo "→ Stopping builder container..." @if docker ps --format '{{.Names}}' | grep -q "^$(CONTAINER_NAME)$$"; then \ docker stop $(CONTAINER_NAME); \ - echo " Container stopped"; \ + echo " Container stopped and removed (--rm flag)"; \ else \ echo " Container not running"; \ fi @@ -122,19 +116,19 @@ copy-source: start-builder run-cmake: copy-source @echo "→ Running CMake configuration (preset: $(CMAKE_PRESET))..." @docker exec -w /usr/local/src/ollama37 $(CONTAINER_NAME) \ - bash -l -c 'CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++ cmake --preset "$(CMAKE_PRESET)"' + bash -l -c 'LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64:/usr/lib64:$$LD_LIBRARY_PATH CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++ cmake --preset "$(CMAKE_PRESET)"' # Run CMake build (C/C++/CUDA compilation) run-build: run-cmake @echo "→ Building C/C++/CUDA libraries (using $(NPROC) cores)..." @docker exec -w /usr/local/src/ollama37 $(CONTAINER_NAME) \ - bash -l -c 'CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++ cmake --build build -j$(NPROC)' + bash -l -c 'LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64:/usr/lib64:$$LD_LIBRARY_PATH CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++ cmake --build build -j$(NPROC)' # Run Go build run-go-build: run-build @echo "→ Building Go binary..." @docker exec -w /usr/local/src/ollama37 $(CONTAINER_NAME) \ - bash -l -c 'go build -o ollama .' + bash -l -c 'go build -buildvcs=false -o ollama .' # Copy build artifacts from container to host copy-artifacts: run-go-build @@ -142,6 +136,10 @@ copy-artifacts: run-go-build @mkdir -p $(OUTPUT_DIR)/lib @docker cp $(CONTAINER_NAME):/usr/local/src/ollama37/ollama $(OUTPUT_DIR)/ @docker cp $(CONTAINER_NAME):/usr/local/src/ollama37/build/lib/ollama/. $(OUTPUT_DIR)/lib/ + @echo "→ Copying GCC 10 runtime libraries..." + @docker cp $(CONTAINER_NAME):/usr/local/lib64/libstdc++.so.6 $(OUTPUT_DIR)/lib/ + @docker cp $(CONTAINER_NAME):/usr/local/lib64/libstdc++.so.6.0.28 $(OUTPUT_DIR)/lib/ + @docker cp $(CONTAINER_NAME):/usr/local/lib64/libgcc_s.so.1 $(OUTPUT_DIR)/lib/ @echo " Artifacts copied to $(OUTPUT_DIR)" @echo "" @echo " Binary: $(OUTPUT_DIR)/ollama" @@ -197,13 +195,14 @@ build-runtime: @echo "✓ Runtime image built successfully!" @echo " Image: $(RUNTIME_IMAGE):$(RUNTIME_TAG)" @echo "" - @$(MAKE) stop-builder - @echo "" @echo "To run the image:" @echo " make run-runtime" @echo "" @echo "Or manually:" @echo " docker run --rm -it --runtime=nvidia --gpus all -p 11434:11434 $(RUNTIME_IMAGE):$(RUNTIME_TAG)" + @echo "" + @echo "To stop the builder container:" + @echo " make stop-builder" # Run the runtime container run-runtime: diff --git a/docker/builder/Dockerfile b/docker/builder/Dockerfile index ac406f6f..9447093e 100644 --- a/docker/builder/Dockerfile +++ b/docker/builder/Dockerfile @@ -1,12 +1,13 @@ FROM rockylinux/rockylinux:8 -# Update OS and install cuda toolkit 11.4 and nvdia driver 470 -#RUN dnf -y update\ -# && dnf -y module install nvidia-driver:470-dkms\ +# Install CUDA toolkit 11.4 +# Note: NVIDIA driver is NOT needed in builder container - only CUDA toolkit for compilation +# The host system provides the driver at runtime via --gpus flag +# dnf-plugins-core is required for the config-manager command -RUN dnf -y install epel-release\ +RUN dnf -y install dnf-plugins-core\ && dnf -y config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo\ - && dnf -y install cuda-11-4 + && dnf -y install cuda-toolkit-11-4 # Post install, setup path COPY cuda-11.4.sh /etc/profile.d/cuda-11.4.sh diff --git a/docker/runtime/Dockerfile b/docker/runtime/Dockerfile index 006061db..b0a99384 100644 --- a/docker/runtime/Dockerfile +++ b/docker/runtime/Dockerfile @@ -1,24 +1,11 @@ -# Dockerfile for Ollama37 Runtime Image -# -# This creates a minimal runtime-only image from pre-built artifacts. -# The artifacts should be built first using the Makefile in ../ -# -# Build context should be the project root (../..) so we can access docker/output/ -# -# Usage: -# cd /path/to/ollama37 -# make -C docker build # Build the binary first -# make -C docker build-runtime # Create the runtime image +FROM rockylinux/rockylinux:8 -FROM nvidia/cuda:11.4.3-runtime-rockylinux8 - -# Install minimal runtime dependencies -# Note: Not running 'dnf update' to keep base image stable and build faster -RUN dnf -y install \ - libgomp \ - libstdc++ \ - && dnf clean all \ - && rm -rf /var/cache/dnf +# Install only CUDA runtime libraries (not the full toolkit) +# The host system provides the NVIDIA driver at runtime via --gpus flag +RUN dnf -y install dnf-plugins-core\ + && dnf -y config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo\ + && dnf -y install cuda-cudart-11-4 libcublas-11-4 \ + && dnf clean all # Create directory structure RUN mkdir -p /usr/local/bin /usr/local/lib/ollama @@ -26,12 +13,15 @@ RUN mkdir -p /usr/local/bin /usr/local/lib/ollama # Copy the ollama binary from build output COPY docker/output/ollama /usr/local/bin/ollama -# Copy all shared libraries from build output +# Copy all shared libraries from build output (includes ollama libs + GCC 10 runtime libs) COPY docker/output/lib/ /usr/local/lib/ollama/ -# Prepend our library path to the existing LD_LIBRARY_PATH -# Base image already has: /usr/local/nvidia/lib:/usr/local/nvidia/lib64 -ENV LD_LIBRARY_PATH="/usr/local/lib/ollama:${LD_LIBRARY_PATH}" +# Set library path to include our ollama libraries first +# This includes: +# - Ollama CUDA/GGML libraries +# - GCC 10 runtime libraries (libstdc++.so.6, libgcc_s.so.1) +# - System CUDA libraries +ENV LD_LIBRARY_PATH=/usr/local/lib/ollama:/usr/local/cuda-11.4/lib64:/usr/lib64 # Base image already sets these, but we can override if needed: # NVIDIA_DRIVER_CAPABILITIES=compute,utility