ollama37/docker/runtime/Dockerfile

FROM rockylinux/rockylinux:8

# Install only CUDA runtime libraries (not the full toolkit)
# The host system provides the NVIDIA driver at runtime via --gpus flag
RUN dnf -y install dnf-plugins-core\
    && dnf -y config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo\
    && dnf -y install cuda-cudart-11-4 libcublas-11-4 \
    && dnf clean all

# Create directory structure
RUN mkdir -p /usr/local/bin /usr/local/lib/ollama

# Copy the ollama binary from build output
COPY docker/output/ollama /usr/local/bin/ollama

# Copy all shared libraries from build output (includes ollama libs + GCC 10 runtime libs)
COPY docker/output/lib/ /usr/local/lib/ollama/

# Set library path to include our ollama libraries first
# This includes:
#   - Ollama CUDA/GGML libraries
#   - GCC 10 runtime libraries (libstdc++.so.6, libgcc_s.so.1)
#   - System CUDA libraries
ENV LD_LIBRARY_PATH=/usr/local/lib/ollama:/usr/local/cuda-11.4/lib64:/usr/lib64

# Base image already sets these, but we can override if needed:
# NVIDIA_DRIVER_CAPABILITIES=compute,utility
# NVIDIA_VISIBLE_DEVICES=all

# Ollama server configuration
ENV OLLAMA_HOST=0.0.0.0:11434

# Expose the Ollama API port
EXPOSE 11434

# Create a data directory for models
RUN mkdir -p /root/.ollama
VOLUME ["/root/.ollama"]

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD /usr/local/bin/ollama list || exit 1

# Set entrypoint and default command
ENTRYPOINT ["/usr/local/bin/ollama"]
CMD ["serve"]