From 288077c3a3b69d717e7bf1af576d28885657826a Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 6 Mar 2026 16:36:22 -0800 Subject: [PATCH] build: smarter docker parallelism (#14653) Our Dockerfile leverages parallel stages for more efficient builds. However, our old parallel settings were naive and lead to under/over utilization depending on the capabilities of your build system. This change switches to using Ninja for all our docker cmake builds to leverage its smarter parallel logic. We tell Ninja to target a load of nproc so each of the build stages will share the load on the system aiming for full CPU use without oversaturation. The GPU parallelism settings are also adjusted to 4 to avoid a long-tail for the last few GPU targets as they work through the long list of GPU architectures. This also fixes the Dockerfile to move Vulkan install to just the stage that needs it instead of blocking most other GPU installs. This should speed up CI which always has a clean build cache. --- Dockerfile | 91 +++++++++++++++++++++++++++----------------------- scripts/env.sh | 1 - 2 files changed, 49 insertions(+), 43 deletions(-) diff --git a/Dockerfile b/Dockerfile index 43f511465..1057dd0f9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,28 +1,18 @@ # vim: filetype=dockerfile ARG FLAVOR=${TARGETARCH} -ARG PARALLEL=8 ARG ROCMVERSION=6.3.3 ARG JETPACK5VERSION=r35.4.1 ARG JETPACK6VERSION=r36.4.0 ARG CMAKEVERSION=3.31.2 +ARG NINJAVERSION=1.12.1 ARG VULKANVERSION=1.4.321.1 FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64 RUN dnf install -y yum-utils ccache gcc-toolset-11-gcc gcc-toolset-11-gcc-c++ gcc-toolset-11-binutils \ && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH -ARG VULKANVERSION -RUN wget https://sdk.lunarg.com/sdk/download/${VULKANVERSION}/linux/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz -O /tmp/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz \ - && tar xvf /tmp/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz \ - && dnf -y install ninja-build \ - && ln -s /usr/bin/python3 /usr/bin/python \ - && /${VULKANVERSION}/vulkansdk -j 8 vulkan-headers \ - && /${VULKANVERSION}/vulkansdk -j 8 shaderc -RUN cp -r /${VULKANVERSION}/x86_64/include/* /usr/local/include/ \ - && cp -r /${VULKANVERSION}/x86_64/lib/* /usr/local/lib -ENV PATH=/${VULKANVERSION}/x86_64/bin:$PATH FROM --platform=linux/arm64 almalinux:8 AS base-arm64 # install epel-release for ccache @@ -33,100 +23,119 @@ ENV CC=clang CXX=clang++ FROM base-${TARGETARCH} AS base ARG CMAKEVERSION +ARG NINJAVERSION RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 +RUN dnf install -y unzip \ + && curl -fsSL -o /tmp/ninja.zip https://github.com/ninja-build/ninja/releases/download/v${NINJAVERSION}/ninja-linux$([ "$(uname -m)" = "aarch64" ] && echo "-aarch64").zip \ + && unzip /tmp/ninja.zip -d /usr/local/bin \ + && rm /tmp/ninja.zip +ENV CMAKE_GENERATOR=Ninja ENV LDFLAGS=-s FROM base AS cpu RUN dnf install -y gcc-toolset-11-gcc gcc-toolset-11-gcc-c++ ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH -ARG PARALLEL COPY CMakeLists.txt CMakePresets.json . COPY ml/backend/ggml/ggml ml/backend/ggml/ggml RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'CPU' \ - && cmake --build --parallel ${PARALLEL} --preset 'CPU' \ - && cmake --install build --component CPU --strip --parallel ${PARALLEL} + && cmake --build --preset 'CPU' -- -l $(nproc) \ + && cmake --install build --component CPU --strip FROM base AS cuda-11 ARG CUDA11VERSION=11.8 RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-} ENV PATH=/usr/local/cuda-11/bin:$PATH -ARG PARALLEL COPY CMakeLists.txt CMakePresets.json . COPY ml/backend/ggml/ggml ml/backend/ggml/ggml RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'CUDA 11' \ - && cmake --build --parallel ${PARALLEL} --preset 'CUDA 11' \ - && cmake --install build --component CUDA --strip --parallel ${PARALLEL} + && cmake --build --preset 'CUDA 11' -- -l $(nproc) \ + && cmake --install build --component CUDA --strip FROM base AS cuda-12 ARG CUDA12VERSION=12.8 RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-} ENV PATH=/usr/local/cuda-12/bin:$PATH -ARG PARALLEL COPY CMakeLists.txt CMakePresets.json . COPY ml/backend/ggml/ggml ml/backend/ggml/ggml RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'CUDA 12' \ - && cmake --build --parallel ${PARALLEL} --preset 'CUDA 12' \ - && cmake --install build --component CUDA --strip --parallel ${PARALLEL} + && cmake --build --preset 'CUDA 12' -- -l $(nproc) \ + && cmake --install build --component CUDA --strip FROM base AS cuda-13 ARG CUDA13VERSION=13.0 RUN dnf install -y cuda-toolkit-${CUDA13VERSION//./-} ENV PATH=/usr/local/cuda-13/bin:$PATH -ARG PARALLEL COPY CMakeLists.txt CMakePresets.json . COPY ml/backend/ggml/ggml ml/backend/ggml/ggml RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'CUDA 13' \ - && cmake --build --parallel ${PARALLEL} --preset 'CUDA 13' \ - && cmake --install build --component CUDA --strip --parallel ${PARALLEL} + && cmake --build --preset 'CUDA 13' -- -l $(nproc) \ + && cmake --install build --component CUDA --strip FROM base AS rocm-6 ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH -ARG PARALLEL COPY CMakeLists.txt CMakePresets.json . COPY ml/backend/ggml/ggml ml/backend/ggml/ggml RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'ROCm 6' \ - && cmake --build --parallel ${PARALLEL} --preset 'ROCm 6' \ - && cmake --install build --component HIP --strip --parallel ${PARALLEL} + && cmake --build --preset 'ROCm 6' -- -l $(nproc) \ + && cmake --install build --component HIP --strip RUN rm -f dist/lib/ollama/rocm/rocblas/library/*gfx90[06]* FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK5VERSION} AS jetpack-5 ARG CMAKEVERSION -RUN apt-get update && apt-get install -y curl ccache \ - && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 +ARG NINJAVERSION +RUN apt-get update && apt-get install -y curl ccache unzip \ + && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 \ + && curl -fsSL -o /tmp/ninja.zip https://github.com/ninja-build/ninja/releases/download/v${NINJAVERSION}/ninja-linux-aarch64.zip \ + && unzip /tmp/ninja.zip -d /usr/local/bin \ + && rm /tmp/ninja.zip +ENV CMAKE_GENERATOR=Ninja COPY CMakeLists.txt CMakePresets.json . COPY ml/backend/ggml/ggml ml/backend/ggml/ggml -ARG PARALLEL RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'JetPack 5' \ - && cmake --build --parallel ${PARALLEL} --preset 'JetPack 5' \ - && cmake --install build --component CUDA --strip --parallel ${PARALLEL} + && cmake --build --preset 'JetPack 5' -- -l $(nproc) \ + && cmake --install build --component CUDA --strip FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK6VERSION} AS jetpack-6 ARG CMAKEVERSION -RUN apt-get update && apt-get install -y curl ccache \ - && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 +ARG NINJAVERSION +RUN apt-get update && apt-get install -y curl ccache unzip \ + && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 \ + && curl -fsSL -o /tmp/ninja.zip https://github.com/ninja-build/ninja/releases/download/v${NINJAVERSION}/ninja-linux-aarch64.zip \ + && unzip /tmp/ninja.zip -d /usr/local/bin \ + && rm /tmp/ninja.zip +ENV CMAKE_GENERATOR=Ninja COPY CMakeLists.txt CMakePresets.json . COPY ml/backend/ggml/ggml ml/backend/ggml/ggml -ARG PARALLEL RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'JetPack 6' \ - && cmake --build --parallel ${PARALLEL} --preset 'JetPack 6' \ - && cmake --install build --component CUDA --strip --parallel ${PARALLEL} + && cmake --build --preset 'JetPack 6' -- -l $(nproc) \ + && cmake --install build --component CUDA --strip FROM base AS vulkan +ARG VULKANVERSION +RUN ln -s /usr/bin/python3 /usr/bin/python \ + && wget https://sdk.lunarg.com/sdk/download/${VULKANVERSION}/linux/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz -O /tmp/vulkansdk.tar.xz \ + && tar xvf /tmp/vulkansdk.tar.xz -C /tmp \ + && /tmp/${VULKANVERSION}/vulkansdk -j 8 vulkan-headers \ + && /tmp/${VULKANVERSION}/vulkansdk -j 8 shaderc \ + && cp -r /tmp/${VULKANVERSION}/x86_64/include/* /usr/local/include/ \ + && cp -r /tmp/${VULKANVERSION}/x86_64/lib/* /usr/local/lib \ + && cp -r /tmp/${VULKANVERSION}/x86_64/bin/* /usr/local/bin/ \ + && rm -rf /tmp/${VULKANVERSION} /tmp/vulkansdk.tar.xz COPY CMakeLists.txt CMakePresets.json . COPY ml/backend/ggml/ggml ml/backend/ggml/ggml RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'Vulkan' \ - && cmake --build --parallel --preset 'Vulkan' \ - && cmake --install build --component Vulkan --strip --parallel 8 + && cmake --build --preset 'Vulkan' -- -l $(nproc) \ + && cmake --install build --component Vulkan --strip FROM base AS mlx ARG CUDA13VERSION=13.0 @@ -138,7 +147,6 @@ ENV PATH=/usr/local/cuda-13/bin:$PATH ENV BLAS_INCLUDE_DIRS=/usr/include/openblas ENV LAPACK_INCLUDE_DIRS=/usr/include/openblas ENV CGO_LDFLAGS="-L/usr/local/cuda-13/lib64 -L/usr/local/cuda-13/targets/x86_64-linux/lib/stubs" -ARG PARALLEL WORKDIR /go/src/github.com/ollama/ollama COPY CMakeLists.txt CMakePresets.json . COPY ml/backend/ggml/ggml ml/backend/ggml/ggml @@ -150,8 +158,8 @@ ENV PATH=/usr/local/go/bin:$PATH RUN go mod download RUN --mount=type=cache,target=/root/.ccache \ cmake --preset 'MLX CUDA 13' -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DLAPACK_INCLUDE_DIRS=/usr/include/openblas \ - && cmake --build --parallel ${PARALLEL} --preset 'MLX CUDA 13' \ - && cmake --install build --component MLX --strip --parallel ${PARALLEL} + && cmake --build --preset 'MLX CUDA 13' -- -l $(nproc) \ + && cmake --install build --component MLX --strip FROM base AS build WORKDIR /go/src/github.com/ollama/ollama @@ -189,7 +197,6 @@ FROM scratch AS rocm COPY --from=rocm-6 dist/lib/ollama /lib/ollama FROM ${FLAVOR} AS archive -ARG VULKANVERSION COPY --from=cpu dist/lib/ollama /lib/ollama COPY --from=build /bin/ollama /bin/ollama diff --git a/scripts/env.sh b/scripts/env.sh index 4f5641fd3..65a970bdc 100644 --- a/scripts/env.sh +++ b/scripts/env.sh @@ -16,7 +16,6 @@ OLLAMA_COMMON_BUILD_ARGS="--build-arg=VERSION \ --build-arg=OLLAMA_FAST_BUILD \ --build-arg=CUSTOM_CPU_FLAGS \ --build-arg=GPU_RUNNER_CPU_FLAGS \ - --build-arg=PARALLEL \ --build-arg=AMDGPU_TARGETS" echo "Building Ollama"