From 288077c3a3b69d717e7bf1af576d28885657826a Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <dhiltgen@users.noreply.github.com>
Date: Fri, 6 Mar 2026 16:36:22 -0800
Subject: [PATCH] build: smarter docker parallelism (#14653)

Our Dockerfile leverages parallel stages for more efficient builds.  However,
our old parallel settings were naive and lead to under/over utilization
depending on the capabilities of your build system.

This change switches to using Ninja for all our docker cmake builds to leverage
its smarter parallel logic.  We tell Ninja to target a load of nproc so each of
the build stages will share the load on the system aiming for full CPU use
without oversaturation.

The GPU parallelism settings are also adjusted to 4 to avoid a long-tail for
the last few GPU targets as they work through the long list of GPU
architectures.

This also fixes the Dockerfile to move Vulkan install to just the stage that
needs it instead of blocking most other GPU installs.  This should speed up CI
which always has a clean build cache.
---
 Dockerfile     | 91 +++++++++++++++++++++++++++-----------------------
 scripts/env.sh |  1 -
 2 files changed, 49 insertions(+), 43 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 43f511465..1057dd0f9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,28 +1,18 @@
 # vim: filetype=dockerfile
 
 ARG FLAVOR=${TARGETARCH}
-ARG PARALLEL=8
 
 ARG ROCMVERSION=6.3.3
 ARG JETPACK5VERSION=r35.4.1
 ARG JETPACK6VERSION=r36.4.0
 ARG CMAKEVERSION=3.31.2
+ARG NINJAVERSION=1.12.1
 ARG VULKANVERSION=1.4.321.1
 
 FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
 RUN dnf install -y yum-utils ccache gcc-toolset-11-gcc gcc-toolset-11-gcc-c++ gcc-toolset-11-binutils \
     && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
 ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
-ARG VULKANVERSION
-RUN wget https://sdk.lunarg.com/sdk/download/${VULKANVERSION}/linux/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz -O /tmp/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz \
-    && tar xvf /tmp/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz \
-    && dnf -y install ninja-build \
-    && ln -s /usr/bin/python3 /usr/bin/python \  
-    && /${VULKANVERSION}/vulkansdk -j 8 vulkan-headers \
-    && /${VULKANVERSION}/vulkansdk -j 8 shaderc
-RUN cp -r /${VULKANVERSION}/x86_64/include/* /usr/local/include/ \
-    && cp -r /${VULKANVERSION}/x86_64/lib/* /usr/local/lib
-ENV PATH=/${VULKANVERSION}/x86_64/bin:$PATH
 
 FROM --platform=linux/arm64 almalinux:8 AS base-arm64
 # install epel-release for ccache
@@ -33,100 +23,119 @@ ENV CC=clang CXX=clang++
 
 FROM base-${TARGETARCH} AS base
 ARG CMAKEVERSION
+ARG NINJAVERSION
 RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
+RUN dnf install -y unzip \
+    && curl -fsSL -o /tmp/ninja.zip https://github.com/ninja-build/ninja/releases/download/v${NINJAVERSION}/ninja-linux$([ "$(uname -m)" = "aarch64" ] && echo "-aarch64").zip \
+    && unzip /tmp/ninja.zip -d /usr/local/bin \
+    && rm /tmp/ninja.zip
+ENV CMAKE_GENERATOR=Ninja
 ENV LDFLAGS=-s
 
 FROM base AS cpu
 RUN dnf install -y gcc-toolset-11-gcc gcc-toolset-11-gcc-c++
 ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
-ARG PARALLEL
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
     cmake --preset 'CPU' \
-        && cmake --build --parallel ${PARALLEL} --preset 'CPU' \
-        && cmake --install build --component CPU --strip --parallel ${PARALLEL}
+        && cmake --build --preset 'CPU' -- -l $(nproc) \
+        && cmake --install build --component CPU --strip
 
 FROM base AS cuda-11
 ARG CUDA11VERSION=11.8
 RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
 ENV PATH=/usr/local/cuda-11/bin:$PATH
-ARG PARALLEL
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
     cmake --preset 'CUDA 11' \
-        && cmake --build --parallel ${PARALLEL} --preset 'CUDA 11' \
-        && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
+        && cmake --build --preset 'CUDA 11' -- -l $(nproc) \
+        && cmake --install build --component CUDA --strip
 
 FROM base AS cuda-12
 ARG CUDA12VERSION=12.8
 RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
 ENV PATH=/usr/local/cuda-12/bin:$PATH
-ARG PARALLEL
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
     cmake --preset 'CUDA 12' \
-        && cmake --build --parallel ${PARALLEL} --preset 'CUDA 12' \
-        && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
+        && cmake --build --preset 'CUDA 12' -- -l $(nproc) \
+        && cmake --install build --component CUDA --strip
 
 
 FROM base AS cuda-13
 ARG CUDA13VERSION=13.0
 RUN dnf install -y cuda-toolkit-${CUDA13VERSION//./-}
 ENV PATH=/usr/local/cuda-13/bin:$PATH
-ARG PARALLEL
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
     cmake --preset 'CUDA 13' \
-        && cmake --build --parallel ${PARALLEL} --preset 'CUDA 13' \
-        && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
+        && cmake --build --preset 'CUDA 13' -- -l $(nproc) \
+        && cmake --install build --component CUDA --strip
 
 
 FROM base AS rocm-6
 ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH
-ARG PARALLEL
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
     cmake --preset 'ROCm 6' \
-        && cmake --build --parallel ${PARALLEL} --preset 'ROCm 6' \
-        && cmake --install build --component HIP --strip --parallel ${PARALLEL}
+        && cmake --build --preset 'ROCm 6' -- -l $(nproc) \
+        && cmake --install build --component HIP --strip
 RUN rm -f dist/lib/ollama/rocm/rocblas/library/*gfx90[06]*
 
 FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK5VERSION} AS jetpack-5
 ARG CMAKEVERSION
-RUN apt-get update && apt-get install -y curl ccache \
-    && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
+ARG NINJAVERSION
+RUN apt-get update && apt-get install -y curl ccache unzip \
+    && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 \
+    && curl -fsSL -o /tmp/ninja.zip https://github.com/ninja-build/ninja/releases/download/v${NINJAVERSION}/ninja-linux-aarch64.zip \
+    && unzip /tmp/ninja.zip -d /usr/local/bin \
+    && rm /tmp/ninja.zip
+ENV CMAKE_GENERATOR=Ninja
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
-ARG PARALLEL
 RUN --mount=type=cache,target=/root/.ccache \
     cmake --preset 'JetPack 5' \
-        && cmake --build --parallel ${PARALLEL} --preset 'JetPack 5' \
-        && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
+        && cmake --build --preset 'JetPack 5' -- -l $(nproc) \
+        && cmake --install build --component CUDA --strip
 
 FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK6VERSION} AS jetpack-6
 ARG CMAKEVERSION
-RUN apt-get update && apt-get install -y curl ccache \
-    && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
+ARG NINJAVERSION
+RUN apt-get update && apt-get install -y curl ccache unzip \
+    && curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1 \
+    && curl -fsSL -o /tmp/ninja.zip https://github.com/ninja-build/ninja/releases/download/v${NINJAVERSION}/ninja-linux-aarch64.zip \
+    && unzip /tmp/ninja.zip -d /usr/local/bin \
+    && rm /tmp/ninja.zip
+ENV CMAKE_GENERATOR=Ninja
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
-ARG PARALLEL
 RUN --mount=type=cache,target=/root/.ccache \
     cmake --preset 'JetPack 6' \
-        && cmake --build --parallel ${PARALLEL} --preset 'JetPack 6' \
-        && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
+        && cmake --build --preset 'JetPack 6' -- -l $(nproc) \
+        && cmake --install build --component CUDA --strip
 
 FROM base AS vulkan
+ARG VULKANVERSION
+RUN ln -s /usr/bin/python3 /usr/bin/python \
+    && wget https://sdk.lunarg.com/sdk/download/${VULKANVERSION}/linux/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz -O /tmp/vulkansdk.tar.xz \
+    && tar xvf /tmp/vulkansdk.tar.xz -C /tmp \
+    && /tmp/${VULKANVERSION}/vulkansdk -j 8 vulkan-headers \
+    && /tmp/${VULKANVERSION}/vulkansdk -j 8 shaderc \
+    && cp -r /tmp/${VULKANVERSION}/x86_64/include/* /usr/local/include/ \
+    && cp -r /tmp/${VULKANVERSION}/x86_64/lib/* /usr/local/lib \
+    && cp -r /tmp/${VULKANVERSION}/x86_64/bin/* /usr/local/bin/ \
+    && rm -rf /tmp/${VULKANVERSION} /tmp/vulkansdk.tar.xz
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
     cmake --preset 'Vulkan' \
-        && cmake --build --parallel --preset 'Vulkan' \
-        && cmake --install build --component Vulkan --strip --parallel 8
+        && cmake --build --preset 'Vulkan' -- -l $(nproc) \
+        && cmake --install build --component Vulkan --strip
 
 FROM base AS mlx
 ARG CUDA13VERSION=13.0
@@ -138,7 +147,6 @@ ENV PATH=/usr/local/cuda-13/bin:$PATH
 ENV BLAS_INCLUDE_DIRS=/usr/include/openblas
 ENV LAPACK_INCLUDE_DIRS=/usr/include/openblas
 ENV CGO_LDFLAGS="-L/usr/local/cuda-13/lib64 -L/usr/local/cuda-13/targets/x86_64-linux/lib/stubs"
-ARG PARALLEL
 WORKDIR /go/src/github.com/ollama/ollama
 COPY CMakeLists.txt CMakePresets.json .
 COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
@@ -150,8 +158,8 @@ ENV PATH=/usr/local/go/bin:$PATH
 RUN go mod download
 RUN --mount=type=cache,target=/root/.ccache \
     cmake --preset 'MLX CUDA 13' -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DLAPACK_INCLUDE_DIRS=/usr/include/openblas \
-        && cmake --build --parallel ${PARALLEL} --preset 'MLX CUDA 13' \
-        && cmake --install build --component MLX --strip --parallel ${PARALLEL}
+        && cmake --build --preset 'MLX CUDA 13' -- -l $(nproc) \
+        && cmake --install build --component MLX --strip
 
 FROM base AS build
 WORKDIR /go/src/github.com/ollama/ollama
@@ -189,7 +197,6 @@ FROM scratch AS rocm
 COPY --from=rocm-6 dist/lib/ollama /lib/ollama
 
 FROM ${FLAVOR} AS archive
-ARG VULKANVERSION
 COPY --from=cpu dist/lib/ollama /lib/ollama
 COPY --from=build /bin/ollama /bin/ollama
 
diff --git a/scripts/env.sh b/scripts/env.sh
index 4f5641fd3..65a970bdc 100644
--- a/scripts/env.sh
+++ b/scripts/env.sh
@@ -16,7 +16,6 @@ OLLAMA_COMMON_BUILD_ARGS="--build-arg=VERSION \
     --build-arg=OLLAMA_FAST_BUILD \
     --build-arg=CUSTOM_CPU_FLAGS \
     --build-arg=GPU_RUNNER_CPU_FLAGS \
-    --build-arg=PARALLEL \
     --build-arg=AMDGPU_TARGETS"
 
 echo "Building Ollama"