diff --git a/Dockerfile b/Dockerfile index 44c07d16f..bddf5c41e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -161,6 +161,9 @@ ARG GOFLAGS="'-ldflags=-w -s'" ENV CGO_ENABLED=1 ARG CGO_CFLAGS ARG CGO_CXXFLAGS +RUN mkdir -p dist/bin +RUN --mount=type=cache,target=/root/.cache/go-build \ + go build -tags mlx -trimpath -buildmode=pie -o dist/bin/ollama-mlx . FROM base AS build WORKDIR /go/src/github.com/ollama/ollama @@ -182,6 +185,7 @@ COPY --from=cuda-12 dist/lib/ollama /lib/ollama/ COPY --from=cuda-13 dist/lib/ollama /lib/ollama/ COPY --from=vulkan dist/lib/ollama /lib/ollama/ COPY --from=mlx /go/src/github.com/ollama/ollama/dist/lib/ollama /lib/ollama/ +COPY --from=mlx /go/src/github.com/ollama/ollama/dist/bin/ /bin/ FROM --platform=linux/arm64 scratch AS arm64 # COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/ diff --git a/scripts/build_darwin.sh b/scripts/build_darwin.sh index c5294e04a..7b9937aa0 100755 --- a/scripts/build_darwin.sh +++ b/scripts/build_darwin.sh @@ -73,7 +73,7 @@ _build_darwin() { MLX_CGO_CFLAGS="-O3 -I$(pwd)/$BUILD_DIR/_deps/mlx-c-src -mmacosx-version-min=14.0" MLX_CGO_LDFLAGS="-L$(pwd)/$BUILD_DIR/lib/ollama -lmlxc -lmlx -Wl,-rpath,@executable_path -lc++ -framework Metal -framework Foundation -framework Accelerate -mmacosx-version-min=14.0" fi - GOOS=darwin GOARCH=$ARCH CGO_ENABLED=1 CGO_CFLAGS="$MLX_CGO_CFLAGS" CGO_LDFLAGS="$MLX_CGO_LDFLAGS" go build -tags mlx -o $INSTALL_PREFIX/imagegen ./x/imagegen/cmd/engine + GOOS=darwin GOARCH=$ARCH CGO_ENABLED=1 CGO_CFLAGS="$MLX_CGO_CFLAGS" CGO_LDFLAGS="$MLX_CGO_LDFLAGS" go build -tags mlx -o $INSTALL_PREFIX/ollama-mlx . GOOS=darwin GOARCH=$ARCH CGO_ENABLED=1 go build -o $INSTALL_PREFIX . done } @@ -82,12 +82,12 @@ _sign_darwin() { status "Creating universal binary..." mkdir -p dist/darwin lipo -create -output dist/darwin/ollama dist/darwin-*/ollama - lipo -create -output dist/darwin/imagegen dist/darwin-*/imagegen + lipo -create -output dist/darwin/ollama-mlx dist/darwin-*/ollama-mlx chmod +x dist/darwin/ollama - chmod +x dist/darwin/imagegen + chmod +x dist/darwin/ollama-mlx if [ -n "$APPLE_IDENTITY" ]; then - for F in dist/darwin/ollama dist/darwin-*/lib/ollama/* dist/darwin/imagegen; do + for F in dist/darwin/ollama dist/darwin-*/lib/ollama/* dist/darwin/ollama-mlx; do codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime $F done @@ -154,7 +154,7 @@ _build_macapp() { mkdir -p dist/Ollama.app/Contents/Resources if [ -d dist/darwin-amd64 ]; then lipo -create -output dist/Ollama.app/Contents/Resources/ollama dist/darwin-amd64/ollama dist/darwin-arm64/ollama - lipo -create -output dist/Ollama.app/Contents/Resources/imagegen dist/darwin-amd64/imagegen dist/darwin-arm64/imagegen + lipo -create -output dist/Ollama.app/Contents/Resources/ollama-mlx dist/darwin-amd64/ollama-mlx dist/darwin-arm64/ollama-mlx for F in dist/darwin-amd64/lib/ollama/*mlx*.dylib ; do lipo -create -output dist/darwin/$(basename $F) $F dist/darwin-arm64/lib/ollama/$(basename $F) done @@ -164,13 +164,13 @@ _build_macapp() { cp -a dist/darwin/ollama dist/Ollama.app/Contents/Resources/ollama cp dist/darwin/*.so dist/darwin/*.dylib dist/Ollama.app/Contents/Resources/ fi - cp -a dist/darwin/imagegen dist/Ollama.app/Contents/Resources/imagegen + cp -a dist/darwin/ollama-mlx dist/Ollama.app/Contents/Resources/ollama-mlx chmod a+x dist/Ollama.app/Contents/Resources/ollama # Sign if [ -n "$APPLE_IDENTITY" ]; then codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime dist/Ollama.app/Contents/Resources/ollama - for lib in dist/Ollama.app/Contents/Resources/*.so dist/Ollama.app/Contents/Resources/*.dylib dist/Ollama.app/Contents/Resources/imagegen ; do + for lib in dist/Ollama.app/Contents/Resources/*.so dist/Ollama.app/Contents/Resources/*.dylib dist/Ollama.app/Contents/Resources/ollama-mlx ; do codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier ai.ollama.ollama --options=runtime ${lib} done codesign -f --timestamp -s "$APPLE_IDENTITY" --identifier com.electron.ollama --deep --options=runtime dist/Ollama.app @@ -178,7 +178,7 @@ _build_macapp() { rm -f dist/Ollama-darwin.zip ditto -c -k --keepParent dist/Ollama.app dist/Ollama-darwin.zip - (cd dist/Ollama.app/Contents/Resources/; tar -cf - ollama imagegen *.so *.dylib) | gzip -9vc > dist/ollama-darwin.tgz + (cd dist/Ollama.app/Contents/Resources/; tar -cf - ollama ollama-mlx *.so *.dylib) | gzip -9vc > dist/ollama-darwin.tgz # Notarize and Staple if [ -n "$APPLE_IDENTITY" ]; then diff --git a/x/README.md b/x/README.md index 22da670a4..5af087380 100644 --- a/x/README.md +++ b/x/README.md @@ -1,24 +1,50 @@ -# Experimental Features +# Experimental Features ## MLX Backend We're working on a new experimental backend based on the [MLX project](https://github.com/ml-explore/mlx) -Support is currently limited to MacOS and Linux with CUDA GPUs. We're looking to add support for Windows CUDA soon, and other GPU vendors. To build: +Support is currently limited to MacOS and Linux with CUDA GPUs. We're looking to add support for Windows CUDA soon, and other GPU vendors. -``` +### Building ollama-mlx + +The `ollama-mlx` binary is a separate build of Ollama with MLX support enabled. This enables experimental features like image generation. + +#### macOS (Apple Silicon and Intel) + +```bash +# Build MLX backend libraries cmake --preset MLX cmake --build --preset MLX --parallel cmake --install build --component MLX -go build -tags mlx . + +# Build ollama-mlx binary +go build -tags mlx -o ollama-mlx . ``` -On linux, use the preset "MLX CUDA 13" or "MLX CUDA 12" to enable CUDA with the default Ollama NVIDIA GPU architectures enabled. +#### Linux (CUDA) + +On Linux, use the preset "MLX CUDA 13" or "MLX CUDA 12" to enable CUDA with the default Ollama NVIDIA GPU architectures enabled: + +```bash +# Build MLX backend libraries with CUDA support +cmake --preset 'MLX CUDA 13' +cmake --build --preset 'MLX CUDA 13' --parallel +cmake --install build --component MLX + +# Build ollama-mlx binary +CGO_CFLAGS="-O3 -I$(pwd)/build/_deps/mlx-c-src" \ +CGO_LDFLAGS="-L$(pwd)/build/lib/ollama -lmlxc -lmlx" \ +go build -tags mlx -o ollama-mlx . +``` + +#### Using build scripts + +The build scripts automatically create the `ollama-mlx` binary: + +- **macOS**: `./scripts/build_darwin.sh` produces `dist/darwin/ollama-mlx` +- **Linux**: `./scripts/build_linux.sh` produces `ollama-mlx` in the output archives ## Image Generation -Based on the experimental MLX backend, we're working on adding imagegen support. After running the cmake commands above: - -``` -go build -o imagegen ./x/imagegen/cmd/engine -``` +Image generation is built into the `ollama-mlx` binary. Run `ollama-mlx serve` to start the server with image generation support enabled. diff --git a/x/imagegen/server.go b/x/imagegen/server.go index e96bdc08a..e980b4944 100644 --- a/x/imagegen/server.go +++ b/x/imagegen/server.go @@ -70,7 +70,7 @@ func NewServer(modelName string) (*Server, error) { port = rand.Intn(65535-49152) + 49152 } - // Get the ollama executable path + // Get the ollama-mlx executable path (in same directory as current executable) exe, err := os.Executable() if err != nil { return nil, fmt.Errorf("unable to lookup executable path: %w", err) @@ -78,9 +78,10 @@ func NewServer(modelName string) (*Server, error) { if eval, err := filepath.EvalSymlinks(exe); err == nil { exe = eval } + mlxExe := filepath.Join(filepath.Dir(exe), "ollama-mlx") - // Spawn subprocess: ollama runner --image-engine --model --port - cmd := exec.Command(exe, "runner", "--image-engine", "--model", modelName, "--port", strconv.Itoa(port)) + // Spawn subprocess: ollama-mlx runner --image-engine --model --port + cmd := exec.Command(mlxExe, "runner", "--image-engine", "--model", modelName, "--port", strconv.Itoa(port)) cmd.Env = os.Environ() s := &Server{ @@ -113,7 +114,7 @@ func NewServer(modelName string) (*Server, error) { } }() - slog.Info("starting image runner subprocess", "model", modelName, "port", port) + slog.Info("starting ollama-mlx image runner subprocess", "exe", mlxExe, "model", modelName, "port", port) if err := cmd.Start(); err != nil { return nil, fmt.Errorf("failed to start image runner: %w", err) }