[GH-ISSUE #12303] offloaded 0/35 layers to GPU on gfx1103 #54687

Closed
opened 2026-04-29 06:54:56 -05:00 by GiteaMirror · 26 comments
Owner

Originally created by @Pekkari on GitHub (Sep 16, 2025).
Original GitHub issue: https://github.com/ollama/ollama/issues/12303

Originally assigned to: @dhiltgen on GitHub.

What is the issue?

Hi,

rocm 7.0 is coming, and nowadays building it using TheRock seems to be possible, both following the ubuntu instructions on the ubuntu docker image, and the manylinux version, which is more appropriate for integrating it with Ollama, so I run a build of TheRock using the following instructions:

$ podman run --name rocm-build --read-only --cap-drop all -v /root/rocm -it ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:543ba2609de3571d2c64f3872e5f1af42fdfa90d074a7baccb1db120c9514be2
$ cd /root/rocm/ ^C
$ git clone https://github.com/ROCm/TheRock.git
$ cd TheRock
$ ./build_tools/fetch_sources.py
$ eval "$(./build_tools/setup_ccache.py)"
$ python3 -m venv .venv
$ . .venv/bin/activate
$ pip install --cache-dir=/root/rocm/.cache  -r requirements.txt
$ cmake -B build -GNinja . -DTHEROCK_AMDGPU_TARGETS=gfx1103  -DBUILD_TESTING=OFF -DTHEROCK_ENABLE_RCCL=OFF
$ cmake --build build

I copied over the build/dist/rocm folder to the root of the project and did these small modifications to ollama source code to consume the rocm installation on podman build:

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d62c8f99..f6bb7c08 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -100,7 +100,7 @@ if(CMAKE_HIP_COMPILER)
 
     find_package(hip REQUIRED)
     if(NOT AMDGPU_TARGETS)
-        list(FILTER AMDGPU_TARGETS INCLUDE REGEX "^gfx(900|94[012]|101[02]|1030|110[012]|120[01])$")
+        list(FILTER AMDGPU_TARGETS INCLUDE REGEX "^gfx(900|94[012]|101[02]|1030|110[0123]|120[01])$")
     elseif(WIN32 AND WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX)
         list(FILTER AMDGPU_TARGETS EXCLUDE REGEX ${WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX})
     endif()
diff --git a/CMakePresets.json b/CMakePresets.json
index ab2cfe9d..845b223d 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -52,7 +52,7 @@
       "inherits": [ "ROCm" ],
       "cacheVariables": {
         "CMAKE_HIP_FLAGS": "-parallel-jobs=4",
-        "AMDGPU_TARGETS": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-"
+        "AMDGPU_TARGETS": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103;gfx1151;gfx1200;gfx1201;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-"
       }
     }
   ],
diff --git a/Dockerfile b/Dockerfile
index 0dc3c126..b81c1889 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,7 +2,7 @@
 
 ARG FLAVOR=${TARGETARCH}
 
-ARG ROCMVERSION=6.3.3
+ARG ROCMVERSION=6.4.3
 ARG JETPACK5VERSION=r35.4.1
 ARG JETPACK6VERSION=r36.4.0
 ARG CMAKEVERSION=3.31.2
@@ -51,6 +51,7 @@ RUN --mount=type=cache,target=/root/.ccache \
 FROM base AS rocm-6
 ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH
 RUN --mount=type=cache,target=/root/.ccache \
+    --mount=type=bind,source=rocm,target=/opt/rocm,Z \
     cmake --preset 'ROCm 6' \
         && cmake --build --parallel --preset 'ROCm 6' \
         && cmake --install build --component HIP --strip --parallel 8
diff --git a/discover/amd_linux.go b/discover/amd_linux.go
index 0f2aa067..86b531a8 100644
--- a/discover/amd_linux.go
+++ b/discover/amd_linux.go
@@ -42,7 +42,7 @@ const (
 
 var (
        // Used to validate if the given ROCm lib is usable
-       ROCmLibGlobs          = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here...
+       ROCmLibGlobs          = []string{"libhipblas.so.3*", "rocblas"} // TODO - probably include more coverage of files here...
        RocmStandardLocations = []string{"/opt/rocm/lib", "/usr/lib64"}
 )
 
@@ -438,7 +438,7 @@ func AMDValidateLibDir() (string, error) {
        }
 
        // Well known ollama installer path
-       installedRocmDir := "/usr/share/ollama/lib/rocm"
+       installedRocmDir := "/usr/lib/ollama/rocm"
        if rocmLibUsable(installedRocmDir) {
                return installedRocmDir, nil
        }

With these, I can run a test build that doesn't require HSA_OVERRIDE_GFX_VERSION to detect the gpu, so by running:

podman run --name ollama-rocm --pod ollama -e OLLAMA_CONTEXT_LENGTH=8192 -v c47c4cabdfb84c606efb5887fea55b20b1673004cedcf577c80d89779a0618a3:/root/.ollama --device /dev/accel --device /dev/dri --device /dev/kfd --cap-drop all --read-only --ipc=host --shm-size=8G -it ollama:rocm-7

I get the following output that detects and attempts to use the gpu for inference when using gemma3:

time=2025-09-16T05:32:21.344Z level=INFO source=routes.go:1331 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:8192 OLLAMA_DEBUG:INFO OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NEW_ESTIMATES:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:]"
time=2025-09-16T05:32:21.346Z level=INFO source=images.go:477 msg="total blobs: 20"
time=2025-09-16T05:32:21.346Z level=INFO source=images.go:484 msg="total unused blobs removed: 0"
[GIN-debug] [WARNING] Creating an Engine instance with the Logger and Recovery middleware already attached.

[GIN-debug] [WARNING] Running in "debug" mode. Switch to "release" mode in production.
 - using env:   export GIN_MODE=release
 - using code:  gin.SetMode(gin.ReleaseMode)

[GIN-debug] HEAD   /                         --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func1 (5 handlers)
[GIN-debug] GET    /                         --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func2 (5 handlers)
[GIN-debug] HEAD   /api/version              --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func3 (5 handlers)
[GIN-debug] GET    /api/version              --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func4 (5 handlers)
[GIN-debug] POST   /api/pull                 --> github.com/ollama/ollama/server.(*Server).PullHandler-fm (5 handlers)
[GIN-debug] POST   /api/push                 --> github.com/ollama/ollama/server.(*Server).PushHandler-fm (5 handlers)
[GIN-debug] HEAD   /api/tags                 --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (5 handlers)
[GIN-debug] GET    /api/tags                 --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (5 handlers)
[GIN-debug] POST   /api/show                 --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (5 handlers)
[GIN-debug] DELETE /api/delete               --> github.com/ollama/ollama/server.(*Server).DeleteHandler-fm (5 handlers)
[GIN-debug] POST   /api/create               --> github.com/ollama/ollama/server.(*Server).CreateHandler-fm (5 handlers)
[GIN-debug] POST   /api/blobs/:digest        --> github.com/ollama/ollama/server.(*Server).CreateBlobHandler-fm (5 handlers)
[GIN-debug] HEAD   /api/blobs/:digest        --> github.com/ollama/ollama/server.(*Server).HeadBlobHandler-fm (5 handlers)
[GIN-debug] POST   /api/copy                 --> github.com/ollama/ollama/server.(*Server).CopyHandler-fm (5 handlers)
[GIN-debug] GET    /api/ps                   --> github.com/ollama/ollama/server.(*Server).PsHandler-fm (5 handlers)
[GIN-debug] POST   /api/generate             --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (5 handlers)
[GIN-debug] POST   /api/chat                 --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (5 handlers)
[GIN-debug] POST   /api/embed                --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (5 handlers)
[GIN-debug] POST   /api/embeddings           --> github.com/ollama/ollama/server.(*Server).EmbeddingsHandler-fm (5 handlers)
[GIN-debug] POST   /v1/chat/completions      --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (6 handlers)
[GIN-debug] POST   /v1/completions           --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (6 handlers)
[GIN-debug] POST   /v1/embeddings            --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (6 handlers)
[GIN-debug] GET    /v1/models                --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (6 handlers)
[GIN-debug] GET    /v1/models/:model         --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (6 handlers)
time=2025-09-16T05:32:21.347Z level=INFO source=routes.go:1384 msg="Listening on [::]:11434 (version 0.0.0)"
time=2025-09-16T05:32:21.347Z level=INFO source=gpu.go:217 msg="looking for compatible GPUs"
time=2025-09-16T05:32:21.350Z level=WARN source=amd_linux.go:61 msg="ollama recommends running the https://www.amd.com/en/support/download/linux-drivers.html" error="amdgpu version file missing: /sys/module/amdgpu/version stat /sys/module/amdgpu/version: no such file or directory"
time=2025-09-16T05:32:21.352Z level=INFO source=amd_linux.go:390 msg="amdgpu is supported" gpu=0 gpu_type=gfx1103
time=2025-09-16T05:32:21.352Z level=INFO source=types.go:131 msg="inference compute" id=0 library=rocm variant="" compute=gfx1103 driver=0.0 name=1002:1900 total="16.0 GiB" available="9.3 GiB"
time=2025-09-16T05:32:21.352Z level=INFO source=routes.go:1425 msg="entering low vram mode" "total vram"="16.0 GiB" threshold="20.0 GiB"
[GIN] 2025/09/16 - 05:32:52 | 200 |     137.779µs |       127.0.0.1 | GET      "/api/version"
time=2025-09-16T05:33:09.672Z level=INFO source=server.go:398 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --model /root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 --port 46841"
time=2025-09-16T05:33:09.673Z level=INFO source=server.go:503 msg="system memory" total="46.9 GiB" free="11.7 GiB" free_swap="8.0 GiB"
time=2025-09-16T05:33:09.674Z level=INFO source=memory.go:36 msg="new model will fit in available VRAM across minimum required GPUs, loading" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 library=rocm parallel=1 required="5.4 GiB" gpus=1
time=2025-09-16T05:33:09.675Z level=INFO source=server.go:543 msg=offload library=rocm layers.requested=-1 layers.model=35 layers.offload=35 layers.split=[35] memory.available="[9.3 GiB]" memory.gpu_overhead="0 B" memory.required.full="5.4 GiB" memory.required.partial="5.4 GiB" memory.required.kv="334.0 MiB" memory.required.allocations="[5.4 GiB]" memory.weights.total="2.3 GiB" memory.weights.repeating="1.8 GiB" memory.weights.nonrepeating="525.0 MiB" memory.graph.full="517.0 MiB" memory.graph.partial="1.0 GiB" projector.weights="795.9 MiB" projector.graph="1.0 GiB"
time=2025-09-16T05:33:09.686Z level=INFO source=runner.go:1251 msg="starting ollama engine"
time=2025-09-16T05:33:09.687Z level=INFO source=runner.go:1286 msg="Server listening on 127.0.0.1:46841"
time=2025-09-16T05:33:09.698Z level=INFO source=runner.go:1170 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:false KvSize:8192 KvCacheType: NumThreads:8 GPULayers:35[ID:0 Layers:35(0..34)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}"
time=2025-09-16T05:33:09.750Z level=INFO source=ggml.go:131 msg="" architecture=gemma3 file_type=Q4_K_M name="" description="" num_tensors=883 num_key_values=36
load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-icelake.so
time=2025-09-16T05:33:09.796Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.AVX512=1 CPU.0.AVX512_VBMI=1 CPU.0.AVX512_VNNI=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 compiler=cgo(gcc)
time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:487 msg="offloading 0 repeating layers to GPU"
time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:491 msg="offloading output layer to CPU"
time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:498 msg="offloaded 0/35 layers to GPU"
time=2025-09-16T05:33:10.084Z level=INFO source=backend.go:315 msg="model weights" device=CPU size="3.6 GiB"
time=2025-09-16T05:33:10.084Z level=INFO source=backend.go:326 msg="kv cache" device=CPU size="334.0 MiB"
time=2025-09-16T05:33:10.084Z level=INFO source=backend.go:337 msg="compute graph" device=CPU size="1.1 GiB"
time=2025-09-16T05:33:10.084Z level=INFO source=backend.go:342 msg="total memory" size="5.0 GiB"
time=2025-09-16T05:33:10.084Z level=INFO source=sched.go:473 msg="loaded runners" count=1
time=2025-09-16T05:33:10.084Z level=INFO source=server.go:1250 msg="waiting for llama runner to start responding"
time=2025-09-16T05:33:10.084Z level=INFO source=server.go:1284 msg="waiting for server to become available" status="llm server loading model"
time=2025-09-16T05:33:11.853Z level=INFO source=server.go:1288 msg="llama runner started in 2.17 seconds"

However, the layers are reportedly not offloaded to the GPU as we see the snippet:

time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:487 msg="offloading 0 repeating layers to GPU"
time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:491 msg="offloading output layer to CPU"
time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:498 msg="offloaded 0/35 layers to GPU"

No other kind of errors seems to be visible, just that the GPU is detected and loaded, and then when attempted to load some llayers in GPU it just doesn't.

Thanks!

Relevant log output

time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:487 msg="offloading 0 repeating layers to GPU"
time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:491 msg="offloading output layer to CPU"
time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:498 msg="offloaded 0/35 layers to GPU"

OS

Fedora 42

GPU

Radeon 780M Graphics
65:00.0 VGA compatible controller: Advanced Micro Devices, Inc. [AMD/ATI] HawkPoint1 (rev c5)

CPU

AMD Ryzen 7 8845HS

Ollama version

v0.11.10

Originally created by @Pekkari on GitHub (Sep 16, 2025). Original GitHub issue: https://github.com/ollama/ollama/issues/12303 Originally assigned to: @dhiltgen on GitHub. ### What is the issue? Hi, rocm 7.0 is coming, and nowadays building it using TheRock seems to be possible, both following the ubuntu instructions on the ubuntu docker image, and the manylinux version, which is more appropriate for integrating it with Ollama, so I run a build of TheRock using the following instructions: ``` $ podman run --name rocm-build --read-only --cap-drop all -v /root/rocm -it ghcr.io/rocm/therock_build_manylinux_x86_64@sha256:543ba2609de3571d2c64f3872e5f1af42fdfa90d074a7baccb1db120c9514be2 $ cd /root/rocm/ ^C $ git clone https://github.com/ROCm/TheRock.git $ cd TheRock $ ./build_tools/fetch_sources.py $ eval "$(./build_tools/setup_ccache.py)" $ python3 -m venv .venv $ . .venv/bin/activate $ pip install --cache-dir=/root/rocm/.cache -r requirements.txt $ cmake -B build -GNinja . -DTHEROCK_AMDGPU_TARGETS=gfx1103 -DBUILD_TESTING=OFF -DTHEROCK_ENABLE_RCCL=OFF $ cmake --build build ``` I copied over the build/dist/rocm folder to the root of the project and did these small modifications to ollama source code to consume the rocm installation on podman build: ``` diff --git a/CMakeLists.txt b/CMakeLists.txt index d62c8f99..f6bb7c08 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -100,7 +100,7 @@ if(CMAKE_HIP_COMPILER) find_package(hip REQUIRED) if(NOT AMDGPU_TARGETS) - list(FILTER AMDGPU_TARGETS INCLUDE REGEX "^gfx(900|94[012]|101[02]|1030|110[012]|120[01])$") + list(FILTER AMDGPU_TARGETS INCLUDE REGEX "^gfx(900|94[012]|101[02]|1030|110[0123]|120[01])$") elseif(WIN32 AND WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX) list(FILTER AMDGPU_TARGETS EXCLUDE REGEX ${WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX}) endif() diff --git a/CMakePresets.json b/CMakePresets.json index ab2cfe9d..845b223d 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -52,7 +52,7 @@ "inherits": [ "ROCm" ], "cacheVariables": { "CMAKE_HIP_FLAGS": "-parallel-jobs=4", - "AMDGPU_TARGETS": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-" + "AMDGPU_TARGETS": "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1103;gfx1151;gfx1200;gfx1201;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-" } } ], diff --git a/Dockerfile b/Dockerfile index 0dc3c126..b81c1889 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ ARG FLAVOR=${TARGETARCH} -ARG ROCMVERSION=6.3.3 +ARG ROCMVERSION=6.4.3 ARG JETPACK5VERSION=r35.4.1 ARG JETPACK6VERSION=r36.4.0 ARG CMAKEVERSION=3.31.2 @@ -51,6 +51,7 @@ RUN --mount=type=cache,target=/root/.ccache \ FROM base AS rocm-6 ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH RUN --mount=type=cache,target=/root/.ccache \ + --mount=type=bind,source=rocm,target=/opt/rocm,Z \ cmake --preset 'ROCm 6' \ && cmake --build --parallel --preset 'ROCm 6' \ && cmake --install build --component HIP --strip --parallel 8 diff --git a/discover/amd_linux.go b/discover/amd_linux.go index 0f2aa067..86b531a8 100644 --- a/discover/amd_linux.go +++ b/discover/amd_linux.go @@ -42,7 +42,7 @@ const ( var ( // Used to validate if the given ROCm lib is usable - ROCmLibGlobs = []string{"libhipblas.so.2*", "rocblas"} // TODO - probably include more coverage of files here... + ROCmLibGlobs = []string{"libhipblas.so.3*", "rocblas"} // TODO - probably include more coverage of files here... RocmStandardLocations = []string{"/opt/rocm/lib", "/usr/lib64"} ) @@ -438,7 +438,7 @@ func AMDValidateLibDir() (string, error) { } // Well known ollama installer path - installedRocmDir := "/usr/share/ollama/lib/rocm" + installedRocmDir := "/usr/lib/ollama/rocm" if rocmLibUsable(installedRocmDir) { return installedRocmDir, nil } ``` With these, I can run a test build that doesn't require HSA_OVERRIDE_GFX_VERSION to detect the gpu, so by running: ``` podman run --name ollama-rocm --pod ollama -e OLLAMA_CONTEXT_LENGTH=8192 -v c47c4cabdfb84c606efb5887fea55b20b1673004cedcf577c80d89779a0618a3:/root/.ollama --device /dev/accel --device /dev/dri --device /dev/kfd --cap-drop all --read-only --ipc=host --shm-size=8G -it ollama:rocm-7 ``` I get the following output that detects and attempts to use the gpu for inference when using gemma3: ``` time=2025-09-16T05:32:21.344Z level=INFO source=routes.go:1331 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:8192 OLLAMA_DEBUG:INFO OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NEW_ESTIMATES:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:]" time=2025-09-16T05:32:21.346Z level=INFO source=images.go:477 msg="total blobs: 20" time=2025-09-16T05:32:21.346Z level=INFO source=images.go:484 msg="total unused blobs removed: 0" [GIN-debug] [WARNING] Creating an Engine instance with the Logger and Recovery middleware already attached. [GIN-debug] [WARNING] Running in "debug" mode. Switch to "release" mode in production. - using env: export GIN_MODE=release - using code: gin.SetMode(gin.ReleaseMode) [GIN-debug] HEAD / --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func1 (5 handlers) [GIN-debug] GET / --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func2 (5 handlers) [GIN-debug] HEAD /api/version --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func3 (5 handlers) [GIN-debug] GET /api/version --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func4 (5 handlers) [GIN-debug] POST /api/pull --> github.com/ollama/ollama/server.(*Server).PullHandler-fm (5 handlers) [GIN-debug] POST /api/push --> github.com/ollama/ollama/server.(*Server).PushHandler-fm (5 handlers) [GIN-debug] HEAD /api/tags --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (5 handlers) [GIN-debug] GET /api/tags --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (5 handlers) [GIN-debug] POST /api/show --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (5 handlers) [GIN-debug] DELETE /api/delete --> github.com/ollama/ollama/server.(*Server).DeleteHandler-fm (5 handlers) [GIN-debug] POST /api/create --> github.com/ollama/ollama/server.(*Server).CreateHandler-fm (5 handlers) [GIN-debug] POST /api/blobs/:digest --> github.com/ollama/ollama/server.(*Server).CreateBlobHandler-fm (5 handlers) [GIN-debug] HEAD /api/blobs/:digest --> github.com/ollama/ollama/server.(*Server).HeadBlobHandler-fm (5 handlers) [GIN-debug] POST /api/copy --> github.com/ollama/ollama/server.(*Server).CopyHandler-fm (5 handlers) [GIN-debug] GET /api/ps --> github.com/ollama/ollama/server.(*Server).PsHandler-fm (5 handlers) [GIN-debug] POST /api/generate --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (5 handlers) [GIN-debug] POST /api/chat --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (5 handlers) [GIN-debug] POST /api/embed --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (5 handlers) [GIN-debug] POST /api/embeddings --> github.com/ollama/ollama/server.(*Server).EmbeddingsHandler-fm (5 handlers) [GIN-debug] POST /v1/chat/completions --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (6 handlers) [GIN-debug] POST /v1/completions --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (6 handlers) [GIN-debug] POST /v1/embeddings --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (6 handlers) [GIN-debug] GET /v1/models --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (6 handlers) [GIN-debug] GET /v1/models/:model --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (6 handlers) time=2025-09-16T05:32:21.347Z level=INFO source=routes.go:1384 msg="Listening on [::]:11434 (version 0.0.0)" time=2025-09-16T05:32:21.347Z level=INFO source=gpu.go:217 msg="looking for compatible GPUs" time=2025-09-16T05:32:21.350Z level=WARN source=amd_linux.go:61 msg="ollama recommends running the https://www.amd.com/en/support/download/linux-drivers.html" error="amdgpu version file missing: /sys/module/amdgpu/version stat /sys/module/amdgpu/version: no such file or directory" time=2025-09-16T05:32:21.352Z level=INFO source=amd_linux.go:390 msg="amdgpu is supported" gpu=0 gpu_type=gfx1103 time=2025-09-16T05:32:21.352Z level=INFO source=types.go:131 msg="inference compute" id=0 library=rocm variant="" compute=gfx1103 driver=0.0 name=1002:1900 total="16.0 GiB" available="9.3 GiB" time=2025-09-16T05:32:21.352Z level=INFO source=routes.go:1425 msg="entering low vram mode" "total vram"="16.0 GiB" threshold="20.0 GiB" [GIN] 2025/09/16 - 05:32:52 | 200 | 137.779µs | 127.0.0.1 | GET "/api/version" time=2025-09-16T05:33:09.672Z level=INFO source=server.go:398 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --model /root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 --port 46841" time=2025-09-16T05:33:09.673Z level=INFO source=server.go:503 msg="system memory" total="46.9 GiB" free="11.7 GiB" free_swap="8.0 GiB" time=2025-09-16T05:33:09.674Z level=INFO source=memory.go:36 msg="new model will fit in available VRAM across minimum required GPUs, loading" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 library=rocm parallel=1 required="5.4 GiB" gpus=1 time=2025-09-16T05:33:09.675Z level=INFO source=server.go:543 msg=offload library=rocm layers.requested=-1 layers.model=35 layers.offload=35 layers.split=[35] memory.available="[9.3 GiB]" memory.gpu_overhead="0 B" memory.required.full="5.4 GiB" memory.required.partial="5.4 GiB" memory.required.kv="334.0 MiB" memory.required.allocations="[5.4 GiB]" memory.weights.total="2.3 GiB" memory.weights.repeating="1.8 GiB" memory.weights.nonrepeating="525.0 MiB" memory.graph.full="517.0 MiB" memory.graph.partial="1.0 GiB" projector.weights="795.9 MiB" projector.graph="1.0 GiB" time=2025-09-16T05:33:09.686Z level=INFO source=runner.go:1251 msg="starting ollama engine" time=2025-09-16T05:33:09.687Z level=INFO source=runner.go:1286 msg="Server listening on 127.0.0.1:46841" time=2025-09-16T05:33:09.698Z level=INFO source=runner.go:1170 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:false KvSize:8192 KvCacheType: NumThreads:8 GPULayers:35[ID:0 Layers:35(0..34)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}" time=2025-09-16T05:33:09.750Z level=INFO source=ggml.go:131 msg="" architecture=gemma3 file_type=Q4_K_M name="" description="" num_tensors=883 num_key_values=36 load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-icelake.so time=2025-09-16T05:33:09.796Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.AVX512=1 CPU.0.AVX512_VBMI=1 CPU.0.AVX512_VNNI=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 compiler=cgo(gcc) time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:487 msg="offloading 0 repeating layers to GPU" time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:491 msg="offloading output layer to CPU" time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:498 msg="offloaded 0/35 layers to GPU" time=2025-09-16T05:33:10.084Z level=INFO source=backend.go:315 msg="model weights" device=CPU size="3.6 GiB" time=2025-09-16T05:33:10.084Z level=INFO source=backend.go:326 msg="kv cache" device=CPU size="334.0 MiB" time=2025-09-16T05:33:10.084Z level=INFO source=backend.go:337 msg="compute graph" device=CPU size="1.1 GiB" time=2025-09-16T05:33:10.084Z level=INFO source=backend.go:342 msg="total memory" size="5.0 GiB" time=2025-09-16T05:33:10.084Z level=INFO source=sched.go:473 msg="loaded runners" count=1 time=2025-09-16T05:33:10.084Z level=INFO source=server.go:1250 msg="waiting for llama runner to start responding" time=2025-09-16T05:33:10.084Z level=INFO source=server.go:1284 msg="waiting for server to become available" status="llm server loading model" time=2025-09-16T05:33:11.853Z level=INFO source=server.go:1288 msg="llama runner started in 2.17 seconds" ``` However, the layers are reportedly not offloaded to the GPU as we see the snippet: ``` time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:487 msg="offloading 0 repeating layers to GPU" time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:491 msg="offloading output layer to CPU" time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:498 msg="offloaded 0/35 layers to GPU" ``` No other kind of errors seems to be visible, just that the GPU is detected and loaded, and then when attempted to load some llayers in GPU it just doesn't. Thanks! ### Relevant log output ```shell time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:487 msg="offloading 0 repeating layers to GPU" time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:491 msg="offloading output layer to CPU" time=2025-09-16T05:33:10.083Z level=INFO source=ggml.go:498 msg="offloaded 0/35 layers to GPU" ``` ### OS Fedora 42 ### GPU Radeon 780M Graphics 65:00.0 VGA compatible controller: Advanced Micro Devices, Inc. [AMD/ATI] HawkPoint1 (rev c5) ### CPU AMD Ryzen 7 8845HS ### Ollama version v0.11.10
GiteaMirror added the amdbug labels 2026-04-29 06:54:57 -05:00
Author
Owner

@rick-github commented on GitHub (Sep 16, 2025):

load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-icelake.so
time=2025-09-16T05:33:09.796Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.AVX512=1 CPU.0.AVX512_VBMI=1 CPU.0.AVX512_VNNI=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 compiler=cgo(gcc)

No ROCm backends found. What's in /usr/lib/ollama?

<!-- gh-comment-id:3296524227 --> @rick-github commented on GitHub (Sep 16, 2025): ``` load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-icelake.so time=2025-09-16T05:33:09.796Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.AVX512=1 CPU.0.AVX512_VBMI=1 CPU.0.AVX512_VNNI=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 compiler=cgo(gcc) ``` No ROCm backends found. What's in `/usr/lib/ollama`?
Author
Owner

@Pekkari commented on GitHub (Sep 16, 2025):

$ podman exec -it ollama-rocm /bin/bash
root@ollama:/# ls /usr/lib/ollama/
libggml-base.so  libggml-cpu-alderlake.so  libggml-cpu-haswell.so  libggml-cpu-icelake.so  libggml-cpu-sandybridge.so  libggml-cpu-skylakex.so  libggml-cpu-sse42.so  libggml-cpu-x64.so  libggml-hip.so  rocm
root@ollama:/#

So both libggml-hip.so is there and the rocm folder:

# ls -la /usr/lib/ollama/rocm/
total 187552
drwxr-xr-x. 1 root root      882 Sep 15 14:18 .
drwxr-xr-x. 1 root root      376 Sep 15 15:08 ..
lrwxrwxrwx. 1 root root       19 Sep 15 14:18 libamd_comgr.so.3 -> libamd_comgr.so.3.0
-rwxr-xr-x. 1 root root 66865808 Sep  7 11:15 libamd_comgr.so.3.0
lrwxrwxrwx. 1 root root       35 Sep 15 14:18 libamdhip64.so.7 -> libamdhip64.so.7.1.25350-2806f9cdf7
-rwxr-xr-x. 1 root root 26888232 Sep  7 11:16 libamdhip64.so.7.1.25350-2806f9cdf7
lrwxrwxrwx. 1 root root       17 Sep 15 14:18 libhipblas.so.3 -> libhipblas.so.3.1
-rwxr-xr-x. 1 root root   933304 Sep  7 13:10 libhipblas.so.3.1
lrwxrwxrwx. 1 root root       19 Sep 15 14:18 libhipblaslt.so.1 -> libhipblaslt.so.1.1
-rwxr-xr-x. 1 root root 10169136 Sep  7 12:33 libhipblaslt.so.1.1
lrwxrwxrwx. 1 root root       26 Sep 15 14:18 libhsa-runtime64.so.1 -> libhsa-runtime64.so.1.18.0
-rwxr-xr-x. 1 root root  4148624 Sep  7 11:15 libhsa-runtime64.so.1.18.0
lrwxrwxrwx. 1 root root       17 Sep 15 14:18 librocblas.so.5 -> librocblas.so.5.1
-rwxr-xr-x. 1 root root 31645000 Sep  7 12:44 librocblas.so.5.1
-rwxr-xr-x. 1 root root   362609 Sep  7 10:10 librocm_sysdeps_drm.so.2
-rwxr-xr-x. 1 root root   240689 Sep  7 10:10 librocm_sysdeps_drm_amdgpu.so.1
-rwxr-xr-x. 1 root root  1203833 Sep  7 10:12 librocm_sysdeps_elf.so.1
-rwxr-xr-x. 1 root root   224913 Sep  7 10:10 librocm_sysdeps_numa.so.1
lrwxrwxrwx. 1 root root       32 Sep 15 14:18 librocprofiler-register.so.0 -> librocprofiler-register.so.0.5.0
-rwxr-xr-x. 1 root root   931896 Sep  7 10:10 librocprofiler-register.so.0.5.0
lrwxrwxrwx. 1 root root       19 Sep 15 14:18 librocsolver.so.0 -> librocsolver.so.0.7
-rwxr-xr-x. 1 root root 48391120 Sep  7 13:03 librocsolver.so.0.7
drwxr-xr-x. 1 root root       14 Sep 15 14:18 rocblas
<!-- gh-comment-id:3297200672 --> @Pekkari commented on GitHub (Sep 16, 2025): ``` $ podman exec -it ollama-rocm /bin/bash root@ollama:/# ls /usr/lib/ollama/ libggml-base.so libggml-cpu-alderlake.so libggml-cpu-haswell.so libggml-cpu-icelake.so libggml-cpu-sandybridge.so libggml-cpu-skylakex.so libggml-cpu-sse42.so libggml-cpu-x64.so libggml-hip.so rocm root@ollama:/# ``` So both libggml-hip.so is there and the rocm folder: ``` # ls -la /usr/lib/ollama/rocm/ total 187552 drwxr-xr-x. 1 root root 882 Sep 15 14:18 . drwxr-xr-x. 1 root root 376 Sep 15 15:08 .. lrwxrwxrwx. 1 root root 19 Sep 15 14:18 libamd_comgr.so.3 -> libamd_comgr.so.3.0 -rwxr-xr-x. 1 root root 66865808 Sep 7 11:15 libamd_comgr.so.3.0 lrwxrwxrwx. 1 root root 35 Sep 15 14:18 libamdhip64.so.7 -> libamdhip64.so.7.1.25350-2806f9cdf7 -rwxr-xr-x. 1 root root 26888232 Sep 7 11:16 libamdhip64.so.7.1.25350-2806f9cdf7 lrwxrwxrwx. 1 root root 17 Sep 15 14:18 libhipblas.so.3 -> libhipblas.so.3.1 -rwxr-xr-x. 1 root root 933304 Sep 7 13:10 libhipblas.so.3.1 lrwxrwxrwx. 1 root root 19 Sep 15 14:18 libhipblaslt.so.1 -> libhipblaslt.so.1.1 -rwxr-xr-x. 1 root root 10169136 Sep 7 12:33 libhipblaslt.so.1.1 lrwxrwxrwx. 1 root root 26 Sep 15 14:18 libhsa-runtime64.so.1 -> libhsa-runtime64.so.1.18.0 -rwxr-xr-x. 1 root root 4148624 Sep 7 11:15 libhsa-runtime64.so.1.18.0 lrwxrwxrwx. 1 root root 17 Sep 15 14:18 librocblas.so.5 -> librocblas.so.5.1 -rwxr-xr-x. 1 root root 31645000 Sep 7 12:44 librocblas.so.5.1 -rwxr-xr-x. 1 root root 362609 Sep 7 10:10 librocm_sysdeps_drm.so.2 -rwxr-xr-x. 1 root root 240689 Sep 7 10:10 librocm_sysdeps_drm_amdgpu.so.1 -rwxr-xr-x. 1 root root 1203833 Sep 7 10:12 librocm_sysdeps_elf.so.1 -rwxr-xr-x. 1 root root 224913 Sep 7 10:10 librocm_sysdeps_numa.so.1 lrwxrwxrwx. 1 root root 32 Sep 15 14:18 librocprofiler-register.so.0 -> librocprofiler-register.so.0.5.0 -rwxr-xr-x. 1 root root 931896 Sep 7 10:10 librocprofiler-register.so.0.5.0 lrwxrwxrwx. 1 root root 19 Sep 15 14:18 librocsolver.so.0 -> librocsolver.so.0.7 -rwxr-xr-x. 1 root root 48391120 Sep 7 13:03 librocsolver.so.0.7 drwxr-xr-x. 1 root root 14 Sep 15 14:18 rocblas ```
Author
Owner

@Pekkari commented on GitHub (Sep 16, 2025):

I was quite quick, it just got the news of the release, so now it is possible to just change the version of rocm from 6.3.3 to 7.0 and try your chances, no build of your own anymore, so I'm on my way to try it out. Probably the small changes on the CMakeLists.txt file and discover/amd_linux.go are still needed to find the new versions.

<!-- gh-comment-id:3298685329 --> @Pekkari commented on GitHub (Sep 16, 2025): I was quite quick, it just got the news of the release, so now it is possible to just change the version of rocm from 6.3.3 to 7.0 and try your chances, no build of your own anymore, so I'm on my way to try it out. Probably the small changes on the CMakeLists.txt file and discover/amd_linux.go are still needed to find the new versions.
Author
Owner

@Pekkari commented on GitHub (Sep 16, 2025):

still same after building, but now the variables to override are needed to show some sort of attempt to use the GPU:

time=2025-09-16T13:51:20.709Z level=INFO source=routes.go:1384 msg="Listening on [::]:11434 (version 0.0.0)"
time=2025-09-16T13:51:20.709Z level=INFO source=gpu.go:217 msg="looking for compatible GPUs"
time=2025-09-16T13:51:20.711Z level=WARN source=amd_linux.go:61 msg="ollama recommends running the https://www.amd.com/en/support/download/linux-drivers.html" error="amdgpu version file missing: /sys/module/amdgpu/version stat /sys/module/amdgpu/version: no such file or directory"
time=2025-09-16T13:51:20.712Z level=INFO source=amd_linux.go:393 msg="skipping rocm gfx compatibility check" HSA_OVERRIDE_GFX_VERSION=11.0.3
time=2025-09-16T13:51:20.712Z level=INFO source=types.go:131 msg="inference compute" id=0 library=rocm variant="" compute=gfx1103 driver=0.0 name=1002:1900 total="16.0 GiB" available="14.5 GiB"
time=2025-09-16T13:51:20.712Z level=INFO source=routes.go:1425 msg="entering low vram mode" "total vram"="16.0 GiB" threshold="20.0 GiB"
[GIN] 2025/09/16 - 13:52:18 | 200 |    1.262191ms |       127.0.0.1 | GET      "/api/tags"
[GIN] 2025/09/16 - 13:52:18 | 200 |      93.345µs |       127.0.0.1 | GET      "/api/ps"
[GIN] 2025/09/16 - 13:52:21 | 200 |      67.677µs |       127.0.0.1 | GET      "/api/version"
[GIN] 2025/09/16 - 13:52:22 | 200 |     597.423µs |       127.0.0.1 | GET      "/api/tags"
[GIN] 2025/09/16 - 13:52:22 | 200 |      13.485µs |       127.0.0.1 | GET      "/api/ps"
time=2025-09-16T13:53:16.969Z level=INFO source=server.go:398 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --model /root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 --port 45887"
time=2025-09-16T13:53:16.970Z level=INFO source=server.go:503 msg="system memory" total="46.9 GiB" free="14.7 GiB" free_swap="4.7 GiB"
time=2025-09-16T13:53:16.971Z level=INFO source=memory.go:36 msg="new model will fit in available VRAM across minimum required GPUs, loading" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 library=rocm parallel=1 required="5.4 GiB" gpus=1
time=2025-09-16T13:53:16.972Z level=INFO source=server.go:543 msg=offload library=rocm layers.requested=-1 layers.model=35 layers.offload=35 layers.split=[35] memory.available="[9.5 GiB]" memory.gpu_overhead="0 B" memory.required.full="5.4 GiB" memory.required.partial="5.4 GiB" memory.required.kv="334.0 MiB" memory.required.allocations="[5.4 GiB]" memory.weights.total="2.3 GiB" memory.weights.repeating="1.8 GiB" memory.weights.nonrepeating="525.0 MiB" memory.graph.full="517.0 MiB" memory.graph.partial="1.0 GiB" projector.weights="795.9 MiB" projector.graph="1.0 GiB"
time=2025-09-16T13:53:16.980Z level=INFO source=runner.go:1251 msg="starting ollama engine"
time=2025-09-16T13:53:16.980Z level=INFO source=runner.go:1286 msg="Server listening on 127.0.0.1:45887"
time=2025-09-16T13:53:16.983Z level=INFO source=runner.go:1170 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:false KvSize:8192 KvCacheType: NumThreads:8 GPULayers:35[ID:0 Layers:35(0..34)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}"
time=2025-09-16T13:53:17.034Z level=INFO source=ggml.go:131 msg="" architecture=gemma3 file_type=Q4_K_M name="" description="" num_tensors=883 num_key_values=36
load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-icelake.so
time=2025-09-16T13:53:17.040Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.AVX512=1 CPU.0.AVX512_VBMI=1 CPU.0.AVX512_VNNI=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 compiler=cgo(gcc)
time=2025-09-16T13:53:17.344Z level=INFO source=ggml.go:487 msg="offloading 0 repeating layers to GPU"
time=2025-09-16T13:53:17.344Z level=INFO source=ggml.go:491 msg="offloading output layer to CPU"
time=2025-09-16T13:53:17.344Z level=INFO source=ggml.go:498 msg="offloaded 0/35 layers to GPU"
time=2025-09-16T13:53:17.344Z level=INFO source=backend.go:315 msg="model weights" device=CPU size="3.6 GiB"
time=2025-09-16T13:53:17.344Z level=INFO source=backend.go:326 msg="kv cache" device=CPU size="334.0 MiB"
time=2025-09-16T13:53:17.344Z level=INFO source=backend.go:337 msg="compute graph" device=CPU size="1.1 GiB"
time=2025-09-16T13:53:17.344Z level=INFO source=backend.go:342 msg="total memory" size="5.0 GiB"
time=2025-09-16T13:53:17.344Z level=INFO source=sched.go:473 msg="loaded runners" count=1
time=2025-09-16T13:53:17.344Z level=INFO source=server.go:1250 msg="waiting for llama runner to start responding"
time=2025-09-16T13:53:17.345Z level=INFO source=server.go:1284 msg="waiting for server to become available" status="llm server loading model"
time=2025-09-16T13:53:18.604Z level=INFO source=server.go:1288 msg="llama runner started in 1.63 seconds"
<!-- gh-comment-id:3298887408 --> @Pekkari commented on GitHub (Sep 16, 2025): still same after building, but now the variables to override are needed to show some sort of attempt to use the GPU: ``` time=2025-09-16T13:51:20.709Z level=INFO source=routes.go:1384 msg="Listening on [::]:11434 (version 0.0.0)" time=2025-09-16T13:51:20.709Z level=INFO source=gpu.go:217 msg="looking for compatible GPUs" time=2025-09-16T13:51:20.711Z level=WARN source=amd_linux.go:61 msg="ollama recommends running the https://www.amd.com/en/support/download/linux-drivers.html" error="amdgpu version file missing: /sys/module/amdgpu/version stat /sys/module/amdgpu/version: no such file or directory" time=2025-09-16T13:51:20.712Z level=INFO source=amd_linux.go:393 msg="skipping rocm gfx compatibility check" HSA_OVERRIDE_GFX_VERSION=11.0.3 time=2025-09-16T13:51:20.712Z level=INFO source=types.go:131 msg="inference compute" id=0 library=rocm variant="" compute=gfx1103 driver=0.0 name=1002:1900 total="16.0 GiB" available="14.5 GiB" time=2025-09-16T13:51:20.712Z level=INFO source=routes.go:1425 msg="entering low vram mode" "total vram"="16.0 GiB" threshold="20.0 GiB" [GIN] 2025/09/16 - 13:52:18 | 200 | 1.262191ms | 127.0.0.1 | GET "/api/tags" [GIN] 2025/09/16 - 13:52:18 | 200 | 93.345µs | 127.0.0.1 | GET "/api/ps" [GIN] 2025/09/16 - 13:52:21 | 200 | 67.677µs | 127.0.0.1 | GET "/api/version" [GIN] 2025/09/16 - 13:52:22 | 200 | 597.423µs | 127.0.0.1 | GET "/api/tags" [GIN] 2025/09/16 - 13:52:22 | 200 | 13.485µs | 127.0.0.1 | GET "/api/ps" time=2025-09-16T13:53:16.969Z level=INFO source=server.go:398 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --model /root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 --port 45887" time=2025-09-16T13:53:16.970Z level=INFO source=server.go:503 msg="system memory" total="46.9 GiB" free="14.7 GiB" free_swap="4.7 GiB" time=2025-09-16T13:53:16.971Z level=INFO source=memory.go:36 msg="new model will fit in available VRAM across minimum required GPUs, loading" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 library=rocm parallel=1 required="5.4 GiB" gpus=1 time=2025-09-16T13:53:16.972Z level=INFO source=server.go:543 msg=offload library=rocm layers.requested=-1 layers.model=35 layers.offload=35 layers.split=[35] memory.available="[9.5 GiB]" memory.gpu_overhead="0 B" memory.required.full="5.4 GiB" memory.required.partial="5.4 GiB" memory.required.kv="334.0 MiB" memory.required.allocations="[5.4 GiB]" memory.weights.total="2.3 GiB" memory.weights.repeating="1.8 GiB" memory.weights.nonrepeating="525.0 MiB" memory.graph.full="517.0 MiB" memory.graph.partial="1.0 GiB" projector.weights="795.9 MiB" projector.graph="1.0 GiB" time=2025-09-16T13:53:16.980Z level=INFO source=runner.go:1251 msg="starting ollama engine" time=2025-09-16T13:53:16.980Z level=INFO source=runner.go:1286 msg="Server listening on 127.0.0.1:45887" time=2025-09-16T13:53:16.983Z level=INFO source=runner.go:1170 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:false KvSize:8192 KvCacheType: NumThreads:8 GPULayers:35[ID:0 Layers:35(0..34)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}" time=2025-09-16T13:53:17.034Z level=INFO source=ggml.go:131 msg="" architecture=gemma3 file_type=Q4_K_M name="" description="" num_tensors=883 num_key_values=36 load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-icelake.so time=2025-09-16T13:53:17.040Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.AVX512=1 CPU.0.AVX512_VBMI=1 CPU.0.AVX512_VNNI=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 compiler=cgo(gcc) time=2025-09-16T13:53:17.344Z level=INFO source=ggml.go:487 msg="offloading 0 repeating layers to GPU" time=2025-09-16T13:53:17.344Z level=INFO source=ggml.go:491 msg="offloading output layer to CPU" time=2025-09-16T13:53:17.344Z level=INFO source=ggml.go:498 msg="offloaded 0/35 layers to GPU" time=2025-09-16T13:53:17.344Z level=INFO source=backend.go:315 msg="model weights" device=CPU size="3.6 GiB" time=2025-09-16T13:53:17.344Z level=INFO source=backend.go:326 msg="kv cache" device=CPU size="334.0 MiB" time=2025-09-16T13:53:17.344Z level=INFO source=backend.go:337 msg="compute graph" device=CPU size="1.1 GiB" time=2025-09-16T13:53:17.344Z level=INFO source=backend.go:342 msg="total memory" size="5.0 GiB" time=2025-09-16T13:53:17.344Z level=INFO source=sched.go:473 msg="loaded runners" count=1 time=2025-09-16T13:53:17.344Z level=INFO source=server.go:1250 msg="waiting for llama runner to start responding" time=2025-09-16T13:53:17.345Z level=INFO source=server.go:1284 msg="waiting for server to become available" status="llm server loading model" time=2025-09-16T13:53:18.604Z level=INFO source=server.go:1288 msg="llama runner started in 1.63 seconds" ```
Author
Owner

@rick-github commented on GitHub (Sep 16, 2025):

load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-icelake.so
time=2025-09-16T13:53:17.040Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.AVX512=1 CPU.0.AVX512_VBMI=1 CPU.0.AVX512_VNNI=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 compiler=cgo(gcc)

ROCm backend still not found.

<!-- gh-comment-id:3298911279 --> @rick-github commented on GitHub (Sep 16, 2025): ``` load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-icelake.so time=2025-09-16T13:53:17.040Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.AVX512=1 CPU.0.AVX512_VBMI=1 CPU.0.AVX512_VNNI=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 compiler=cgo(gcc) ``` ROCm backend still not found.
Author
Owner

@Pekkari commented on GitHub (Sep 16, 2025):

load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-icelake.so
time=2025-09-16T13:53:17.040Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.AVX512=1 CPU.0.AVX512_VBMI=1 CPU.0.AVX512_VNNI=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 compiler=cgo(gcc)

ROCm backend still not found.

Yes, this time seems to be a missing aco files, since it is possible to find rocm in the former path, but it only includes gfx1101, and gfx1102 aco files, no gfx1103, so seems that self building is mandatory:

root@ollama:/# ls -la /usr/lib/ollama/
total 637372
drwxr-xr-x. 1 root root       376 Sep 16 13:49 .
drwxr-xr-x. 1 root root        12 Sep 16 13:49 ..
-rwxr-xr-x. 1 root root    628584 Sep 16 13:24 libggml-base.so
-rwxr-xr-x. 1 root root    771168 Sep 16 13:24 libggml-cpu-alderlake.so
-rwxr-xr-x. 1 root root    771168 Sep 16 13:24 libggml-cpu-haswell.so
-rwxr-xr-x. 1 root root    902240 Sep 16 13:24 libggml-cpu-icelake.so
-rwxr-xr-x. 1 root root    713888 Sep 16 13:24 libggml-cpu-sandybridge.so
-rwxr-xr-x. 1 root root    902240 Sep 16 13:24 libggml-cpu-skylakex.so
-rwxr-xr-x. 1 root root    533792 Sep 16 13:24 libggml-cpu-sse42.so
-rwxr-xr-x. 1 root root    525600 Sep 16 13:24 libggml-cpu-x64.so
-rwxr-xr-x. 1 root root 646895344 Sep 16 13:42 libggml-hip.so
drwxr-xr-x. 1 root root       960 Sep 16 13:42 rocm
root@ollama:/# ls -la /usr/lib/ollama/rocm/
total 903832
drwxr-xr-x. 1 root root       960 Sep 16 13:42 .
drwxr-xr-x. 1 root root       376 Sep 16 13:49 ..
lrwxrwxrwx. 1 root root        25 Sep 16 13:42 libamd_comgr.so.3 -> libamd_comgr.so.3.0.70000
-rwxr-xr-x. 1 root root 167238128 Aug 16 21:38 libamd_comgr.so.3.0.70000
lrwxrwxrwx. 1 root root        24 Sep 16 13:42 libamdhip64.so.7 -> libamdhip64.so.7.0.70000
-rwxr-xr-x. 1 root root  26340840 Aug 16 21:43 libamdhip64.so.7.0.70000
lrwxrwxrwx. 1 root root        15 Sep 16 13:42 libdrm.so.2 -> libdrm.so.2.4.0
-rwxr-xr-x. 1 root root     87904 Oct 11  2023 libdrm.so.2.4.0
lrwxrwxrwx. 1 root root        22 Sep 16 13:42 libdrm_amdgpu.so.1 -> libdrm_amdgpu.so.1.0.0
-rwxr-xr-x. 1 root root     45464 Oct 11  2023 libdrm_amdgpu.so.1.0.0
-rwxr-xr-x. 1 root root    109000 Aug 25 12:56 libelf-0.190.so
lrwxrwxrwx. 1 root root        15 Sep 16 13:42 libelf.so.1 -> libelf-0.190.so
lrwxrwxrwx. 1 root root        23 Sep 16 13:42 libhipblas.so.3 -> libhipblas.so.3.0.70000
-rwxr-xr-x. 1 root root    762840 Aug 17 00:33 libhipblas.so.3.0.70000
lrwxrwxrwx. 1 root root        25 Sep 16 13:42 libhipblaslt.so.1 -> libhipblaslt.so.1.0.70000
-rwxr-xr-x. 1 root root   8616816 Aug 16 23:49 libhipblaslt.so.1.0.70000
lrwxrwxrwx. 1 root root        30 Sep 16 13:42 libhsa-runtime64.so.1 -> libhsa-runtime64.so.1.18.70000
-rwxr-xr-x. 1 root root   3600672 Aug 16 21:38 libhsa-runtime64.so.1.18.70000
lrwxrwxrwx. 1 root root        16 Sep 16 13:42 libnuma.so.1 -> libnuma.so.1.0.0
-rwxr-xr-x. 1 root root     51400 Apr  6  2024 libnuma.so.1.0.0
lrwxrwxrwx. 1 root root        23 Sep 16 13:42 librocblas.so.5 -> librocblas.so.5.0.70000
-rwxr-xr-x. 1 root root  54067000 Aug 17 00:26 librocblas.so.5.0.70000
lrwxrwxrwx. 1 root root        32 Sep 16 13:42 librocprofiler-register.so.0 -> librocprofiler-register.so.0.5.0
-rwxr-xr-x. 1 root root    958088 Aug 16 21:14 librocprofiler-register.so.0.5.0
lrwxrwxrwx. 1 root root        25 Sep 16 13:42 librocsolver.so.0 -> librocsolver.so.0.6.70000
-rwxr-xr-x. 1 root root 663573328 Aug 17 00:30 librocsolver.so.0.6.70000
drwxr-xr-x. 1 root root        14 Sep 16 13:42 rocblas
-rw-r--r--. 1 root root   369422 Aug 16 23:57 TensileManifest.txt
root@ollama:/# ls -la /usr/lib/ollama/rocm/rocblas/library/
[...]
-rw-r--r--. 1 root root    75008 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1030.hsaco
-rw-r--r--. 1 root root    77064 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1100.hsaco
-rw-r--r--. 1 root root    77064 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1101.hsaco
-rw-r--r--. 1 root root    77064 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1102.hsaco
-rw-r--r--. 1 root root    77064 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1151.hsaco
-rw-r--r--. 1 root root    78608 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1200.hsaco
-rw-r--r--. 1 root root    78608 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1201.hsaco
-rw-r--r--. 1 root root    74240 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx908.hsaco
-rw-r--r--. 1 root root    71680 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx90a-xnack+.hsaco
-rw-r--r--. 1 root root    71680 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx90a-xnack-.hsaco
-rw-r--r--. 1 root root    70144 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx942-xnack+.hsaco
-rw-r--r--. 1 root root    70144 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx942-xnack-.hsaco
-rw-r--r--. 1 root root    70144 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx950.hsaco
-rw-r--r--. 1 root root    89105 Aug 17 00:20 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_gfx908.co
-rw-r--r--. 1 root root   271298 Aug 17 00:22 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_gfx908.dat
-rw-r--r--. 1 root root    63802 Aug 17 00:18 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_gfx90a.co
-rw-r--r--. 1 root root    80836 Aug 17 00:21 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_gfx90a.dat
-rw-r--r--. 1 root root   635682 Aug 17 00:17 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_gfx942.co
-rw-r--r--. 1 root root  1231892 Aug 17 00:21 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_gfx942.dat
-rw-r--r--. 1 root root    23551 Aug 17 00:16 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_gfx950.co
-rw-r--r--. 1 root root    32968 Aug 17 00:21 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_gfx950.dat
-rw-r--r--. 1 root root    23109 Aug 17 00:21 TensileLibrary_lazy_gfx1030.dat
-rw-r--r--. 1 root root    24273 Aug 17 00:21 TensileLibrary_lazy_gfx1100.dat
-rw-r--r--. 1 root root    24273 Aug 17 00:21 TensileLibrary_lazy_gfx1101.dat
-rw-r--r--. 1 root root    24273 Aug 17 00:21 TensileLibrary_lazy_gfx1102.dat
-rw-r--r--. 1 root root    17716 Aug 17 00:21 TensileLibrary_lazy_gfx1151.dat
-rw-r--r--. 1 root root    17716 Aug 17 00:21 TensileLibrary_lazy_gfx1200.dat
-rw-r--r--. 1 root root    17716 Aug 17 00:21 TensileLibrary_lazy_gfx1201.dat
-rw-r--r--. 1 root root    34855 Aug 17 00:21 TensileLibrary_lazy_gfx908.dat
-rw-r--r--. 1 root root    62470 Aug 17 00:21 TensileLibrary_lazy_gfx90a.dat
-rw-r--r--. 1 root root    59159 Aug 17 00:21 TensileLibrary_lazy_gfx942.dat
-rw-r--r--. 1 root root    34184 Aug 17 00:21 TensileLibrary_lazy_gfx950.dat
-rw-r--r--. 1 root root   369422 Aug 16 23:57 TensileManifest.txt

<!-- gh-comment-id:3298937634 --> @Pekkari commented on GitHub (Sep 16, 2025): > ``` > load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-icelake.so > time=2025-09-16T13:53:17.040Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.AVX512=1 CPU.0.AVX512_VBMI=1 CPU.0.AVX512_VNNI=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 compiler=cgo(gcc) > ``` > > ROCm backend still not found. Yes, this time seems to be a missing aco files, since it is possible to find rocm in the former path, but it only includes gfx1101, and gfx1102 aco files, no gfx1103, so seems that self building is mandatory: ``` root@ollama:/# ls -la /usr/lib/ollama/ total 637372 drwxr-xr-x. 1 root root 376 Sep 16 13:49 . drwxr-xr-x. 1 root root 12 Sep 16 13:49 .. -rwxr-xr-x. 1 root root 628584 Sep 16 13:24 libggml-base.so -rwxr-xr-x. 1 root root 771168 Sep 16 13:24 libggml-cpu-alderlake.so -rwxr-xr-x. 1 root root 771168 Sep 16 13:24 libggml-cpu-haswell.so -rwxr-xr-x. 1 root root 902240 Sep 16 13:24 libggml-cpu-icelake.so -rwxr-xr-x. 1 root root 713888 Sep 16 13:24 libggml-cpu-sandybridge.so -rwxr-xr-x. 1 root root 902240 Sep 16 13:24 libggml-cpu-skylakex.so -rwxr-xr-x. 1 root root 533792 Sep 16 13:24 libggml-cpu-sse42.so -rwxr-xr-x. 1 root root 525600 Sep 16 13:24 libggml-cpu-x64.so -rwxr-xr-x. 1 root root 646895344 Sep 16 13:42 libggml-hip.so drwxr-xr-x. 1 root root 960 Sep 16 13:42 rocm root@ollama:/# ls -la /usr/lib/ollama/rocm/ total 903832 drwxr-xr-x. 1 root root 960 Sep 16 13:42 . drwxr-xr-x. 1 root root 376 Sep 16 13:49 .. lrwxrwxrwx. 1 root root 25 Sep 16 13:42 libamd_comgr.so.3 -> libamd_comgr.so.3.0.70000 -rwxr-xr-x. 1 root root 167238128 Aug 16 21:38 libamd_comgr.so.3.0.70000 lrwxrwxrwx. 1 root root 24 Sep 16 13:42 libamdhip64.so.7 -> libamdhip64.so.7.0.70000 -rwxr-xr-x. 1 root root 26340840 Aug 16 21:43 libamdhip64.so.7.0.70000 lrwxrwxrwx. 1 root root 15 Sep 16 13:42 libdrm.so.2 -> libdrm.so.2.4.0 -rwxr-xr-x. 1 root root 87904 Oct 11 2023 libdrm.so.2.4.0 lrwxrwxrwx. 1 root root 22 Sep 16 13:42 libdrm_amdgpu.so.1 -> libdrm_amdgpu.so.1.0.0 -rwxr-xr-x. 1 root root 45464 Oct 11 2023 libdrm_amdgpu.so.1.0.0 -rwxr-xr-x. 1 root root 109000 Aug 25 12:56 libelf-0.190.so lrwxrwxrwx. 1 root root 15 Sep 16 13:42 libelf.so.1 -> libelf-0.190.so lrwxrwxrwx. 1 root root 23 Sep 16 13:42 libhipblas.so.3 -> libhipblas.so.3.0.70000 -rwxr-xr-x. 1 root root 762840 Aug 17 00:33 libhipblas.so.3.0.70000 lrwxrwxrwx. 1 root root 25 Sep 16 13:42 libhipblaslt.so.1 -> libhipblaslt.so.1.0.70000 -rwxr-xr-x. 1 root root 8616816 Aug 16 23:49 libhipblaslt.so.1.0.70000 lrwxrwxrwx. 1 root root 30 Sep 16 13:42 libhsa-runtime64.so.1 -> libhsa-runtime64.so.1.18.70000 -rwxr-xr-x. 1 root root 3600672 Aug 16 21:38 libhsa-runtime64.so.1.18.70000 lrwxrwxrwx. 1 root root 16 Sep 16 13:42 libnuma.so.1 -> libnuma.so.1.0.0 -rwxr-xr-x. 1 root root 51400 Apr 6 2024 libnuma.so.1.0.0 lrwxrwxrwx. 1 root root 23 Sep 16 13:42 librocblas.so.5 -> librocblas.so.5.0.70000 -rwxr-xr-x. 1 root root 54067000 Aug 17 00:26 librocblas.so.5.0.70000 lrwxrwxrwx. 1 root root 32 Sep 16 13:42 librocprofiler-register.so.0 -> librocprofiler-register.so.0.5.0 -rwxr-xr-x. 1 root root 958088 Aug 16 21:14 librocprofiler-register.so.0.5.0 lrwxrwxrwx. 1 root root 25 Sep 16 13:42 librocsolver.so.0 -> librocsolver.so.0.6.70000 -rwxr-xr-x. 1 root root 663573328 Aug 17 00:30 librocsolver.so.0.6.70000 drwxr-xr-x. 1 root root 14 Sep 16 13:42 rocblas -rw-r--r--. 1 root root 369422 Aug 16 23:57 TensileManifest.txt root@ollama:/# ls -la /usr/lib/ollama/rocm/rocblas/library/ [...] -rw-r--r--. 1 root root 75008 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1030.hsaco -rw-r--r--. 1 root root 77064 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1100.hsaco -rw-r--r--. 1 root root 77064 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1101.hsaco -rw-r--r--. 1 root root 77064 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1102.hsaco -rw-r--r--. 1 root root 77064 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1151.hsaco -rw-r--r--. 1 root root 78608 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1200.hsaco -rw-r--r--. 1 root root 78608 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1201.hsaco -rw-r--r--. 1 root root 74240 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx908.hsaco -rw-r--r--. 1 root root 71680 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx90a-xnack+.hsaco -rw-r--r--. 1 root root 71680 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx90a-xnack-.hsaco -rw-r--r--. 1 root root 70144 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx942-xnack+.hsaco -rw-r--r--. 1 root root 70144 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx942-xnack-.hsaco -rw-r--r--. 1 root root 70144 Aug 17 00:26 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx950.hsaco -rw-r--r--. 1 root root 89105 Aug 17 00:20 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_gfx908.co -rw-r--r--. 1 root root 271298 Aug 17 00:22 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_gfx908.dat -rw-r--r--. 1 root root 63802 Aug 17 00:18 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_gfx90a.co -rw-r--r--. 1 root root 80836 Aug 17 00:21 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_gfx90a.dat -rw-r--r--. 1 root root 635682 Aug 17 00:17 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_gfx942.co -rw-r--r--. 1 root root 1231892 Aug 17 00:21 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_gfx942.dat -rw-r--r--. 1 root root 23551 Aug 17 00:16 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_gfx950.co -rw-r--r--. 1 root root 32968 Aug 17 00:21 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_gfx950.dat -rw-r--r--. 1 root root 23109 Aug 17 00:21 TensileLibrary_lazy_gfx1030.dat -rw-r--r--. 1 root root 24273 Aug 17 00:21 TensileLibrary_lazy_gfx1100.dat -rw-r--r--. 1 root root 24273 Aug 17 00:21 TensileLibrary_lazy_gfx1101.dat -rw-r--r--. 1 root root 24273 Aug 17 00:21 TensileLibrary_lazy_gfx1102.dat -rw-r--r--. 1 root root 17716 Aug 17 00:21 TensileLibrary_lazy_gfx1151.dat -rw-r--r--. 1 root root 17716 Aug 17 00:21 TensileLibrary_lazy_gfx1200.dat -rw-r--r--. 1 root root 17716 Aug 17 00:21 TensileLibrary_lazy_gfx1201.dat -rw-r--r--. 1 root root 34855 Aug 17 00:21 TensileLibrary_lazy_gfx908.dat -rw-r--r--. 1 root root 62470 Aug 17 00:21 TensileLibrary_lazy_gfx90a.dat -rw-r--r--. 1 root root 59159 Aug 17 00:21 TensileLibrary_lazy_gfx942.dat -rw-r--r--. 1 root root 34184 Aug 17 00:21 TensileLibrary_lazy_gfx950.dat -rw-r--r--. 1 root root 369422 Aug 16 23:57 TensileManifest.txt ```
Author
Owner

@Pekkari commented on GitHub (Sep 17, 2025):

I repeated the rocm build today following the instructions listed up here, and rebuilt ollama this time using this diff in the dockerfile:

$ git diff Dockerfile
diff --git a/Dockerfile b/Dockerfile
index 0dc3c126..ad9c3005 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,8 +1,7 @@
 # vim: filetype=dockerfile
-
 ARG FLAVOR=${TARGETARCH}
 
-ARG ROCMVERSION=6.3.3
+ARG ROCMVERSION=7.0
 ARG JETPACK5VERSION=r35.4.1
 ARG JETPACK6VERSION=r36.4.0
 ARG CMAKEVERSION=3.31.2
@@ -51,9 +50,10 @@ RUN --mount=type=cache,target=/root/.ccache \
 FROM base AS rocm-6
 ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH
 RUN --mount=type=cache,target=/root/.ccache \
+    --mount=type=bind,source=rocm,target=/opt/rocm-7.0.0,Z \
     cmake --preset 'ROCm 6' \
         && cmake --build --parallel --preset 'ROCm 6' \
-        && cmake --install build --component HIP --strip --parallel 8
+        && cmake --install build --component HIP --strip --parallel 4
 
 FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK5VERSION} AS jetpack-5
 ARG CMAKEVERSION

Build went alright, but still the output shows no backend found:

$ podman run --name ollama-rocm --pod ollama -e OLLAMA_CONTEXT_LENGTH=8192 -v c47c4cabdfb84c606efb5887fea55b20b1673004cedcf577c80d89779a0618a3:/root/.ollama --device /dev/accel --device /dev/dri --device /dev/kfd --cap-drop all --read-only --ipc=host --shm-size=8G -it ollama:rocm-7
time=2025-09-17T15:11:06.706Z level=INFO source=routes.go:1331 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:8192 OLLAMA_DEBUG:INFO OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NEW_ESTIMATES:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:]"
time=2025-09-17T15:11:06.709Z level=INFO source=images.go:477 msg="total blobs: 20"
time=2025-09-17T15:11:06.709Z level=INFO source=images.go:484 msg="total unused blobs removed: 0"
[GIN-debug] [WARNING] Creating an Engine instance with the Logger and Recovery middleware already attached.

[GIN-debug] [WARNING] Running in "debug" mode. Switch to "release" mode in production.
 - using env:   export GIN_MODE=release
 - using code:  gin.SetMode(gin.ReleaseMode)

[GIN-debug] HEAD   /                         --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func1 (5 handlers)
[GIN-debug] GET    /                         --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func2 (5 handlers)
[GIN-debug] HEAD   /api/version              --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func3 (5 handlers)
[GIN-debug] GET    /api/version              --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func4 (5 handlers)
[GIN-debug] POST   /api/pull                 --> github.com/ollama/ollama/server.(*Server).PullHandler-fm (5 handlers)
[GIN-debug] POST   /api/push                 --> github.com/ollama/ollama/server.(*Server).PushHandler-fm (5 handlers)
[GIN-debug] HEAD   /api/tags                 --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (5 handlers)
[GIN-debug] GET    /api/tags                 --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (5 handlers)
[GIN-debug] POST   /api/show                 --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (5 handlers)
[GIN-debug] DELETE /api/delete               --> github.com/ollama/ollama/server.(*Server).DeleteHandler-fm (5 handlers)
[GIN-debug] POST   /api/create               --> github.com/ollama/ollama/server.(*Server).CreateHandler-fm (5 handlers)
[GIN-debug] POST   /api/blobs/:digest        --> github.com/ollama/ollama/server.(*Server).CreateBlobHandler-fm (5 handlers)
[GIN-debug] HEAD   /api/blobs/:digest        --> github.com/ollama/ollama/server.(*Server).HeadBlobHandler-fm (5 handlers)
[GIN-debug] POST   /api/copy                 --> github.com/ollama/ollama/server.(*Server).CopyHandler-fm (5 handlers)
[GIN-debug] GET    /api/ps                   --> github.com/ollama/ollama/server.(*Server).PsHandler-fm (5 handlers)
[GIN-debug] POST   /api/generate             --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (5 handlers)
[GIN-debug] POST   /api/chat                 --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (5 handlers)
[GIN-debug] POST   /api/embed                --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (5 handlers)
[GIN-debug] POST   /api/embeddings           --> github.com/ollama/ollama/server.(*Server).EmbeddingsHandler-fm (5 handlers)
[GIN-debug] POST   /v1/chat/completions      --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (6 handlers)
[GIN-debug] POST   /v1/completions           --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (6 handlers)
[GIN-debug] POST   /v1/embeddings            --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (6 handlers)
[GIN-debug] GET    /v1/models                --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (6 handlers)
[GIN-debug] GET    /v1/models/:model         --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (6 handlers)
time=2025-09-17T15:11:06.709Z level=INFO source=routes.go:1384 msg="Listening on [::]:11434 (version 0.0.0)"
time=2025-09-17T15:11:06.709Z level=INFO source=gpu.go:217 msg="looking for compatible GPUs"
time=2025-09-17T15:11:06.711Z level=WARN source=amd_linux.go:61 msg="ollama recommends running the https://www.amd.com/en/support/download/linux-drivers.html" error="amdgpu version file missing: /sys/module/amdgpu/version stat /sys/module/amdgpu/version: no such file or directory"
time=2025-09-17T15:11:06.712Z level=INFO source=amd_linux.go:390 msg="amdgpu is supported" gpu=0 gpu_type=gfx1103
time=2025-09-17T15:11:06.712Z level=INFO source=types.go:131 msg="inference compute" id=0 library=rocm variant="" compute=gfx1103 driver=0.0 name=1002:1900 total="16.0 GiB" available="9.8 GiB"
time=2025-09-17T15:11:06.712Z level=INFO source=routes.go:1425 msg="entering low vram mode" "total vram"="16.0 GiB" threshold="20.0 GiB"
[GIN] 2025/09/17 - 15:11:27 | 200 |    1.357804ms |       127.0.0.1 | GET      "/api/tags"
[GIN] 2025/09/17 - 15:11:27 | 200 |      64.732µs |       127.0.0.1 | GET      "/api/ps"
[GIN] 2025/09/17 - 15:11:28 | 200 |      35.678µs |       127.0.0.1 | GET      "/api/version"
[GIN] 2025/09/17 - 15:11:30 | 200 |     502.896µs |       127.0.0.1 | GET      "/api/tags"
[GIN] 2025/09/17 - 15:11:30 | 200 |       21.19µs |       127.0.0.1 | GET      "/api/ps"
time=2025-09-17T15:11:47.113Z level=INFO source=server.go:398 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --model /root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 --port 38257"
time=2025-09-17T15:11:47.114Z level=INFO source=server.go:503 msg="system memory" total="46.9 GiB" free="18.3 GiB" free_swap="5.7 GiB"
time=2025-09-17T15:11:47.115Z level=INFO source=memory.go:36 msg="new model will fit in available VRAM across minimum required GPUs, loading" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 library=rocm parallel=1 required="5.4 GiB" gpus=1
time=2025-09-17T15:11:47.117Z level=INFO source=server.go:543 msg=offload library=rocm layers.requested=-1 layers.model=35 layers.offload=35 layers.split=[35] memory.available="[9.6 GiB]" memory.gpu_overhead="0 B" memory.required.full="5.4 GiB" memory.required.partial="5.4 GiB" memory.required.kv="334.0 MiB" memory.required.allocations="[5.4 GiB]" memory.weights.total="2.3 GiB" memory.weights.repeating="1.8 GiB" memory.weights.nonrepeating="525.0 MiB" memory.graph.full="517.0 MiB" memory.graph.partial="1.0 GiB" projector.weights="795.9 MiB" projector.graph="1.0 GiB"
time=2025-09-17T15:11:47.126Z level=INFO source=runner.go:1251 msg="starting ollama engine"
time=2025-09-17T15:11:47.127Z level=INFO source=runner.go:1286 msg="Server listening on 127.0.0.1:38257"
time=2025-09-17T15:11:47.129Z level=INFO source=runner.go:1170 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:false KvSize:8192 KvCacheType: NumThreads:8 GPULayers:35[ID:0 Layers:35(0..34)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}"
time=2025-09-17T15:11:47.183Z level=INFO source=ggml.go:131 msg="" architecture=gemma3 file_type=Q4_K_M name="" description="" num_tensors=883 num_key_values=36
load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-icelake.so
time=2025-09-17T15:11:47.188Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.AVX512=1 CPU.0.AVX512_VBMI=1 CPU.0.AVX512_VNNI=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 compiler=cgo(gcc)
time=2025-09-17T15:11:47.497Z level=INFO source=ggml.go:487 msg="offloading 0 repeating layers to GPU"
time=2025-09-17T15:11:47.497Z level=INFO source=ggml.go:491 msg="offloading output layer to CPU"
time=2025-09-17T15:11:47.497Z level=INFO source=ggml.go:498 msg="offloaded 0/35 layers to GPU"
time=2025-09-17T15:11:47.498Z level=INFO source=backend.go:315 msg="model weights" device=CPU size="3.6 GiB"
time=2025-09-17T15:11:47.498Z level=INFO source=backend.go:326 msg="kv cache" device=CPU size="334.0 MiB"
time=2025-09-17T15:11:47.498Z level=INFO source=backend.go:337 msg="compute graph" device=CPU size="1.1 GiB"
time=2025-09-17T15:11:47.498Z level=INFO source=backend.go:342 msg="total memory" size="5.0 GiB"
time=2025-09-17T15:11:47.498Z level=INFO source=sched.go:473 msg="loaded runners" count=1
time=2025-09-17T15:11:47.498Z level=INFO source=server.go:1250 msg="waiting for llama runner to start responding"
time=2025-09-17T15:11:47.498Z level=INFO source=server.go:1284 msg="waiting for server to become available" status="llm server loading model"
time=2025-09-17T15:11:48.502Z level=INFO source=server.go:1288 msg="llama runner started in 1.39 seconds"

And the files reportedly are there now:

$ podman exec -it ollama-rocm /bin/bash
root@ollama:/# ls -la /usr/lib/ollama/rocm/
total 188188
drwxr-xr-x. 1 root root      882 Sep 17 15:08 .
drwxr-xr-x. 1 root root      376 Sep 17 15:10 ..
lrwxrwxrwx. 1 root root       19 Sep 17 15:08 libamd_comgr.so.3 -> libamd_comgr.so.3.0
-rwxr-xr-x. 1 root root 66869904 Sep 17 09:40 libamd_comgr.so.3.0
lrwxrwxrwx. 1 root root       35 Sep 17 15:08 libamdhip64.so.7 -> libamdhip64.so.7.1.25372-fc79758579
-rwxr-xr-x. 1 root root 26890344 Sep 17 09:42 libamdhip64.so.7.1.25372-fc79758579
lrwxrwxrwx. 1 root root       17 Sep 17 15:08 libhipblas.so.3 -> libhipblas.so.3.1
-rwxr-xr-x. 1 root root   933304 Sep 17 13:51 libhipblas.so.3.1
lrwxrwxrwx. 1 root root       19 Sep 17 15:08 libhipblaslt.so.1 -> libhipblaslt.so.1.1
-rwxr-xr-x. 1 root root 10755248 Sep 17 10:15 libhipblaslt.so.1.1
lrwxrwxrwx. 1 root root       26 Sep 17 15:08 libhsa-runtime64.so.1 -> libhsa-runtime64.so.1.18.0
-rwxr-xr-x. 1 root root  4148456 Sep 17 09:40 libhsa-runtime64.so.1.18.0
lrwxrwxrwx. 1 root root       17 Sep 17 15:08 librocblas.so.5 -> librocblas.so.5.1
-rwxr-xr-x. 1 root root 31640888 Sep 17 10:36 librocblas.so.5.1
-rwxr-xr-x. 1 root root   362609 Sep 16 15:10 librocm_sysdeps_drm.so.2
-rwxr-xr-x. 1 root root   240689 Sep 16 15:10 librocm_sysdeps_drm_amdgpu.so.1
-rwxr-xr-x. 1 root root  1195593 Sep 16 15:10 librocm_sysdeps_elf.so.1
-rwxr-xr-x. 1 root root   224913 Sep 16 15:10 librocm_sysdeps_numa.so.1
lrwxrwxrwx. 1 root root       32 Sep 17 15:08 librocprofiler-register.so.0 -> librocprofiler-register.so.0.5.0
-rwxr-xr-x. 1 root root   931896 Sep 16 15:10 librocprofiler-register.so.0.5.0
lrwxrwxrwx. 1 root root       19 Sep 17 15:08 librocsolver.so.0 -> librocsolver.so.0.7
-rwxr-xr-x. 1 root root 48460168 Sep 17 13:42 librocsolver.so.0.7
drwxr-xr-x. 1 root root       14 Sep 17 15:08 rocblas
root@ollama:/# ls -la /usr/lib/ollama/rocm/rocblas/library/
total 10652
drwxr-xr-x. 1 root root  16636 Sep 17 15:08 .
drwxr-xr-x. 1 root root     14 Sep 17 15:08 ..
-rw-r--r--. 1 root root 261704 Sep 17 10:20 Kernels.so-000-gfx1103.hsaco
-rw-r--r--. 1 root root  39136 Sep 17 10:20 TensileLibrary_Type_4xi8I_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 181176 Sep 17 10:19 TensileLibrary_Type_4xi8I_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  33590 Sep 17 10:20 TensileLibrary_Type_4xi8I_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 150608 Sep 17 10:19 TensileLibrary_Type_4xi8I_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  39480 Sep 17 10:20 TensileLibrary_Type_4xi8I_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 180408 Sep 17 10:20 TensileLibrary_Type_4xi8I_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  33758 Sep 17 10:20 TensileLibrary_Type_4xi8I_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 149584 Sep 17 10:20 TensileLibrary_Type_4xi8I_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  50661 Sep 17 10:20 TensileLibrary_Type_BB_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 556064 Sep 17 10:20 TensileLibrary_Type_BB_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  48071 Sep 17 10:20 TensileLibrary_Type_BB_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 502992 Sep 17 10:20 TensileLibrary_Type_BB_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  35902 Sep 17 10:20 TensileLibrary_Type_BB_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 213872 Sep 17 10:20 TensileLibrary_Type_BB_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  48198 Sep 17 10:20 TensileLibrary_Type_BB_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 497616 Sep 17 10:20 TensileLibrary_Type_BB_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  49260 Sep 17 10:20 TensileLibrary_Type_BS_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 515760 Sep 17 10:20 TensileLibrary_Type_BS_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  29092 Sep 17 10:20 TensileLibrary_Type_BS_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 264584 Sep 17 10:20 TensileLibrary_Type_BS_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  15630 Sep 17 10:20 TensileLibrary_Type_BS_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  92432 Sep 17 10:20 TensileLibrary_Type_BS_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  25769 Sep 17 10:20 TensileLibrary_Type_BS_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 217080 Sep 17 10:20 TensileLibrary_Type_BS_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6570 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Ailk_BjlkC_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  49840 Sep 17 10:19 TensileLibrary_Type_CC_Contraction_l_Ailk_BjlkC_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6558 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  49800 Sep 17 10:19 TensileLibrary_Type_CC_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6574 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  49800 Sep 17 10:19 TensileLibrary_Type_CC_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6598 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_AlikC_BjlkC_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  51416 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_AlikC_BjlkC_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6586 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_AlikC_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  49840 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_AlikC_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6586 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_AlikC_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  49072 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_AlikC_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6586 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Alik_BjlkC_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  49840 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Alik_BjlkC_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6574 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  49288 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6574 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  49032 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6366 Sep 17 10:20 TensileLibrary_Type_DD_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  23000 Sep 17 10:19 TensileLibrary_Type_DD_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   7892 Sep 17 10:20 TensileLibrary_Type_DD_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  38200 Sep 17 10:19 TensileLibrary_Type_DD_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6382 Sep 17 10:20 TensileLibrary_Type_DD_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  23000 Sep 17 10:20 TensileLibrary_Type_DD_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6382 Sep 17 10:20 TensileLibrary_Type_DD_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  22488 Sep 17 10:20 TensileLibrary_Type_DD_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  24752 Sep 17 10:20 TensileLibrary_Type_HH_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 206976 Sep 17 10:20 TensileLibrary_Type_HH_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  18792 Sep 17 10:20 TensileLibrary_Type_HH_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 102504 Sep 17 10:19 TensileLibrary_Type_HH_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  18796 Sep 17 10:20 TensileLibrary_Type_HH_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 117368 Sep 17 10:20 TensileLibrary_Type_HH_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  18792 Sep 17 10:20 TensileLibrary_Type_HH_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 101736 Sep 17 10:20 TensileLibrary_Type_HH_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  67184 Sep 17 10:20 TensileLibrary_Type_HH_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 442376 Sep 17 10:20 TensileLibrary_Type_HH_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  47298 Sep 17 10:20 TensileLibrary_Type_HH_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 221304 Sep 17 10:20 TensileLibrary_Type_HH_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  41272 Sep 17 10:20 TensileLibrary_Type_HH_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 121952 Sep 17 10:20 TensileLibrary_Type_HH_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  41296 Sep 17 10:20 TensileLibrary_Type_HH_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 183944 Sep 17 10:20 TensileLibrary_Type_HH_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  42250 Sep 17 10:20 TensileLibrary_Type_HS_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 394856 Sep 17 10:20 TensileLibrary_Type_HS_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  25543 Sep 17 10:20 TensileLibrary_Type_HS_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 222496 Sep 17 10:20 TensileLibrary_Type_HS_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  18699 Sep 17 10:20 TensileLibrary_Type_HS_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 115280 Sep 17 10:20 TensileLibrary_Type_HS_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  28854 Sep 17 10:20 TensileLibrary_Type_HS_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 261512 Sep 17 10:20 TensileLibrary_Type_HS_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  65097 Sep 17 10:20 TensileLibrary_Type_I8I_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 543848 Sep 17 10:20 TensileLibrary_Type_I8I_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  52773 Sep 17 10:20 TensileLibrary_Type_I8I_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 371568 Sep 17 10:20 TensileLibrary_Type_I8I_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  62360 Sep 17 10:20 TensileLibrary_Type_I8I_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 516072 Sep 17 10:20 TensileLibrary_Type_I8I_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  60758 Sep 17 10:20 TensileLibrary_Type_I8I_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root 459816 Sep 17 10:20 TensileLibrary_Type_I8I_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6386 Sep 17 10:20 TensileLibrary_Type_SS_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  61184 Sep 17 10:19 TensileLibrary_Type_SS_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6382 Sep 17 10:20 TensileLibrary_Type_SS_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  40424 Sep 17 10:20 TensileLibrary_Type_SS_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6386 Sep 17 10:20 TensileLibrary_Type_SS_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  39912 Sep 17 10:20 TensileLibrary_Type_SS_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6386 Sep 17 10:20 TensileLibrary_Type_SS_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  49664 Sep 17 10:20 TensileLibrary_Type_SS_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6602 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Ailk_BjlkC_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  53424 Sep 17 10:19 TensileLibrary_Type_ZZ_Contraction_l_Ailk_BjlkC_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6598 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  53384 Sep 17 10:19 TensileLibrary_Type_ZZ_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6606 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  53384 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6630 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_AlikC_BjlkC_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  53720 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_AlikC_BjlkC_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6618 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_AlikC_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  53424 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_AlikC_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6618 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_AlikC_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  53168 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_AlikC_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6618 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Alik_BjlkC_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  53424 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Alik_BjlkC_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6606 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  53384 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root   6606 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat
-rw-r--r--. 1 root root  53128 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco
-rw-r--r--. 1 root root  17653 Sep 17 10:20 TensileLibrary_lazy_gfx1103.dat
-rw-r--r--. 1 root root  16109 Sep 17 10:19 TensileManifest.txt

Thanks!

<!-- gh-comment-id:3303480955 --> @Pekkari commented on GitHub (Sep 17, 2025): I repeated the rocm build today following the instructions listed up here, and rebuilt ollama this time using this diff in the dockerfile: ``` $ git diff Dockerfile diff --git a/Dockerfile b/Dockerfile index 0dc3c126..ad9c3005 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,7 @@ # vim: filetype=dockerfile - ARG FLAVOR=${TARGETARCH} -ARG ROCMVERSION=6.3.3 +ARG ROCMVERSION=7.0 ARG JETPACK5VERSION=r35.4.1 ARG JETPACK6VERSION=r36.4.0 ARG CMAKEVERSION=3.31.2 @@ -51,9 +50,10 @@ RUN --mount=type=cache,target=/root/.ccache \ FROM base AS rocm-6 ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH RUN --mount=type=cache,target=/root/.ccache \ + --mount=type=bind,source=rocm,target=/opt/rocm-7.0.0,Z \ cmake --preset 'ROCm 6' \ && cmake --build --parallel --preset 'ROCm 6' \ - && cmake --install build --component HIP --strip --parallel 8 + && cmake --install build --component HIP --strip --parallel 4 FROM --platform=linux/arm64 nvcr.io/nvidia/l4t-jetpack:${JETPACK5VERSION} AS jetpack-5 ARG CMAKEVERSION ``` Build went alright, but still the output shows no backend found: ``` $ podman run --name ollama-rocm --pod ollama -e OLLAMA_CONTEXT_LENGTH=8192 -v c47c4cabdfb84c606efb5887fea55b20b1673004cedcf577c80d89779a0618a3:/root/.ollama --device /dev/accel --device /dev/dri --device /dev/kfd --cap-drop all --read-only --ipc=host --shm-size=8G -it ollama:rocm-7 time=2025-09-17T15:11:06.706Z level=INFO source=routes.go:1331 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:8192 OLLAMA_DEBUG:INFO OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NEW_ESTIMATES:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:]" time=2025-09-17T15:11:06.709Z level=INFO source=images.go:477 msg="total blobs: 20" time=2025-09-17T15:11:06.709Z level=INFO source=images.go:484 msg="total unused blobs removed: 0" [GIN-debug] [WARNING] Creating an Engine instance with the Logger and Recovery middleware already attached. [GIN-debug] [WARNING] Running in "debug" mode. Switch to "release" mode in production. - using env: export GIN_MODE=release - using code: gin.SetMode(gin.ReleaseMode) [GIN-debug] HEAD / --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func1 (5 handlers) [GIN-debug] GET / --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func2 (5 handlers) [GIN-debug] HEAD /api/version --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func3 (5 handlers) [GIN-debug] GET /api/version --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func4 (5 handlers) [GIN-debug] POST /api/pull --> github.com/ollama/ollama/server.(*Server).PullHandler-fm (5 handlers) [GIN-debug] POST /api/push --> github.com/ollama/ollama/server.(*Server).PushHandler-fm (5 handlers) [GIN-debug] HEAD /api/tags --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (5 handlers) [GIN-debug] GET /api/tags --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (5 handlers) [GIN-debug] POST /api/show --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (5 handlers) [GIN-debug] DELETE /api/delete --> github.com/ollama/ollama/server.(*Server).DeleteHandler-fm (5 handlers) [GIN-debug] POST /api/create --> github.com/ollama/ollama/server.(*Server).CreateHandler-fm (5 handlers) [GIN-debug] POST /api/blobs/:digest --> github.com/ollama/ollama/server.(*Server).CreateBlobHandler-fm (5 handlers) [GIN-debug] HEAD /api/blobs/:digest --> github.com/ollama/ollama/server.(*Server).HeadBlobHandler-fm (5 handlers) [GIN-debug] POST /api/copy --> github.com/ollama/ollama/server.(*Server).CopyHandler-fm (5 handlers) [GIN-debug] GET /api/ps --> github.com/ollama/ollama/server.(*Server).PsHandler-fm (5 handlers) [GIN-debug] POST /api/generate --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (5 handlers) [GIN-debug] POST /api/chat --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (5 handlers) [GIN-debug] POST /api/embed --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (5 handlers) [GIN-debug] POST /api/embeddings --> github.com/ollama/ollama/server.(*Server).EmbeddingsHandler-fm (5 handlers) [GIN-debug] POST /v1/chat/completions --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (6 handlers) [GIN-debug] POST /v1/completions --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (6 handlers) [GIN-debug] POST /v1/embeddings --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (6 handlers) [GIN-debug] GET /v1/models --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (6 handlers) [GIN-debug] GET /v1/models/:model --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (6 handlers) time=2025-09-17T15:11:06.709Z level=INFO source=routes.go:1384 msg="Listening on [::]:11434 (version 0.0.0)" time=2025-09-17T15:11:06.709Z level=INFO source=gpu.go:217 msg="looking for compatible GPUs" time=2025-09-17T15:11:06.711Z level=WARN source=amd_linux.go:61 msg="ollama recommends running the https://www.amd.com/en/support/download/linux-drivers.html" error="amdgpu version file missing: /sys/module/amdgpu/version stat /sys/module/amdgpu/version: no such file or directory" time=2025-09-17T15:11:06.712Z level=INFO source=amd_linux.go:390 msg="amdgpu is supported" gpu=0 gpu_type=gfx1103 time=2025-09-17T15:11:06.712Z level=INFO source=types.go:131 msg="inference compute" id=0 library=rocm variant="" compute=gfx1103 driver=0.0 name=1002:1900 total="16.0 GiB" available="9.8 GiB" time=2025-09-17T15:11:06.712Z level=INFO source=routes.go:1425 msg="entering low vram mode" "total vram"="16.0 GiB" threshold="20.0 GiB" [GIN] 2025/09/17 - 15:11:27 | 200 | 1.357804ms | 127.0.0.1 | GET "/api/tags" [GIN] 2025/09/17 - 15:11:27 | 200 | 64.732µs | 127.0.0.1 | GET "/api/ps" [GIN] 2025/09/17 - 15:11:28 | 200 | 35.678µs | 127.0.0.1 | GET "/api/version" [GIN] 2025/09/17 - 15:11:30 | 200 | 502.896µs | 127.0.0.1 | GET "/api/tags" [GIN] 2025/09/17 - 15:11:30 | 200 | 21.19µs | 127.0.0.1 | GET "/api/ps" time=2025-09-17T15:11:47.113Z level=INFO source=server.go:398 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --model /root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 --port 38257" time=2025-09-17T15:11:47.114Z level=INFO source=server.go:503 msg="system memory" total="46.9 GiB" free="18.3 GiB" free_swap="5.7 GiB" time=2025-09-17T15:11:47.115Z level=INFO source=memory.go:36 msg="new model will fit in available VRAM across minimum required GPUs, loading" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 library=rocm parallel=1 required="5.4 GiB" gpus=1 time=2025-09-17T15:11:47.117Z level=INFO source=server.go:543 msg=offload library=rocm layers.requested=-1 layers.model=35 layers.offload=35 layers.split=[35] memory.available="[9.6 GiB]" memory.gpu_overhead="0 B" memory.required.full="5.4 GiB" memory.required.partial="5.4 GiB" memory.required.kv="334.0 MiB" memory.required.allocations="[5.4 GiB]" memory.weights.total="2.3 GiB" memory.weights.repeating="1.8 GiB" memory.weights.nonrepeating="525.0 MiB" memory.graph.full="517.0 MiB" memory.graph.partial="1.0 GiB" projector.weights="795.9 MiB" projector.graph="1.0 GiB" time=2025-09-17T15:11:47.126Z level=INFO source=runner.go:1251 msg="starting ollama engine" time=2025-09-17T15:11:47.127Z level=INFO source=runner.go:1286 msg="Server listening on 127.0.0.1:38257" time=2025-09-17T15:11:47.129Z level=INFO source=runner.go:1170 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:false KvSize:8192 KvCacheType: NumThreads:8 GPULayers:35[ID:0 Layers:35(0..34)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}" time=2025-09-17T15:11:47.183Z level=INFO source=ggml.go:131 msg="" architecture=gemma3 file_type=Q4_K_M name="" description="" num_tensors=883 num_key_values=36 load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-icelake.so time=2025-09-17T15:11:47.188Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.AVX512=1 CPU.0.AVX512_VBMI=1 CPU.0.AVX512_VNNI=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 compiler=cgo(gcc) time=2025-09-17T15:11:47.497Z level=INFO source=ggml.go:487 msg="offloading 0 repeating layers to GPU" time=2025-09-17T15:11:47.497Z level=INFO source=ggml.go:491 msg="offloading output layer to CPU" time=2025-09-17T15:11:47.497Z level=INFO source=ggml.go:498 msg="offloaded 0/35 layers to GPU" time=2025-09-17T15:11:47.498Z level=INFO source=backend.go:315 msg="model weights" device=CPU size="3.6 GiB" time=2025-09-17T15:11:47.498Z level=INFO source=backend.go:326 msg="kv cache" device=CPU size="334.0 MiB" time=2025-09-17T15:11:47.498Z level=INFO source=backend.go:337 msg="compute graph" device=CPU size="1.1 GiB" time=2025-09-17T15:11:47.498Z level=INFO source=backend.go:342 msg="total memory" size="5.0 GiB" time=2025-09-17T15:11:47.498Z level=INFO source=sched.go:473 msg="loaded runners" count=1 time=2025-09-17T15:11:47.498Z level=INFO source=server.go:1250 msg="waiting for llama runner to start responding" time=2025-09-17T15:11:47.498Z level=INFO source=server.go:1284 msg="waiting for server to become available" status="llm server loading model" time=2025-09-17T15:11:48.502Z level=INFO source=server.go:1288 msg="llama runner started in 1.39 seconds" ``` And the files reportedly are there now: ``` $ podman exec -it ollama-rocm /bin/bash root@ollama:/# ls -la /usr/lib/ollama/rocm/ total 188188 drwxr-xr-x. 1 root root 882 Sep 17 15:08 . drwxr-xr-x. 1 root root 376 Sep 17 15:10 .. lrwxrwxrwx. 1 root root 19 Sep 17 15:08 libamd_comgr.so.3 -> libamd_comgr.so.3.0 -rwxr-xr-x. 1 root root 66869904 Sep 17 09:40 libamd_comgr.so.3.0 lrwxrwxrwx. 1 root root 35 Sep 17 15:08 libamdhip64.so.7 -> libamdhip64.so.7.1.25372-fc79758579 -rwxr-xr-x. 1 root root 26890344 Sep 17 09:42 libamdhip64.so.7.1.25372-fc79758579 lrwxrwxrwx. 1 root root 17 Sep 17 15:08 libhipblas.so.3 -> libhipblas.so.3.1 -rwxr-xr-x. 1 root root 933304 Sep 17 13:51 libhipblas.so.3.1 lrwxrwxrwx. 1 root root 19 Sep 17 15:08 libhipblaslt.so.1 -> libhipblaslt.so.1.1 -rwxr-xr-x. 1 root root 10755248 Sep 17 10:15 libhipblaslt.so.1.1 lrwxrwxrwx. 1 root root 26 Sep 17 15:08 libhsa-runtime64.so.1 -> libhsa-runtime64.so.1.18.0 -rwxr-xr-x. 1 root root 4148456 Sep 17 09:40 libhsa-runtime64.so.1.18.0 lrwxrwxrwx. 1 root root 17 Sep 17 15:08 librocblas.so.5 -> librocblas.so.5.1 -rwxr-xr-x. 1 root root 31640888 Sep 17 10:36 librocblas.so.5.1 -rwxr-xr-x. 1 root root 362609 Sep 16 15:10 librocm_sysdeps_drm.so.2 -rwxr-xr-x. 1 root root 240689 Sep 16 15:10 librocm_sysdeps_drm_amdgpu.so.1 -rwxr-xr-x. 1 root root 1195593 Sep 16 15:10 librocm_sysdeps_elf.so.1 -rwxr-xr-x. 1 root root 224913 Sep 16 15:10 librocm_sysdeps_numa.so.1 lrwxrwxrwx. 1 root root 32 Sep 17 15:08 librocprofiler-register.so.0 -> librocprofiler-register.so.0.5.0 -rwxr-xr-x. 1 root root 931896 Sep 16 15:10 librocprofiler-register.so.0.5.0 lrwxrwxrwx. 1 root root 19 Sep 17 15:08 librocsolver.so.0 -> librocsolver.so.0.7 -rwxr-xr-x. 1 root root 48460168 Sep 17 13:42 librocsolver.so.0.7 drwxr-xr-x. 1 root root 14 Sep 17 15:08 rocblas root@ollama:/# ls -la /usr/lib/ollama/rocm/rocblas/library/ total 10652 drwxr-xr-x. 1 root root 16636 Sep 17 15:08 . drwxr-xr-x. 1 root root 14 Sep 17 15:08 .. -rw-r--r--. 1 root root 261704 Sep 17 10:20 Kernels.so-000-gfx1103.hsaco -rw-r--r--. 1 root root 39136 Sep 17 10:20 TensileLibrary_Type_4xi8I_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 181176 Sep 17 10:19 TensileLibrary_Type_4xi8I_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 33590 Sep 17 10:20 TensileLibrary_Type_4xi8I_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 150608 Sep 17 10:19 TensileLibrary_Type_4xi8I_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 39480 Sep 17 10:20 TensileLibrary_Type_4xi8I_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 180408 Sep 17 10:20 TensileLibrary_Type_4xi8I_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 33758 Sep 17 10:20 TensileLibrary_Type_4xi8I_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 149584 Sep 17 10:20 TensileLibrary_Type_4xi8I_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 50661 Sep 17 10:20 TensileLibrary_Type_BB_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 556064 Sep 17 10:20 TensileLibrary_Type_BB_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 48071 Sep 17 10:20 TensileLibrary_Type_BB_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 502992 Sep 17 10:20 TensileLibrary_Type_BB_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 35902 Sep 17 10:20 TensileLibrary_Type_BB_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 213872 Sep 17 10:20 TensileLibrary_Type_BB_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 48198 Sep 17 10:20 TensileLibrary_Type_BB_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 497616 Sep 17 10:20 TensileLibrary_Type_BB_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 49260 Sep 17 10:20 TensileLibrary_Type_BS_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 515760 Sep 17 10:20 TensileLibrary_Type_BS_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 29092 Sep 17 10:20 TensileLibrary_Type_BS_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 264584 Sep 17 10:20 TensileLibrary_Type_BS_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 15630 Sep 17 10:20 TensileLibrary_Type_BS_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 92432 Sep 17 10:20 TensileLibrary_Type_BS_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 25769 Sep 17 10:20 TensileLibrary_Type_BS_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 217080 Sep 17 10:20 TensileLibrary_Type_BS_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6570 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Ailk_BjlkC_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 49840 Sep 17 10:19 TensileLibrary_Type_CC_Contraction_l_Ailk_BjlkC_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6558 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 49800 Sep 17 10:19 TensileLibrary_Type_CC_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6574 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 49800 Sep 17 10:19 TensileLibrary_Type_CC_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6598 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_AlikC_BjlkC_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 51416 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_AlikC_BjlkC_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6586 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_AlikC_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 49840 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_AlikC_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6586 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_AlikC_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 49072 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_AlikC_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6586 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Alik_BjlkC_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 49840 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Alik_BjlkC_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6574 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 49288 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6574 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 49032 Sep 17 10:20 TensileLibrary_Type_CC_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6366 Sep 17 10:20 TensileLibrary_Type_DD_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 23000 Sep 17 10:19 TensileLibrary_Type_DD_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 7892 Sep 17 10:20 TensileLibrary_Type_DD_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 38200 Sep 17 10:19 TensileLibrary_Type_DD_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6382 Sep 17 10:20 TensileLibrary_Type_DD_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 23000 Sep 17 10:20 TensileLibrary_Type_DD_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6382 Sep 17 10:20 TensileLibrary_Type_DD_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 22488 Sep 17 10:20 TensileLibrary_Type_DD_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 24752 Sep 17 10:20 TensileLibrary_Type_HH_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 206976 Sep 17 10:20 TensileLibrary_Type_HH_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 18792 Sep 17 10:20 TensileLibrary_Type_HH_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 102504 Sep 17 10:19 TensileLibrary_Type_HH_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 18796 Sep 17 10:20 TensileLibrary_Type_HH_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 117368 Sep 17 10:20 TensileLibrary_Type_HH_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 18792 Sep 17 10:20 TensileLibrary_Type_HH_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 101736 Sep 17 10:20 TensileLibrary_Type_HH_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 67184 Sep 17 10:20 TensileLibrary_Type_HH_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 442376 Sep 17 10:20 TensileLibrary_Type_HH_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 47298 Sep 17 10:20 TensileLibrary_Type_HH_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 221304 Sep 17 10:20 TensileLibrary_Type_HH_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 41272 Sep 17 10:20 TensileLibrary_Type_HH_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 121952 Sep 17 10:20 TensileLibrary_Type_HH_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 41296 Sep 17 10:20 TensileLibrary_Type_HH_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 183944 Sep 17 10:20 TensileLibrary_Type_HH_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 42250 Sep 17 10:20 TensileLibrary_Type_HS_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 394856 Sep 17 10:20 TensileLibrary_Type_HS_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 25543 Sep 17 10:20 TensileLibrary_Type_HS_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 222496 Sep 17 10:20 TensileLibrary_Type_HS_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 18699 Sep 17 10:20 TensileLibrary_Type_HS_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 115280 Sep 17 10:20 TensileLibrary_Type_HS_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 28854 Sep 17 10:20 TensileLibrary_Type_HS_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 261512 Sep 17 10:20 TensileLibrary_Type_HS_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 65097 Sep 17 10:20 TensileLibrary_Type_I8I_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 543848 Sep 17 10:20 TensileLibrary_Type_I8I_HPA_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 52773 Sep 17 10:20 TensileLibrary_Type_I8I_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 371568 Sep 17 10:20 TensileLibrary_Type_I8I_HPA_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 62360 Sep 17 10:20 TensileLibrary_Type_I8I_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 516072 Sep 17 10:20 TensileLibrary_Type_I8I_HPA_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 60758 Sep 17 10:20 TensileLibrary_Type_I8I_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 459816 Sep 17 10:20 TensileLibrary_Type_I8I_HPA_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6386 Sep 17 10:20 TensileLibrary_Type_SS_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 61184 Sep 17 10:19 TensileLibrary_Type_SS_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6382 Sep 17 10:20 TensileLibrary_Type_SS_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 40424 Sep 17 10:20 TensileLibrary_Type_SS_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6386 Sep 17 10:20 TensileLibrary_Type_SS_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 39912 Sep 17 10:20 TensileLibrary_Type_SS_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6386 Sep 17 10:20 TensileLibrary_Type_SS_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 49664 Sep 17 10:20 TensileLibrary_Type_SS_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6602 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Ailk_BjlkC_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 53424 Sep 17 10:19 TensileLibrary_Type_ZZ_Contraction_l_Ailk_BjlkC_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6598 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 53384 Sep 17 10:19 TensileLibrary_Type_ZZ_Contraction_l_Ailk_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6606 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 53384 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Ailk_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6630 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_AlikC_BjlkC_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 53720 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_AlikC_BjlkC_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6618 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_AlikC_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 53424 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_AlikC_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6618 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_AlikC_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 53168 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_AlikC_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6618 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Alik_BjlkC_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 53424 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Alik_BjlkC_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6606 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 53384 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bjlk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 6606 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback.dat -rw-r--r--. 1 root root 53128 Sep 17 10:20 TensileLibrary_Type_ZZ_Contraction_l_Alik_Bljk_Cijk_Dijk_fallback_gfx1103.hsaco -rw-r--r--. 1 root root 17653 Sep 17 10:20 TensileLibrary_lazy_gfx1103.dat -rw-r--r--. 1 root root 16109 Sep 17 10:19 TensileManifest.txt ``` Thanks!
Author
Owner

@waltercool commented on GitHub (Sep 24, 2025):

Did you found a solution or workaround for this? I have exactly same problem with gfx1151 using rocm7.0

<!-- gh-comment-id:3328437504 --> @waltercool commented on GitHub (Sep 24, 2025): Did you found a solution or workaround for this? I have exactly same problem with gfx1151 using rocm7.0
Author
Owner

@Pekkari commented on GitHub (Sep 24, 2025):

I didn't, I found this review, so currently seems to be that vulkan inference works better for amd platforms, but of course may depend in the platform, etc, it is a bit unfair to say that may apply to ours since we didn't get rocm 7.0 working first place.

<!-- gh-comment-id:3328643862 --> @Pekkari commented on GitHub (Sep 24, 2025): I didn't, I found this [review](https://www.phoronix.com/review/amd-rocm-7-strix-halo/4), so currently seems to be that vulkan inference works better for amd platforms, but of course may depend in the platform, etc, it is a bit unfair to say that may apply to ours since we didn't get rocm 7.0 working first place.
Author
Owner

@waltercool commented on GitHub (Sep 24, 2025):

It may underperform, but should work anyways.

While gfx1151 is not supported with rocm7.0, it works "mostly fine" to me with Comfyui.

<!-- gh-comment-id:3328800765 --> @waltercool commented on GitHub (Sep 24, 2025): It may underperform, but [should work](https://www.phoronix.com/forums/forum/linux-graphics-x-org-drivers/open-source-amd-linux/1577749-amd-rocm-7-0-officially-released-with-many-significant-improvements?p=1577796#post1577796) anyways. While gfx1151 is not supported with rocm7.0, it works "mostly fine" to me with Comfyui.
Author
Owner

@Pekkari commented on GitHub (Sep 24, 2025):

It may underperform, but should work anyways.

While gfx1151 is not supported with rocm7.0, it works "mostly fine" to me with Comfyui.

Agreed, indeed, the review I pointed is that same target, so phoronix guys got it to work, that was the strix halo, not mine, mine is an older one. From cmake/therock_amdgpu_targets.cmake:

therock_add_amdgpu_target(gfx1103 "AMD Radeon 780M Laptop iGPU" FAMILY igpu-all gfx110X-all gfx110X-igpu)

# gfx115X family
therock_add_amdgpu_target(gfx1150 "AMD Strix Point iGPU" FAMILY igpu-all gfx115X-all gfx115X-igpu
  EXCLUDE_TARGET_PROJECTS
    rccl  # https://github.com/ROCm/TheRock/issues/150
)
therock_add_amdgpu_target(gfx1151 "AMD Strix Halo iGPU" FAMILY igpu-all gfx115X-all gfx115X-igpu
  EXCLUDE_TARGET_PROJECTS
    rccl  # https://github.com/ROCm/TheRock/issues/150
)

So both should work even when not supported.

<!-- gh-comment-id:3329052306 --> @Pekkari commented on GitHub (Sep 24, 2025): > It may underperform, but [should work](https://www.phoronix.com/forums/forum/linux-graphics-x-org-drivers/open-source-amd-linux/1577749-amd-rocm-7-0-officially-released-with-many-significant-improvements?p=1577796#post1577796) anyways. > > While gfx1151 is not supported with rocm7.0, it works "mostly fine" to me with Comfyui. Agreed, indeed, the review I pointed is that same target, so phoronix guys got it to work, that was the strix halo, not mine, mine is an older one. From cmake/therock_amdgpu_targets.cmake: ``` therock_add_amdgpu_target(gfx1103 "AMD Radeon 780M Laptop iGPU" FAMILY igpu-all gfx110X-all gfx110X-igpu) # gfx115X family therock_add_amdgpu_target(gfx1150 "AMD Strix Point iGPU" FAMILY igpu-all gfx115X-all gfx115X-igpu EXCLUDE_TARGET_PROJECTS rccl # https://github.com/ROCm/TheRock/issues/150 ) therock_add_amdgpu_target(gfx1151 "AMD Strix Halo iGPU" FAMILY igpu-all gfx115X-all gfx115X-igpu EXCLUDE_TARGET_PROJECTS rccl # https://github.com/ROCm/TheRock/issues/150 ) ``` So both should work even when not supported.
Author
Owner

@dhiltgen commented on GitHub (Sep 24, 2025):

@Pekkari you should enable debug logging to see more details of what it's trying and hopefully why it's failing. OLLAMA_DEBUG=1

My suspicion is there's a dependency that's not in the LD_LIBRARY_PATH so when we attempt to dlopen libggml-hip.so it fails due to that missing dependency. Use ldd /usr/lib/ollama/libggml-hip.so and then verify all the dependencies are in directories being passed to the runner subprocess.

<!-- gh-comment-id:3329822424 --> @dhiltgen commented on GitHub (Sep 24, 2025): @Pekkari you should enable debug logging to see more details of what it's trying and hopefully why it's failing. `OLLAMA_DEBUG=1` My suspicion is there's a dependency that's not in the LD_LIBRARY_PATH so when we attempt to dlopen `libggml-hip.so` it fails due to that missing dependency. Use `ldd /usr/lib/ollama/libggml-hip.so` and then verify all the dependencies are in directories being passed to the runner subprocess.
Author
Owner

@waltercool commented on GitHub (Sep 24, 2025):

Hi @dhiltgen ,

When using vanilla ollama, removing the iGPU restriction it seems to not detect/use GTT. I have configured 512MB UMA, 110GB GTT.

time=2025-09-24T16:58:44.315Z level=DEBUG source=memory.go:269 msg="gpu has too little memory to allocate any layers" id=0 library=rocm variant="" compute=gfx1151 driver=0.0 name=1002:1586 total="512.0 MiB" available="70.7 MiB" minimum_memory=479199232 layer_size="392.9 MiB" gpu_zer_overhead="0 B" partial_offload="512.0 MiB" full_offload="512.0 MiB"
time=2025-09-24T16:58:44.316Z level=DEBUG source=memory.go:411 msg="insufficient VRAM to load any model layers"

So, it goes for CPU offloading.


When using ollama-linux-amd-apu, seems like there is a problem with data structure for iGPU here

time=2025-09-24T17:02:38.376Z level=INFO source=amd_linux.go:495 msg="amdgpu is supported" gpu=0 gpu_type=gfx1151
time=2025-09-24T17:02:38.376Z level=DEBUG source=gpu.go:410 msg="updating system memory data" before.total="17.5 GiB" before.free="17179869182.5 GiB" before.free_swap="0 B" now.total="17.5 GiB" now.free="17179869182.4 GiB" now.free_swap="0 B"
time=2025-09-24T17:02:38.376Z level=DEBUG source=gpu.go:497 msg="problem refreshing ROCm free memory" error="failed to read sysfs node  open : no such file or directory"
time=2025-09-24T17:02:38.376Z level=DEBUG source=sched.go:188 msg="updating default concurrency" OLLAMA_MAX_LOADED_MODELS=3 gpu_count=1
<!-- gh-comment-id:3329898273 --> @waltercool commented on GitHub (Sep 24, 2025): Hi @dhiltgen , When using vanilla ollama, [removing the iGPU restriction](https://github.com/ollama/ollama/blob/main/discover/amd_linux.go#L318) it seems to not detect/use GTT. I have configured 512MB UMA, 110GB GTT. ``` time=2025-09-24T16:58:44.315Z level=DEBUG source=memory.go:269 msg="gpu has too little memory to allocate any layers" id=0 library=rocm variant="" compute=gfx1151 driver=0.0 name=1002:1586 total="512.0 MiB" available="70.7 MiB" minimum_memory=479199232 layer_size="392.9 MiB" gpu_zer_overhead="0 B" partial_offload="512.0 MiB" full_offload="512.0 MiB" time=2025-09-24T16:58:44.316Z level=DEBUG source=memory.go:411 msg="insufficient VRAM to load any model layers" ``` So, it goes for CPU offloading. ---- When using ollama-linux-amd-apu, seems like there is a problem with data structure for iGPU [here](https://github.com/rjmalagon/ollama-linux-amd-apu/blob/main/discover/amd_linux.go#L597) ``` time=2025-09-24T17:02:38.376Z level=INFO source=amd_linux.go:495 msg="amdgpu is supported" gpu=0 gpu_type=gfx1151 time=2025-09-24T17:02:38.376Z level=DEBUG source=gpu.go:410 msg="updating system memory data" before.total="17.5 GiB" before.free="17179869182.5 GiB" before.free_swap="0 B" now.total="17.5 GiB" now.free="17179869182.4 GiB" now.free_swap="0 B" time=2025-09-24T17:02:38.376Z level=DEBUG source=gpu.go:497 msg="problem refreshing ROCm free memory" error="failed to read sysfs node open : no such file or directory" time=2025-09-24T17:02:38.376Z level=DEBUG source=sched.go:188 msg="updating default concurrency" OLLAMA_MAX_LOADED_MODELS=3 gpu_count=1 ```
Author
Owner

@Pekkari commented on GitHub (Sep 25, 2025):

@Pekkari you should enable debug logging to see more details of what it's trying and hopefully why it's failing. OLLAMA_DEBUG=1

Full output comes since I fail to narrow what the issue seems to be from it:

$ podman run --name ollama-rocm --pod ollama -e OLLAMA_DEBUG=1 -e OLLAMA_CONTEXT_LENGTH=8192 -v c47c4cabdfb84c606efb5887fea55b20b1673004cedcf577c80d89779a0618a3:/root/.ollama --device /dev/accel --device /dev/dri --device /dev/kfd --cap-drop all --read-only --ipc=host --shm-size=8G -it ollama:rocm-7
time=2025-09-25T08:27:22.065Z level=INFO source=routes.go:1331 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:8192 OLLAMA_DEBUG:DEBUG OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NEW_ESTIMATES:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:]"
time=2025-09-25T08:27:22.065Z level=INFO source=images.go:477 msg="total blobs: 20"
time=2025-09-25T08:27:22.066Z level=INFO source=images.go:484 msg="total unused blobs removed: 0"
[GIN-debug] [WARNING] Creating an Engine instance with the Logger and Recovery middleware already attached.

[GIN-debug] [WARNING] Running in "debug" mode. Switch to "release" mode in production.
 - using env:   export GIN_MODE=release
 - using code:  gin.SetMode(gin.ReleaseMode)

[GIN-debug] HEAD   /                         --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func1 (5 handlers)
[GIN-debug] GET    /                         --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func2 (5 handlers)
[GIN-debug] HEAD   /api/version              --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func3 (5 handlers)
[GIN-debug] GET    /api/version              --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func4 (5 handlers)
[GIN-debug] POST   /api/pull                 --> github.com/ollama/ollama/server.(*Server).PullHandler-fm (5 handlers)
[GIN-debug] POST   /api/push                 --> github.com/ollama/ollama/server.(*Server).PushHandler-fm (5 handlers)
[GIN-debug] HEAD   /api/tags                 --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (5 handlers)
[GIN-debug] GET    /api/tags                 --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (5 handlers)
[GIN-debug] POST   /api/show                 --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (5 handlers)
[GIN-debug] DELETE /api/delete               --> github.com/ollama/ollama/server.(*Server).DeleteHandler-fm (5 handlers)
[GIN-debug] POST   /api/create               --> github.com/ollama/ollama/server.(*Server).CreateHandler-fm (5 handlers)
[GIN-debug] POST   /api/blobs/:digest        --> github.com/ollama/ollama/server.(*Server).CreateBlobHandler-fm (5 handlers)
[GIN-debug] HEAD   /api/blobs/:digest        --> github.com/ollama/ollama/server.(*Server).HeadBlobHandler-fm (5 handlers)
[GIN-debug] POST   /api/copy                 --> github.com/ollama/ollama/server.(*Server).CopyHandler-fm (5 handlers)
[GIN-debug] GET    /api/ps                   --> github.com/ollama/ollama/server.(*Server).PsHandler-fm (5 handlers)
[GIN-debug] POST   /api/generate             --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (5 handlers)
[GIN-debug] POST   /api/chat                 --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (5 handlers)
[GIN-debug] POST   /api/embed                --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (5 handlers)
[GIN-debug] POST   /api/embeddings           --> github.com/ollama/ollama/server.(*Server).EmbeddingsHandler-fm (5 handlers)
[GIN-debug] POST   /v1/chat/completions      --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (6 handlers)
[GIN-debug] POST   /v1/completions           --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (6 handlers)
[GIN-debug] POST   /v1/embeddings            --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (6 handlers)
[GIN-debug] GET    /v1/models                --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (6 handlers)
[GIN-debug] GET    /v1/models/:model         --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (6 handlers)
time=2025-09-25T08:27:22.066Z level=INFO source=routes.go:1384 msg="Listening on [::]:11434 (version 0.0.0)"
time=2025-09-25T08:27:22.066Z level=DEBUG source=sched.go:121 msg="starting llm scheduler"
time=2025-09-25T08:27:22.066Z level=INFO source=gpu.go:217 msg="looking for compatible GPUs"
time=2025-09-25T08:27:22.067Z level=DEBUG source=gpu.go:98 msg="searching for GPU discovery libraries for NVIDIA"
time=2025-09-25T08:27:22.067Z level=DEBUG source=gpu.go:512 msg="Searching for GPU library" name=libcuda.so*
time=2025-09-25T08:27:22.067Z level=DEBUG source=gpu.go:536 msg="gpu library search" globs="[/usr/lib/ollama/libcuda.so* /usr/local/nvidia/lib/libcuda.so* /usr/local/nvidia/lib64/libcuda.so* /usr/local/cuda*/targets/*/lib/libcuda.so* /usr/lib/*-linux-gnu/nvidia/current/libcuda.so* /usr/lib/*-linux-gnu/libcuda.so* /usr/lib/wsl/lib/libcuda.so* /usr/lib/wsl/drivers/*/libcuda.so* /opt/cuda/lib*/libcuda.so* /usr/local/cuda/lib*/libcuda.so* /usr/lib*/libcuda.so* /usr/local/lib*/libcuda.so*]"
time=2025-09-25T08:27:22.068Z level=DEBUG source=gpu.go:569 msg="discovered GPU libraries" paths=[]
time=2025-09-25T08:27:22.068Z level=DEBUG source=gpu.go:512 msg="Searching for GPU library" name=libcudart.so*
time=2025-09-25T08:27:22.068Z level=DEBUG source=gpu.go:536 msg="gpu library search" globs="[/usr/lib/ollama/libcudart.so* /usr/local/nvidia/lib/libcudart.so* /usr/local/nvidia/lib64/libcudart.so* /usr/lib/ollama/cuda_v*/libcudart.so* /usr/local/cuda/lib64/libcudart.so* /usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so* /usr/lib/x86_64-linux-gnu/libcudart.so* /usr/lib/wsl/lib/libcudart.so* /usr/lib/wsl/drivers/*/libcudart.so* /opt/cuda/lib64/libcudart.so* /usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so* /usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so* /usr/lib/aarch64-linux-gnu/libcudart.so* /usr/local/cuda/lib*/libcudart.so* /usr/lib*/libcudart.so* /usr/local/lib*/libcudart.so*]"
time=2025-09-25T08:27:22.069Z level=DEBUG source=gpu.go:569 msg="discovered GPU libraries" paths=[]
time=2025-09-25T08:27:22.069Z level=WARN source=amd_linux.go:61 msg="ollama recommends running the https://www.amd.com/en/support/download/linux-drivers.html" error="amdgpu version file missing: /sys/module/amdgpu/version stat /sys/module/amdgpu/version: no such file or directory"
time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_linux.go:102 msg="evaluating amdgpu node /sys/class/kfd/kfd/topology/nodes/0/properties"
time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_linux.go:122 msg="detected CPU /sys/class/kfd/kfd/topology/nodes/0/properties"
time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_linux.go:102 msg="evaluating amdgpu node /sys/class/kfd/kfd/topology/nodes/1/properties"
time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_linux.go:203 msg="mapping amdgpu to drm sysfs nodes" amdgpu=/sys/class/kfd/kfd/topology/nodes/1/properties vendor=4098 device=6400 unique_id=0
time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_linux.go:237 msg=matched amdgpu=/sys/class/kfd/kfd/topology/nodes/1/properties drm=/sys/class/drm/card1/device
time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_linux.go:343 msg="amdgpu memory" gpu=0 total="16.0 GiB"
time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_linux.go:344 msg="amdgpu memory" gpu=0 available="9.8 GiB"
time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_common.go:16 msg="evaluating potential rocm lib dir /usr/lib/ollama/rocm"
time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_common.go:44 msg="detected ROCM next to ollama executable /usr/lib/ollama/rocm"
time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_linux.go:375 msg="rocm supported GPUs" types=[gfx1103]
time=2025-09-25T08:27:22.069Z level=INFO source=amd_linux.go:390 msg="amdgpu is supported" gpu=0 gpu_type=gfx1103
time=2025-09-25T08:27:22.069Z level=INFO source=types.go:131 msg="inference compute" id=0 library=rocm variant="" compute=gfx1103 driver=0.0 name=1002:1900 total="16.0 GiB" available="9.8 GiB"
time=2025-09-25T08:27:22.069Z level=INFO source=routes.go:1425 msg="entering low vram mode" "total vram"="16.0 GiB" threshold="20.0 GiB"
[GIN] 2025/09/25 - 08:28:01 | 200 |     562.815µs |       127.0.0.1 | GET      "/api/tags"
[GIN] 2025/09/25 - 08:28:01 | 200 |     118.422µs |       127.0.0.1 | GET      "/api/ps"
[GIN] 2025/09/25 - 08:28:01 | 200 |      65.623µs |       127.0.0.1 | GET      "/api/version"
[GIN] 2025/09/25 - 08:28:07 | 200 |      31.159µs |       127.0.0.1 | GET      "/api/version"
[GIN] 2025/09/25 - 08:28:08 | 200 |     466.004µs |       127.0.0.1 | GET      "/api/tags"
[GIN] 2025/09/25 - 08:28:08 | 200 |      23.164µs |       127.0.0.1 | GET      "/api/ps"
[GIN] 2025/09/25 - 08:28:08 | 200 |     453.831µs |       127.0.0.1 | GET      "/api/tags"
[GIN] 2025/09/25 - 08:28:08 | 200 |      20.749µs |       127.0.0.1 | GET      "/api/ps"
time=2025-09-25T08:28:23.820Z level=DEBUG source=gpu.go:402 msg="updating system memory data" before.total="46.9 GiB" before.free="12.2 GiB" before.free_swap="8.0 GiB" now.total="46.9 GiB" now.free="11.6 GiB" now.free_swap="8.0 GiB"
time=2025-09-25T08:28:23.820Z level=DEBUG source=amd_linux.go:492 msg="updating rocm free memory" gpu=0 name=1002:1900 before="9.8 GiB" now="9.8 GiB"
time=2025-09-25T08:28:23.820Z level=DEBUG source=sched.go:188 msg="updating default concurrency" OLLAMA_MAX_LOADED_MODELS=3 gpu_count=1
time=2025-09-25T08:28:23.847Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.alignment default=32
time=2025-09-25T08:28:23.848Z level=DEBUG source=sched.go:208 msg="loading first model" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25
time=2025-09-25T08:28:23.953Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.alignment default=32
time=2025-09-25T08:28:23.955Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=tokenizer.ggml.eot_token_id default=106
time=2025-09-25T08:28:23.955Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=tokenizer.ggml.eos_token_ids default="&{size:0 values:[]}"
time=2025-09-25T08:28:23.957Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.attention.layer_norm_rms_epsilon default=9.999999974752427e-07
time=2025-09-25T08:28:23.957Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.local.freq_base default=10000
time=2025-09-25T08:28:23.957Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.global.freq_base default=1e+06
time=2025-09-25T08:28:23.957Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.freq_scale default=1
time=2025-09-25T08:28:23.957Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.mm_tokens_per_image default=256
time=2025-09-25T08:28:23.957Z level=DEBUG source=gpu.go:402 msg="updating system memory data" before.total="46.9 GiB" before.free="11.6 GiB" before.free_swap="8.0 GiB" now.total="46.9 GiB" now.free="11.6 GiB" now.free_swap="8.0 GiB"
time=2025-09-25T08:28:23.957Z level=DEBUG source=amd_linux.go:492 msg="updating rocm free memory" gpu=0 name=1002:1900 before="9.8 GiB" now="9.8 GiB"
time=2025-09-25T08:28:23.957Z level=DEBUG source=server.go:323 msg="adding gpu library" path=/usr/lib/ollama/rocm
time=2025-09-25T08:28:23.957Z level=DEBUG source=server.go:331 msg="adding gpu dependency paths" paths=[/usr/lib/ollama/rocm]
time=2025-09-25T08:28:23.957Z level=INFO source=server.go:398 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --model /root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 --port 44615"
time=2025-09-25T08:28:23.957Z level=DEBUG source=server.go:399 msg=subprocess PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin OLLAMA_DEBUG=1 OLLAMA_HOST=0.0.0.0:11434 LD_LIBRARY_PATH=/usr/lib/ollama/rocm:/usr/lib/ollama/rocm:/usr/lib/ollama:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib/ollama OLLAMA_CONTEXT_LENGTH=8192 OLLAMA_MAX_LOADED_MODELS=3 OLLAMA_LIBRARY_PATH=/usr/lib/ollama:/usr/lib/ollama/rocm ROCR_VISIBLE_DEVICES=0
time=2025-09-25T08:28:23.958Z level=DEBUG source=gpu.go:402 msg="updating system memory data" before.total="46.9 GiB" before.free="11.6 GiB" before.free_swap="8.0 GiB" now.total="46.9 GiB" now.free="11.6 GiB" now.free_swap="8.0 GiB"
time=2025-09-25T08:28:23.958Z level=DEBUG source=amd_linux.go:492 msg="updating rocm free memory" gpu=0 name=1002:1900 before="9.8 GiB" now="9.8 GiB"
time=2025-09-25T08:28:23.958Z level=INFO source=server.go:503 msg="system memory" total="46.9 GiB" free="11.6 GiB" free_swap="8.0 GiB"
time=2025-09-25T08:28:23.958Z level=DEBUG source=memory.go:181 msg=evaluating library=rocm gpu_count=1 available="[9.8 GiB]"
time=2025-09-25T08:28:23.959Z level=INFO source=memory.go:36 msg="new model will fit in available VRAM across minimum required GPUs, loading" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 library=rocm parallel=1 required="5.4 GiB" gpus=1
time=2025-09-25T08:28:23.959Z level=DEBUG source=memory.go:181 msg=evaluating library=rocm gpu_count=1 available="[9.8 GiB]"
time=2025-09-25T08:28:23.960Z level=INFO source=server.go:543 msg=offload library=rocm layers.requested=-1 layers.model=35 layers.offload=35 layers.split=[35] memory.available="[9.8 GiB]" memory.gpu_overhead="0 B" memory.required.full="5.4 GiB" memory.required.partial="5.4 GiB" memory.required.kv="334.0 MiB" memory.required.allocations="[5.4 GiB]" memory.weights.total="2.3 GiB" memory.weights.repeating="1.8 GiB" memory.weights.nonrepeating="525.0 MiB" memory.graph.full="517.0 MiB" memory.graph.partial="1.0 GiB" projector.weights="795.9 MiB" projector.graph="1.0 GiB"
time=2025-09-25T08:28:23.969Z level=INFO source=runner.go:1251 msg="starting ollama engine"
time=2025-09-25T08:28:23.970Z level=INFO source=runner.go:1286 msg="Server listening on 127.0.0.1:44615"
time=2025-09-25T08:28:23.971Z level=INFO source=runner.go:1170 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:false KvSize:8192 KvCacheType: NumThreads:8 GPULayers:35[ID:0 Layers:35(0..34)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}"
time=2025-09-25T08:28:24.022Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.alignment default=32
time=2025-09-25T08:28:24.023Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.name default=""
time=2025-09-25T08:28:24.023Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.description default=""
time=2025-09-25T08:28:24.023Z level=INFO source=ggml.go:131 msg="" architecture=gemma3 file_type=Q4_K_M name="" description="" num_tensors=883 num_key_values=36
time=2025-09-25T08:28:24.023Z level=DEBUG source=ggml.go:94 msg="ggml backend load all from path" path=/usr/lib/ollama
load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-icelake.so
time=2025-09-25T08:28:24.076Z level=DEBUG source=ggml.go:94 msg="ggml backend load all from path" path=/usr/lib/ollama/rocm
time=2025-09-25T08:28:24.077Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.AVX512=1 CPU.0.AVX512_VBMI=1 CPU.0.AVX512_VNNI=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 compiler=cgo(gcc)
time=2025-09-25T08:28:24.080Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.pooling_type default=4294967295
time=2025-09-25T08:28:24.080Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=tokenizer.ggml.eot_token_id default=106
time=2025-09-25T08:28:24.080Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=tokenizer.ggml.eos_token_ids default="&{size:0 values:[]}"
time=2025-09-25T08:28:24.082Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.attention.layer_norm_rms_epsilon default=9.999999974752427e-07
time=2025-09-25T08:28:24.082Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.local.freq_base default=10000
time=2025-09-25T08:28:24.082Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.global.freq_base default=1e+06
time=2025-09-25T08:28:24.082Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.freq_scale default=1
time=2025-09-25T08:28:24.082Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.mm_tokens_per_image default=256
time=2025-09-25T08:28:24.271Z level=DEBUG source=ggml.go:794 msg="compute graph" nodes=972 splits=1
time=2025-09-25T08:28:24.407Z level=DEBUG source=ggml.go:794 msg="compute graph" nodes=1505 splits=1
time=2025-09-25T08:28:24.408Z level=INFO source=ggml.go:487 msg="offloading 0 repeating layers to GPU"
time=2025-09-25T08:28:24.408Z level=INFO source=ggml.go:491 msg="offloading output layer to CPU"
time=2025-09-25T08:28:24.408Z level=INFO source=ggml.go:498 msg="offloaded 0/35 layers to GPU"
time=2025-09-25T08:28:24.409Z level=INFO source=backend.go:315 msg="model weights" device=CPU size="3.6 GiB"
time=2025-09-25T08:28:24.409Z level=INFO source=backend.go:326 msg="kv cache" device=CPU size="334.0 MiB"
time=2025-09-25T08:28:24.409Z level=INFO source=backend.go:337 msg="compute graph" device=CPU size="1.1 GiB"
time=2025-09-25T08:28:24.409Z level=INFO source=backend.go:342 msg="total memory" size="5.0 GiB"
time=2025-09-25T08:28:24.409Z level=INFO source=sched.go:473 msg="loaded runners" count=1
time=2025-09-25T08:28:24.409Z level=INFO source=server.go:1250 msg="waiting for llama runner to start responding"
time=2025-09-25T08:28:24.409Z level=INFO source=server.go:1284 msg="waiting for server to become available" status="llm server loading model"
time=2025-09-25T08:28:24.662Z level=DEBUG source=server.go:1294 msg="model load progress 0.21"
time=2025-09-25T08:28:24.916Z level=DEBUG source=server.go:1294 msg="model load progress 0.42"
time=2025-09-25T08:28:25.169Z level=DEBUG source=server.go:1294 msg="model load progress 0.63"
time=2025-09-25T08:28:25.420Z level=DEBUG source=server.go:1294 msg="model load progress 0.89"
time=2025-09-25T08:28:25.671Z level=DEBUG source=server.go:1294 msg="model load progress 0.97"
time=2025-09-25T08:28:25.794Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.pooling_type default=4294967295
time=2025-09-25T08:28:25.922Z level=INFO source=server.go:1288 msg="llama runner started in 1.96 seconds"
time=2025-09-25T08:28:25.922Z level=DEBUG source=sched.go:485 msg="finished setting up" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192
time=2025-09-25T08:28:25.922Z level=DEBUG source=server.go:1387 msg="completion request" images=0 prompt=112 format=""
time=2025-09-25T08:28:25.943Z level=DEBUG source=vocabulary.go:52 msg="adding bos token to prompt" id=2
time=2025-09-25T08:28:25.943Z level=DEBUG source=cache.go:144 msg="loading cache slot" id=0 cache=0 prompt=21 used=0 remaining=21
[GIN] 2025/09/25 - 08:29:11 | 200 | 47.345281223s |       127.0.0.1 | POST     "/api/chat"
time=2025-09-25T08:29:11.039Z level=DEBUG source=sched.go:493 msg="context for request finished"
time=2025-09-25T08:29:11.039Z level=DEBUG source=sched.go:286 msg="runner with non-zero duration has gone idle, adding timer" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192 duration=5m0s
time=2025-09-25T08:29:11.039Z level=DEBUG source=sched.go:304 msg="after processing request finished event" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192 refCount=0
time=2025-09-25T08:29:11.255Z level=DEBUG source=sched.go:583 msg="evaluating already loaded" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25
time=2025-09-25T08:29:11.255Z level=DEBUG source=server.go:1387 msg="completion request" images=0 prompt=4850 format=""
time=2025-09-25T08:29:11.259Z level=DEBUG source=vocabulary.go:52 msg="adding bos token to prompt" id=2
time=2025-09-25T08:29:11.259Z level=DEBUG source=cache.go:144 msg="loading cache slot" id=0 cache=777 prompt=1005 used=4 remaining=1001
[GIN] 2025/09/25 - 08:29:33 | 200 | 21.890251578s |       127.0.0.1 | POST     "/api/chat"
time=2025-09-25T08:29:33.007Z level=DEBUG source=sched.go:377 msg="context for request finished" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192
time=2025-09-25T08:29:33.007Z level=DEBUG source=sched.go:286 msg="runner with non-zero duration has gone idle, adding timer" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192 duration=5m0s
time=2025-09-25T08:29:33.007Z level=DEBUG source=sched.go:304 msg="after processing request finished event" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192 refCount=0
time=2025-09-25T08:29:33.146Z level=DEBUG source=sched.go:583 msg="evaluating already loaded" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25
time=2025-09-25T08:29:33.146Z level=DEBUG source=server.go:1387 msg="completion request" images=0 prompt=5179 format=""
time=2025-09-25T08:29:33.151Z level=DEBUG source=vocabulary.go:52 msg="adding bos token to prompt" id=2
time=2025-09-25T08:29:33.151Z level=DEBUG source=cache.go:144 msg="loading cache slot" id=0 cache=1100 prompt=1071 used=0 remaining=1071
[GIN] 2025/09/25 - 08:29:51 | 200 | 18.265154778s |       127.0.0.1 | POST     "/api/chat"
time=2025-09-25T08:29:51.278Z level=DEBUG source=sched.go:377 msg="context for request finished" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192
time=2025-09-25T08:29:51.278Z level=DEBUG source=sched.go:286 msg="runner with non-zero duration has gone idle, adding timer" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192 duration=5m0s
time=2025-09-25T08:29:51.278Z level=DEBUG source=sched.go:304 msg="after processing request finished event" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192 refCount=0
time=2025-09-25T08:29:51.566Z level=DEBUG source=sched.go:583 msg="evaluating already loaded" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25
time=2025-09-25T08:29:51.566Z level=DEBUG source=server.go:1387 msg="completion request" images=0 prompt=4639 format=""
time=2025-09-25T08:29:51.569Z level=DEBUG source=vocabulary.go:52 msg="adding bos token to prompt" id=2
time=2025-09-25T08:29:51.569Z level=DEBUG source=cache.go:144 msg="loading cache slot" id=0 cache=1091 prompt=960 used=0 remaining=960
<!-- gh-comment-id:3332848294 --> @Pekkari commented on GitHub (Sep 25, 2025): > [@Pekkari](https://github.com/Pekkari) you should enable debug logging to see more details of what it's trying and hopefully why it's failing. `OLLAMA_DEBUG=1` Full output comes since I fail to narrow what the issue seems to be from it: ``` $ podman run --name ollama-rocm --pod ollama -e OLLAMA_DEBUG=1 -e OLLAMA_CONTEXT_LENGTH=8192 -v c47c4cabdfb84c606efb5887fea55b20b1673004cedcf577c80d89779a0618a3:/root/.ollama --device /dev/accel --device /dev/dri --device /dev/kfd --cap-drop all --read-only --ipc=host --shm-size=8G -it ollama:rocm-7 time=2025-09-25T08:27:22.065Z level=INFO source=routes.go:1331 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:8192 OLLAMA_DEBUG:DEBUG OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NEW_ESTIMATES:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:]" time=2025-09-25T08:27:22.065Z level=INFO source=images.go:477 msg="total blobs: 20" time=2025-09-25T08:27:22.066Z level=INFO source=images.go:484 msg="total unused blobs removed: 0" [GIN-debug] [WARNING] Creating an Engine instance with the Logger and Recovery middleware already attached. [GIN-debug] [WARNING] Running in "debug" mode. Switch to "release" mode in production. - using env: export GIN_MODE=release - using code: gin.SetMode(gin.ReleaseMode) [GIN-debug] HEAD / --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func1 (5 handlers) [GIN-debug] GET / --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func2 (5 handlers) [GIN-debug] HEAD /api/version --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func3 (5 handlers) [GIN-debug] GET /api/version --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func4 (5 handlers) [GIN-debug] POST /api/pull --> github.com/ollama/ollama/server.(*Server).PullHandler-fm (5 handlers) [GIN-debug] POST /api/push --> github.com/ollama/ollama/server.(*Server).PushHandler-fm (5 handlers) [GIN-debug] HEAD /api/tags --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (5 handlers) [GIN-debug] GET /api/tags --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (5 handlers) [GIN-debug] POST /api/show --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (5 handlers) [GIN-debug] DELETE /api/delete --> github.com/ollama/ollama/server.(*Server).DeleteHandler-fm (5 handlers) [GIN-debug] POST /api/create --> github.com/ollama/ollama/server.(*Server).CreateHandler-fm (5 handlers) [GIN-debug] POST /api/blobs/:digest --> github.com/ollama/ollama/server.(*Server).CreateBlobHandler-fm (5 handlers) [GIN-debug] HEAD /api/blobs/:digest --> github.com/ollama/ollama/server.(*Server).HeadBlobHandler-fm (5 handlers) [GIN-debug] POST /api/copy --> github.com/ollama/ollama/server.(*Server).CopyHandler-fm (5 handlers) [GIN-debug] GET /api/ps --> github.com/ollama/ollama/server.(*Server).PsHandler-fm (5 handlers) [GIN-debug] POST /api/generate --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (5 handlers) [GIN-debug] POST /api/chat --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (5 handlers) [GIN-debug] POST /api/embed --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (5 handlers) [GIN-debug] POST /api/embeddings --> github.com/ollama/ollama/server.(*Server).EmbeddingsHandler-fm (5 handlers) [GIN-debug] POST /v1/chat/completions --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (6 handlers) [GIN-debug] POST /v1/completions --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (6 handlers) [GIN-debug] POST /v1/embeddings --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (6 handlers) [GIN-debug] GET /v1/models --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (6 handlers) [GIN-debug] GET /v1/models/:model --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (6 handlers) time=2025-09-25T08:27:22.066Z level=INFO source=routes.go:1384 msg="Listening on [::]:11434 (version 0.0.0)" time=2025-09-25T08:27:22.066Z level=DEBUG source=sched.go:121 msg="starting llm scheduler" time=2025-09-25T08:27:22.066Z level=INFO source=gpu.go:217 msg="looking for compatible GPUs" time=2025-09-25T08:27:22.067Z level=DEBUG source=gpu.go:98 msg="searching for GPU discovery libraries for NVIDIA" time=2025-09-25T08:27:22.067Z level=DEBUG source=gpu.go:512 msg="Searching for GPU library" name=libcuda.so* time=2025-09-25T08:27:22.067Z level=DEBUG source=gpu.go:536 msg="gpu library search" globs="[/usr/lib/ollama/libcuda.so* /usr/local/nvidia/lib/libcuda.so* /usr/local/nvidia/lib64/libcuda.so* /usr/local/cuda*/targets/*/lib/libcuda.so* /usr/lib/*-linux-gnu/nvidia/current/libcuda.so* /usr/lib/*-linux-gnu/libcuda.so* /usr/lib/wsl/lib/libcuda.so* /usr/lib/wsl/drivers/*/libcuda.so* /opt/cuda/lib*/libcuda.so* /usr/local/cuda/lib*/libcuda.so* /usr/lib*/libcuda.so* /usr/local/lib*/libcuda.so*]" time=2025-09-25T08:27:22.068Z level=DEBUG source=gpu.go:569 msg="discovered GPU libraries" paths=[] time=2025-09-25T08:27:22.068Z level=DEBUG source=gpu.go:512 msg="Searching for GPU library" name=libcudart.so* time=2025-09-25T08:27:22.068Z level=DEBUG source=gpu.go:536 msg="gpu library search" globs="[/usr/lib/ollama/libcudart.so* /usr/local/nvidia/lib/libcudart.so* /usr/local/nvidia/lib64/libcudart.so* /usr/lib/ollama/cuda_v*/libcudart.so* /usr/local/cuda/lib64/libcudart.so* /usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so* /usr/lib/x86_64-linux-gnu/libcudart.so* /usr/lib/wsl/lib/libcudart.so* /usr/lib/wsl/drivers/*/libcudart.so* /opt/cuda/lib64/libcudart.so* /usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so* /usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so* /usr/lib/aarch64-linux-gnu/libcudart.so* /usr/local/cuda/lib*/libcudart.so* /usr/lib*/libcudart.so* /usr/local/lib*/libcudart.so*]" time=2025-09-25T08:27:22.069Z level=DEBUG source=gpu.go:569 msg="discovered GPU libraries" paths=[] time=2025-09-25T08:27:22.069Z level=WARN source=amd_linux.go:61 msg="ollama recommends running the https://www.amd.com/en/support/download/linux-drivers.html" error="amdgpu version file missing: /sys/module/amdgpu/version stat /sys/module/amdgpu/version: no such file or directory" time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_linux.go:102 msg="evaluating amdgpu node /sys/class/kfd/kfd/topology/nodes/0/properties" time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_linux.go:122 msg="detected CPU /sys/class/kfd/kfd/topology/nodes/0/properties" time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_linux.go:102 msg="evaluating amdgpu node /sys/class/kfd/kfd/topology/nodes/1/properties" time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_linux.go:203 msg="mapping amdgpu to drm sysfs nodes" amdgpu=/sys/class/kfd/kfd/topology/nodes/1/properties vendor=4098 device=6400 unique_id=0 time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_linux.go:237 msg=matched amdgpu=/sys/class/kfd/kfd/topology/nodes/1/properties drm=/sys/class/drm/card1/device time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_linux.go:343 msg="amdgpu memory" gpu=0 total="16.0 GiB" time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_linux.go:344 msg="amdgpu memory" gpu=0 available="9.8 GiB" time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_common.go:16 msg="evaluating potential rocm lib dir /usr/lib/ollama/rocm" time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_common.go:44 msg="detected ROCM next to ollama executable /usr/lib/ollama/rocm" time=2025-09-25T08:27:22.069Z level=DEBUG source=amd_linux.go:375 msg="rocm supported GPUs" types=[gfx1103] time=2025-09-25T08:27:22.069Z level=INFO source=amd_linux.go:390 msg="amdgpu is supported" gpu=0 gpu_type=gfx1103 time=2025-09-25T08:27:22.069Z level=INFO source=types.go:131 msg="inference compute" id=0 library=rocm variant="" compute=gfx1103 driver=0.0 name=1002:1900 total="16.0 GiB" available="9.8 GiB" time=2025-09-25T08:27:22.069Z level=INFO source=routes.go:1425 msg="entering low vram mode" "total vram"="16.0 GiB" threshold="20.0 GiB" [GIN] 2025/09/25 - 08:28:01 | 200 | 562.815µs | 127.0.0.1 | GET "/api/tags" [GIN] 2025/09/25 - 08:28:01 | 200 | 118.422µs | 127.0.0.1 | GET "/api/ps" [GIN] 2025/09/25 - 08:28:01 | 200 | 65.623µs | 127.0.0.1 | GET "/api/version" [GIN] 2025/09/25 - 08:28:07 | 200 | 31.159µs | 127.0.0.1 | GET "/api/version" [GIN] 2025/09/25 - 08:28:08 | 200 | 466.004µs | 127.0.0.1 | GET "/api/tags" [GIN] 2025/09/25 - 08:28:08 | 200 | 23.164µs | 127.0.0.1 | GET "/api/ps" [GIN] 2025/09/25 - 08:28:08 | 200 | 453.831µs | 127.0.0.1 | GET "/api/tags" [GIN] 2025/09/25 - 08:28:08 | 200 | 20.749µs | 127.0.0.1 | GET "/api/ps" time=2025-09-25T08:28:23.820Z level=DEBUG source=gpu.go:402 msg="updating system memory data" before.total="46.9 GiB" before.free="12.2 GiB" before.free_swap="8.0 GiB" now.total="46.9 GiB" now.free="11.6 GiB" now.free_swap="8.0 GiB" time=2025-09-25T08:28:23.820Z level=DEBUG source=amd_linux.go:492 msg="updating rocm free memory" gpu=0 name=1002:1900 before="9.8 GiB" now="9.8 GiB" time=2025-09-25T08:28:23.820Z level=DEBUG source=sched.go:188 msg="updating default concurrency" OLLAMA_MAX_LOADED_MODELS=3 gpu_count=1 time=2025-09-25T08:28:23.847Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.alignment default=32 time=2025-09-25T08:28:23.848Z level=DEBUG source=sched.go:208 msg="loading first model" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 time=2025-09-25T08:28:23.953Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.alignment default=32 time=2025-09-25T08:28:23.955Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=tokenizer.ggml.eot_token_id default=106 time=2025-09-25T08:28:23.955Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=tokenizer.ggml.eos_token_ids default="&{size:0 values:[]}" time=2025-09-25T08:28:23.957Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.attention.layer_norm_rms_epsilon default=9.999999974752427e-07 time=2025-09-25T08:28:23.957Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.local.freq_base default=10000 time=2025-09-25T08:28:23.957Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.global.freq_base default=1e+06 time=2025-09-25T08:28:23.957Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.freq_scale default=1 time=2025-09-25T08:28:23.957Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.mm_tokens_per_image default=256 time=2025-09-25T08:28:23.957Z level=DEBUG source=gpu.go:402 msg="updating system memory data" before.total="46.9 GiB" before.free="11.6 GiB" before.free_swap="8.0 GiB" now.total="46.9 GiB" now.free="11.6 GiB" now.free_swap="8.0 GiB" time=2025-09-25T08:28:23.957Z level=DEBUG source=amd_linux.go:492 msg="updating rocm free memory" gpu=0 name=1002:1900 before="9.8 GiB" now="9.8 GiB" time=2025-09-25T08:28:23.957Z level=DEBUG source=server.go:323 msg="adding gpu library" path=/usr/lib/ollama/rocm time=2025-09-25T08:28:23.957Z level=DEBUG source=server.go:331 msg="adding gpu dependency paths" paths=[/usr/lib/ollama/rocm] time=2025-09-25T08:28:23.957Z level=INFO source=server.go:398 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --model /root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 --port 44615" time=2025-09-25T08:28:23.957Z level=DEBUG source=server.go:399 msg=subprocess PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin OLLAMA_DEBUG=1 OLLAMA_HOST=0.0.0.0:11434 LD_LIBRARY_PATH=/usr/lib/ollama/rocm:/usr/lib/ollama/rocm:/usr/lib/ollama:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib/ollama OLLAMA_CONTEXT_LENGTH=8192 OLLAMA_MAX_LOADED_MODELS=3 OLLAMA_LIBRARY_PATH=/usr/lib/ollama:/usr/lib/ollama/rocm ROCR_VISIBLE_DEVICES=0 time=2025-09-25T08:28:23.958Z level=DEBUG source=gpu.go:402 msg="updating system memory data" before.total="46.9 GiB" before.free="11.6 GiB" before.free_swap="8.0 GiB" now.total="46.9 GiB" now.free="11.6 GiB" now.free_swap="8.0 GiB" time=2025-09-25T08:28:23.958Z level=DEBUG source=amd_linux.go:492 msg="updating rocm free memory" gpu=0 name=1002:1900 before="9.8 GiB" now="9.8 GiB" time=2025-09-25T08:28:23.958Z level=INFO source=server.go:503 msg="system memory" total="46.9 GiB" free="11.6 GiB" free_swap="8.0 GiB" time=2025-09-25T08:28:23.958Z level=DEBUG source=memory.go:181 msg=evaluating library=rocm gpu_count=1 available="[9.8 GiB]" time=2025-09-25T08:28:23.959Z level=INFO source=memory.go:36 msg="new model will fit in available VRAM across minimum required GPUs, loading" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 library=rocm parallel=1 required="5.4 GiB" gpus=1 time=2025-09-25T08:28:23.959Z level=DEBUG source=memory.go:181 msg=evaluating library=rocm gpu_count=1 available="[9.8 GiB]" time=2025-09-25T08:28:23.960Z level=INFO source=server.go:543 msg=offload library=rocm layers.requested=-1 layers.model=35 layers.offload=35 layers.split=[35] memory.available="[9.8 GiB]" memory.gpu_overhead="0 B" memory.required.full="5.4 GiB" memory.required.partial="5.4 GiB" memory.required.kv="334.0 MiB" memory.required.allocations="[5.4 GiB]" memory.weights.total="2.3 GiB" memory.weights.repeating="1.8 GiB" memory.weights.nonrepeating="525.0 MiB" memory.graph.full="517.0 MiB" memory.graph.partial="1.0 GiB" projector.weights="795.9 MiB" projector.graph="1.0 GiB" time=2025-09-25T08:28:23.969Z level=INFO source=runner.go:1251 msg="starting ollama engine" time=2025-09-25T08:28:23.970Z level=INFO source=runner.go:1286 msg="Server listening on 127.0.0.1:44615" time=2025-09-25T08:28:23.971Z level=INFO source=runner.go:1170 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:false KvSize:8192 KvCacheType: NumThreads:8 GPULayers:35[ID:0 Layers:35(0..34)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}" time=2025-09-25T08:28:24.022Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.alignment default=32 time=2025-09-25T08:28:24.023Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.name default="" time=2025-09-25T08:28:24.023Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.description default="" time=2025-09-25T08:28:24.023Z level=INFO source=ggml.go:131 msg="" architecture=gemma3 file_type=Q4_K_M name="" description="" num_tensors=883 num_key_values=36 time=2025-09-25T08:28:24.023Z level=DEBUG source=ggml.go:94 msg="ggml backend load all from path" path=/usr/lib/ollama load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-icelake.so time=2025-09-25T08:28:24.076Z level=DEBUG source=ggml.go:94 msg="ggml backend load all from path" path=/usr/lib/ollama/rocm time=2025-09-25T08:28:24.077Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.AVX512=1 CPU.0.AVX512_VBMI=1 CPU.0.AVX512_VNNI=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 compiler=cgo(gcc) time=2025-09-25T08:28:24.080Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.pooling_type default=4294967295 time=2025-09-25T08:28:24.080Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=tokenizer.ggml.eot_token_id default=106 time=2025-09-25T08:28:24.080Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=tokenizer.ggml.eos_token_ids default="&{size:0 values:[]}" time=2025-09-25T08:28:24.082Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.attention.layer_norm_rms_epsilon default=9.999999974752427e-07 time=2025-09-25T08:28:24.082Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.local.freq_base default=10000 time=2025-09-25T08:28:24.082Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.global.freq_base default=1e+06 time=2025-09-25T08:28:24.082Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.freq_scale default=1 time=2025-09-25T08:28:24.082Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.mm_tokens_per_image default=256 time=2025-09-25T08:28:24.271Z level=DEBUG source=ggml.go:794 msg="compute graph" nodes=972 splits=1 time=2025-09-25T08:28:24.407Z level=DEBUG source=ggml.go:794 msg="compute graph" nodes=1505 splits=1 time=2025-09-25T08:28:24.408Z level=INFO source=ggml.go:487 msg="offloading 0 repeating layers to GPU" time=2025-09-25T08:28:24.408Z level=INFO source=ggml.go:491 msg="offloading output layer to CPU" time=2025-09-25T08:28:24.408Z level=INFO source=ggml.go:498 msg="offloaded 0/35 layers to GPU" time=2025-09-25T08:28:24.409Z level=INFO source=backend.go:315 msg="model weights" device=CPU size="3.6 GiB" time=2025-09-25T08:28:24.409Z level=INFO source=backend.go:326 msg="kv cache" device=CPU size="334.0 MiB" time=2025-09-25T08:28:24.409Z level=INFO source=backend.go:337 msg="compute graph" device=CPU size="1.1 GiB" time=2025-09-25T08:28:24.409Z level=INFO source=backend.go:342 msg="total memory" size="5.0 GiB" time=2025-09-25T08:28:24.409Z level=INFO source=sched.go:473 msg="loaded runners" count=1 time=2025-09-25T08:28:24.409Z level=INFO source=server.go:1250 msg="waiting for llama runner to start responding" time=2025-09-25T08:28:24.409Z level=INFO source=server.go:1284 msg="waiting for server to become available" status="llm server loading model" time=2025-09-25T08:28:24.662Z level=DEBUG source=server.go:1294 msg="model load progress 0.21" time=2025-09-25T08:28:24.916Z level=DEBUG source=server.go:1294 msg="model load progress 0.42" time=2025-09-25T08:28:25.169Z level=DEBUG source=server.go:1294 msg="model load progress 0.63" time=2025-09-25T08:28:25.420Z level=DEBUG source=server.go:1294 msg="model load progress 0.89" time=2025-09-25T08:28:25.671Z level=DEBUG source=server.go:1294 msg="model load progress 0.97" time=2025-09-25T08:28:25.794Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.pooling_type default=4294967295 time=2025-09-25T08:28:25.922Z level=INFO source=server.go:1288 msg="llama runner started in 1.96 seconds" time=2025-09-25T08:28:25.922Z level=DEBUG source=sched.go:485 msg="finished setting up" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192 time=2025-09-25T08:28:25.922Z level=DEBUG source=server.go:1387 msg="completion request" images=0 prompt=112 format="" time=2025-09-25T08:28:25.943Z level=DEBUG source=vocabulary.go:52 msg="adding bos token to prompt" id=2 time=2025-09-25T08:28:25.943Z level=DEBUG source=cache.go:144 msg="loading cache slot" id=0 cache=0 prompt=21 used=0 remaining=21 [GIN] 2025/09/25 - 08:29:11 | 200 | 47.345281223s | 127.0.0.1 | POST "/api/chat" time=2025-09-25T08:29:11.039Z level=DEBUG source=sched.go:493 msg="context for request finished" time=2025-09-25T08:29:11.039Z level=DEBUG source=sched.go:286 msg="runner with non-zero duration has gone idle, adding timer" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192 duration=5m0s time=2025-09-25T08:29:11.039Z level=DEBUG source=sched.go:304 msg="after processing request finished event" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192 refCount=0 time=2025-09-25T08:29:11.255Z level=DEBUG source=sched.go:583 msg="evaluating already loaded" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 time=2025-09-25T08:29:11.255Z level=DEBUG source=server.go:1387 msg="completion request" images=0 prompt=4850 format="" time=2025-09-25T08:29:11.259Z level=DEBUG source=vocabulary.go:52 msg="adding bos token to prompt" id=2 time=2025-09-25T08:29:11.259Z level=DEBUG source=cache.go:144 msg="loading cache slot" id=0 cache=777 prompt=1005 used=4 remaining=1001 [GIN] 2025/09/25 - 08:29:33 | 200 | 21.890251578s | 127.0.0.1 | POST "/api/chat" time=2025-09-25T08:29:33.007Z level=DEBUG source=sched.go:377 msg="context for request finished" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192 time=2025-09-25T08:29:33.007Z level=DEBUG source=sched.go:286 msg="runner with non-zero duration has gone idle, adding timer" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192 duration=5m0s time=2025-09-25T08:29:33.007Z level=DEBUG source=sched.go:304 msg="after processing request finished event" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192 refCount=0 time=2025-09-25T08:29:33.146Z level=DEBUG source=sched.go:583 msg="evaluating already loaded" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 time=2025-09-25T08:29:33.146Z level=DEBUG source=server.go:1387 msg="completion request" images=0 prompt=5179 format="" time=2025-09-25T08:29:33.151Z level=DEBUG source=vocabulary.go:52 msg="adding bos token to prompt" id=2 time=2025-09-25T08:29:33.151Z level=DEBUG source=cache.go:144 msg="loading cache slot" id=0 cache=1100 prompt=1071 used=0 remaining=1071 [GIN] 2025/09/25 - 08:29:51 | 200 | 18.265154778s | 127.0.0.1 | POST "/api/chat" time=2025-09-25T08:29:51.278Z level=DEBUG source=sched.go:377 msg="context for request finished" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192 time=2025-09-25T08:29:51.278Z level=DEBUG source=sched.go:286 msg="runner with non-zero duration has gone idle, adding timer" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192 duration=5m0s time=2025-09-25T08:29:51.278Z level=DEBUG source=sched.go:304 msg="after processing request finished event" runner.name=registry.ollama.ai/library/gemma3:latest runner.inference=rocm runner.devices=1 runner.size="5.4 GiB" runner.vram="5.4 GiB" runner.parallel=1 runner.pid=22 runner.model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 runner.num_ctx=8192 refCount=0 time=2025-09-25T08:29:51.566Z level=DEBUG source=sched.go:583 msg="evaluating already loaded" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 time=2025-09-25T08:29:51.566Z level=DEBUG source=server.go:1387 msg="completion request" images=0 prompt=4639 format="" time=2025-09-25T08:29:51.569Z level=DEBUG source=vocabulary.go:52 msg="adding bos token to prompt" id=2 time=2025-09-25T08:29:51.569Z level=DEBUG source=cache.go:144 msg="loading cache slot" id=0 cache=1091 prompt=960 used=0 remaining=960 ```
Author
Owner

@dhiltgen commented on GitHub (Sep 26, 2025):

@Pekkari from the logs, it looks like it does try to load from the rocm runner directory

time=2025-09-25T08:28:24.076Z level=DEBUG source=ggml.go:94 msg="ggml backend load all from path" path=/usr/lib/ollama/rocm

Did you check to see what the dependencies are with ldd?

@waltercool it looks like you have an iGPU - currently we only work properly on these if your BIOS allows you to dedicate more system memory to the GPU. We're working on trying to improve this.

<!-- gh-comment-id:3336286541 --> @dhiltgen commented on GitHub (Sep 26, 2025): @Pekkari from the logs, it looks like it does try to load from the rocm runner directory ``` time=2025-09-25T08:28:24.076Z level=DEBUG source=ggml.go:94 msg="ggml backend load all from path" path=/usr/lib/ollama/rocm ``` Did you check to see what the dependencies are with `ldd`? @waltercool it looks like you have an iGPU - currently we only work properly on these if your BIOS allows you to dedicate more system memory to the GPU. We're working on trying to improve this.
Author
Owner

@Pekkari commented on GitHub (Sep 26, 2025):

@Pekkari you should enable debug logging to see more details of what it's trying and hopefully why it's failing. OLLAMA_DEBUG=1

My suspicion is there's a dependency that's not in the LD_LIBRARY_PATH so when we attempt to dlopen libggml-hip.so it fails due to that missing dependency. Use ldd /usr/lib/ollama/libggml-hip.so and then verify all the dependencies are in directories being passed to the runner subprocess.

and you are correct here, after adding the following to the library path:

-e LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib/ollama:/usr/lib/ollama/rocm

And modifying the CMakeList.txt with this patch:

$ git diff CMakeLists.txt
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d62c8f99..c8abccae 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -100,7 +100,7 @@ if(CMAKE_HIP_COMPILER)
 
     find_package(hip REQUIRED)
     if(NOT AMDGPU_TARGETS)
-        list(FILTER AMDGPU_TARGETS INCLUDE REGEX "^gfx(900|94[012]|101[02]|1030|110[012]|120[01])$")
+        list(FILTER AMDGPU_TARGETS INCLUDE REGEX "^gfx(900|94[012]|101[02]|1030|110[0123]|120[01])$")
     elseif(WIN32 AND WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX)
         list(FILTER AMDGPU_TARGETS EXCLUDE REGEX ${WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX})
     endif()
@@ -122,7 +122,7 @@ if(CMAKE_HIP_COMPILER)
         )
         install(RUNTIME_DEPENDENCY_SET rocm
                 DIRECTORIES ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR}
-                PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register drm drm_amdgpu numa elf
+                PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver roctx64 rocroller rocm_sysdeps_z rocm_sysdeps_zstd rocm_sysdeps_bz2 amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register LLVM.so.20.0git drm drm_amdgpu atomic numa elf
                 PRE_EXCLUDE_REGEXES ".*"
                 POST_EXCLUDE_REGEXES "system32"
             RUNTIME DESTINATION ${OLLAMA_HIP_INSTALL_DIR} COMPONENT HIP

The rocm backend became active:

$ podman run --name ollama-rocm --pod ollama -e LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib/ollama:/usr/lib/ollama/rocm -e OLLAMA_DEBUG=1 -e OLLAMA_CONTEXT_LENGTH=8192 -v c47c4cabdfb84c606efb5887fea55b20b1673004cedcf577c80d89779a0618a3:/root/.ollama --device /dev/accel --device /dev/dri --device /dev/kfd --cap-drop all --read-only --ipc=host --shm-size=8G -it ollama:rocm-7
time=2025-09-26T15:24:24.942Z level=INFO source=routes.go:1331 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:8192 OLLAMA_DEBUG:DEBUG OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NEW_ESTIMATES:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:]"
time=2025-09-26T15:24:24.943Z level=INFO source=images.go:477 msg="total blobs: 20"
time=2025-09-26T15:24:24.943Z level=INFO source=images.go:484 msg="total unused blobs removed: 0"
[GIN-debug] [WARNING] Creating an Engine instance with the Logger and Recovery middleware already attached.

[GIN-debug] [WARNING] Running in "debug" mode. Switch to "release" mode in production.
 - using env:   export GIN_MODE=release
 - using code:  gin.SetMode(gin.ReleaseMode)

[GIN-debug] HEAD   /                         --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func1 (5 handlers)
[GIN-debug] GET    /                         --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func2 (5 handlers)
[GIN-debug] HEAD   /api/version              --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func3 (5 handlers)
[GIN-debug] GET    /api/version              --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func4 (5 handlers)
[GIN-debug] POST   /api/pull                 --> github.com/ollama/ollama/server.(*Server).PullHandler-fm (5 handlers)
[GIN-debug] POST   /api/push                 --> github.com/ollama/ollama/server.(*Server).PushHandler-fm (5 handlers)
[GIN-debug] HEAD   /api/tags                 --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (5 handlers)
[GIN-debug] GET    /api/tags                 --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (5 handlers)
[GIN-debug] POST   /api/show                 --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (5 handlers)
[GIN-debug] DELETE /api/delete               --> github.com/ollama/ollama/server.(*Server).DeleteHandler-fm (5 handlers)
[GIN-debug] POST   /api/create               --> github.com/ollama/ollama/server.(*Server).CreateHandler-fm (5 handlers)
[GIN-debug] POST   /api/blobs/:digest        --> github.com/ollama/ollama/server.(*Server).CreateBlobHandler-fm (5 handlers)
[GIN-debug] HEAD   /api/blobs/:digest        --> github.com/ollama/ollama/server.(*Server).HeadBlobHandler-fm (5 handlers)
[GIN-debug] POST   /api/copy                 --> github.com/ollama/ollama/server.(*Server).CopyHandler-fm (5 handlers)
[GIN-debug] GET    /api/ps                   --> github.com/ollama/ollama/server.(*Server).PsHandler-fm (5 handlers)
[GIN-debug] POST   /api/generate             --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (5 handlers)
[GIN-debug] POST   /api/chat                 --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (5 handlers)
[GIN-debug] POST   /api/embed                --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (5 handlers)
[GIN-debug] POST   /api/embeddings           --> github.com/ollama/ollama/server.(*Server).EmbeddingsHandler-fm (5 handlers)
[GIN-debug] POST   /v1/chat/completions      --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (6 handlers)
[GIN-debug] POST   /v1/completions           --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (6 handlers)
[GIN-debug] POST   /v1/embeddings            --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (6 handlers)
[GIN-debug] GET    /v1/models                --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (6 handlers)
[GIN-debug] GET    /v1/models/:model         --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (6 handlers)
time=2025-09-26T15:24:24.943Z level=INFO source=routes.go:1384 msg="Listening on [::]:11434 (version 0.0.0)"
time=2025-09-26T15:24:24.943Z level=DEBUG source=sched.go:121 msg="starting llm scheduler"
time=2025-09-26T15:24:24.943Z level=INFO source=gpu.go:217 msg="looking for compatible GPUs"
time=2025-09-26T15:24:24.945Z level=DEBUG source=gpu.go:98 msg="searching for GPU discovery libraries for NVIDIA"
time=2025-09-26T15:24:24.945Z level=DEBUG source=gpu.go:512 msg="Searching for GPU library" name=libcuda.so*
time=2025-09-26T15:24:24.945Z level=DEBUG source=gpu.go:536 msg="gpu library search" globs="[/usr/lib/ollama/libcuda.so* /usr/local/nvidia/lib/libcuda.so* /usr/local/nvidia/lib64/libcuda.so* /usr/lib/ollama/libcuda.so* /usr/lib/ollama/rocm/libcuda.so* /usr/local/cuda*/targets/*/lib/libcuda.so* /usr/lib/*-linux-gnu/nvidia/current/libcuda.so* /usr/lib/*-linux-gnu/libcuda.so* /usr/lib/wsl/lib/libcuda.so* /usr/lib/wsl/drivers/*/libcuda.so* /opt/cuda/lib*/libcuda.so* /usr/local/cuda/lib*/libcuda.so* /usr/lib*/libcuda.so* /usr/local/lib*/libcuda.so*]"
time=2025-09-26T15:24:24.945Z level=DEBUG source=gpu.go:569 msg="discovered GPU libraries" paths=[]
time=2025-09-26T15:24:24.945Z level=DEBUG source=gpu.go:512 msg="Searching for GPU library" name=libcudart.so*
time=2025-09-26T15:24:24.945Z level=DEBUG source=gpu.go:536 msg="gpu library search" globs="[/usr/lib/ollama/libcudart.so* /usr/local/nvidia/lib/libcudart.so* /usr/local/nvidia/lib64/libcudart.so* /usr/lib/ollama/libcudart.so* /usr/lib/ollama/rocm/libcudart.so* /usr/lib/ollama/cuda_v*/libcudart.so* /usr/local/cuda/lib64/libcudart.so* /usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so* /usr/lib/x86_64-linux-gnu/libcudart.so* /usr/lib/wsl/lib/libcudart.so* /usr/lib/wsl/drivers/*/libcudart.so* /opt/cuda/lib64/libcudart.so* /usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so* /usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so* /usr/lib/aarch64-linux-gnu/libcudart.so* /usr/local/cuda/lib*/libcudart.so* /usr/lib*/libcudart.so* /usr/local/lib*/libcudart.so*]"
time=2025-09-26T15:24:24.946Z level=DEBUG source=gpu.go:569 msg="discovered GPU libraries" paths=[]
time=2025-09-26T15:24:24.946Z level=WARN source=amd_linux.go:61 msg="ollama recommends running the https://www.amd.com/en/support/download/linux-drivers.html" error="amdgpu version file missing: /sys/module/amdgpu/version stat /sys/module/amdgpu/version: no such file or directory"
time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_linux.go:102 msg="evaluating amdgpu node /sys/class/kfd/kfd/topology/nodes/0/properties"
time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_linux.go:122 msg="detected CPU /sys/class/kfd/kfd/topology/nodes/0/properties"
time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_linux.go:102 msg="evaluating amdgpu node /sys/class/kfd/kfd/topology/nodes/1/properties"
time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_linux.go:203 msg="mapping amdgpu to drm sysfs nodes" amdgpu=/sys/class/kfd/kfd/topology/nodes/1/properties vendor=4098 device=6400 unique_id=0
time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_linux.go:237 msg=matched amdgpu=/sys/class/kfd/kfd/topology/nodes/1/properties drm=/sys/class/drm/card1/device
time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_linux.go:343 msg="amdgpu memory" gpu=0 total="16.0 GiB"
time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_linux.go:344 msg="amdgpu memory" gpu=0 available="10.2 GiB"
time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_common.go:16 msg="evaluating potential rocm lib dir /usr/lib/ollama/rocm"
time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_common.go:44 msg="detected ROCM next to ollama executable /usr/lib/ollama/rocm"
time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_linux.go:375 msg="rocm supported GPUs" types=[gfx1103]
time=2025-09-26T15:24:24.946Z level=INFO source=amd_linux.go:390 msg="amdgpu is supported" gpu=0 gpu_type=gfx1103
time=2025-09-26T15:24:24.946Z level=INFO source=types.go:131 msg="inference compute" id=0 library=rocm variant="" compute=gfx1103 driver=0.0 name=1002:1900 total="16.0 GiB" available="10.2 GiB"
time=2025-09-26T15:24:24.946Z level=INFO source=routes.go:1425 msg="entering low vram mode" "total vram"="16.0 GiB" threshold="20.0 GiB"
[GIN] 2025/09/26 - 15:25:17 | 200 |    1.580669ms |       127.0.0.1 | GET      "/api/tags"
[GIN] 2025/09/26 - 15:25:17 | 200 |       68.75µs |       127.0.0.1 | GET      "/api/ps"
time=2025-09-26T15:25:23.787Z level=DEBUG source=gpu.go:402 msg="updating system memory data" before.total="46.9 GiB" before.free="15.6 GiB" before.free_swap="6.1 GiB" now.total="46.9 GiB" now.free="15.5 GiB" now.free_swap="6.1 GiB"
time=2025-09-26T15:25:23.787Z level=DEBUG source=amd_linux.go:492 msg="updating rocm free memory" gpu=0 name=1002:1900 before="10.2 GiB" now="10.2 GiB"
time=2025-09-26T15:25:23.787Z level=DEBUG source=sched.go:188 msg="updating default concurrency" OLLAMA_MAX_LOADED_MODELS=3 gpu_count=1
time=2025-09-26T15:25:23.813Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.alignment default=32
time=2025-09-26T15:25:23.814Z level=DEBUG source=sched.go:208 msg="loading first model" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25
time=2025-09-26T15:25:23.915Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.alignment default=32
time=2025-09-26T15:25:23.916Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=tokenizer.ggml.eot_token_id default=106
time=2025-09-26T15:25:23.916Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=tokenizer.ggml.eos_token_ids default="&{size:0 values:[]}"
time=2025-09-26T15:25:23.918Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.attention.layer_norm_rms_epsilon default=9.999999974752427e-07
time=2025-09-26T15:25:23.918Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.local.freq_base default=10000
time=2025-09-26T15:25:23.918Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.global.freq_base default=1e+06
time=2025-09-26T15:25:23.918Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.freq_scale default=1
time=2025-09-26T15:25:23.918Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.mm_tokens_per_image default=256
time=2025-09-26T15:25:23.918Z level=DEBUG source=gpu.go:402 msg="updating system memory data" before.total="46.9 GiB" before.free="15.5 GiB" before.free_swap="6.1 GiB" now.total="46.9 GiB" now.free="15.5 GiB" now.free_swap="6.1 GiB"
time=2025-09-26T15:25:23.918Z level=DEBUG source=amd_linux.go:492 msg="updating rocm free memory" gpu=0 name=1002:1900 before="10.2 GiB" now="10.2 GiB"
time=2025-09-26T15:25:23.919Z level=DEBUG source=server.go:323 msg="adding gpu library" path=/usr/lib/ollama/rocm
time=2025-09-26T15:25:23.919Z level=DEBUG source=server.go:331 msg="adding gpu dependency paths" paths=[/usr/lib/ollama/rocm]
time=2025-09-26T15:25:23.919Z level=INFO source=server.go:398 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --model /root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 --port 37095"
time=2025-09-26T15:25:23.919Z level=DEBUG source=server.go:399 msg=subprocess LD_LIBRARY_PATH=/usr/lib/ollama/rocm:/usr/lib/ollama/rocm:/usr/lib/ollama:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib/ollama:/usr/lib/ollama/rocm:/usr/lib/ollama PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin OLLAMA_HOST=0.0.0.0:11434 OLLAMA_CONTEXT_LENGTH=8192 OLLAMA_DEBUG=1 OLLAMA_MAX_LOADED_MODELS=3 OLLAMA_LIBRARY_PATH=/usr/lib/ollama:/usr/lib/ollama/rocm ROCR_VISIBLE_DEVICES=0
time=2025-09-26T15:25:23.919Z level=DEBUG source=gpu.go:402 msg="updating system memory data" before.total="46.9 GiB" before.free="15.5 GiB" before.free_swap="6.1 GiB" now.total="46.9 GiB" now.free="15.5 GiB" now.free_swap="6.1 GiB"
time=2025-09-26T15:25:23.919Z level=DEBUG source=amd_linux.go:492 msg="updating rocm free memory" gpu=0 name=1002:1900 before="10.2 GiB" now="10.2 GiB"
time=2025-09-26T15:25:23.919Z level=INFO source=server.go:503 msg="system memory" total="46.9 GiB" free="15.5 GiB" free_swap="6.1 GiB"
time=2025-09-26T15:25:23.919Z level=DEBUG source=memory.go:181 msg=evaluating library=rocm gpu_count=1 available="[10.2 GiB]"
time=2025-09-26T15:25:23.920Z level=INFO source=memory.go:36 msg="new model will fit in available VRAM across minimum required GPUs, loading" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 library=rocm parallel=1 required="5.4 GiB" gpus=1
time=2025-09-26T15:25:23.920Z level=DEBUG source=memory.go:181 msg=evaluating library=rocm gpu_count=1 available="[10.2 GiB]"
time=2025-09-26T15:25:23.921Z level=INFO source=server.go:543 msg=offload library=rocm layers.requested=-1 layers.model=35 layers.offload=35 layers.split=[35] memory.available="[10.2 GiB]" memory.gpu_overhead="0 B" memory.required.full="5.4 GiB" memory.required.partial="5.4 GiB" memory.required.kv="334.0 MiB" memory.required.allocations="[5.4 GiB]" memory.weights.total="2.3 GiB" memory.weights.repeating="1.8 GiB" memory.weights.nonrepeating="525.0 MiB" memory.graph.full="517.0 MiB" memory.graph.partial="1.0 GiB" projector.weights="795.9 MiB" projector.graph="1.0 GiB"
time=2025-09-26T15:25:23.929Z level=INFO source=runner.go:1251 msg="starting ollama engine"
time=2025-09-26T15:25:23.929Z level=INFO source=runner.go:1286 msg="Server listening on 127.0.0.1:37095"
time=2025-09-26T15:25:23.932Z level=INFO source=runner.go:1170 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:false KvSize:8192 KvCacheType: NumThreads:8 GPULayers:35[ID:0 Layers:35(0..34)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}"
time=2025-09-26T15:25:23.982Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.alignment default=32
time=2025-09-26T15:25:23.983Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.name default=""
time=2025-09-26T15:25:23.983Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.description default=""
time=2025-09-26T15:25:23.983Z level=INFO source=ggml.go:131 msg="" architecture=gemma3 file_type=Q4_K_M name="" description="" num_tensors=883 num_key_values=36
time=2025-09-26T15:25:23.983Z level=DEBUG source=ggml.go:94 msg="ggml backend load all from path" path=/usr/lib/ollama
ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
  Device 0: AMD Radeon Graphics, gfx1103 (0x1103), VMM: no, Wave Size: 32, ID: 0
load_backend: loaded ROCm backend from /usr/lib/ollama/libggml-hip.so
load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-icelake.so
time=2025-09-26T15:25:24.204Z level=DEBUG source=ggml.go:94 msg="ggml backend load all from path" path=/usr/lib/ollama/rocm
time=2025-09-26T15:25:24.204Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.AVX512=1 CPU.0.AVX512_VBMI=1 CPU.0.AVX512_VNNI=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 ROCm.0.NO_VMM=1 ROCm.0.PEER_MAX_BATCH_SIZE=128 compiler=cgo(gcc)
time=2025-09-26T15:25:24.490Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.pooling_type default=4294967295
time=2025-09-26T15:25:24.490Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=tokenizer.ggml.eot_token_id default=106
time=2025-09-26T15:25:24.490Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=tokenizer.ggml.eos_token_ids default="&{size:0 values:[]}"
time=2025-09-26T15:25:24.492Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.attention.layer_norm_rms_epsilon default=9.999999974752427e-07
time=2025-09-26T15:25:24.492Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.local.freq_base default=10000
time=2025-09-26T15:25:24.492Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.global.freq_base default=1e+06
time=2025-09-26T15:25:24.492Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.freq_scale default=1
time=2025-09-26T15:25:24.492Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.mm_tokens_per_image default=256
time=2025-09-26T15:25:25.120Z level=DEBUG source=ggml.go:794 msg="compute graph" nodes=972 splits=1
time=2025-09-26T15:25:25.378Z level=DEBUG source=ggml.go:794 msg="compute graph" nodes=1505 splits=2
time=2025-09-26T15:25:25.379Z level=INFO source=ggml.go:487 msg="offloading 34 repeating layers to GPU"
time=2025-09-26T15:25:25.379Z level=INFO source=ggml.go:493 msg="offloading output layer to GPU"
time=2025-09-26T15:25:25.379Z level=INFO source=ggml.go:498 msg="offloaded 35/35 layers to GPU"
time=2025-09-26T15:25:25.379Z level=INFO source=backend.go:310 msg="model weights" device=ROCm0 size="3.1 GiB"
time=2025-09-26T15:25:25.379Z level=INFO source=backend.go:315 msg="model weights" device=CPU size="525.0 MiB"
time=2025-09-26T15:25:25.380Z level=INFO source=backend.go:321 msg="kv cache" device=ROCm0 size="334.0 MiB"
time=2025-09-26T15:25:25.380Z level=INFO source=backend.go:332 msg="compute graph" device=ROCm0 size="1.1 GiB"
time=2025-09-26T15:25:25.380Z level=INFO source=backend.go:337 msg="compute graph" device=CPU size="5.0 MiB"
time=2025-09-26T15:25:25.380Z level=INFO source=backend.go:342 msg="total memory" size="5.0 GiB"
[...]
<!-- gh-comment-id:3339184850 --> @Pekkari commented on GitHub (Sep 26, 2025): > [@Pekkari](https://github.com/Pekkari) you should enable debug logging to see more details of what it's trying and hopefully why it's failing. `OLLAMA_DEBUG=1` > > My suspicion is there's a dependency that's not in the LD_LIBRARY_PATH so when we attempt to dlopen `libggml-hip.so` it fails due to that missing dependency. Use `ldd /usr/lib/ollama/libggml-hip.so` and then verify all the dependencies are in directories being passed to the runner subprocess. and you are correct here, after adding the following to the library path: ``` -e LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib/ollama:/usr/lib/ollama/rocm ``` And modifying the CMakeList.txt with this patch: ``` $ git diff CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index d62c8f99..c8abccae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -100,7 +100,7 @@ if(CMAKE_HIP_COMPILER) find_package(hip REQUIRED) if(NOT AMDGPU_TARGETS) - list(FILTER AMDGPU_TARGETS INCLUDE REGEX "^gfx(900|94[012]|101[02]|1030|110[012]|120[01])$") + list(FILTER AMDGPU_TARGETS INCLUDE REGEX "^gfx(900|94[012]|101[02]|1030|110[0123]|120[01])$") elseif(WIN32 AND WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX) list(FILTER AMDGPU_TARGETS EXCLUDE REGEX ${WINDOWS_AMDGPU_TARGETS_EXCLUDE_REGEX}) endif() @@ -122,7 +122,7 @@ if(CMAKE_HIP_COMPILER) ) install(RUNTIME_DEPENDENCY_SET rocm DIRECTORIES ${HIP_BIN_INSTALL_DIR} ${HIP_LIB_INSTALL_DIR} - PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register drm drm_amdgpu numa elf + PRE_INCLUDE_REGEXES hipblas rocblas amdhip64 rocsolver roctx64 rocroller rocm_sysdeps_z rocm_sysdeps_zstd rocm_sysdeps_bz2 amd_comgr hsa-runtime64 rocsparse tinfo rocprofiler-register LLVM.so.20.0git drm drm_amdgpu atomic numa elf PRE_EXCLUDE_REGEXES ".*" POST_EXCLUDE_REGEXES "system32" RUNTIME DESTINATION ${OLLAMA_HIP_INSTALL_DIR} COMPONENT HIP ``` The rocm backend became active: ``` $ podman run --name ollama-rocm --pod ollama -e LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib/ollama:/usr/lib/ollama/rocm -e OLLAMA_DEBUG=1 -e OLLAMA_CONTEXT_LENGTH=8192 -v c47c4cabdfb84c606efb5887fea55b20b1673004cedcf577c80d89779a0618a3:/root/.ollama --device /dev/accel --device /dev/dri --device /dev/kfd --cap-drop all --read-only --ipc=host --shm-size=8G -it ollama:rocm-7 time=2025-09-26T15:24:24.942Z level=INFO source=routes.go:1331 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:8192 OLLAMA_DEBUG:DEBUG OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NEW_ESTIMATES:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:]" time=2025-09-26T15:24:24.943Z level=INFO source=images.go:477 msg="total blobs: 20" time=2025-09-26T15:24:24.943Z level=INFO source=images.go:484 msg="total unused blobs removed: 0" [GIN-debug] [WARNING] Creating an Engine instance with the Logger and Recovery middleware already attached. [GIN-debug] [WARNING] Running in "debug" mode. Switch to "release" mode in production. - using env: export GIN_MODE=release - using code: gin.SetMode(gin.ReleaseMode) [GIN-debug] HEAD / --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func1 (5 handlers) [GIN-debug] GET / --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func2 (5 handlers) [GIN-debug] HEAD /api/version --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func3 (5 handlers) [GIN-debug] GET /api/version --> github.com/ollama/ollama/server.(*Server).GenerateRoutes.func4 (5 handlers) [GIN-debug] POST /api/pull --> github.com/ollama/ollama/server.(*Server).PullHandler-fm (5 handlers) [GIN-debug] POST /api/push --> github.com/ollama/ollama/server.(*Server).PushHandler-fm (5 handlers) [GIN-debug] HEAD /api/tags --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (5 handlers) [GIN-debug] GET /api/tags --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (5 handlers) [GIN-debug] POST /api/show --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (5 handlers) [GIN-debug] DELETE /api/delete --> github.com/ollama/ollama/server.(*Server).DeleteHandler-fm (5 handlers) [GIN-debug] POST /api/create --> github.com/ollama/ollama/server.(*Server).CreateHandler-fm (5 handlers) [GIN-debug] POST /api/blobs/:digest --> github.com/ollama/ollama/server.(*Server).CreateBlobHandler-fm (5 handlers) [GIN-debug] HEAD /api/blobs/:digest --> github.com/ollama/ollama/server.(*Server).HeadBlobHandler-fm (5 handlers) [GIN-debug] POST /api/copy --> github.com/ollama/ollama/server.(*Server).CopyHandler-fm (5 handlers) [GIN-debug] GET /api/ps --> github.com/ollama/ollama/server.(*Server).PsHandler-fm (5 handlers) [GIN-debug] POST /api/generate --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (5 handlers) [GIN-debug] POST /api/chat --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (5 handlers) [GIN-debug] POST /api/embed --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (5 handlers) [GIN-debug] POST /api/embeddings --> github.com/ollama/ollama/server.(*Server).EmbeddingsHandler-fm (5 handlers) [GIN-debug] POST /v1/chat/completions --> github.com/ollama/ollama/server.(*Server).ChatHandler-fm (6 handlers) [GIN-debug] POST /v1/completions --> github.com/ollama/ollama/server.(*Server).GenerateHandler-fm (6 handlers) [GIN-debug] POST /v1/embeddings --> github.com/ollama/ollama/server.(*Server).EmbedHandler-fm (6 handlers) [GIN-debug] GET /v1/models --> github.com/ollama/ollama/server.(*Server).ListHandler-fm (6 handlers) [GIN-debug] GET /v1/models/:model --> github.com/ollama/ollama/server.(*Server).ShowHandler-fm (6 handlers) time=2025-09-26T15:24:24.943Z level=INFO source=routes.go:1384 msg="Listening on [::]:11434 (version 0.0.0)" time=2025-09-26T15:24:24.943Z level=DEBUG source=sched.go:121 msg="starting llm scheduler" time=2025-09-26T15:24:24.943Z level=INFO source=gpu.go:217 msg="looking for compatible GPUs" time=2025-09-26T15:24:24.945Z level=DEBUG source=gpu.go:98 msg="searching for GPU discovery libraries for NVIDIA" time=2025-09-26T15:24:24.945Z level=DEBUG source=gpu.go:512 msg="Searching for GPU library" name=libcuda.so* time=2025-09-26T15:24:24.945Z level=DEBUG source=gpu.go:536 msg="gpu library search" globs="[/usr/lib/ollama/libcuda.so* /usr/local/nvidia/lib/libcuda.so* /usr/local/nvidia/lib64/libcuda.so* /usr/lib/ollama/libcuda.so* /usr/lib/ollama/rocm/libcuda.so* /usr/local/cuda*/targets/*/lib/libcuda.so* /usr/lib/*-linux-gnu/nvidia/current/libcuda.so* /usr/lib/*-linux-gnu/libcuda.so* /usr/lib/wsl/lib/libcuda.so* /usr/lib/wsl/drivers/*/libcuda.so* /opt/cuda/lib*/libcuda.so* /usr/local/cuda/lib*/libcuda.so* /usr/lib*/libcuda.so* /usr/local/lib*/libcuda.so*]" time=2025-09-26T15:24:24.945Z level=DEBUG source=gpu.go:569 msg="discovered GPU libraries" paths=[] time=2025-09-26T15:24:24.945Z level=DEBUG source=gpu.go:512 msg="Searching for GPU library" name=libcudart.so* time=2025-09-26T15:24:24.945Z level=DEBUG source=gpu.go:536 msg="gpu library search" globs="[/usr/lib/ollama/libcudart.so* /usr/local/nvidia/lib/libcudart.so* /usr/local/nvidia/lib64/libcudart.so* /usr/lib/ollama/libcudart.so* /usr/lib/ollama/rocm/libcudart.so* /usr/lib/ollama/cuda_v*/libcudart.so* /usr/local/cuda/lib64/libcudart.so* /usr/lib/x86_64-linux-gnu/nvidia/current/libcudart.so* /usr/lib/x86_64-linux-gnu/libcudart.so* /usr/lib/wsl/lib/libcudart.so* /usr/lib/wsl/drivers/*/libcudart.so* /opt/cuda/lib64/libcudart.so* /usr/local/cuda*/targets/aarch64-linux/lib/libcudart.so* /usr/lib/aarch64-linux-gnu/nvidia/current/libcudart.so* /usr/lib/aarch64-linux-gnu/libcudart.so* /usr/local/cuda/lib*/libcudart.so* /usr/lib*/libcudart.so* /usr/local/lib*/libcudart.so*]" time=2025-09-26T15:24:24.946Z level=DEBUG source=gpu.go:569 msg="discovered GPU libraries" paths=[] time=2025-09-26T15:24:24.946Z level=WARN source=amd_linux.go:61 msg="ollama recommends running the https://www.amd.com/en/support/download/linux-drivers.html" error="amdgpu version file missing: /sys/module/amdgpu/version stat /sys/module/amdgpu/version: no such file or directory" time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_linux.go:102 msg="evaluating amdgpu node /sys/class/kfd/kfd/topology/nodes/0/properties" time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_linux.go:122 msg="detected CPU /sys/class/kfd/kfd/topology/nodes/0/properties" time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_linux.go:102 msg="evaluating amdgpu node /sys/class/kfd/kfd/topology/nodes/1/properties" time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_linux.go:203 msg="mapping amdgpu to drm sysfs nodes" amdgpu=/sys/class/kfd/kfd/topology/nodes/1/properties vendor=4098 device=6400 unique_id=0 time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_linux.go:237 msg=matched amdgpu=/sys/class/kfd/kfd/topology/nodes/1/properties drm=/sys/class/drm/card1/device time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_linux.go:343 msg="amdgpu memory" gpu=0 total="16.0 GiB" time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_linux.go:344 msg="amdgpu memory" gpu=0 available="10.2 GiB" time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_common.go:16 msg="evaluating potential rocm lib dir /usr/lib/ollama/rocm" time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_common.go:44 msg="detected ROCM next to ollama executable /usr/lib/ollama/rocm" time=2025-09-26T15:24:24.946Z level=DEBUG source=amd_linux.go:375 msg="rocm supported GPUs" types=[gfx1103] time=2025-09-26T15:24:24.946Z level=INFO source=amd_linux.go:390 msg="amdgpu is supported" gpu=0 gpu_type=gfx1103 time=2025-09-26T15:24:24.946Z level=INFO source=types.go:131 msg="inference compute" id=0 library=rocm variant="" compute=gfx1103 driver=0.0 name=1002:1900 total="16.0 GiB" available="10.2 GiB" time=2025-09-26T15:24:24.946Z level=INFO source=routes.go:1425 msg="entering low vram mode" "total vram"="16.0 GiB" threshold="20.0 GiB" [GIN] 2025/09/26 - 15:25:17 | 200 | 1.580669ms | 127.0.0.1 | GET "/api/tags" [GIN] 2025/09/26 - 15:25:17 | 200 | 68.75µs | 127.0.0.1 | GET "/api/ps" time=2025-09-26T15:25:23.787Z level=DEBUG source=gpu.go:402 msg="updating system memory data" before.total="46.9 GiB" before.free="15.6 GiB" before.free_swap="6.1 GiB" now.total="46.9 GiB" now.free="15.5 GiB" now.free_swap="6.1 GiB" time=2025-09-26T15:25:23.787Z level=DEBUG source=amd_linux.go:492 msg="updating rocm free memory" gpu=0 name=1002:1900 before="10.2 GiB" now="10.2 GiB" time=2025-09-26T15:25:23.787Z level=DEBUG source=sched.go:188 msg="updating default concurrency" OLLAMA_MAX_LOADED_MODELS=3 gpu_count=1 time=2025-09-26T15:25:23.813Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.alignment default=32 time=2025-09-26T15:25:23.814Z level=DEBUG source=sched.go:208 msg="loading first model" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 time=2025-09-26T15:25:23.915Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.alignment default=32 time=2025-09-26T15:25:23.916Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=tokenizer.ggml.eot_token_id default=106 time=2025-09-26T15:25:23.916Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=tokenizer.ggml.eos_token_ids default="&{size:0 values:[]}" time=2025-09-26T15:25:23.918Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.attention.layer_norm_rms_epsilon default=9.999999974752427e-07 time=2025-09-26T15:25:23.918Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.local.freq_base default=10000 time=2025-09-26T15:25:23.918Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.global.freq_base default=1e+06 time=2025-09-26T15:25:23.918Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.freq_scale default=1 time=2025-09-26T15:25:23.918Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.mm_tokens_per_image default=256 time=2025-09-26T15:25:23.918Z level=DEBUG source=gpu.go:402 msg="updating system memory data" before.total="46.9 GiB" before.free="15.5 GiB" before.free_swap="6.1 GiB" now.total="46.9 GiB" now.free="15.5 GiB" now.free_swap="6.1 GiB" time=2025-09-26T15:25:23.918Z level=DEBUG source=amd_linux.go:492 msg="updating rocm free memory" gpu=0 name=1002:1900 before="10.2 GiB" now="10.2 GiB" time=2025-09-26T15:25:23.919Z level=DEBUG source=server.go:323 msg="adding gpu library" path=/usr/lib/ollama/rocm time=2025-09-26T15:25:23.919Z level=DEBUG source=server.go:331 msg="adding gpu dependency paths" paths=[/usr/lib/ollama/rocm] time=2025-09-26T15:25:23.919Z level=INFO source=server.go:398 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --model /root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 --port 37095" time=2025-09-26T15:25:23.919Z level=DEBUG source=server.go:399 msg=subprocess LD_LIBRARY_PATH=/usr/lib/ollama/rocm:/usr/lib/ollama/rocm:/usr/lib/ollama:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib/ollama:/usr/lib/ollama/rocm:/usr/lib/ollama PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin OLLAMA_HOST=0.0.0.0:11434 OLLAMA_CONTEXT_LENGTH=8192 OLLAMA_DEBUG=1 OLLAMA_MAX_LOADED_MODELS=3 OLLAMA_LIBRARY_PATH=/usr/lib/ollama:/usr/lib/ollama/rocm ROCR_VISIBLE_DEVICES=0 time=2025-09-26T15:25:23.919Z level=DEBUG source=gpu.go:402 msg="updating system memory data" before.total="46.9 GiB" before.free="15.5 GiB" before.free_swap="6.1 GiB" now.total="46.9 GiB" now.free="15.5 GiB" now.free_swap="6.1 GiB" time=2025-09-26T15:25:23.919Z level=DEBUG source=amd_linux.go:492 msg="updating rocm free memory" gpu=0 name=1002:1900 before="10.2 GiB" now="10.2 GiB" time=2025-09-26T15:25:23.919Z level=INFO source=server.go:503 msg="system memory" total="46.9 GiB" free="15.5 GiB" free_swap="6.1 GiB" time=2025-09-26T15:25:23.919Z level=DEBUG source=memory.go:181 msg=evaluating library=rocm gpu_count=1 available="[10.2 GiB]" time=2025-09-26T15:25:23.920Z level=INFO source=memory.go:36 msg="new model will fit in available VRAM across minimum required GPUs, loading" model=/root/.ollama/models/blobs/sha256-aeda25e63ebd698fab8638ffb778e68bed908b960d39d0becc650fa981609d25 library=rocm parallel=1 required="5.4 GiB" gpus=1 time=2025-09-26T15:25:23.920Z level=DEBUG source=memory.go:181 msg=evaluating library=rocm gpu_count=1 available="[10.2 GiB]" time=2025-09-26T15:25:23.921Z level=INFO source=server.go:543 msg=offload library=rocm layers.requested=-1 layers.model=35 layers.offload=35 layers.split=[35] memory.available="[10.2 GiB]" memory.gpu_overhead="0 B" memory.required.full="5.4 GiB" memory.required.partial="5.4 GiB" memory.required.kv="334.0 MiB" memory.required.allocations="[5.4 GiB]" memory.weights.total="2.3 GiB" memory.weights.repeating="1.8 GiB" memory.weights.nonrepeating="525.0 MiB" memory.graph.full="517.0 MiB" memory.graph.partial="1.0 GiB" projector.weights="795.9 MiB" projector.graph="1.0 GiB" time=2025-09-26T15:25:23.929Z level=INFO source=runner.go:1251 msg="starting ollama engine" time=2025-09-26T15:25:23.929Z level=INFO source=runner.go:1286 msg="Server listening on 127.0.0.1:37095" time=2025-09-26T15:25:23.932Z level=INFO source=runner.go:1170 msg=load request="{Operation:commit LoraPath:[] Parallel:1 BatchSize:512 FlashAttention:false KvSize:8192 KvCacheType: NumThreads:8 GPULayers:35[ID:0 Layers:35(0..34)] MultiUserCache:false ProjectorPath: MainGPU:0 UseMmap:false}" time=2025-09-26T15:25:23.982Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.alignment default=32 time=2025-09-26T15:25:23.983Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.name default="" time=2025-09-26T15:25:23.983Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=general.description default="" time=2025-09-26T15:25:23.983Z level=INFO source=ggml.go:131 msg="" architecture=gemma3 file_type=Q4_K_M name="" description="" num_tensors=883 num_key_values=36 time=2025-09-26T15:25:23.983Z level=DEBUG source=ggml.go:94 msg="ggml backend load all from path" path=/usr/lib/ollama ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1103 (0x1103), VMM: no, Wave Size: 32, ID: 0 load_backend: loaded ROCm backend from /usr/lib/ollama/libggml-hip.so load_backend: loaded CPU backend from /usr/lib/ollama/libggml-cpu-icelake.so time=2025-09-26T15:25:24.204Z level=DEBUG source=ggml.go:94 msg="ggml backend load all from path" path=/usr/lib/ollama/rocm time=2025-09-26T15:25:24.204Z level=INFO source=ggml.go:104 msg=system CPU.0.SSE3=1 CPU.0.SSSE3=1 CPU.0.AVX=1 CPU.0.AVX2=1 CPU.0.F16C=1 CPU.0.FMA=1 CPU.0.BMI2=1 CPU.0.AVX512=1 CPU.0.AVX512_VBMI=1 CPU.0.AVX512_VNNI=1 CPU.0.LLAMAFILE=1 CPU.1.LLAMAFILE=1 ROCm.0.NO_VMM=1 ROCm.0.PEER_MAX_BATCH_SIZE=128 compiler=cgo(gcc) time=2025-09-26T15:25:24.490Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.pooling_type default=4294967295 time=2025-09-26T15:25:24.490Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=tokenizer.ggml.eot_token_id default=106 time=2025-09-26T15:25:24.490Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=tokenizer.ggml.eos_token_ids default="&{size:0 values:[]}" time=2025-09-26T15:25:24.492Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.attention.layer_norm_rms_epsilon default=9.999999974752427e-07 time=2025-09-26T15:25:24.492Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.local.freq_base default=10000 time=2025-09-26T15:25:24.492Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.global.freq_base default=1e+06 time=2025-09-26T15:25:24.492Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.rope.freq_scale default=1 time=2025-09-26T15:25:24.492Z level=DEBUG source=ggml.go:210 msg="key with type not found" key=gemma3.mm_tokens_per_image default=256 time=2025-09-26T15:25:25.120Z level=DEBUG source=ggml.go:794 msg="compute graph" nodes=972 splits=1 time=2025-09-26T15:25:25.378Z level=DEBUG source=ggml.go:794 msg="compute graph" nodes=1505 splits=2 time=2025-09-26T15:25:25.379Z level=INFO source=ggml.go:487 msg="offloading 34 repeating layers to GPU" time=2025-09-26T15:25:25.379Z level=INFO source=ggml.go:493 msg="offloading output layer to GPU" time=2025-09-26T15:25:25.379Z level=INFO source=ggml.go:498 msg="offloaded 35/35 layers to GPU" time=2025-09-26T15:25:25.379Z level=INFO source=backend.go:310 msg="model weights" device=ROCm0 size="3.1 GiB" time=2025-09-26T15:25:25.379Z level=INFO source=backend.go:315 msg="model weights" device=CPU size="525.0 MiB" time=2025-09-26T15:25:25.380Z level=INFO source=backend.go:321 msg="kv cache" device=ROCm0 size="334.0 MiB" time=2025-09-26T15:25:25.380Z level=INFO source=backend.go:332 msg="compute graph" device=ROCm0 size="1.1 GiB" time=2025-09-26T15:25:25.380Z level=INFO source=backend.go:337 msg="compute graph" device=CPU size="5.0 MiB" time=2025-09-26T15:25:25.380Z level=INFO source=backend.go:342 msg="total memory" size="5.0 GiB" [...] ```
Author
Owner

@Mannshoch commented on GitHub (Nov 12, 2025):

Any update on this issue. I would love it to use an updated Rocm and ubuntu.
I would love to see kernel 6.14 with AMDXDNA.
(If I under stood right that schould boost )

<!-- gh-comment-id:3523547143 --> @Mannshoch commented on GitHub (Nov 12, 2025): Any update on this issue. I would love it to use an updated Rocm and ubuntu. I would love to see kernel 6.14 with AMDXDNA. (If I under stood right that schould boost )
Author
Owner

@Pekkari commented on GitHub (Nov 13, 2025):

this ticket was for GPU inference, it didn't have anything to do with NPU inference. So far, the fixes contributed went out of date, and I have in my agenda to retry this with rocm 7.9. As long as the build of the new version happens smooth, the instructions here should still provide an usable rocm to integrate with ollama, and from the patch set, some of the patches can be dropped.

<!-- gh-comment-id:3526785769 --> @Pekkari commented on GitHub (Nov 13, 2025): this ticket was for GPU inference, it didn't have anything to do with NPU inference. So far, the fixes contributed went out of date, and I have in my agenda to retry this with rocm 7.9. As long as the build of the new version happens smooth, the instructions here should still provide an usable rocm to integrate with ollama, and from the patch set, some of the patches can be dropped.
Author
Owner

@dhiltgen commented on GitHub (Mar 11, 2026):

Release 0.17.8 updates Linux to ROCm v7 which covers support for this GPU. Please give the RC a try and let us know if you run into any problems.

<!-- gh-comment-id:4041992965 --> @dhiltgen commented on GitHub (Mar 11, 2026): Release 0.17.8 updates Linux to ROCm v7 which covers support for this GPU. Please give the [RC a try](https://github.com/ollama/ollama/blob/main/docs/linux.mdx#installing-specific-versions) and let us know if you run into any problems.
Author
Owner

@Pekkari commented on GitHub (Mar 14, 2026):

I'm afraid I did, look:

$ podman run --name ollama -e HSA_OVERRIDE_GFX_VERSION=11.0.0 --read-only --cap-drop ALL -p 127.0.0.1:11434:11434 -v ollama:/root/.ollama -v ollama-shader-cache:/root/.cache --device /dev/dri --device /dev/kfd --device /dev/accel --group-add video -it ollama/ollama:rocm
time=2026-03-14T13:18:07.068Z level=INFO source=routes.go:1727 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GGML_VK_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION:11.0.0 HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:0 OLLAMA_DEBUG:INFO OLLAMA_EDITOR: OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NO_CLOUD:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_REMOTES:[ollama.com] OLLAMA_SCHED_SPREAD:false OLLAMA_VULKAN:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:127.0.0.0/8, ::1, localhost, privoxy.localhost, whatismyip.com, whatismyip.org]"
time=2026-03-14T13:18:07.068Z level=INFO source=routes.go:1729 msg="Ollama cloud disabled: false"
time=2026-03-14T13:18:07.069Z level=INFO source=images.go:477 msg="total blobs: 10"
time=2026-03-14T13:18:07.070Z level=INFO source=images.go:484 msg="total unused blobs removed: 0"
time=2026-03-14T13:18:07.070Z level=INFO source=routes.go:1782 msg="Listening on [::]:11434 (version 0.17.8-rc4)"
time=2026-03-14T13:18:07.070Z level=INFO source=runner.go:67 msg="discovering available GPUs..."
time=2026-03-14T13:18:07.070Z level=WARN source=runner.go:485 msg="user overrode visible devices" HSA_OVERRIDE_GFX_VERSION=11.0.0
time=2026-03-14T13:18:07.070Z level=WARN source=runner.go:489 msg="if GPUs are not correctly discovered, unset and try again"
time=2026-03-14T13:18:07.071Z level=INFO source=server.go:430 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --port 37435"
time=2026-03-14T13:18:07.157Z level=INFO source=server.go:430 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --port 40623"
time=2026-03-14T13:18:07.268Z level=INFO source=types.go:60 msg="inference compute" id=cpu library=cpu compute="" name=cpu description=cpu libdirs=ollama driver="" pci_id="" type="" total="60.6 GiB" available="60.6 GiB"
time=2026-03-14T13:18:07.268Z level=INFO source=routes.go:1832 msg="vram-based default context" total_vram="0 B" default_num_ctx=4096

and without overriding anything:

$ podman run --name ollama --read-only --cap-drop ALL -p 127.0.0.1:11434:11434 -v ollama:/root/.ollama -v ollama-shader-cache:/root/.cache --device /dev/dri --device /dev/kfd --device /dev/accel --group-add video -it ollama/ollama:rocm
time=2026-03-14T13:21:54.663Z level=INFO source=routes.go:1727 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GGML_VK_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:0 OLLAMA_DEBUG:INFO OLLAMA_EDITOR: OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NO_CLOUD:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_REMOTES:[ollama.com] OLLAMA_SCHED_SPREAD:false OLLAMA_VULKAN:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:127.0.0.0/8, ::1, localhost, privoxy.localhost, whatismyip.com, whatismyip.org]"
time=2026-03-14T13:21:54.663Z level=INFO source=routes.go:1729 msg="Ollama cloud disabled: false"
time=2026-03-14T13:21:54.664Z level=INFO source=images.go:477 msg="total blobs: 10"
time=2026-03-14T13:21:54.664Z level=INFO source=images.go:484 msg="total unused blobs removed: 0"
time=2026-03-14T13:21:54.664Z level=INFO source=routes.go:1782 msg="Listening on [::]:11434 (version 0.17.8-rc4)"
time=2026-03-14T13:21:54.664Z level=INFO source=runner.go:67 msg="discovering available GPUs..."
time=2026-03-14T13:21:54.666Z level=INFO source=server.go:430 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --port 44089"
time=2026-03-14T13:21:54.738Z level=INFO source=server.go:430 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --port 38267"
time=2026-03-14T13:21:55.359Z level=INFO source=runner.go:464 msg="failure during GPU discovery" OLLAMA_LIBRARY_PATH="[/usr/lib/ollama /usr/lib/ollama/rocm]" extra_envs="map[GGML_CUDA_INIT:1 ROCR_VISIBLE_DEVICES:0]" error="runner crashed"
time=2026-03-14T13:21:55.360Z level=INFO source=types.go:60 msg="inference compute" id=cpu library=cpu compute="" name=cpu description=cpu libdirs=ollama driver="" pci_id="" type="" total="60.6 GiB" available="60.6 GiB"
time=2026-03-14T13:21:55.360Z level=INFO source=routes.go:1832 msg="vram-based default context" total_vram="0 B" default_num_ctx=4096
<!-- gh-comment-id:4060567362 --> @Pekkari commented on GitHub (Mar 14, 2026): I'm afraid I did, look: ``` $ podman run --name ollama -e HSA_OVERRIDE_GFX_VERSION=11.0.0 --read-only --cap-drop ALL -p 127.0.0.1:11434:11434 -v ollama:/root/.ollama -v ollama-shader-cache:/root/.cache --device /dev/dri --device /dev/kfd --device /dev/accel --group-add video -it ollama/ollama:rocm time=2026-03-14T13:18:07.068Z level=INFO source=routes.go:1727 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GGML_VK_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION:11.0.0 HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:0 OLLAMA_DEBUG:INFO OLLAMA_EDITOR: OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NO_CLOUD:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_REMOTES:[ollama.com] OLLAMA_SCHED_SPREAD:false OLLAMA_VULKAN:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:127.0.0.0/8, ::1, localhost, privoxy.localhost, whatismyip.com, whatismyip.org]" time=2026-03-14T13:18:07.068Z level=INFO source=routes.go:1729 msg="Ollama cloud disabled: false" time=2026-03-14T13:18:07.069Z level=INFO source=images.go:477 msg="total blobs: 10" time=2026-03-14T13:18:07.070Z level=INFO source=images.go:484 msg="total unused blobs removed: 0" time=2026-03-14T13:18:07.070Z level=INFO source=routes.go:1782 msg="Listening on [::]:11434 (version 0.17.8-rc4)" time=2026-03-14T13:18:07.070Z level=INFO source=runner.go:67 msg="discovering available GPUs..." time=2026-03-14T13:18:07.070Z level=WARN source=runner.go:485 msg="user overrode visible devices" HSA_OVERRIDE_GFX_VERSION=11.0.0 time=2026-03-14T13:18:07.070Z level=WARN source=runner.go:489 msg="if GPUs are not correctly discovered, unset and try again" time=2026-03-14T13:18:07.071Z level=INFO source=server.go:430 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --port 37435" time=2026-03-14T13:18:07.157Z level=INFO source=server.go:430 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --port 40623" time=2026-03-14T13:18:07.268Z level=INFO source=types.go:60 msg="inference compute" id=cpu library=cpu compute="" name=cpu description=cpu libdirs=ollama driver="" pci_id="" type="" total="60.6 GiB" available="60.6 GiB" time=2026-03-14T13:18:07.268Z level=INFO source=routes.go:1832 msg="vram-based default context" total_vram="0 B" default_num_ctx=4096 ``` and without overriding anything: ``` $ podman run --name ollama --read-only --cap-drop ALL -p 127.0.0.1:11434:11434 -v ollama:/root/.ollama -v ollama-shader-cache:/root/.cache --device /dev/dri --device /dev/kfd --device /dev/accel --group-add video -it ollama/ollama:rocm time=2026-03-14T13:21:54.663Z level=INFO source=routes.go:1727 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GGML_VK_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:0 OLLAMA_DEBUG:INFO OLLAMA_EDITOR: OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://0.0.0.0:11434 OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NO_CLOUD:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_REMOTES:[ollama.com] OLLAMA_SCHED_SPREAD:false OLLAMA_VULKAN:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:127.0.0.0/8, ::1, localhost, privoxy.localhost, whatismyip.com, whatismyip.org]" time=2026-03-14T13:21:54.663Z level=INFO source=routes.go:1729 msg="Ollama cloud disabled: false" time=2026-03-14T13:21:54.664Z level=INFO source=images.go:477 msg="total blobs: 10" time=2026-03-14T13:21:54.664Z level=INFO source=images.go:484 msg="total unused blobs removed: 0" time=2026-03-14T13:21:54.664Z level=INFO source=routes.go:1782 msg="Listening on [::]:11434 (version 0.17.8-rc4)" time=2026-03-14T13:21:54.664Z level=INFO source=runner.go:67 msg="discovering available GPUs..." time=2026-03-14T13:21:54.666Z level=INFO source=server.go:430 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --port 44089" time=2026-03-14T13:21:54.738Z level=INFO source=server.go:430 msg="starting runner" cmd="/usr/bin/ollama runner --ollama-engine --port 38267" time=2026-03-14T13:21:55.359Z level=INFO source=runner.go:464 msg="failure during GPU discovery" OLLAMA_LIBRARY_PATH="[/usr/lib/ollama /usr/lib/ollama/rocm]" extra_envs="map[GGML_CUDA_INIT:1 ROCR_VISIBLE_DEVICES:0]" error="runner crashed" time=2026-03-14T13:21:55.360Z level=INFO source=types.go:60 msg="inference compute" id=cpu library=cpu compute="" name=cpu description=cpu libdirs=ollama driver="" pci_id="" type="" total="60.6 GiB" available="60.6 GiB" time=2026-03-14T13:21:55.360Z level=INFO source=routes.go:1832 msg="vram-based default context" total_vram="0 B" default_num_ctx=4096 ```
Author
Owner

@Pekkari commented on GitHub (Mar 15, 2026):

seems like librocblas.so is crashing:

$ coredumpctl dump --output ollama-crash.out
Hint: You are currently not seeing messages from other users and the system.
      Users in groups 'adm', 'systemd-journal', 'wheel' can see all messages.
      Pass -q to turn off this notice.
           PID: 26059 (ollama)
           UID: 1000 (fedora)
           GID: 1000 (fedora)
        Signal: 6 (ABRT)
     Timestamp: Sun 2026-03-15 11:10:21 UTC (3min 28s ago)
  Command Line: /usr/bin/ollama runner --ollama-engine --port 39639
    Executable: /usr/bin/ollama
 Control Group: /user.slice/user-1000.slice/user@1000.service/user.slice/libpod-8b34cb898559eb9afb1305f93f263775c9fb67c32806ab5c261a869dca97587c.scope/container
          Unit: user@1000.service
     User Unit: libpod-8b34cb898559eb9afb1305f93f263775c9fb67c32806ab5c261a869dca97587c.scope
         Slice: user-1000.slice
     Owner UID: 1000 (fedora)
       Boot ID: 4778ebc1ce4640e2a63016d20ee457be
    Machine ID: 6aa83f7036b540ff993185932c32331e
      Hostname: 8b34cb898559
       Storage: /var/lib/systemd/coredump/core.ollama.1000.4778ebc1ce4640e2a63016d20ee457be.26059.1773573021000000.zst (present)
  Size on Disk: 17.5M
       Message: Process 26059 (ollama) of user 1000 dumped core.
                
                Module /usr/lib/ollama/rocm/libggml-hip.so without build-id.
                Module /usr/lib/ollama/rocm/libggml-hip.so
                Module /usr/lib/ollama/rocm/librocroller.so.1.0.0 without build-id.
                Module /usr/lib/ollama/rocm/librocroller.so.1.0.0
                Module /usr/lib/x86_64-linux-gnu/libzstd.so.1.5.5 from deb libzstd-1.5.5+dfsg2-2build1.1.amd64
                Module /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 from deb gcc-14-14.2.0-4ubuntu2~24.04.1.amd64
                Module /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.33 from deb gcc-14-14.2.0-4ubuntu2~24.04.1.amd64
                Stack trace of thread 26:
                #0  0x00007fb4f7ba7b2c n/a (/usr/lib/x86_64-linux-gnu/libc.so.6 + 0x9eb2c)
                #1  0x00007fb4f7b4e27e n/a (/usr/lib/x86_64-linux-gnu/libc.so.6 + 0x4527e)
                #2  0x00007fb4f7b318ff n/a (/usr/lib/x86_64-linux-gnu/libc.so.6 + 0x288ff)
                #3  0x00007fb4a75ab3fe n/a (/usr/lib/ollama/rocm/librocblas.so.5.2.70200 + 0x29743fe)
                ELF object binary architecture: AMD x86-64
More than one entry matches, ignoring rest.

Some coredump attached.

This dump is taken with the ollama 0.18.0 image, pulled today.

<!-- gh-comment-id:4062795977 --> @Pekkari commented on GitHub (Mar 15, 2026): seems like librocblas.so is crashing: ``` $ coredumpctl dump --output ollama-crash.out Hint: You are currently not seeing messages from other users and the system. Users in groups 'adm', 'systemd-journal', 'wheel' can see all messages. Pass -q to turn off this notice. PID: 26059 (ollama) UID: 1000 (fedora) GID: 1000 (fedora) Signal: 6 (ABRT) Timestamp: Sun 2026-03-15 11:10:21 UTC (3min 28s ago) Command Line: /usr/bin/ollama runner --ollama-engine --port 39639 Executable: /usr/bin/ollama Control Group: /user.slice/user-1000.slice/user@1000.service/user.slice/libpod-8b34cb898559eb9afb1305f93f263775c9fb67c32806ab5c261a869dca97587c.scope/container Unit: user@1000.service User Unit: libpod-8b34cb898559eb9afb1305f93f263775c9fb67c32806ab5c261a869dca97587c.scope Slice: user-1000.slice Owner UID: 1000 (fedora) Boot ID: 4778ebc1ce4640e2a63016d20ee457be Machine ID: 6aa83f7036b540ff993185932c32331e Hostname: 8b34cb898559 Storage: /var/lib/systemd/coredump/core.ollama.1000.4778ebc1ce4640e2a63016d20ee457be.26059.1773573021000000.zst (present) Size on Disk: 17.5M Message: Process 26059 (ollama) of user 1000 dumped core. Module /usr/lib/ollama/rocm/libggml-hip.so without build-id. Module /usr/lib/ollama/rocm/libggml-hip.so Module /usr/lib/ollama/rocm/librocroller.so.1.0.0 without build-id. Module /usr/lib/ollama/rocm/librocroller.so.1.0.0 Module /usr/lib/x86_64-linux-gnu/libzstd.so.1.5.5 from deb libzstd-1.5.5+dfsg2-2build1.1.amd64 Module /usr/lib/x86_64-linux-gnu/libgcc_s.so.1 from deb gcc-14-14.2.0-4ubuntu2~24.04.1.amd64 Module /usr/lib/x86_64-linux-gnu/libstdc++.so.6.0.33 from deb gcc-14-14.2.0-4ubuntu2~24.04.1.amd64 Stack trace of thread 26: #0 0x00007fb4f7ba7b2c n/a (/usr/lib/x86_64-linux-gnu/libc.so.6 + 0x9eb2c) #1 0x00007fb4f7b4e27e n/a (/usr/lib/x86_64-linux-gnu/libc.so.6 + 0x4527e) #2 0x00007fb4f7b318ff n/a (/usr/lib/x86_64-linux-gnu/libc.so.6 + 0x288ff) #3 0x00007fb4a75ab3fe n/a (/usr/lib/ollama/rocm/librocblas.so.5.2.70200 + 0x29743fe) ELF object binary architecture: AMD x86-64 More than one entry matches, ignoring rest. ``` Some coredump attached. <!-- Failed to upload "ollama-crash.dmp" --> This dump is taken with the ollama 0.18.0 image, pulled today.
Author
Owner

@waltercool commented on GitHub (Mar 15, 2026):

Uhm it works fine to me using the ollama binaries + ollama's ROCm package.

Also works fine to me using system ROCm 7.2 + building ollama from source.

Now, I'm using gfx1151, not gfx1103.

I would recommend you to do some tests if possible:

  1. Upgrade your kernel to 6.18.4 or newer, and test
  2. If doesn't work. Install ROCm into your system and do some PyTorch ROCm testing with/without your overrides.
  • If 2 fails, then no luck, after all gfx1103 is not supported officially. You may need to use a working ROCm version instead.
  • If 2 works, then no clue.
<!-- gh-comment-id:4063020636 --> @waltercool commented on GitHub (Mar 15, 2026): Uhm it works fine to me using the ollama binaries + ollama's ROCm package. Also works fine to me using system ROCm 7.2 + building ollama from source. Now, I'm using gfx1151, not gfx1103. I would recommend you to do some tests if possible: 1) Upgrade your kernel to **6.18.4** or newer, and test 2) If doesn't work. Install ROCm into your system and do some [PyTorch ROCm testing](https://gist.github.com/barneyjackson/6365d903ce0045e6d11a2c89316f5d20) with/without your overrides. - If 2 fails, then no luck, after all gfx1103 is not supported officially. You may need to use a working ROCm version instead. - If 2 works, then no clue.
Author
Owner

@Pekkari commented on GitHub (Mar 15, 2026):

Uhm it works fine to me using the ollama binaries + ollama's ROCm package.

Also works fine to me using system ROCm 7.2 + building ollama from source.

Now, I'm using gfx1151, not gfx1103.

Thanks for your attempt, but if you are not testing in the platform the ticket is referring to, I'm afraid it is useless, I'mm happy to test anything that seems to work on your end if you are testing on the same platform.

<!-- gh-comment-id:4063089229 --> @Pekkari commented on GitHub (Mar 15, 2026): > Uhm it works fine to me using the ollama binaries + ollama's ROCm package. > > Also works fine to me using system ROCm 7.2 + building ollama from source. > > Now, I'm using gfx1151, not gfx1103. Thanks for your attempt, but if you are not testing in the platform the ticket is referring to, I'm afraid it is useless, I'mm happy to test anything that seems to work on your end if you are testing on the same platform.
Author
Owner

@waltercool commented on GitHub (Mar 15, 2026):

But your GPU is not officially supported by AMD, I don't think people from here will help you if that's the case.

I would recommend you to use the Vulkan backend instead

<!-- gh-comment-id:4063230323 --> @waltercool commented on GitHub (Mar 15, 2026): But your GPU is not officially supported by AMD, I don't think people from here will help you if that's the case. I would recommend you to use the Vulkan backend instead
Author
Owner

@Pekkari commented on GitHub (Mar 15, 2026):

But your GPU is not officially supported by AMD, I don't think people from here will help you if that's the case.

I would recommend you to use the Vulkan backend instead

Oh yes, that I do, no problem.

<!-- gh-comment-id:4063240849 --> @Pekkari commented on GitHub (Mar 15, 2026): > But your GPU is not officially supported by AMD, I don't think people from here will help you if that's the case. > > I would recommend you to use the Vulkan backend instead Oh yes, that I do, no problem.
Author
Owner

@altad3005 commented on GitHub (Apr 20, 2026):

Setup: Minisforum UM880 Plus — AMD Ryzen 7 8845HS / Radeon 780M (gfx1103) / 32GB DDR5
OS: Ubuntu 24.04 LXC unprivileged on Proxmox VE 9.1.7
Ollama: 0.21.0 (community-scripts install) + manual update via install.sh
Devices passed: /dev/dri/card0, /dev/dri/renderD128, /dev/kfd
Results:

ROCm: GPU detected, prompt eval uses GPU (~14-90 t/s) but generation (eval rate) stays on CPU (~3 t/s). offloaded 0/35 layers to GPU despite GPU being detected and VRAM available.
Vulkan (OLLAMA_VULKAN=1): qwen2.5:3b → 12.79 t/s , qwen2.5:7b → 5.57 t/s, qwen2.5:14b → 3 t/s (still CPU).

Conclusion: Vulkan works partially — smaller models benefit, larger ones still fall back to CPU. ROCm generation offload completely broken on gfx1103 in LXC context.

<!-- gh-comment-id:4280641692 --> @altad3005 commented on GitHub (Apr 20, 2026): Setup: Minisforum UM880 Plus — AMD Ryzen 7 8845HS / Radeon 780M (gfx1103) / 32GB DDR5 OS: Ubuntu 24.04 LXC unprivileged on Proxmox VE 9.1.7 Ollama: 0.21.0 (community-scripts install) + manual update via install.sh Devices passed: /dev/dri/card0, /dev/dri/renderD128, /dev/kfd Results: ROCm: GPU detected, prompt eval uses GPU (~14-90 t/s) but generation (eval rate) stays on CPU (~3 t/s). offloaded 0/35 layers to GPU despite GPU being detected and VRAM available. Vulkan (OLLAMA_VULKAN=1): qwen2.5:3b → 12.79 t/s ✅, qwen2.5:7b → 5.57 t/s, qwen2.5:14b → 3 t/s (still CPU). Conclusion: Vulkan works partially — smaller models benefit, larger ones still fall back to CPU. ROCm generation offload completely broken on gfx1103 in LXC context.
Sign in to join this conversation.
1 Participants
Notifications
Due Date
No due date set.
Dependencies

No dependencies set.

Reference: github-starred/ollama#54687