bring back sysfs based VRAM information for AMD (#12871)

* build: optimize dockerfile context for iterating This moves the copy of the source into the layer AFTER doing software installs so we don't have to go through the RPM install for cuda, etc. every time you touch a source file. * amd: implement linux sysfs based VRAM lookup This adds a C++ implementation of sysfs DRM VRAM discovery for more accurate free VRAM data on linux for AMD GPUs.
2025-12-05 19:16:53 -06:00 · 2025-11-17 15:40:58 -08:00
parent 399eacf486
commit 2f36d769aa
5 changed files with 186 additions and 22 deletions
--- a/14
+++ b/14
@@ -39,14 +39,14 @@ ENV CC=clang CXX=clang++
 FROM base-${TARGETARCH} AS base
 ARG CMAKEVERSION
 RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-$(uname -m).tar.gz | tar xz -C /usr/local --strip-components 1
-COPY CMakeLists.txt CMakePresets.json .
-COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 ENV LDFLAGS=-s

 FROM base AS cpu
 RUN dnf install -y gcc-toolset-11-gcc gcc-toolset-11-gcc-c++
 ENV PATH=/opt/rh/gcc-toolset-11/root/usr/bin:$PATH
 ARG PARALLEL
+COPY CMakeLists.txt CMakePresets.json .
+COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'CPU' \
        && cmake --build --parallel ${PARALLEL} --preset 'CPU' \
@@ -57,6 +57,8 @@ ARG CUDA11VERSION=11.8
 RUN dnf install -y cuda-toolkit-${CUDA11VERSION//./-}
 ENV PATH=/usr/local/cuda-11/bin:$PATH
 ARG PARALLEL
+COPY CMakeLists.txt CMakePresets.json .
+COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'CUDA 11' \
        && cmake --build --parallel ${PARALLEL} --preset 'CUDA 11' \
@@ -67,6 +69,8 @@ ARG CUDA12VERSION=12.8
 RUN dnf install -y cuda-toolkit-${CUDA12VERSION//./-}
 ENV PATH=/usr/local/cuda-12/bin:$PATH
 ARG PARALLEL
+COPY CMakeLists.txt CMakePresets.json .
+COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'CUDA 12' \
        && cmake --build --parallel ${PARALLEL} --preset 'CUDA 12' \
@@ -78,6 +82,8 @@ ARG CUDA13VERSION=13.0
 RUN dnf install -y cuda-toolkit-${CUDA13VERSION//./-}
 ENV PATH=/usr/local/cuda-13/bin:$PATH
 ARG PARALLEL
+COPY CMakeLists.txt CMakePresets.json .
+COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'CUDA 13' \
        && cmake --build --parallel ${PARALLEL} --preset 'CUDA 13' \
@@ -87,6 +93,8 @@ RUN --mount=type=cache,target=/root/.ccache \
 FROM base AS rocm-6
 ENV PATH=/opt/rocm/hcc/bin:/opt/rocm/hip/bin:/opt/rocm/bin:/opt/rocm/hcc/bin:$PATH
 ARG PARALLEL
+COPY CMakeLists.txt CMakePresets.json .
+COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'ROCm 6' \
        && cmake --build --parallel ${PARALLEL} --preset 'ROCm 6' \
@@ -118,6 +126,8 @@ RUN --mount=type=cache,target=/root/.ccache \
        && cmake --install build --component CUDA --strip --parallel ${PARALLEL}

 FROM base AS vulkan
+COPY CMakeLists.txt CMakePresets.json .
+COPY ml/backend/ggml/ggml ml/backend/ggml/ggml
 RUN --mount=type=cache,target=/root/.ccache \
    cmake --preset 'Vulkan' \
        && cmake --build --parallel --preset 'Vulkan' \
--- a/llama/patches/0024-GPU-discovery-enhancements.patch
+++ b/llama/patches/0024-GPU-discovery-enhancements.patch
@@ -20,10 +20,10 @@ fix vulkan PCI ID and ID handling
 ggml/src/ggml-cuda/vendors/hip.h     |   3 +
 ggml/src/ggml-impl.h                 |   8 +
 ggml/src/ggml-metal/ggml-metal.cpp   |   2 +
- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 209 +++++++++++--
- ggml/src/mem_hip.cpp                 | 452 +++++++++++++++++++++++++++
- ggml/src/mem_nvml.cpp                | 209 +++++++++++++
- 9 files changed, 926 insertions(+), 30 deletions(-)
+ ggml/src/ggml-vulkan/ggml-vulkan.cpp | 209 +++++++++--
+ ggml/src/mem_hip.cpp                 | 529 +++++++++++++++++++++++++++
+ ggml/src/mem_nvml.cpp                | 209 +++++++++++
+ 9 files changed, 1003 insertions(+), 30 deletions(-)
 create mode 100644 ggml/src/mem_hip.cpp
 create mode 100644 ggml/src/mem_nvml.cpp

@@ -58,7 +58,7 @@ index f9a6587f1..03f359ae9 100644
 
 target_include_directories(ggml-base PRIVATE .)
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index c9333689f..41b00af83 100644
+index c9333689f..f1a20e7fe 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -261,6 +261,16 @@ static ggml_cuda_device_info ggml_cuda_init() {
@@ -111,7 +111,7 @@ index c9333689f..41b00af83 100644
 +    if (ggml_hip_mgmt_init() == 0) {
 +        int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total);
 +        if (status == 0) {
-+            GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total);
+            GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total);
 +            ggml_hip_mgmt_release();
 +            return;
 +        }
@@ -243,7 +243,7 @@ index 05ff6a5a6..032dee76d 100644
         /* .async                 = */ true,
         /* .host_buffer           = */ false,
 diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
-index 3a6bbe564..d2c278a35 100644
+index 3a6bbe564..ca02ea079 100644
 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
 +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -229,6 +229,7 @@ class vk_memory_logger;
@@ -337,7 +337,7 @@ index 3a6bbe564..d2c278a35 100644
 +            if (ggml_hip_mgmt_init() == 0) {
 +                int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total);
 +                if (status == 0) {
-+                    GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total);
+                    GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total);
 +                    ggml_hip_mgmt_release();
 +                    return;
 +                }
@@ -548,11 +548,12 @@ index 3a6bbe564..d2c278a35 100644
         }
 diff --git a/ggml/src/mem_hip.cpp b/ggml/src/mem_hip.cpp
 new file mode 100644
-index 000000000..5a7f5d465
+index 000000000..c1949b899
 --- /dev/null
 +++ b/ggml/src/mem_hip.cpp
-@@ -0,0 +1,452 @@
+@@ -0,0 +1,529 @@
 +#include "ggml.h"
+#include "ggml-impl.h"
 +
 +#ifdef _WIN32
 +// AMD Device Library eXtra (ADLX)
@@ -570,7 +571,6 @@ index 000000000..5a7f5d465
 +// Unused function parameters are commented out to avoid unnecessary type
 +// definitions.
 +
-+#include "ggml-impl.h"
 +#include <filesystem>
 +#include <mutex>
 +
@@ -990,15 +990,92 @@ index 000000000..5a7f5d465
 +
 +#else // #ifdef _WIN32
 +
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <filesystem>
+
+#include <sys/stat.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <glob.h>
+namespace fs = std::filesystem;
+
 +extern "C" {
 +
-+// TODO Linux implementation of accurate VRAM reporting
 +int ggml_hip_mgmt_init() {
-+    return -1;
+    return 0;
 +}
 +void ggml_hip_mgmt_release() {}
 +int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
-+    return -1;
+    GGML_LOG_INFO("%s searching for device %s\n", __func__, id);
+    const std::string drmDeviceGlob = "/sys/class/drm/card*/device/uevent";
+    const std::string drmTotalMemoryFile = "mem_info_vram_total";
+    const std::string drmUsedMemoryFile = "mem_info_vram_used";
+    const std::string drmUeventPCISlotLabel = "PCI_SLOT_NAME=";
+
+    glob_t glob_result;
+    glob(drmDeviceGlob.c_str(), GLOB_NOSORT, NULL, &glob_result);
+
+    for (size_t i = 0; i < glob_result.gl_pathc; ++i) {
+        const char* device_file = glob_result.gl_pathv[i];
+        std::ifstream file(device_file);
+        if (!file.is_open()) {
+            std::cerr << "Failed to open sysfs node" << std::endl;
+            globfree(&glob_result);
+            return 1;
+        }
+
+        std::string line;
+        while (std::getline(file, line)) {
+            // Check for PCI_SLOT_NAME label
+            if (line.find(drmUeventPCISlotLabel) == 0) {
+                std::istringstream iss(line.substr(drmUeventPCISlotLabel.size()));
+                std::string pciSlot;
+                iss >> pciSlot;
+                if (pciSlot == std::string(id)) {
+                    std::string dir = fs::path(device_file).parent_path().string();
+
+                    std::string totalFile = dir + "/" + drmTotalMemoryFile;
+                    std::ifstream totalFileStream(totalFile.c_str());
+                    if (!totalFileStream.is_open()) {
+                        GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, totalFile.c_str());
+                        file.close();
+                        globfree(&glob_result);
+                        return 1;
+                    }
+
+                    uint64_t memory;
+                    totalFileStream >> memory;
+                    *total = memory;
+
+                    std::string usedFile = dir + "/" + drmUsedMemoryFile;
+                    std::ifstream usedFileStream(usedFile.c_str());
+                    if (!usedFileStream.is_open()) {
+                        GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, usedFile.c_str());
+                        file.close();
+                        globfree(&glob_result);
+                        return 1;
+                    }
+
+                    uint64_t memoryUsed;
+                    usedFileStream >> memoryUsed;
+                    *free = memory - memoryUsed;
+
+                    file.close();
+                    globfree(&glob_result);
+                    return 0;
+                }
+            }
+        }
+
+        file.close();
+    }
+    GGML_LOG_DEBUG("%s unable to find matching device\n", __func__);
+    globfree(&glob_result);
+    return 1;
 +}
 +
 +} // extern "C"
--- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -3513,7 +3513,7 @@ static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t *
    if (ggml_hip_mgmt_init() == 0) {
        int status = ggml_hip_get_device_memory(ctx->pci_bus_id.c_str(), free, total);
        if (status == 0) {
-            GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total);
+            GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_bus_id.c_str(), *free, *total);
            ggml_hip_mgmt_release();
            return;
        }
--- a/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
+++ b/ml/backend/ggml/ggml/src/ggml-vulkan/ggml-vulkan.cpp
@@ -13212,7 +13212,7 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
            if (ggml_hip_mgmt_init() == 0) {
                int status = ggml_hip_get_device_memory(ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), free, total);
                if (status == 0) {
-                    GGML_LOG_DEBUG("%s device %s utilizing ADLX memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total);
+                    GGML_LOG_DEBUG("%s device %s utilizing AMD specific memory reporting free: %zu total: %zu\n", __func__, ctx->pci_id != "" ? ctx->pci_id.c_str() : ctx->uuid.c_str(), *free, *total);
                    ggml_hip_mgmt_release();
                    return;
                }
--- a/ml/backend/ggml/ggml/src/mem_hip.cpp
+++ b/ml/backend/ggml/ggml/src/mem_hip.cpp
@@ -1,4 +1,5 @@
 #include "ggml.h"
+#include "ggml-impl.h"

 #ifdef _WIN32
 // AMD Device Library eXtra (ADLX)
@@ -16,7 +17,6 @@
 // Unused function parameters are commented out to avoid unnecessary type
 // definitions.

-#include "ggml-impl.h"
 #include <filesystem>
 #include <mutex>

@@ -436,15 +436,92 @@ int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {

 #else // #ifdef _WIN32

+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <filesystem>
+
+#include <sys/stat.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <glob.h>
+namespace fs = std::filesystem;
+
 extern "C" {

-// TODO Linux implementation of accurate VRAM reporting
 int ggml_hip_mgmt_init() {
-    return -1;
+    return 0;
 }
 void ggml_hip_mgmt_release() {}
 int ggml_hip_get_device_memory(const char *id, size_t *free, size_t *total) {
-    return -1;
+    GGML_LOG_INFO("%s searching for device %s\n", __func__, id);
+    const std::string drmDeviceGlob = "/sys/class/drm/card*/device/uevent";
+    const std::string drmTotalMemoryFile = "mem_info_vram_total";
+    const std::string drmUsedMemoryFile = "mem_info_vram_used";
+    const std::string drmUeventPCISlotLabel = "PCI_SLOT_NAME=";
+
+    glob_t glob_result;
+    glob(drmDeviceGlob.c_str(), GLOB_NOSORT, NULL, &glob_result);
+
+    for (size_t i = 0; i < glob_result.gl_pathc; ++i) {
+        const char* device_file = glob_result.gl_pathv[i];
+        std::ifstream file(device_file);
+        if (!file.is_open()) {
+            std::cerr << "Failed to open sysfs node" << std::endl;
+            globfree(&glob_result);
+            return 1;
+        }
+
+        std::string line;
+        while (std::getline(file, line)) {
+            // Check for PCI_SLOT_NAME label
+            if (line.find(drmUeventPCISlotLabel) == 0) {
+                std::istringstream iss(line.substr(drmUeventPCISlotLabel.size()));
+                std::string pciSlot;
+                iss >> pciSlot;
+                if (pciSlot == std::string(id)) {
+                    std::string dir = fs::path(device_file).parent_path().string();
+
+                    std::string totalFile = dir + "/" + drmTotalMemoryFile;
+                    std::ifstream totalFileStream(totalFile.c_str());
+                    if (!totalFileStream.is_open()) {
+                        GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, totalFile.c_str());
+                        file.close();
+                        globfree(&glob_result);
+                        return 1;
+                    }
+
+                    uint64_t memory;
+                    totalFileStream >> memory;
+                    *total = memory;
+
+                    std::string usedFile = dir + "/" + drmUsedMemoryFile;
+                    std::ifstream usedFileStream(usedFile.c_str());
+                    if (!usedFileStream.is_open()) {
+                        GGML_LOG_DEBUG("%s Failed to read sysfs node %s\n", __func__, usedFile.c_str());
+                        file.close();
+                        globfree(&glob_result);
+                        return 1;
+                    }
+
+                    uint64_t memoryUsed;
+                    usedFileStream >> memoryUsed;
+                    *free = memory - memoryUsed;
+
+                    file.close();
+                    globfree(&glob_result);
+                    return 0;
+                }
+            }
+        }
+
+        file.close();
+    }
+    GGML_LOG_DEBUG("%s unable to find matching device\n", __func__);
+    globfree(&glob_result);
+    return 1;
 }

 } // extern "C"