mirror of
https://github.com/ollama/ollama.git
synced 2025-12-05 18:46:22 -06:00
discovery: only retry AMD GPUs (#12894)
* discovery: only retry AMD GPUs CUDA and Vulkan don't crash on unsupported devices, so retry isn't necessary. This also refactors the code to shift the Library specific logic into the ml package. * review comments
This commit is contained in:
@@ -27,7 +27,6 @@ var (
|
||||
deviceMu sync.Mutex
|
||||
devices []ml.DeviceInfo
|
||||
libDirs map[string]struct{}
|
||||
rocmDir string
|
||||
exe string
|
||||
bootstrapped bool
|
||||
)
|
||||
@@ -61,14 +60,6 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
libDirs[filepath.Dir(file)] = struct{}{}
|
||||
}
|
||||
|
||||
// Our current packaging model places ggml-hip in the main directory
|
||||
// but keeps rocm in an isolated directory. We have to add it to
|
||||
// the [LD_LIBRARY_]PATH so ggml-hip will load properly
|
||||
rocmDir = filepath.Join(ml.LibOllamaPath, "rocm")
|
||||
if _, err := os.Stat(rocmDir); err != nil {
|
||||
rocmDir = ""
|
||||
}
|
||||
|
||||
if len(libDirs) == 0 {
|
||||
libDirs[""] = struct{}{}
|
||||
}
|
||||
@@ -82,9 +73,20 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
// are enumerated, but not actually supported.
|
||||
// We run this in serial to avoid potentially initializing a GPU multiple
|
||||
// times concurrently leading to memory contention
|
||||
// TODO refactor so we group the lib dirs and do serial per version, but parallel for different libs
|
||||
for dir := range libDirs {
|
||||
// Typically bootstrapping takes < 1s, but on some systems, with devices
|
||||
// in low power/idle mode, initialization can take multiple seconds. We
|
||||
// set a longer timeout just for bootstrap discovery to reduce the chance
|
||||
// of giving up too quickly
|
||||
bootstrapTimeout := 30 * time.Second
|
||||
if runtime.GOOS == "windows" {
|
||||
// On Windows with Defender enabled, AV scanning of the DLLs
|
||||
// takes place sequentially and this can significantly increase
|
||||
// the time it takes too do the initial discovery pass.
|
||||
// Subsequent loads will be faster as the scan results are
|
||||
// cached
|
||||
bootstrapTimeout = 90 * time.Second
|
||||
}
|
||||
var dirs []string
|
||||
if dir != "" {
|
||||
if requested != "" && filepath.Base(dir) != requested {
|
||||
@@ -93,21 +95,11 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if dir == "" {
|
||||
dirs = []string{ml.LibOllamaPath}
|
||||
} else {
|
||||
dirs = []string{ml.LibOllamaPath, dir}
|
||||
} else {
|
||||
dirs = []string{ml.LibOllamaPath}
|
||||
}
|
||||
|
||||
// ROCm can take a long time on some systems, so give it more time before giving up
|
||||
if dir != "" && strings.Contains(filepath.Base(dir), "rocm") {
|
||||
bootstrapTimeout = 60 * time.Second
|
||||
}
|
||||
// Typically bootstrapping takes < 1s, but on some systems, with devices
|
||||
// in low power/idle mode, initialization can take multiple seconds. We
|
||||
// set a long timeout just for bootstrap discovery to reduce the chance
|
||||
// of giving up too quickly
|
||||
ctx1stPass, cancel := context.WithTimeout(ctx, bootstrapTimeout)
|
||||
defer cancel()
|
||||
|
||||
@@ -117,6 +109,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
|
||||
// In the second pass, we more deeply initialize the GPUs to weed out devices that
|
||||
// aren't supported by a given library. We run this phase in parallel to speed up discovery.
|
||||
// Only devices that need verification are included in this pass
|
||||
slog.Debug("evluating which if any devices to filter out", "initial_count", len(devices))
|
||||
ctx2ndPass, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
@@ -125,35 +118,16 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
supportedMu := sync.Mutex{}
|
||||
supported := make(map[string]map[string]map[string]int) // [Library][libDir][ID] = pre-deletion devices index
|
||||
for i := range devices {
|
||||
libDir := devices[i].LibraryPath[len(devices[i].LibraryPath)-1]
|
||||
if devices[i].Library == "Metal" {
|
||||
if !devices[i].NeedsInitValidation() {
|
||||
continue
|
||||
}
|
||||
slog.Debug("verifying GPU is supported", "library", libDir, "description", devices[i].Description, "compute", devices[i].Compute(), "id", devices[i].ID, "pci_id", devices[i].PCIID)
|
||||
libDir := devices[i].LibraryPath[len(devices[i].LibraryPath)-1]
|
||||
slog.Debug("verifying device is supported", "library", libDir, "description", devices[i].Description, "compute", devices[i].Compute(), "id", devices[i].ID, "pci_id", devices[i].PCIID)
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
var envVar string
|
||||
id := devices[i].ID
|
||||
if devices[i].Library == "ROCm" {
|
||||
if runtime.GOOS != "linux" {
|
||||
envVar = "HIP_VISIBLE_DEVICES"
|
||||
} else {
|
||||
envVar = "ROCR_VISIBLE_DEVICES"
|
||||
}
|
||||
} else if devices[i].Library == "CUDA" {
|
||||
envVar = "CUDA_VISIBLE_DEVICES"
|
||||
} else if devices[i].Library == "Vulkan" {
|
||||
id = devices[i].FilteredID
|
||||
envVar = "GGML_VK_VISIBLE_DEVICES"
|
||||
} else {
|
||||
slog.Error("Unknown Library:" + devices[i].Library)
|
||||
}
|
||||
|
||||
extraEnvs := map[string]string{
|
||||
"GGML_CUDA_INIT": "1", // force deep initialization to trigger crash on unsupported GPUs
|
||||
envVar: id, // Filter to just this one GPU
|
||||
}
|
||||
extraEnvs := ml.GetVisibleDevicesEnv(devices[i : i+1])
|
||||
devices[i].AddInitValidation(extraEnvs)
|
||||
if len(bootstrapDevices(ctx2ndPass, devices[i].LibraryPath, extraEnvs)) == 0 {
|
||||
slog.Debug("filtering device which didn't fully initialize",
|
||||
"id", devices[i].ID,
|
||||
@@ -178,26 +152,28 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
wg.Wait()
|
||||
logutil.Trace("supported GPU library combinations before filtering", "supported", supported)
|
||||
|
||||
filterOutVulkanThatAreSupportedByOtherGPU(needsDelete)
|
||||
|
||||
// Mark for deletion any overlaps - favoring the library version that can cover all GPUs if possible
|
||||
filterOverlapByLibrary(supported, needsDelete)
|
||||
|
||||
// TODO if we ever support multiple ROCm library versions this algorithm will need to be adjusted to keep the rocmID numeric value correct
|
||||
rocmID := 0
|
||||
// Any Libraries that utilize numeric IDs need adjusting based on any possible filtering taking place
|
||||
postFilteredID := map[string]int{}
|
||||
for i := 0; i < len(needsDelete); i++ {
|
||||
if needsDelete[i] {
|
||||
logutil.Trace("removing unsupported or overlapping GPU combination", "libDir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1], "description", devices[i].Description, "compute", devices[i].Compute(), "pci_id", devices[i].PCIID)
|
||||
devices = append(devices[:i], devices[i+1:]...)
|
||||
needsDelete = append(needsDelete[:i], needsDelete[i+1:]...)
|
||||
i--
|
||||
} else if devices[i].Library == "ROCm" {
|
||||
} else {
|
||||
if _, ok := postFilteredID[devices[i].Library]; !ok {
|
||||
postFilteredID[devices[i].Library] = 0
|
||||
}
|
||||
if _, err := strconv.Atoi(devices[i].ID); err == nil {
|
||||
// Replace the numeric ID with the post-filtered IDs
|
||||
devices[i].FilteredID = devices[i].ID
|
||||
devices[i].ID = strconv.Itoa(rocmID)
|
||||
slog.Debug("adjusting filtering IDs", "FilterID", devices[i].ID, "new_ID", strconv.Itoa(postFilteredID[devices[i].Library]))
|
||||
devices[i].FilterID = devices[i].ID
|
||||
devices[i].ID = strconv.Itoa(postFilteredID[devices[i].Library])
|
||||
}
|
||||
rocmID++
|
||||
postFilteredID[devices[i].Library]++
|
||||
}
|
||||
}
|
||||
|
||||
@@ -214,7 +190,7 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
case ml.DuplicateDevice:
|
||||
// Different library, choose based on priority
|
||||
var droppedDevice ml.DeviceInfo
|
||||
if devices[i].Library == "CUDA" || devices[i].Library == "ROCm" {
|
||||
if devices[i].PreferredLibrary(devices[j]) {
|
||||
droppedDevice = devices[j]
|
||||
} else {
|
||||
droppedDevice = devices[i]
|
||||
@@ -363,38 +339,6 @@ func GPUDevices(ctx context.Context, runners []ml.FilteredRunnerDiscovery) []ml.
|
||||
return devices
|
||||
}
|
||||
|
||||
func filterOutVulkanThatAreSupportedByOtherGPU(needsDelete []bool) {
|
||||
// Filter out Vulkan devices that share a PCI ID with a non-Vulkan device that is not marked for deletion
|
||||
for i := range devices {
|
||||
if devices[i].Library != "Vulkan" || needsDelete[i] {
|
||||
continue
|
||||
}
|
||||
if devices[i].PCIID == "" {
|
||||
continue
|
||||
}
|
||||
for j := range devices {
|
||||
if i == j {
|
||||
continue
|
||||
}
|
||||
if devices[j].PCIID == "" {
|
||||
continue
|
||||
}
|
||||
if devices[j].PCIID == devices[i].PCIID && devices[j].Library != "Vulkan" && !needsDelete[j] {
|
||||
needsDelete[i] = true
|
||||
slog.Debug("filtering device with duplicate PCI ID",
|
||||
"id", devices[i].ID,
|
||||
"library", devices[i].Library,
|
||||
"libdir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1],
|
||||
"pci_id", devices[i].PCIID,
|
||||
"kept_id", devices[j].ID,
|
||||
"kept_library", devices[j].Library,
|
||||
)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func filterOverlapByLibrary(supported map[string]map[string]map[string]int, needsDelete []bool) {
|
||||
// For multi-GPU systems, use the newest version that supports all the GPUs
|
||||
for _, byLibDirs := range supported {
|
||||
|
||||
@@ -41,7 +41,7 @@ func LogDetails(devices []ml.DeviceInfo) {
|
||||
}
|
||||
slog.Info("inference compute",
|
||||
"id", dev.ID,
|
||||
"filtered_id", dev.FilteredID,
|
||||
"filter_id", dev.FilterID,
|
||||
"library", dev.Library,
|
||||
"compute", dev.Compute(),
|
||||
"name", dev.Name,
|
||||
|
||||
@@ -14,24 +14,24 @@ Vulkan PCI and Memory
|
||||
|
||||
fix vulkan PCI ID and ID handling
|
||||
---
|
||||
ggml/include/ggml-backend.h | 8 +
|
||||
ggml/include/ggml-backend.h | 6 +
|
||||
ggml/src/CMakeLists.txt | 2 +
|
||||
ggml/src/ggml-cuda/ggml-cuda.cu | 65 ++++
|
||||
ggml/src/ggml-cuda/vendors/hip.h | 3 +
|
||||
ggml/src/ggml-impl.h | 8 +
|
||||
ggml/src/ggml-metal/ggml-metal.cpp | 2 +
|
||||
ggml/src/ggml-vulkan/ggml-vulkan.cpp | 212 +++++++++++--
|
||||
ggml/src/ggml-vulkan/ggml-vulkan.cpp | 209 +++++++++++--
|
||||
ggml/src/mem_hip.cpp | 452 +++++++++++++++++++++++++++
|
||||
ggml/src/mem_nvml.cpp | 209 +++++++++++++
|
||||
9 files changed, 931 insertions(+), 30 deletions(-)
|
||||
9 files changed, 926 insertions(+), 30 deletions(-)
|
||||
create mode 100644 ggml/src/mem_hip.cpp
|
||||
create mode 100644 ggml/src/mem_nvml.cpp
|
||||
|
||||
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
|
||||
index ba181d09d..809835243 100644
|
||||
index 69223c488..6510e0cba 100644
|
||||
--- a/ggml/include/ggml-backend.h
|
||||
+++ b/ggml/include/ggml-backend.h
|
||||
@@ -169,6 +169,14 @@ extern "C" {
|
||||
@@ -169,6 +169,12 @@ extern "C" {
|
||||
const char * device_id;
|
||||
// device capabilities
|
||||
struct ggml_backend_dev_caps caps;
|
||||
@@ -41,8 +41,6 @@ index ba181d09d..809835243 100644
|
||||
+ int compute_minor;
|
||||
+ int integrated;
|
||||
+ const char *library;
|
||||
+ // number with which the devices are accessed (Vulkan)
|
||||
+ const char *numeric_id;
|
||||
};
|
||||
|
||||
GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
|
||||
@@ -60,7 +58,7 @@ index 0609c6503..aefe43bdd 100644
|
||||
|
||||
target_include_directories(ggml-base PRIVATE .)
|
||||
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
index 87c6c34a4..b075a18be 100644
|
||||
index 5787e8cd5..d232bf828 100644
|
||||
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
@@ -261,6 +261,16 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
||||
@@ -92,7 +90,7 @@ index 87c6c34a4..b075a18be 100644
|
||||
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
|
||||
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
|
||||
ggml_cuda_parse_uuid(prop, id).c_str());
|
||||
@@ -3484,6 +3499,11 @@ struct ggml_backend_cuda_device_context {
|
||||
@@ -3476,6 +3491,11 @@ struct ggml_backend_cuda_device_context {
|
||||
std::string description;
|
||||
std::string pci_bus_id;
|
||||
std::string id;
|
||||
@@ -104,7 +102,7 @@ index 87c6c34a4..b075a18be 100644
|
||||
};
|
||||
|
||||
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
|
||||
@@ -3504,6 +3524,28 @@ static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
|
||||
@@ -3496,6 +3516,28 @@ static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
|
||||
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
||||
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
|
||||
ggml_cuda_set_device(ctx->device);
|
||||
@@ -133,7 +131,7 @@ index 87c6c34a4..b075a18be 100644
|
||||
CUDA_CHECK(cudaMemGetInfo(free, total));
|
||||
}
|
||||
|
||||
@@ -3512,6 +3554,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
|
||||
@@ -3504,6 +3546,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
|
||||
return GGML_BACKEND_DEVICE_TYPE_GPU;
|
||||
}
|
||||
|
||||
@@ -141,7 +139,7 @@ index 87c6c34a4..b075a18be 100644
|
||||
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
|
||||
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
|
||||
|
||||
@@ -3525,6 +3568,19 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
|
||||
@@ -3517,6 +3560,19 @@ static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_back
|
||||
// If you need the memory data, call ggml_backend_dev_memory() explicitly.
|
||||
props->memory_total = props->memory_free = 0;
|
||||
|
||||
@@ -161,7 +159,7 @@ index 87c6c34a4..b075a18be 100644
|
||||
bool host_buffer = getenv("GGML_CUDA_NO_PINNED") == nullptr;
|
||||
#ifdef GGML_CUDA_NO_PEER_COPY
|
||||
bool events = false;
|
||||
@@ -4087,6 +4143,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
||||
@@ -4079,6 +4135,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
if (!initialized) {
|
||||
ggml_backend_cuda_reg_context * ctx = new ggml_backend_cuda_reg_context;
|
||||
@@ -169,7 +167,7 @@ index 87c6c34a4..b075a18be 100644
|
||||
|
||||
for (int i = 0; i < ggml_cuda_info().device_count; i++) {
|
||||
ggml_backend_cuda_device_context * dev_ctx = new ggml_backend_cuda_device_context;
|
||||
@@ -4102,6 +4159,14 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
||||
@@ -4094,6 +4151,14 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
|
||||
snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID);
|
||||
dev_ctx->pci_bus_id = pci_bus_id;
|
||||
|
||||
@@ -225,10 +223,10 @@ index d0fb3bcca..b63edd0c1 100644
|
||||
}
|
||||
#endif
|
||||
diff --git a/ggml/src/ggml-metal/ggml-metal.cpp b/ggml/src/ggml-metal/ggml-metal.cpp
|
||||
index f2ff9f322..f356e4a0a 100644
|
||||
index 05ff6a5a6..032dee76d 100644
|
||||
--- a/ggml/src/ggml-metal/ggml-metal.cpp
|
||||
+++ b/ggml/src/ggml-metal/ggml-metal.cpp
|
||||
@@ -535,6 +535,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
|
||||
@@ -537,6 +537,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
|
||||
GGML_UNUSED(dev);
|
||||
}
|
||||
|
||||
@@ -236,7 +234,7 @@ index f2ff9f322..f356e4a0a 100644
|
||||
static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
|
||||
props->name = ggml_backend_metal_device_get_name(dev);
|
||||
props->description = ggml_backend_metal_device_get_description(dev);
|
||||
@@ -543,6 +544,7 @@ static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, ggml_bac
|
||||
@@ -545,6 +546,7 @@ static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, ggml_bac
|
||||
|
||||
ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
||||
|
||||
@@ -245,7 +243,7 @@ index f2ff9f322..f356e4a0a 100644
|
||||
/* .async = */ true,
|
||||
/* .host_buffer = */ false,
|
||||
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||
index ed83236f4..0bbcecd01 100644
|
||||
index bd3ece516..7cfb14a54 100644
|
||||
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||
@@ -231,6 +231,7 @@ class vk_memory_logger;
|
||||
@@ -286,7 +284,7 @@ index ed83236f4..0bbcecd01 100644
|
||||
// backend interface
|
||||
|
||||
#define UNUSED GGML_UNUSED
|
||||
@@ -12391,31 +12415,103 @@ void ggml_backend_vk_get_device_description(int device, char * description, size
|
||||
@@ -12392,31 +12416,102 @@ void ggml_backend_vk_get_device_description(int device, char * description, size
|
||||
ggml_vk_get_device_description(dev_idx, description, description_size);
|
||||
}
|
||||
|
||||
@@ -309,7 +307,6 @@ index ed83236f4..0bbcecd01 100644
|
||||
+ std::string pci_id;
|
||||
+ std::string id;
|
||||
+ std::string uuid;
|
||||
+ std::string numeric_id;
|
||||
+ int major;
|
||||
+ int minor;
|
||||
+ int driver_major;
|
||||
@@ -407,7 +404,7 @@ index ed83236f4..0bbcecd01 100644
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -12448,8 +12544,13 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
|
||||
@@ -12449,8 +12544,13 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
|
||||
}
|
||||
}
|
||||
|
||||
@@ -422,7 +419,7 @@ index ed83236f4..0bbcecd01 100644
|
||||
}
|
||||
|
||||
vk::PhysicalDeviceProperties2 props = {};
|
||||
@@ -12466,19 +12567,24 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
|
||||
@@ -12467,19 +12567,24 @@ static std::string ggml_backend_vk_get_device_pci_id(int device_idx) {
|
||||
|
||||
char pci_bus_id[16] = {};
|
||||
snprintf(pci_bus_id, sizeof(pci_bus_id), "%04x:%02x:%02x.%x", pci_domain, pci_bus, pci_device, pci_function);
|
||||
@@ -456,7 +453,7 @@ index ed83236f4..0bbcecd01 100644
|
||||
|
||||
static const char * ggml_backend_vk_device_get_name(ggml_backend_dev_t dev) {
|
||||
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
||||
@@ -12490,9 +12596,14 @@ static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t de
|
||||
@@ -12491,9 +12596,14 @@ static const char * ggml_backend_vk_device_get_description(ggml_backend_dev_t de
|
||||
return ctx->description.c_str();
|
||||
}
|
||||
|
||||
@@ -472,7 +469,7 @@ index ed83236f4..0bbcecd01 100644
|
||||
}
|
||||
|
||||
static ggml_backend_buffer_type_t ggml_backend_vk_device_get_buffer_type(ggml_backend_dev_t dev) {
|
||||
@@ -12516,8 +12627,9 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
|
||||
@@ -12517,8 +12627,9 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
|
||||
|
||||
props->name = ggml_backend_vk_device_get_name(dev);
|
||||
props->description = ggml_backend_vk_device_get_description(dev);
|
||||
@@ -483,7 +480,7 @@ index ed83236f4..0bbcecd01 100644
|
||||
ggml_backend_vk_device_get_memory(dev, &props->memory_free, &props->memory_total);
|
||||
props->caps = {
|
||||
/* .async = */ false,
|
||||
@@ -12525,6 +12637,14 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
|
||||
@@ -12526,6 +12637,13 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
|
||||
/* .buffer_from_host_ptr = */ false,
|
||||
/* .events = */ false,
|
||||
};
|
||||
@@ -494,11 +491,10 @@ index ed83236f4..0bbcecd01 100644
|
||||
+ props->driver_minor = ctx->driver_minor;
|
||||
+ props->integrated = ctx->is_integrated_gpu;
|
||||
+ props->library = GGML_VK_NAME;
|
||||
+ props->numeric_id = ctx->numeric_id.c_str();
|
||||
}
|
||||
|
||||
static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
|
||||
@@ -12953,6 +13073,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
||||
@@ -12954,6 +13072,8 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
||||
static std::mutex mutex;
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
if (!initialized) {
|
||||
@@ -507,7 +503,7 @@ index ed83236f4..0bbcecd01 100644
|
||||
for (int i = 0; i < ggml_backend_vk_get_device_count(); i++) {
|
||||
ggml_backend_vk_device_context * ctx = new ggml_backend_vk_device_context;
|
||||
char desc[256];
|
||||
@@ -12961,12 +13083,42 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
||||
@@ -12962,12 +13082,41 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
||||
ctx->name = GGML_VK_NAME + std::to_string(i);
|
||||
ctx->description = desc;
|
||||
ctx->is_integrated_gpu = ggml_backend_vk_get_device_type(i) == vk::PhysicalDeviceType::eIntegratedGpu;
|
||||
@@ -547,7 +543,6 @@ index ed83236f4..0bbcecd01 100644
|
||||
+ // TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string
|
||||
+ ctx->driver_major = 0;
|
||||
+ ctx->driver_minor = 0;
|
||||
+ ctx->numeric_id = std::to_string(i);
|
||||
}
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
@@ -38,7 +38,7 @@ index b63edd0c1..81cad8cf3 100644
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||
index cc68e7968..27d6574da 100644
|
||||
index 7cfb14a54..a1c46d0b3 100644
|
||||
--- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||
+++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp
|
||||
@@ -73,6 +73,7 @@ DispatchLoaderDynamic & ggml_vk_default_dispatcher();
|
||||
@@ -54,10 +54,10 @@ index cc68e7968..27d6574da 100644
|
||||
std::string id;
|
||||
std::string uuid;
|
||||
+ std::string luid;
|
||||
std::string numeric_id;
|
||||
int major;
|
||||
int minor;
|
||||
@@ -12449,8 +12451,22 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
|
||||
int driver_major;
|
||||
@@ -12448,8 +12450,22 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
|
||||
vk::PhysicalDeviceMemoryProperties memprops = vkdev.getMemoryProperties();
|
||||
vk::PhysicalDeviceProperties2 props2;
|
||||
vkdev.getProperties2(&props2);
|
||||
@@ -81,7 +81,7 @@ index cc68e7968..27d6574da 100644
|
||||
{
|
||||
// Use vendor specific management libraries for best VRAM reporting if available
|
||||
switch (props2.properties.vendorID) {
|
||||
@@ -12478,8 +12494,8 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
|
||||
@@ -12477,8 +12493,8 @@ void ggml_backend_vk_get_device_memory(ggml_backend_vk_device_context *ctx, size
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -91,7 +91,7 @@ index cc68e7968..27d6574da 100644
|
||||
*total = 0;
|
||||
*free = 0;
|
||||
vk::PhysicalDeviceMemoryBudgetPropertiesEXT mem_budget_props;
|
||||
@@ -13091,7 +13107,6 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
||||
@@ -13089,7 +13105,6 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
||||
/* .reg = */ reg,
|
||||
/* .context = */ ctx,
|
||||
});
|
||||
@@ -99,7 +99,7 @@ index cc68e7968..27d6574da 100644
|
||||
// Gather additional information about the device
|
||||
int dev_idx = vk_instance.device_indices[i];
|
||||
vk::PhysicalDeviceProperties props1;
|
||||
@@ -13114,6 +13129,14 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
||||
@@ -13112,6 +13127,14 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
||||
}
|
||||
}
|
||||
ctx->uuid = oss.str();
|
||||
@@ -1,6 +1,6 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Yang <git@mxy.ng>
|
||||
Date: Web, 16 Oct 2025 20:37:19 -0700
|
||||
Date: Thu, 16 Oct 2025 20:37:19 -0700
|
||||
Subject: [PATCH] interleave multi rope
|
||||
|
||||
since ollama doesn't use mrope for anything else, change it to mean the
|
||||
@@ -85,7 +85,7 @@ index 375a0c7fd..9866c96b4 100644
|
||||
// end of mrope
|
||||
|
||||
diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp
|
||||
index 111286b49..6fc2b42f8 100644
|
||||
index 111286b49..633dc20ff 100644
|
||||
--- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp
|
||||
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp
|
||||
@@ -31,19 +31,13 @@ void main() {
|
||||
@@ -730,10 +730,6 @@ func (b *Backend) BackendDevices() []ml.DeviceInfo {
|
||||
info.PCIID = C.GoString(props.device_id)
|
||||
}
|
||||
info.LibraryPath = ggml.LibPaths()
|
||||
if props.numeric_id != nil {
|
||||
info.FilteredID = C.GoString(props.numeric_id)
|
||||
}
|
||||
|
||||
C.ggml_backend_dev_memory(dev, &props.memory_free, &props.memory_total)
|
||||
info.TotalMemory = (uint64)(props.memory_total)
|
||||
info.FreeMemory = (uint64)(props.memory_free)
|
||||
|
||||
2
ml/backend/ggml/ggml/include/ggml-backend.h
vendored
2
ml/backend/ggml/ggml/include/ggml-backend.h
vendored
@@ -175,8 +175,6 @@ extern "C" {
|
||||
int compute_minor;
|
||||
int integrated;
|
||||
const char *library;
|
||||
// number with which the devices are accessed (Vulkan)
|
||||
const char *numeric_id;
|
||||
};
|
||||
|
||||
GGML_API const char * ggml_backend_dev_name(ggml_backend_dev_t device);
|
||||
|
||||
@@ -12435,7 +12435,6 @@ struct ggml_backend_vk_device_context {
|
||||
std::string id;
|
||||
std::string uuid;
|
||||
std::string luid;
|
||||
std::string numeric_id;
|
||||
int major;
|
||||
int minor;
|
||||
int driver_major;
|
||||
@@ -12661,7 +12660,6 @@ static void ggml_backend_vk_device_get_props(ggml_backend_dev_t dev, struct ggml
|
||||
props->driver_minor = ctx->driver_minor;
|
||||
props->integrated = ctx->is_integrated_gpu;
|
||||
props->library = GGML_VK_NAME;
|
||||
props->numeric_id = ctx->numeric_id.c_str();
|
||||
}
|
||||
|
||||
static ggml_backend_t ggml_backend_vk_device_init(ggml_backend_dev_t dev, const char * params) {
|
||||
@@ -13142,7 +13140,6 @@ static ggml_backend_dev_t ggml_backend_vk_reg_get_device(ggml_backend_reg_t reg,
|
||||
// TODO regex parse driver_props.driverInfo for a X.Y or X.Y.Z version string
|
||||
ctx->driver_major = 0;
|
||||
ctx->driver_minor = 0;
|
||||
ctx->numeric_id = std::to_string(i);
|
||||
}
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
35
ml/device.go
35
ml/device.go
@@ -257,7 +257,7 @@ type DeviceInfo struct {
|
||||
|
||||
// FilterID is populated with the unfiltered device ID if a numeric ID is used
|
||||
// so the device can be included.
|
||||
FilteredID string `json:"filtered_id,omitempty"`
|
||||
FilterID string `json:"filter_id,omitempty"`
|
||||
|
||||
// Integrated is set true for integrated GPUs, false for Discrete GPUs
|
||||
Integrated bool `json:"integration,omitempty"`
|
||||
@@ -455,6 +455,35 @@ func GetVisibleDevicesEnv(l []DeviceInfo) map[string]string {
|
||||
return env
|
||||
}
|
||||
|
||||
// NeedsInitValidation returns true if the device in question has the potential
|
||||
// to crash at inference time and requires deeper validation before we include
|
||||
// it in the supported devices list.
|
||||
func (d DeviceInfo) NeedsInitValidation() bool {
|
||||
// At this time the only library we know needs a 2nd pass is ROCm since
|
||||
// rocblas will crash on unsupported devices. We want to find those crashes
|
||||
// during bootstrap discovery so we can eliminate those GPUs before the user
|
||||
// tries to run inference on them
|
||||
return d.Library == "ROCm"
|
||||
}
|
||||
|
||||
// Set the init validation environment variable
|
||||
func (d DeviceInfo) AddInitValidation(env map[string]string) {
|
||||
env["GGML_CUDA_INIT"] = "1" // force deep initialization to trigger crash on unsupported GPUs
|
||||
}
|
||||
|
||||
// PreferredLibrary returns true if this library is preferred over the other input
|
||||
// library
|
||||
// Used to filter out Vulkan in favor of CUDA or ROCm
|
||||
func (d DeviceInfo) PreferredLibrary(other DeviceInfo) bool {
|
||||
// TODO in the future if we find Vulkan is better than ROCm on some devices
|
||||
// that implementation can live here.
|
||||
|
||||
if d.Library == "CUDA" || d.Library == "ROCm" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (d DeviceInfo) updateVisibleDevicesEnv(env map[string]string) {
|
||||
var envVar string
|
||||
switch d.Library {
|
||||
@@ -472,8 +501,8 @@ func (d DeviceInfo) updateVisibleDevicesEnv(env map[string]string) {
|
||||
if existing {
|
||||
v = v + ","
|
||||
}
|
||||
if d.FilteredID != "" {
|
||||
v = v + d.FilteredID
|
||||
if d.FilterID != "" {
|
||||
v = v + d.FilterID
|
||||
} else {
|
||||
v = v + d.ID
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user