mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-20 12:47:00 +00:00
ml/backend/ggml: update model loading for hybrid/multi backends
use a similar strategy as llama.cpp for deciding where tensors should be allocated. this will be improved later to be aware of usable memory before assigning the tensor
This commit is contained in:
21
ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
vendored
21
ml/backend/ggml/ggml/src/ggml-backend-reg.cpp
vendored
@@ -207,13 +207,7 @@ struct ggml_backend_registry {
|
||||
for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
|
||||
register_device(ggml_backend_reg_dev_get(reg, i), score);
|
||||
}
|
||||
}
|
||||
|
||||
void register_device(ggml_backend_dev_t device, int score = -1) {
|
||||
#ifndef NDEBUG
|
||||
GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
|
||||
#endif
|
||||
devices.push_back({device, score});
|
||||
std::stable_sort(devices.begin(), devices.end(),
|
||||
[](const auto & a, const auto & b) {
|
||||
return a.second > b.second;
|
||||
@@ -221,6 +215,21 @@ struct ggml_backend_registry {
|
||||
);
|
||||
}
|
||||
|
||||
void register_device(ggml_backend_dev_t device, int score = -1) {
|
||||
switch (ggml_backend_dev_type(device)) {
|
||||
case GGML_BACKEND_DEVICE_TYPE_CPU:
|
||||
case GGML_BACKEND_DEVICE_TYPE_GPU:
|
||||
score += 1 << 16;
|
||||
case GGML_BACKEND_DEVICE_TYPE_ACCEL:
|
||||
score += 1 << 20;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
|
||||
#endif
|
||||
devices.push_back({device, score});
|
||||
}
|
||||
|
||||
ggml_backend_reg_t load_backend(const std::filesystem::path & path, bool silent) {
|
||||
dl_handle_ptr handle { dl_load_library(path) };
|
||||
if (!handle) {
|
||||
|
||||
Reference in New Issue
Block a user