mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-16 10:47:01 +00:00
llama: update llama.cpp vendor code to commit d7cfe1ff (#9356)
This commit is contained in:
5
llama/llama.cpp/src/llama-context.cpp
vendored
5
llama/llama.cpp/src/llama-context.cpp
vendored
@@ -1,5 +1,8 @@
|
||||
#include "llama-context.h"
|
||||
|
||||
#include "llama-impl.h"
|
||||
#include "llama-mmap.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
@@ -513,7 +516,7 @@ size_t llama_output_reserve(struct llama_context & lctx, size_t n_outputs) {
|
||||
|
||||
auto * buft = ggml_backend_cpu_buffer_type();
|
||||
// try to use the host buffer of the device where the output tensor is allocated for faster transfer to system memory
|
||||
auto * output_dev = lctx.model.dev_output.dev;
|
||||
auto * output_dev = lctx.model.dev_output();
|
||||
auto * output_dev_host_buft = output_dev ? ggml_backend_dev_host_buffer_type(output_dev) : nullptr;
|
||||
if (output_dev_host_buft) {
|
||||
buft = output_dev_host_buft;
|
||||
|
||||
Reference in New Issue
Block a user