mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-20 20:57:01 +00:00
llama: update vendor code to commit ba1cb19c (#8101)
This commit is contained in:
8
llama/ggml-cuda/convert.cu
vendored
8
llama/ggml-cuda/convert.cu
vendored
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
|
||||
* llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
@@ -52,7 +52,7 @@ static __global__ void dequantize_block(const void * __restrict__ vx, dst_t * __
|
||||
|
||||
template <bool need_check>
|
||||
static __global__ void dequantize_block_q8_0_f16(const void * __restrict__ vx, half * __restrict__ y, const int64_t k) {
|
||||
#if __CUDA_ARCH__ >= CC_PASCAL
|
||||
#if __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL
|
||||
constexpr int nint = CUDA_Q8_0_NE_ALIGN/sizeof(int) + WARP_SIZE;
|
||||
|
||||
const int64_t i0 = CUDA_Q8_0_NE_ALIGN*blockIdx.x;
|
||||
@@ -90,7 +90,7 @@ static __global__ void dequantize_block_q8_0_f16(const void * __restrict__ vx, h
|
||||
GGML_UNUSED(y);
|
||||
GGML_UNUSED(k);
|
||||
NO_DEVICE_CODE;
|
||||
#endif // __CUDA_ARCH__ >= CC_PASCAL
|
||||
#endif // __CUDA_ARCH__ >= GGML_CUDA_CC_PASCAL
|
||||
}
|
||||
|
||||
template<typename dst_t>
|
||||
@@ -625,7 +625,7 @@ to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) {
|
||||
case GGML_TYPE_Q5_1:
|
||||
return dequantize_block_cuda<QK5_1, QR5_1, dequantize_q5_1>;
|
||||
case GGML_TYPE_Q8_0:
|
||||
if (ggml_cuda_info().devices[ggml_cuda_get_device()].cc >= CC_PASCAL) {
|
||||
if (ggml_cuda_info().devices[ggml_cuda_get_device()].cc >= GGML_CUDA_CC_PASCAL) {
|
||||
return dequantize_block_q8_0_f16_cuda;
|
||||
}
|
||||
return dequantize_block_cuda<QK8_0, QR8_0, dequantize_q8_0>;
|
||||
|
||||
Reference in New Issue
Block a user