llama: update vendored code to commit 40c6d79f (#7875)

This commit is contained in:
Jeffrey Morgan
2024-12-10 19:21:34 -08:00
committed by GitHub
parent a37f4a86a7
commit 527cc97899
289 changed files with 58552 additions and 41806 deletions

View File

@@ -1,5 +1,5 @@
/**
* llama.cpp - commit 3f1ae2e32cde00c39b96be6d01c2997c29bae555 - do not edit this file
* llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
*
* MIT License
*
@@ -34,8 +34,6 @@ void ggml_cuda_op_mul_mat_q(
const int64_t ne00 = src0->ne[0];
const int64_t nb01 = src0->nb[1];
const int64_t ne10 = src1->ne[0];
const int64_t ne11 = src1->ne[1];
GGML_ASSERT(ne10 % QK8_1 == 0);
@@ -43,7 +41,7 @@ void ggml_cuda_op_mul_mat_q(
const int64_t ne0 = dst->ne[0];
const int64_t row_diff = row_high - row_low;
const int64_t stride00 = nb01 / ggml_type_size(src0->type);
const int64_t stride00 = ne00 / ggml_blck_size(src0->type);
int id = ggml_cuda_get_device();
const int compute_capability = ggml_cuda_info().devices[id].cc;
@@ -176,5 +174,5 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) {
return cc < CC_VOLTA || ne11 < MMQ_DP4A_MAX_BATCH_SIZE;
}
return cc < CC_RDNA3 || ne11 < MMQ_DP4A_MAX_BATCH_SIZE;
return (cc < CC_RDNA3 && cc != CC_CDNA && cc != CC_VEGA20) || ne11 < MMQ_DP4A_MAX_BATCH_SIZE;
}