llama: update vendor code to commit ba1cb19c (#8101)

This commit is contained in:
Jeffrey Morgan
2024-12-14 14:55:51 -08:00
committed by GitHub
parent 60f75560a2
commit 7a81daf026
273 changed files with 3194 additions and 1900 deletions

View File

@@ -1,5 +1,5 @@
/**
* llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
* llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file
*
* MIT License
*
@@ -2923,6 +2923,53 @@ kernel void kernel_pad_f32(
}
}
kernel void kernel_pad_reflect_1d_f32(
device const char * src0,
device char * dst,
constant int64_t & ne00,
constant int64_t & ne01,
constant int64_t & ne02,
constant int64_t & ne03,
constant int64_t & ne0,
constant uint64_t & nb00,
constant uint64_t & nb01,
constant uint64_t & nb02,
constant uint64_t & nb03,
constant uint64_t & nb0,
constant uint64_t & nb1,
constant uint64_t & nb2,
constant uint64_t & nb3,
constant int32_t & p0,
constant int32_t & p1,
uint3 tgpig[[threadgroup_position_in_grid]],
uint3 tgpg[[threadgroups_per_grid]],
uint3 tpitg[[thread_position_in_threadgroup]],
uint3 ntg[[threads_per_threadgroup]]) {
const int64_t i3 = tgpig.z;
const int64_t i2 = tgpig.y;
const int64_t i1 = tgpig.x;
const int64_t i03 = i3;
const int64_t i02 = i2;
const int64_t i01 = i1;
device const float * src0_ptr = (device const float *) (src0 + i03*nb03 + i02*nb02 + i01*nb01);
device float * dst_ptr = (device float *) (dst + i3*nb3 + i2*nb2 + i1*nb1);
if (i1 < ne01 && i2 < ne02 && i3 < ne03) {
for (int i0 = tpitg.x; i0 < ne0; i0 += ntg.x) {
if (i0 < p0) {
dst_ptr[i0] = src0_ptr[p0 - i0];
} else if (i0 < ne0 - p1) {
dst_ptr[i0] = src0_ptr[i0 - p0];
} else {
dst_ptr[i0] = src0_ptr[(ne0 - p1 - p0) - (p1 + 1 - (ne0 - i0)) - 1];
}
}
}
}
kernel void kernel_unpad_f32(
device const char * src0,
device char * dst,
@@ -3951,6 +3998,38 @@ template [[host_name("kernel_flash_attn_ext_vec_q8_0_h256")]] kernel flash_attn_
#undef FA_TYPES
template<typename T>
kernel void kernel_set(
constant ggml_metal_kargs_set & args,
device const char * src0,
device const char * src1,
device char * dst,
uint3 tgpig[[threadgroup_position_in_grid]],
ushort3 tpitg[[thread_position_in_threadgroup]],
ushort3 ntg[[threads_per_threadgroup]]) {
const int i13 = tgpig[2];
const int i12 = tgpig[1];
const int i11 = tgpig[0];
const int64_t n = i13*args.ne12*args.ne11*args.ne10 + i12*args.ne11*args.ne10 + i11*args.ne10;
const int64_t i3 = n / (args.ne12*args.ne11*args.ne10);
const int64_t i2 = (n - i3*args.ne12*args.ne11*args.ne10) / (args.ne11*args.ne10);
const int64_t i1 = (n - i3*args.ne12*args.ne11*args.ne10 - i2*args.ne11*args.ne10) / args.ne10;
device T * dst_data = (device T *) (dst + i3*args.nb3 + i2*args.nb2 + i1*args.nb1 + args.offs);
for (int64_t i10 = tpitg.x; i10 < args.ne10; i10 += ntg.x) {
device const T * src = (device T *) (src1 + i13*args.nb13 + i12*args.nb12 + i11*args.nb11 + i10*args.nb10);
dst_data[i10] = (T) src[0];
}
}
typedef decltype(kernel_set<float>) kernel_set_t;
template [[host_name("kernel_set_f32")]] kernel kernel_set_t kernel_set<float>;
template [[host_name("kernel_set_i32")]] kernel kernel_set_t kernel_set<int32_t>;
template<typename T0, typename T1>
kernel void kernel_cpy(
constant ggml_metal_kargs_cpy & args,