mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-21 05:07:04 +00:00
llama: update vendor code to commit ba1cb19c (#8101)
This commit is contained in:
81
llama/ggml-metal.metal
vendored
81
llama/ggml-metal.metal
vendored
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* llama.cpp - commit 40c6d79fb52f995f47507fedfeaae2ac05d9b35c - do not edit this file
|
||||
* llama.cpp - commit ba1cb19cdd0d92e012e0f6e009e0620f854b6afd - do not edit this file
|
||||
*
|
||||
* MIT License
|
||||
*
|
||||
@@ -2923,6 +2923,53 @@ kernel void kernel_pad_f32(
|
||||
}
|
||||
}
|
||||
|
||||
kernel void kernel_pad_reflect_1d_f32(
|
||||
device const char * src0,
|
||||
device char * dst,
|
||||
constant int64_t & ne00,
|
||||
constant int64_t & ne01,
|
||||
constant int64_t & ne02,
|
||||
constant int64_t & ne03,
|
||||
constant int64_t & ne0,
|
||||
constant uint64_t & nb00,
|
||||
constant uint64_t & nb01,
|
||||
constant uint64_t & nb02,
|
||||
constant uint64_t & nb03,
|
||||
constant uint64_t & nb0,
|
||||
constant uint64_t & nb1,
|
||||
constant uint64_t & nb2,
|
||||
constant uint64_t & nb3,
|
||||
constant int32_t & p0,
|
||||
constant int32_t & p1,
|
||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||
uint3 tgpg[[threadgroups_per_grid]],
|
||||
uint3 tpitg[[thread_position_in_threadgroup]],
|
||||
uint3 ntg[[threads_per_threadgroup]]) {
|
||||
|
||||
const int64_t i3 = tgpig.z;
|
||||
const int64_t i2 = tgpig.y;
|
||||
const int64_t i1 = tgpig.x;
|
||||
|
||||
const int64_t i03 = i3;
|
||||
const int64_t i02 = i2;
|
||||
const int64_t i01 = i1;
|
||||
|
||||
device const float * src0_ptr = (device const float *) (src0 + i03*nb03 + i02*nb02 + i01*nb01);
|
||||
device float * dst_ptr = (device float *) (dst + i3*nb3 + i2*nb2 + i1*nb1);
|
||||
|
||||
if (i1 < ne01 && i2 < ne02 && i3 < ne03) {
|
||||
for (int i0 = tpitg.x; i0 < ne0; i0 += ntg.x) {
|
||||
if (i0 < p0) {
|
||||
dst_ptr[i0] = src0_ptr[p0 - i0];
|
||||
} else if (i0 < ne0 - p1) {
|
||||
dst_ptr[i0] = src0_ptr[i0 - p0];
|
||||
} else {
|
||||
dst_ptr[i0] = src0_ptr[(ne0 - p1 - p0) - (p1 + 1 - (ne0 - i0)) - 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kernel void kernel_unpad_f32(
|
||||
device const char * src0,
|
||||
device char * dst,
|
||||
@@ -3951,6 +3998,38 @@ template [[host_name("kernel_flash_attn_ext_vec_q8_0_h256")]] kernel flash_attn_
|
||||
|
||||
#undef FA_TYPES
|
||||
|
||||
template<typename T>
|
||||
kernel void kernel_set(
|
||||
constant ggml_metal_kargs_set & args,
|
||||
device const char * src0,
|
||||
device const char * src1,
|
||||
device char * dst,
|
||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||
ushort3 tpitg[[thread_position_in_threadgroup]],
|
||||
ushort3 ntg[[threads_per_threadgroup]]) {
|
||||
const int i13 = tgpig[2];
|
||||
const int i12 = tgpig[1];
|
||||
const int i11 = tgpig[0];
|
||||
|
||||
const int64_t n = i13*args.ne12*args.ne11*args.ne10 + i12*args.ne11*args.ne10 + i11*args.ne10;
|
||||
|
||||
const int64_t i3 = n / (args.ne12*args.ne11*args.ne10);
|
||||
const int64_t i2 = (n - i3*args.ne12*args.ne11*args.ne10) / (args.ne11*args.ne10);
|
||||
const int64_t i1 = (n - i3*args.ne12*args.ne11*args.ne10 - i2*args.ne11*args.ne10) / args.ne10;
|
||||
|
||||
device T * dst_data = (device T *) (dst + i3*args.nb3 + i2*args.nb2 + i1*args.nb1 + args.offs);
|
||||
|
||||
for (int64_t i10 = tpitg.x; i10 < args.ne10; i10 += ntg.x) {
|
||||
device const T * src = (device T *) (src1 + i13*args.nb13 + i12*args.nb12 + i11*args.nb11 + i10*args.nb10);
|
||||
dst_data[i10] = (T) src[0];
|
||||
}
|
||||
}
|
||||
|
||||
typedef decltype(kernel_set<float>) kernel_set_t;
|
||||
|
||||
template [[host_name("kernel_set_f32")]] kernel kernel_set_t kernel_set<float>;
|
||||
template [[host_name("kernel_set_i32")]] kernel kernel_set_t kernel_set<int32_t>;
|
||||
|
||||
template<typename T0, typename T1>
|
||||
kernel void kernel_cpy(
|
||||
constant ggml_metal_kargs_cpy & args,
|
||||
|
||||
Reference in New Issue
Block a user