mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
* Enable CUDA Graphs for gemma3n. Similar to https://github.com/ggml-org/llama.cpp/pull/14741, though ollama has a slightly different model graph than llama.cpp which requires different workaround checks. * Remove residual check by reshaping differently in gemma3n model This should make the heuristics more robust
5090 lines
240 KiB
Diff
5090 lines
240 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Aman Gupta <amangupta052@gmail.com>
|
|
Date: Sun, 22 Jun 2025 12:39:54 +0800
|
|
Subject: [PATCH] CUDA: add mean operation (#14313)
|
|
|
|
* CUDA: add mean operation
|
|
|
|
* add back sum_rows_f32_cuda
|
|
|
|
* Review: early exit if col!=0
|
|
---
|
|
ggml/src/ggml-cuda/common.cuh | 20 +
|
|
ggml/src/ggml-cuda/ggml-cuda.cu | 5 +
|
|
ggml/src/ggml-cuda/mean.cu | 19 +
|
|
ggml/src/ggml-cuda/mean.cuh | 3 +
|
|
ggml/src/ggml-cuda/sumrows.cu | 23 +-
|
|
ggml/src/ggml-cuda/sumrows.cuh | 1 -
|
|
tests/test-backend-ops.cpp | 2990 ++++++++++++++++---------------
|
|
7 files changed, 1554 insertions(+), 1507 deletions(-)
|
|
create mode 100644 ggml/src/ggml-cuda/mean.cu
|
|
create mode 100644 ggml/src/ggml-cuda/mean.cuh
|
|
|
|
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
|
|
index 64fb4ff4..5b9a0fe3 100644
|
|
--- a/ggml/src/ggml-cuda/common.cuh
|
|
+++ b/ggml/src/ggml-cuda/common.cuh
|
|
@@ -362,6 +362,26 @@ static __device__ __forceinline__ half2 warp_reduce_sum(half2 a) {
|
|
#endif // FP16_AVAILABLE
|
|
}
|
|
|
|
+// Row reduction kernel template - compute sum (norm=false) or mean (norm=true)
|
|
+template<bool norm>
|
|
+static __global__ void reduce_rows_f32(const float * x, float * dst, const int ncols) {
|
|
+ const int row = blockIdx.x;
|
|
+ const int col = threadIdx.x;
|
|
+
|
|
+ float sum = 0.0f;
|
|
+ for (int i = col; i < ncols; i += blockDim.x) {
|
|
+ sum += x[row * ncols + i];
|
|
+ }
|
|
+
|
|
+ sum = warp_reduce_sum(sum);
|
|
+
|
|
+ if (col != 0) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ dst[row] = norm ? sum / ncols : sum;
|
|
+}
|
|
+
|
|
template<int width = WARP_SIZE>
|
|
static __device__ __forceinline__ float warp_reduce_max(float x) {
|
|
#pragma unroll
|
|
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
|
index d6960174..2b9fabf4 100644
|
|
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
|
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
|
@@ -35,6 +35,7 @@
|
|
#include "ggml-cuda/ssm-scan.cuh"
|
|
#include "ggml-cuda/sum.cuh"
|
|
#include "ggml-cuda/sumrows.cuh"
|
|
+#include "ggml-cuda/mean.cuh"
|
|
#include "ggml-cuda/tsembd.cuh"
|
|
#include "ggml-cuda/unary.cuh"
|
|
#include "ggml-cuda/upscale.cuh"
|
|
@@ -2322,6 +2323,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
|
|
case GGML_OP_SUM_ROWS:
|
|
ggml_cuda_op_sum_rows(ctx, dst);
|
|
break;
|
|
+ case GGML_OP_MEAN:
|
|
+ ggml_cuda_op_mean(ctx, dst);
|
|
+ break;
|
|
case GGML_OP_SSM_CONV:
|
|
ggml_cuda_op_ssm_conv(ctx, dst);
|
|
break;
|
|
@@ -3211,6 +3215,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
|
case GGML_OP_POOL_2D:
|
|
case GGML_OP_SUM:
|
|
case GGML_OP_SUM_ROWS:
|
|
+ case GGML_OP_MEAN:
|
|
case GGML_OP_ARGSORT:
|
|
case GGML_OP_ACC:
|
|
return true;
|
|
diff --git a/ggml/src/ggml-cuda/mean.cu b/ggml/src/ggml-cuda/mean.cu
|
|
new file mode 100644
|
|
index 00000000..4b238a39
|
|
--- /dev/null
|
|
+++ b/ggml/src/ggml-cuda/mean.cu
|
|
@@ -0,0 +1,19 @@
|
|
+#include "mean.cuh"
|
|
+
|
|
+void ggml_cuda_op_mean(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
+ const ggml_tensor * src0 = dst->src[0];
|
|
+ const float * src0_d = (const float *) src0->data;
|
|
+ float * dst_d = (float *) dst->data;
|
|
+ cudaStream_t stream = ctx.stream();
|
|
+
|
|
+ GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
|
+ GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
|
+ GGML_ASSERT(ggml_is_contiguous(src0));
|
|
+
|
|
+ const int64_t ncols = src0->ne[0];
|
|
+ const int64_t nrows = ggml_nrows(src0);
|
|
+
|
|
+ const dim3 block_dims(WARP_SIZE, 1, 1);
|
|
+ const dim3 block_nums(nrows, 1, 1);
|
|
+ reduce_rows_f32</*norm*/ true><<<block_nums, block_dims, 0, stream>>>(src0_d, dst_d, ncols);
|
|
+}
|
|
diff --git a/ggml/src/ggml-cuda/mean.cuh b/ggml/src/ggml-cuda/mean.cuh
|
|
new file mode 100644
|
|
index 00000000..2b9b1043
|
|
--- /dev/null
|
|
+++ b/ggml/src/ggml-cuda/mean.cuh
|
|
@@ -0,0 +1,3 @@
|
|
+#include "common.cuh"
|
|
+
|
|
+void ggml_cuda_op_mean(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
|
diff --git a/ggml/src/ggml-cuda/sumrows.cu b/ggml/src/ggml-cuda/sumrows.cu
|
|
index 38dbf1b5..2eee08fa 100644
|
|
--- a/ggml/src/ggml-cuda/sumrows.cu
|
|
+++ b/ggml/src/ggml-cuda/sumrows.cu
|
|
@@ -1,25 +1,9 @@
|
|
#include "sumrows.cuh"
|
|
|
|
-static __global__ void k_sum_rows_f32(const float * x, float * dst, const int ncols) {
|
|
- const int row = blockIdx.x;
|
|
- const int col = threadIdx.x;
|
|
-
|
|
- float sum = 0.0f;
|
|
- for (int i = col; i < ncols; i += blockDim.x) {
|
|
- sum += x[row * ncols + i];
|
|
- }
|
|
-
|
|
- sum = warp_reduce_sum(sum);
|
|
-
|
|
- if (col == 0) {
|
|
- dst[row] = sum;
|
|
- }
|
|
-}
|
|
-
|
|
void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) {
|
|
const dim3 block_dims(WARP_SIZE, 1, 1);
|
|
const dim3 block_nums(nrows, 1, 1);
|
|
- k_sum_rows_f32<<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
|
|
+ reduce_rows_f32</*norm*/false><<<block_nums, block_dims, 0, stream>>>(x, dst, ncols);
|
|
}
|
|
|
|
void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
@@ -35,5 +19,8 @@ void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
const int64_t ncols = src0->ne[0];
|
|
const int64_t nrows = ggml_nrows(src0);
|
|
|
|
- sum_rows_f32_cuda(src0_d, dst_d, ncols, nrows, stream);
|
|
+ const dim3 block_dims(WARP_SIZE, 1, 1);
|
|
+ const dim3 block_nums(nrows, 1, 1);
|
|
+
|
|
+ reduce_rows_f32</*norm=*/false><<<block_nums, block_dims, 0, stream>>>(src0_d, dst_d, ncols);
|
|
}
|
|
diff --git a/ggml/src/ggml-cuda/sumrows.cuh b/ggml/src/ggml-cuda/sumrows.cuh
|
|
index 191db1c1..3431c599 100644
|
|
--- a/ggml/src/ggml-cuda/sumrows.cuh
|
|
+++ b/ggml/src/ggml-cuda/sumrows.cuh
|
|
@@ -1,5 +1,4 @@
|
|
#include "common.cuh"
|
|
|
|
void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream);
|
|
-
|
|
void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
|
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
|
|
index 543db934..58bdc874 100644
|
|
--- a/tests/test-backend-ops.cpp
|
|
+++ b/tests/test-backend-ops.cpp
|
|
@@ -9,16 +9,14 @@
|
|
// Quick start for adding a new GGML op: Go to section 2 and create a struct that inherits from test_case,
|
|
// then go to section 3 and add an instantiation of your struct.
|
|
|
|
-
|
|
// ##############################
|
|
// ## Section 1: General Setup ##
|
|
// ##############################
|
|
|
|
-
|
|
-#include <ggml.h>
|
|
#include <ggml-alloc.h>
|
|
#include <ggml-backend.h>
|
|
#include <ggml-cpp.h>
|
|
+#include <ggml.h>
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
@@ -37,24 +35,26 @@
|
|
#include <vector>
|
|
|
|
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
|
|
- size_t nels = ggml_nelements(tensor);
|
|
+ size_t nels = ggml_nelements(tensor);
|
|
std::vector<float> data(nels);
|
|
{
|
|
// parallel initialization
|
|
- static const size_t n_threads = std::thread::hardware_concurrency();
|
|
+ static const size_t n_threads = std::thread::hardware_concurrency();
|
|
// static RNG initialization (revisit if n_threads stops being constant)
|
|
static std::vector<std::default_random_engine> generators = []() {
|
|
- std::random_device rd;
|
|
+ std::random_device rd;
|
|
std::vector<std::default_random_engine> vec;
|
|
vec.reserve(n_threads);
|
|
//for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(1234 + i); } // fixed seed
|
|
- for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(rd()); }
|
|
+ for (size_t i = 0; i < n_threads; i++) {
|
|
+ vec.emplace_back(rd());
|
|
+ }
|
|
return vec;
|
|
}();
|
|
|
|
auto init_thread = [&](size_t ith, size_t start, size_t end) {
|
|
std::uniform_real_distribution<float> distribution(min, max);
|
|
- auto & gen = generators[ith];
|
|
+ auto & gen = generators[ith];
|
|
for (size_t i = start; i < end; i++) {
|
|
data[i] = distribution(gen);
|
|
}
|
|
@@ -63,8 +63,8 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
|
std::vector<std::future<void>> tasks;
|
|
tasks.reserve(n_threads);
|
|
for (size_t i = 0; i < n_threads; i++) {
|
|
- size_t start = i*nels/n_threads;
|
|
- size_t end = (i+1)*nels/n_threads;
|
|
+ size_t start = i * nels / n_threads;
|
|
+ size_t end = (i + 1) * nels / n_threads;
|
|
tasks.push_back(std::async(std::launch::async, init_thread, i, start, end));
|
|
}
|
|
for (auto & t : tasks) {
|
|
@@ -77,13 +77,13 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
|
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16 || tensor->type == GGML_TYPE_BF16) {
|
|
GGML_ASSERT(nels % ggml_blck_size(tensor->type) == 0);
|
|
|
|
- // dummy importance matrix
|
|
+ // dummy importance matrix
|
|
std::vector<float> imatrix(tensor->ne[0], 1.0f);
|
|
- const float * im = imatrix.data();
|
|
+ const float * im = imatrix.data();
|
|
if (!ggml_quantize_requires_imatrix(tensor->type)) {
|
|
// when the imatrix is optional, we want to test both quantization with and without imatrix
|
|
// use one of the random numbers to decide
|
|
- if (data[0] > 0.5f*(min + max)) {
|
|
+ if (data[0] > 0.5f * (min + max)) {
|
|
im = nullptr;
|
|
}
|
|
}
|
|
@@ -92,21 +92,21 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
|
{
|
|
// parallel quantization by block
|
|
size_t blck_size = ggml_blck_size(tensor->type);
|
|
- size_t n_blocks = nels / blck_size;
|
|
+ size_t n_blocks = nels / blck_size;
|
|
|
|
auto quantize_thread = [&](size_t start, size_t end) {
|
|
- ggml_quantize_chunk(tensor->type, data.data(), dataq.data(),
|
|
- start * blck_size, end - start, blck_size, im);
|
|
+ ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), start * blck_size, end - start, blck_size,
|
|
+ im);
|
|
};
|
|
|
|
- const size_t min_blocks_per_thread = 1;
|
|
- const size_t n_threads = std::min<size_t>(std::thread::hardware_concurrency()/2,
|
|
- std::max<size_t>(1, n_blocks / min_blocks_per_thread));
|
|
+ const size_t min_blocks_per_thread = 1;
|
|
+ const size_t n_threads = std::min<size_t>(std::thread::hardware_concurrency() / 2,
|
|
+ std::max<size_t>(1, n_blocks / min_blocks_per_thread));
|
|
std::vector<std::future<void>> tasks;
|
|
tasks.reserve(n_threads);
|
|
for (size_t i = 0; i < n_threads; i++) {
|
|
- size_t start = i*n_blocks/n_threads;
|
|
- size_t end = (i+1)*n_blocks/n_threads;
|
|
+ size_t start = i * n_blocks / n_threads;
|
|
+ size_t end = (i + 1) * n_blocks / n_threads;
|
|
tasks.push_back(std::async(std::launch::async, quantize_thread, start, end));
|
|
}
|
|
for (auto & t : tasks) {
|
|
@@ -119,9 +119,9 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
|
ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor));
|
|
} else if (tensor->type == GGML_TYPE_I64) {
|
|
// Integers with a size of 8 bytes can be set by mirroring the float data, the specific values are again not really meaningful.
|
|
- const size_t nbytes_half = ggml_nbytes(tensor)/2;
|
|
- ggml_backend_tensor_set(tensor, data.data(), 0*nbytes_half, nbytes_half);
|
|
- ggml_backend_tensor_set(tensor, data.data(), 1*nbytes_half, nbytes_half);
|
|
+ const size_t nbytes_half = ggml_nbytes(tensor) / 2;
|
|
+ ggml_backend_tensor_set(tensor, data.data(), 0 * nbytes_half, nbytes_half);
|
|
+ ggml_backend_tensor_set(tensor, data.data(), 1 * nbytes_half, nbytes_half);
|
|
} else {
|
|
GGML_ABORT("fatal error");
|
|
}
|
|
@@ -134,31 +134,31 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
|
|
std::vector<uint8_t> buf(ggml_nbytes(t));
|
|
ggml_backend_tensor_get(t, buf.data(), 0, ggml_nbytes(t));
|
|
|
|
- const auto * tt = ggml_get_type_traits(t->type);
|
|
- size_t bs = ggml_blck_size(t->type);
|
|
+ const auto * tt = ggml_get_type_traits(t->type);
|
|
+ size_t bs = ggml_blck_size(t->type);
|
|
std::vector<float> vq(ggml_blck_size(t->type));
|
|
- bool quantized = ggml_is_quantized(t->type);
|
|
+ bool quantized = ggml_is_quantized(t->type);
|
|
|
|
// access elements by index to avoid gaps in views
|
|
for (int64_t i3 = 0; i3 < t->ne[3]; i3++) {
|
|
for (int64_t i2 = 0; i2 < t->ne[2]; i2++) {
|
|
for (int64_t i1 = 0; i1 < t->ne[1]; i1++) {
|
|
for (int64_t i0 = 0; i0 < t->ne[0]; i0 += bs) {
|
|
- size_t i = i3*t->nb[3] + i2*t->nb[2] + i1*t->nb[1] + i0/bs*t->nb[0];
|
|
+ size_t i = i3 * t->nb[3] + i2 * t->nb[2] + i1 * t->nb[1] + i0 / bs * t->nb[0];
|
|
if (t->type == GGML_TYPE_F16) {
|
|
- tv.push_back(ggml_fp16_to_fp32(*(ggml_fp16_t*)&buf[i]));
|
|
+ tv.push_back(ggml_fp16_to_fp32(*(ggml_fp16_t *) &buf[i]));
|
|
} else if (t->type == GGML_TYPE_BF16) {
|
|
- tv.push_back(ggml_bf16_to_fp32(*(ggml_bf16_t*)&buf[i]));
|
|
+ tv.push_back(ggml_bf16_to_fp32(*(ggml_bf16_t *) &buf[i]));
|
|
} else if (t->type == GGML_TYPE_F32) {
|
|
tv.push_back(*(float *) &buf[i]);
|
|
} else if (t->type == GGML_TYPE_I64) {
|
|
- tv.push_back((float)*(int64_t *) &buf[i]);
|
|
+ tv.push_back((float) *(int64_t *) &buf[i]);
|
|
} else if (t->type == GGML_TYPE_I32) {
|
|
- tv.push_back((float)*(int32_t *) &buf[i]);
|
|
+ tv.push_back((float) *(int32_t *) &buf[i]);
|
|
} else if (t->type == GGML_TYPE_I16) {
|
|
- tv.push_back((float)*(int16_t *) &buf[i]);
|
|
+ tv.push_back((float) *(int16_t *) &buf[i]);
|
|
} else if (t->type == GGML_TYPE_I8) {
|
|
- tv.push_back((float)*(int8_t *) &buf[i]);
|
|
+ tv.push_back((float) *(int8_t *) &buf[i]);
|
|
} else if (quantized) {
|
|
tt->to_float(&buf[i], vq.data(), bs);
|
|
tv.insert(tv.end(), vq.begin(), vq.end());
|
|
@@ -195,7 +195,8 @@ static double nmse(const float * a, const float * b, size_t n) {
|
|
// n: number of values to compare.
|
|
// expected_vals: optional vector of expected values for a. If expected_vals is not empty, filter out all comparisons where
|
|
// a does not match any of the expected values. Needed for noncontinuous gradients where the numerical calculation can fail.
|
|
-static double mean_abs_asymm(const float * a, const float * b, const size_t n, const std::vector<float> & expected_vals) {
|
|
+static double mean_abs_asymm(const float * a, const float * b, const size_t n,
|
|
+ const std::vector<float> & expected_vals) {
|
|
double sum = 0.0f;
|
|
|
|
size_t nvalid = 0;
|
|
@@ -219,18 +220,16 @@ static double mean_abs_asymm(const float * a, const float * b, const size_t n, c
|
|
nvalid++;
|
|
}
|
|
|
|
- return sum/nvalid;
|
|
+ return sum / nvalid;
|
|
}
|
|
|
|
// utils for printing the variables of the test cases
|
|
|
|
-template<typename T>
|
|
-static std::string var_to_str(const T & x) {
|
|
+template <typename T> static std::string var_to_str(const T & x) {
|
|
return std::to_string(x);
|
|
}
|
|
|
|
-template<typename T, size_t N>
|
|
-static std::string var_to_str(const T (&x)[N]) {
|
|
+template <typename T, size_t N> static std::string var_to_str(const T (&x)[N]) {
|
|
std::string s = "[";
|
|
for (size_t i = 0; i < N; i++) {
|
|
if (i > 0) {
|
|
@@ -242,8 +241,7 @@ static std::string var_to_str(const T (&x)[N]) {
|
|
return s;
|
|
}
|
|
|
|
-template<typename T, size_t N>
|
|
-static std::string var_to_str(const std::array<T, N> & x) {
|
|
+template <typename T, size_t N> static std::string var_to_str(const std::array<T, N> & x) {
|
|
std::string s = "[";
|
|
for (size_t i = 0; i < N; i++) {
|
|
if (i > 0) {
|
|
@@ -265,41 +263,50 @@ static std::string var_to_str(ggml_prec prec) {
|
|
|
|
static std::string var_to_str(ggml_op_pool pool) {
|
|
switch (pool) {
|
|
- case GGML_OP_POOL_AVG: return "avg";
|
|
- case GGML_OP_POOL_MAX: return "max";
|
|
- default: return std::to_string(pool);
|
|
+ case GGML_OP_POOL_AVG:
|
|
+ return "avg";
|
|
+ case GGML_OP_POOL_MAX:
|
|
+ return "max";
|
|
+ default:
|
|
+ return std::to_string(pool);
|
|
}
|
|
}
|
|
|
|
static std::string var_to_str(ggml_scale_mode mode) {
|
|
switch (mode) {
|
|
- case GGML_SCALE_MODE_NEAREST: return "nearest";
|
|
- case GGML_SCALE_MODE_BILINEAR: return "bilinear";
|
|
- default: return std::to_string(mode);
|
|
+ case GGML_SCALE_MODE_NEAREST:
|
|
+ return "nearest";
|
|
+ case GGML_SCALE_MODE_BILINEAR:
|
|
+ return "bilinear";
|
|
+ default:
|
|
+ return std::to_string(mode);
|
|
}
|
|
}
|
|
|
|
#define VAR_TO_STR(x) (#x "=" + var_to_str(x))
|
|
|
|
-#define VARS_TO_STR1(a) VAR_TO_STR(a)
|
|
-#define VARS_TO_STR2(a, b) VAR_TO_STR(a) + "," + VAR_TO_STR(b)
|
|
-#define VARS_TO_STR3(a, b, c) VAR_TO_STR(a) + "," + VARS_TO_STR2(b, c)
|
|
-#define VARS_TO_STR4(a, b, c, d) VAR_TO_STR(a) + "," + VARS_TO_STR3(b, c, d)
|
|
-#define VARS_TO_STR5(a, b, c, d, e) VAR_TO_STR(a) + "," + VARS_TO_STR4(b, c, d, e)
|
|
-#define VARS_TO_STR6(a, b, c, d, e, f) VAR_TO_STR(a) + "," + VARS_TO_STR5(b, c, d, e, f)
|
|
-#define VARS_TO_STR7(a, b, c, d, e, f, g) VAR_TO_STR(a) + "," + VARS_TO_STR6(b, c, d, e, f, g)
|
|
-#define VARS_TO_STR8(a, b, c, d, e, f, g, h) VAR_TO_STR(a) + "," + VARS_TO_STR7(b, c, d, e, f, g, h)
|
|
-#define VARS_TO_STR9(a, b, c, d, e, f, g, h, i) VAR_TO_STR(a) + "," + VARS_TO_STR8(b, c, d, e, f, g, h, i)
|
|
-#define VARS_TO_STR10(a, b, c, d, e, f, g, h, i, j) VAR_TO_STR(a) + "," + VARS_TO_STR9(b, c, d, e, f, g, h, i, j)
|
|
+#define VARS_TO_STR1(a) VAR_TO_STR(a)
|
|
+#define VARS_TO_STR2(a, b) VAR_TO_STR(a) + "," + VAR_TO_STR(b)
|
|
+#define VARS_TO_STR3(a, b, c) VAR_TO_STR(a) + "," + VARS_TO_STR2(b, c)
|
|
+#define VARS_TO_STR4(a, b, c, d) VAR_TO_STR(a) + "," + VARS_TO_STR3(b, c, d)
|
|
+#define VARS_TO_STR5(a, b, c, d, e) VAR_TO_STR(a) + "," + VARS_TO_STR4(b, c, d, e)
|
|
+#define VARS_TO_STR6(a, b, c, d, e, f) VAR_TO_STR(a) + "," + VARS_TO_STR5(b, c, d, e, f)
|
|
+#define VARS_TO_STR7(a, b, c, d, e, f, g) VAR_TO_STR(a) + "," + VARS_TO_STR6(b, c, d, e, f, g)
|
|
+#define VARS_TO_STR8(a, b, c, d, e, f, g, h) VAR_TO_STR(a) + "," + VARS_TO_STR7(b, c, d, e, f, g, h)
|
|
+#define VARS_TO_STR9(a, b, c, d, e, f, g, h, i) VAR_TO_STR(a) + "," + VARS_TO_STR8(b, c, d, e, f, g, h, i)
|
|
+#define VARS_TO_STR10(a, b, c, d, e, f, g, h, i, j) VAR_TO_STR(a) + "," + VARS_TO_STR9(b, c, d, e, f, g, h, i, j)
|
|
#define VARS_TO_STR11(a, b, c, d, e, f, g, h, i, j, k) VAR_TO_STR(a) + "," + VARS_TO_STR10(b, c, d, e, f, g, h, i, j, k)
|
|
-#define VARS_TO_STR12(a, b, c, d, e, f, g, h, i, j, k, l) VAR_TO_STR(a) + "," + VARS_TO_STR11(b, c, d, e, f, g, h, i, j, k, l)
|
|
+#define VARS_TO_STR12(a, b, c, d, e, f, g, h, i, j, k, l) \
|
|
+ VAR_TO_STR(a) + "," + VARS_TO_STR11(b, c, d, e, f, g, h, i, j, k, l)
|
|
|
|
#ifdef GGML_USE_SYCL
|
|
static bool inline _isinf(float f) {
|
|
- return (*(uint32_t *)&f & 0x7fffffff) == 0x7f800000;
|
|
+ return (*(uint32_t *) &f & 0x7fffffff) == 0x7f800000;
|
|
}
|
|
#else
|
|
-static bool inline _isinf(float f) { return std::isinf(f); }
|
|
+static bool inline _isinf(float f) {
|
|
+ return std::isinf(f);
|
|
+}
|
|
#endif
|
|
|
|
// accept FLT_MAX as infinity
|
|
@@ -320,45 +327,29 @@ enum test_mode {
|
|
struct test_case {
|
|
virtual ~test_case() {}
|
|
|
|
- virtual std::string op_desc(ggml_tensor * t) {
|
|
- return ggml_op_desc(t);
|
|
- }
|
|
+ virtual std::string op_desc(ggml_tensor * t) { return ggml_op_desc(t); }
|
|
|
|
- virtual std::string vars() {
|
|
- return "";
|
|
- }
|
|
+ virtual std::string vars() { return ""; }
|
|
|
|
virtual ggml_tensor * build_graph(ggml_context * ctx) = 0;
|
|
|
|
- virtual double max_nmse_err() {
|
|
- return 1e-7;
|
|
- }
|
|
+ virtual double max_nmse_err() { return 1e-7; }
|
|
|
|
- virtual double max_maa_err() {
|
|
- return 1e-4;
|
|
- }
|
|
+ virtual double max_maa_err() { return 1e-4; }
|
|
|
|
- virtual float grad_eps() {
|
|
- return 1e-1f;
|
|
- }
|
|
+ virtual float grad_eps() { return 1e-1f; }
|
|
|
|
// If false, estimate gradient with 2 points, neglects 3rd order derivative and higher.
|
|
// If true, estimate gradient with 4 points, neglects 5th order derivative and higher.
|
|
- virtual bool grad_precise() {
|
|
- return false;
|
|
- }
|
|
+ virtual bool grad_precise() { return false; }
|
|
|
|
// Skip gradient checks if total number of gradients to be checked is larger than this (to speed up the tests).
|
|
- virtual int64_t grad_nmax() {
|
|
- return 10000;
|
|
- }
|
|
+ virtual int64_t grad_nmax() { return 10000; }
|
|
|
|
// No effect if empty.
|
|
// If not empty, skip all gradient checks where the numerical result does not match any of the values.
|
|
// Needed for dealing with noncontinuous gradients (e.g. ReLU) where estimation using finite differences is unreliable.
|
|
- virtual std::vector<float> grad_expect() {
|
|
- return {};
|
|
- }
|
|
+ virtual std::vector<float> grad_expect() { return {}; }
|
|
|
|
virtual void initialize_tensors(ggml_context * ctx) {
|
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != nullptr; t = ggml_get_next_tensor(ctx, t)) {
|
|
@@ -426,7 +417,8 @@ struct test_case {
|
|
return t;
|
|
}
|
|
|
|
- ggml_tensor * ggml_new_tensor_4d(ggml_context * ctx, ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
|
|
+ ggml_tensor * ggml_new_tensor_4d(ggml_context * ctx, ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2,
|
|
+ int64_t ne3) {
|
|
ggml_tensor * t = ::ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3);
|
|
add_sentinel(ctx);
|
|
return t;
|
|
@@ -436,7 +428,7 @@ struct test_case {
|
|
mode = MODE_TEST;
|
|
|
|
ggml_init_params params = {
|
|
- /* .mem_size = */ ggml_tensor_overhead()*128 + ggml_graph_overhead(),
|
|
+ /* .mem_size = */ ggml_tensor_overhead() * 128 + ggml_graph_overhead(),
|
|
/* .mem_base = */ NULL,
|
|
/* .no_alloc = */ true,
|
|
};
|
|
@@ -461,7 +453,7 @@ struct test_case {
|
|
|
|
// check if the backends support the ops
|
|
bool supported = true;
|
|
- for (ggml_backend_t backend : {backend1, backend2}) {
|
|
+ for (ggml_backend_t backend : { backend1, backend2 }) {
|
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
if (!ggml_backend_supports_op(backend, t)) {
|
|
printf("not supported [%s] ", ggml_backend_name(backend));
|
|
@@ -501,23 +493,18 @@ struct test_case {
|
|
|
|
// compare
|
|
struct callback_userdata {
|
|
- bool ok;
|
|
- double max_err;
|
|
+ bool ok;
|
|
+ double max_err;
|
|
ggml_backend_t backend1;
|
|
ggml_backend_t backend2;
|
|
};
|
|
|
|
- callback_userdata ud {
|
|
- true,
|
|
- max_nmse_err(),
|
|
- backend1,
|
|
- backend2
|
|
- };
|
|
+ callback_userdata ud{ true, max_nmse_err(), backend1, backend2 };
|
|
|
|
auto callback = [](int index, ggml_tensor * t1, ggml_tensor * t2, void * user_data) -> bool {
|
|
- callback_userdata * ud = (callback_userdata *) user_data;
|
|
- const char * bn1 = ggml_backend_name(ud->backend1);
|
|
- const char * bn2 = ggml_backend_name(ud->backend2);
|
|
+ callback_userdata * ud = (callback_userdata *) user_data;
|
|
+ const char * bn1 = ggml_backend_name(ud->backend1);
|
|
+ const char * bn2 = ggml_backend_name(ud->backend2);
|
|
|
|
if (t1->op == GGML_OP_NONE) {
|
|
// sentinels must be unchanged
|
|
@@ -599,11 +586,11 @@ struct test_case {
|
|
static const size_t graph_nodes = 8192;
|
|
|
|
ggml_init_params params = {
|
|
- /* .mem_size = */ ggml_tensor_overhead()*128 + ggml_graph_overhead_custom(graph_nodes, false),
|
|
+ /* .mem_size = */ ggml_tensor_overhead() * 128 + ggml_graph_overhead_custom(graph_nodes, false),
|
|
/* .mem_base = */ NULL,
|
|
/* .no_alloc = */ true,
|
|
};
|
|
- ggml_context_ptr ctx(ggml_init(params)); // smart ptr
|
|
+ ggml_context_ptr ctx(ggml_init(params)); // smart ptr
|
|
GGML_ASSERT(ctx);
|
|
|
|
ggml_tensor * out = build_graph(ctx.get());
|
|
@@ -624,14 +611,14 @@ struct test_case {
|
|
|
|
// align while also leaving some margin for variations in parameters
|
|
int align = 8;
|
|
- int last = (len + align - 1) / align * align;
|
|
+ int last = (len + align - 1) / align * align;
|
|
if (last - len < 5) {
|
|
last += align;
|
|
}
|
|
printf("%*s", last - len, "");
|
|
|
|
// allocate
|
|
- ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr
|
|
+ ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr
|
|
|
|
if (buf == NULL) {
|
|
printf("failed to allocate tensors\n");
|
|
@@ -648,26 +635,27 @@ struct test_case {
|
|
// warmup run
|
|
ggml_status status = ggml_backend_graph_compute(backend, gf);
|
|
if (status != GGML_STATUS_SUCCESS) {
|
|
- fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
|
|
+ fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__,
|
|
+ ggml_status_to_string(status));
|
|
return false;
|
|
}
|
|
|
|
// determine number of runs
|
|
- int n_runs;
|
|
+ int n_runs;
|
|
bool is_cpu = ggml_backend_dev_type(ggml_backend_get_device(backend)) == GGML_BACKEND_DEVICE_TYPE_CPU;
|
|
if (op_flops(out) > 0) {
|
|
// based on flops
|
|
- const uint64_t GFLOP = 1000 * 1000 * 1000;
|
|
- const uint64_t target_flops_cpu = 8ULL * GFLOP;
|
|
+ const uint64_t GFLOP = 1000 * 1000 * 1000;
|
|
+ const uint64_t target_flops_cpu = 8ULL * GFLOP;
|
|
const uint64_t target_flops_gpu = 100ULL * GFLOP;
|
|
- uint64_t target_flops = is_cpu ? target_flops_cpu : target_flops_gpu;
|
|
+ uint64_t target_flops = is_cpu ? target_flops_cpu : target_flops_gpu;
|
|
n_runs = std::min<int>(ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_flops / op_flops(out)) + 1;
|
|
} else {
|
|
// based on memory size
|
|
- const size_t GB = 1ULL << 30;
|
|
- const size_t target_size_cpu = 8 * GB;
|
|
+ const size_t GB = 1ULL << 30;
|
|
+ const size_t target_size_cpu = 8 * GB;
|
|
const size_t target_size_gpu = 32 * GB;
|
|
- size_t target_size = is_cpu ? target_size_cpu : target_size_gpu;
|
|
+ size_t target_size = is_cpu ? target_size_cpu : target_size_gpu;
|
|
n_runs = std::min<int>(ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_size / op_size(out)) + 1;
|
|
}
|
|
|
|
@@ -677,8 +665,8 @@ struct test_case {
|
|
}
|
|
|
|
// calculate memory
|
|
- size_t mem = n_runs * op_size(out);
|
|
- auto tensor_op_size = [](ggml_tensor * t) {
|
|
+ size_t mem = n_runs * op_size(out);
|
|
+ auto tensor_op_size = [](ggml_tensor * t) {
|
|
size_t size = ggml_nbytes(t);
|
|
// add source tensors
|
|
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
|
@@ -697,13 +685,14 @@ struct test_case {
|
|
|
|
// run
|
|
int64_t total_time_us = 0;
|
|
- int64_t total_mem = 0;
|
|
- int total_runs = 0;
|
|
+ int64_t total_mem = 0;
|
|
+ int total_runs = 0;
|
|
do {
|
|
- int64_t start_time = ggml_time_us();
|
|
- ggml_status status = ggml_backend_graph_compute(backend, gf);
|
|
+ int64_t start_time = ggml_time_us();
|
|
+ ggml_status status = ggml_backend_graph_compute(backend, gf);
|
|
if (status != GGML_STATUS_SUCCESS) {
|
|
- fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
|
|
+ fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__,
|
|
+ ggml_status_to_string(status));
|
|
return false;
|
|
}
|
|
int64_t end_time = ggml_time_us();
|
|
@@ -711,15 +700,13 @@ struct test_case {
|
|
total_time_us += end_time - start_time;
|
|
total_mem += mem;
|
|
total_runs += n_runs;
|
|
- } while (total_time_us < 1000*1000); // run for at least 1 second
|
|
+ } while (total_time_us < 1000 * 1000); // run for at least 1 second
|
|
|
|
- printf(" %8d runs - %8.2f us/run - ",
|
|
- total_runs,
|
|
- (double)total_time_us / total_runs);
|
|
+ printf(" %8d runs - %8.2f us/run - ", total_runs, (double) total_time_us / total_runs);
|
|
|
|
if (op_flops(out) > 0) {
|
|
double flops_per_sec = (op_flops(out) * total_runs) / (total_time_us / 1e6);
|
|
- auto format_flops = [](double flops) -> std::string {
|
|
+ auto format_flops = [](double flops) -> std::string {
|
|
char buf[256];
|
|
if (flops >= 1e12) {
|
|
snprintf(buf, sizeof(buf), "%6.2f TFLOP", flops / 1e12);
|
|
@@ -732,14 +719,12 @@ struct test_case {
|
|
}
|
|
return buf;
|
|
};
|
|
- printf("%s/run - \033[1;34m%sS\033[0m",
|
|
- format_flops(op_flops(out)).c_str(),
|
|
- format_flops(flops_per_sec).c_str());
|
|
+ printf("%s/run - \033[1;34m%sS\033[0m", format_flops(op_flops(out)).c_str(),
|
|
+ format_flops(flops_per_sec).c_str());
|
|
|
|
} else {
|
|
- printf("%8zu kB/run - \033[1;34m%7.2f GB/s\033[0m",
|
|
- op_size(out) / 1024,
|
|
- total_mem / (total_time_us / 1e6) / 1024.0 / 1024.0 / 1024.0);
|
|
+ printf("%8zu kB/run - \033[1;34m%7.2f GB/s\033[0m", op_size(out) / 1024,
|
|
+ total_mem / (total_time_us / 1e6) / 1024.0 / 1024.0 / 1024.0);
|
|
}
|
|
printf("\n");
|
|
|
|
@@ -747,15 +732,16 @@ struct test_case {
|
|
}
|
|
|
|
bool eval_grad(ggml_backend_t backend, const char * op_name) {
|
|
- mode = MODE_GRAD;
|
|
+ mode = MODE_GRAD;
|
|
const std::vector<float> expect = grad_expect();
|
|
|
|
ggml_init_params params = {
|
|
- /* .mem_size = */ ggml_tensor_overhead()*128 + 2*ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, true),
|
|
+ /* .mem_size = */ ggml_tensor_overhead() * 128 +
|
|
+ 2 * ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, true),
|
|
/* .mem_base = */ NULL,
|
|
/* .no_alloc = */ true,
|
|
};
|
|
- ggml_context_ptr ctx(ggml_init(params)); // smart ptr
|
|
+ ggml_context_ptr ctx(ggml_init(params)); // smart ptr
|
|
GGML_ASSERT(ctx);
|
|
|
|
gf = ggml_new_graph_custom(ctx.get(), GGML_DEFAULT_GRAPH_SIZE, true);
|
|
@@ -777,7 +763,7 @@ struct test_case {
|
|
}
|
|
|
|
// check if the backend supports the ops
|
|
- bool supported = true;
|
|
+ bool supported = true;
|
|
bool any_params = false;
|
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
|
|
if (!ggml_backend_supports_op(backend, t)) {
|
|
@@ -814,7 +800,6 @@ struct test_case {
|
|
return true;
|
|
}
|
|
|
|
-
|
|
if (!ggml_is_scalar(out)) {
|
|
out = ggml_sum(ctx.get(), out);
|
|
ggml_set_name(out, "sum_of_out");
|
|
@@ -826,7 +811,8 @@ struct test_case {
|
|
ggml_build_backward_expand(ctx.get(), gb, nullptr);
|
|
if (expect.size() != 1 || expect[0] != 0.0f) {
|
|
GGML_ASSERT(ggml_graph_n_nodes(gb) > ggml_graph_n_nodes(gf));
|
|
- for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) {
|
|
+ for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL;
|
|
+ t = ggml_get_next_tensor(ctx.get(), t)) {
|
|
GGML_ASSERT(!(t->flags & GGML_TENSOR_FLAG_PARAM) || ggml_graph_get_grad(gb, t)->op != GGML_OP_NONE);
|
|
}
|
|
}
|
|
@@ -849,44 +835,47 @@ struct test_case {
|
|
}
|
|
|
|
// allocate
|
|
- ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr
|
|
+ ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr
|
|
if (buf == NULL) {
|
|
printf("failed to allocate tensors [%s] ", ggml_backend_name(backend));
|
|
return false;
|
|
}
|
|
|
|
- initialize_tensors(ctx.get()); // Randomizes all tensors (including gradients).
|
|
- ggml_graph_reset(gb); // Sets gradients to 1 if loss, 0 otherwise.
|
|
+ initialize_tensors(ctx.get()); // Randomizes all tensors (including gradients).
|
|
+ ggml_graph_reset(gb); // Sets gradients to 1 if loss, 0 otherwise.
|
|
|
|
ggml_status status = ggml_backend_graph_compute(backend, gf);
|
|
if (status != GGML_STATUS_SUCCESS) {
|
|
- fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
|
|
+ fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__,
|
|
+ ggml_status_to_string(status));
|
|
return false;
|
|
}
|
|
status = ggml_backend_graph_compute(backend, gb);
|
|
if (status != GGML_STATUS_SUCCESS) {
|
|
- fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
|
|
+ fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__,
|
|
+ ggml_status_to_string(status));
|
|
return false;
|
|
}
|
|
|
|
bool ok = true;
|
|
- for (struct ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != nullptr; t = ggml_get_next_tensor(ctx.get(), t)) {
|
|
+ for (struct ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != nullptr;
|
|
+ t = ggml_get_next_tensor(ctx.get(), t)) {
|
|
if (!(t->flags & GGML_TENSOR_FLAG_PARAM)) {
|
|
continue;
|
|
}
|
|
|
|
- const char * bn = ggml_backend_name(backend);
|
|
+ const char * bn = ggml_backend_name(backend);
|
|
const int64_t ne = ggml_nelements(t);
|
|
|
|
- std::vector<float> ga;
|
|
+ std::vector<float> ga;
|
|
struct ggml_tensor * grad = ggml_graph_get_grad(gb, t);
|
|
if (grad) {
|
|
ga = tensor_to_float(grad);
|
|
} else {
|
|
- ga.resize(ne); // default value is 0.0f
|
|
+ ga.resize(ne); // default value is 0.0f
|
|
}
|
|
|
|
- for (int64_t i = 0; i < ne; ++i) { // gradient algebraic
|
|
+ for (int64_t i = 0; i < ne; ++i) { // gradient algebraic
|
|
// check for nans
|
|
if (!std::isfinite(ga[i])) {
|
|
printf("[%s] nonfinite gradient at index %" PRId64 " (%s=%f) ", ggml_op_desc(t), i, bn, ga[i]);
|
|
@@ -898,58 +887,63 @@ struct test_case {
|
|
break;
|
|
}
|
|
|
|
- std::vector<float> gn(ne); // gradient numeric
|
|
+ std::vector<float> gn(ne); // gradient numeric
|
|
GGML_ASSERT(ga.size() == gn.size());
|
|
|
|
- std::vector<float> x0 = tensor_to_float(t); // original t data
|
|
+ std::vector<float> x0 = tensor_to_float(t); // original t data
|
|
GGML_ASSERT(ggml_is_scalar(out));
|
|
GGML_ASSERT(out->type == GGML_TYPE_F32);
|
|
|
|
const float eps = grad_eps();
|
|
for (int64_t i = 0; i < ne; ++i) {
|
|
- const float xiu = x0[i] + 1.0f*eps; // x, index i, up
|
|
- const float xiuh = x0[i] + 0.5f*eps; // x, index i, up half
|
|
- const float xidh = x0[i] - 0.5f*eps; // x, index i, down half
|
|
- const float xid = x0[i] - 1.0f*eps; // x, index i, down
|
|
+ const float xiu = x0[i] + 1.0f * eps; // x, index i, up
|
|
+ const float xiuh = x0[i] + 0.5f * eps; // x, index i, up half
|
|
+ const float xidh = x0[i] - 0.5f * eps; // x, index i, down half
|
|
+ const float xid = x0[i] - 1.0f * eps; // x, index i, down
|
|
|
|
- float fu, fuh, fdh, fd; // output values for xiu, xiuh, xid, xidh
|
|
+ float fu, fuh, fdh, fd; // output values for xiu, xiuh, xid, xidh
|
|
|
|
- ggml_backend_tensor_set(t, &xiu, i*sizeof(float), sizeof(float));
|
|
+ ggml_backend_tensor_set(t, &xiu, i * sizeof(float), sizeof(float));
|
|
status = ggml_backend_graph_compute(backend, gf);
|
|
if (status != GGML_STATUS_SUCCESS) {
|
|
- fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
|
|
+ fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__,
|
|
+ ggml_status_to_string(status));
|
|
return false;
|
|
}
|
|
ggml_backend_tensor_get(out, &fu, 0, ggml_nbytes(out));
|
|
|
|
- ggml_backend_tensor_set(t, &xid, i*sizeof(float), sizeof(float));
|
|
+ ggml_backend_tensor_set(t, &xid, i * sizeof(float), sizeof(float));
|
|
status = ggml_backend_graph_compute(backend, gf);
|
|
if (status != GGML_STATUS_SUCCESS) {
|
|
- fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
|
|
+ fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__,
|
|
+ ggml_status_to_string(status));
|
|
return false;
|
|
}
|
|
ggml_backend_tensor_get(out, &fd, 0, ggml_nbytes(out));
|
|
|
|
if (grad_precise()) {
|
|
- ggml_backend_tensor_set(t, &xiuh, i*sizeof(float), sizeof(float));
|
|
+ ggml_backend_tensor_set(t, &xiuh, i * sizeof(float), sizeof(float));
|
|
status = ggml_backend_graph_compute(backend, gf);
|
|
if (status != GGML_STATUS_SUCCESS) {
|
|
- fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
|
|
+ fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__,
|
|
+ ggml_status_to_string(status));
|
|
return false;
|
|
}
|
|
ggml_backend_tensor_get(out, &fuh, 0, ggml_nbytes(out));
|
|
|
|
- ggml_backend_tensor_set(t, &xidh, i*sizeof(float), sizeof(float));
|
|
+ ggml_backend_tensor_set(t, &xidh, i * sizeof(float), sizeof(float));
|
|
status = ggml_backend_graph_compute(backend, gf);
|
|
if (status != GGML_STATUS_SUCCESS) {
|
|
- fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__, ggml_status_to_string(status));
|
|
+ fprintf(stderr, "%s: ggml_backend_graph_compute failed. status=%s \n", __func__,
|
|
+ ggml_status_to_string(status));
|
|
return false;
|
|
}
|
|
ggml_backend_tensor_get(out, &fdh, 0, ggml_nbytes(out));
|
|
|
|
- gn[i] = (8.0*(double)fuh + (double)fd - (8.0*(double)fdh + (double)fu)) / (6.0*(double)eps);
|
|
+ gn[i] =
|
|
+ (8.0 * (double) fuh + (double) fd - (8.0 * (double) fdh + (double) fu)) / (6.0 * (double) eps);
|
|
} else {
|
|
- gn[i] = (fu - fd) / (2.0f*eps);
|
|
+ gn[i] = (fu - fd) / (2.0f * eps);
|
|
}
|
|
|
|
ggml_backend_tensor_set(t, x0.data(), 0, ggml_nbytes(t));
|
|
@@ -980,82 +974,77 @@ struct test_case {
|
|
}
|
|
};
|
|
|
|
-
|
|
// ###################################
|
|
// ## Section 2: GGML Op Defintions ##
|
|
// ###################################
|
|
|
|
-
|
|
// The following is an example showing the bare minimum for creating a test for a GGML op.
|
|
|
|
// GGML_OP_EXAMPLE
|
|
struct test_example : public test_case {
|
|
// Always define these 2 or variants thereof:
|
|
- const ggml_type type; // The type of the input tensors.
|
|
- const std::array<int64_t, 4> ne; // The shape of the input tensors.
|
|
+ const ggml_type type; // The type of the input tensors.
|
|
+ const std::array<int64_t, 4> ne; // The shape of the input tensors.
|
|
+
|
|
// For some ops it's necessary to define multiple types or shapes for the inputs.
|
|
// Or they may need additional parameters.
|
|
|
|
// Put all parameters needed to fully define the test into one of the VARS_TO_STR macros.
|
|
// In most cases these are just the properties of the struct that you defined above.
|
|
// This is needed for info prints.
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
// Define a constructor for the struct.
|
|
// In most cases it will be sufficient to have the same arguments as the struct has properties
|
|
// and just use initializer lists.
|
|
- test_example(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
- : type(type), ne(ne) {}
|
|
+ test_example(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 5, 4, 3 }) : type(type), ne(ne) {}
|
|
|
|
// Define how a simple GGML compute graph can be constructed for the new GGML op.
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
// Step 1: create input tensors that don't depend on any other tensors:
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
- ggml_set_name(a, "a"); // Setting names is optional but it's useful for debugging.
|
|
+ ggml_set_name(a, "a"); // Setting names is optional but it's useful for debugging.
|
|
|
|
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
ggml_set_name(b, "b");
|
|
|
|
// Step 2: use the op that you want to test in the GGML compute graph.
|
|
- ggml_tensor * out = ggml_add(ctx, a, b); // For this example we're just doing a simple addition.
|
|
+ ggml_tensor * out = ggml_add(ctx, a, b); // For this example we're just doing a simple addition.
|
|
ggml_set_name(out, "out");
|
|
|
|
// Step 3: return the output tensor.
|
|
return out;
|
|
}
|
|
+
|
|
// In order to also check the gradients for your op, add calls like ggml_set_param(a)
|
|
// immediately after you create the tensors.
|
|
// This is optional and only makes sense if a backward pass has actually been implemented for the new op.
|
|
};
|
|
|
|
-
|
|
// GGML_OP_UNARY
|
|
struct test_unary : public test_case {
|
|
- const ggml_unary_op op;
|
|
- const ggml_type type;
|
|
+ const ggml_unary_op op;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne_a;
|
|
- int v; // view (1 : non-contiguous a)
|
|
+ int v; // view (1 : non-contiguous a)
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR3(type, ne_a, v);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR3(type, ne_a, v); }
|
|
|
|
- test_unary(ggml_unary_op op,
|
|
- ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne_a = {128, 2, 2, 2},
|
|
- int v = 0)
|
|
- : op(op), type(type), ne_a(ne_a), v(v) {}
|
|
+ test_unary(ggml_unary_op op, ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne_a = { 128, 2, 2, 2 },
|
|
+ int v = 0) :
|
|
+ op(op),
|
|
+ type(type),
|
|
+ ne_a(ne_a),
|
|
+ v(v) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
const bool grad_supported = op == GGML_UNARY_OP_ABS || op == GGML_UNARY_OP_SGN || op == GGML_UNARY_OP_NEG ||
|
|
- op == GGML_UNARY_OP_STEP || op == GGML_UNARY_OP_RELU || op == GGML_UNARY_OP_SILU;
|
|
+ op == GGML_UNARY_OP_STEP || op == GGML_UNARY_OP_RELU || op == GGML_UNARY_OP_SILU;
|
|
|
|
ggml_tensor * a;
|
|
if (v & 1) {
|
|
- auto ne = ne_a; ne[0] *= 3;
|
|
+ auto ne = ne_a;
|
|
+ ne[0] *= 3;
|
|
a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
if (grad_supported) {
|
|
ggml_set_param(a);
|
|
@@ -1085,40 +1074,40 @@ struct test_unary : public test_case {
|
|
}
|
|
}
|
|
|
|
- float grad_eps() override {
|
|
- return 15.0f;
|
|
- }
|
|
+ float grad_eps() override { return 15.0f; }
|
|
|
|
std::vector<float> grad_expect() override {
|
|
if (op == GGML_UNARY_OP_ABS) {
|
|
- return {-1.0f, 1.0f};
|
|
+ return { -1.0f, 1.0f };
|
|
}
|
|
if (op == GGML_UNARY_OP_SGN || op == GGML_UNARY_OP_STEP) {
|
|
- return {0.0f};
|
|
+ return { 0.0f };
|
|
}
|
|
if (op == GGML_UNARY_OP_RELU) {
|
|
- return {0.0f, 1.0f};
|
|
+ return { 0.0f, 1.0f };
|
|
}
|
|
return {};
|
|
}
|
|
-
|
|
};
|
|
|
|
// GGML_OP_GET_ROWS
|
|
struct test_get_rows : public test_case {
|
|
const ggml_type type;
|
|
- const int n; // cols
|
|
- const int m; // rows
|
|
- const int r; // rows to get
|
|
- const int b; // batch size
|
|
- const bool v; // view (non-contiguous src1)
|
|
-
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR6(type, n, m, r, b, v);
|
|
- }
|
|
-
|
|
- test_get_rows(ggml_type type = GGML_TYPE_F32, int n = 10, int m = 5, int r = 3, int b = 1, bool v = false)
|
|
- : type(type), n(n), m(m), r(r), b(b), v(v) {}
|
|
+ const int n; // cols
|
|
+ const int m; // rows
|
|
+ const int r; // rows to get
|
|
+ const int b; // batch size
|
|
+ const bool v; // view (non-contiguous src1)
|
|
+
|
|
+ std::string vars() override { return VARS_TO_STR6(type, n, m, r, b, v); }
|
|
+
|
|
+ test_get_rows(ggml_type type = GGML_TYPE_F32, int n = 10, int m = 5, int r = 3, int b = 1, bool v = false) :
|
|
+ type(type),
|
|
+ n(n),
|
|
+ m(m),
|
|
+ r(r),
|
|
+ b(b),
|
|
+ v(v) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * in = ggml_new_tensor_3d(ctx, type, n, m, b);
|
|
@@ -1127,7 +1116,7 @@ struct test_get_rows : public test_case {
|
|
ggml_tensor * rows = ggml_new_tensor_2d(ctx, GGML_TYPE_I32, r, b);
|
|
ggml_set_name(rows, "rows");
|
|
if (v) {
|
|
- rows = ggml_view_2d(ctx, rows, r/2, b, rows->nb[1], 0);
|
|
+ rows = ggml_view_2d(ctx, rows, r / 2, b, rows->nb[1], 0);
|
|
ggml_set_name(rows, "view_of_rows");
|
|
}
|
|
|
|
@@ -1146,10 +1135,12 @@ struct test_get_rows : public test_case {
|
|
void initialize_tensors(ggml_context * ctx) override {
|
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
if (t->type == GGML_TYPE_I32) {
|
|
- if (ggml_is_view_op(t->op)) { continue; }
|
|
+ if (ggml_is_view_op(t->op)) {
|
|
+ continue;
|
|
+ }
|
|
// rows
|
|
- std::vector<int> data(r*b);
|
|
- for (int i = 0; i < r*b; i++) {
|
|
+ std::vector<int> data(r * b);
|
|
+ for (int i = 0; i < r * b; i++) {
|
|
data[i] = rand() % m;
|
|
}
|
|
ggml_backend_tensor_set(t, data.data(), 0, r * b * sizeof(int));
|
|
@@ -1163,18 +1154,21 @@ struct test_get_rows : public test_case {
|
|
// GGML_OP_GET_ROWS_BACK
|
|
struct test_get_rows_back : public test_case {
|
|
const ggml_type type;
|
|
- const int n; // cols
|
|
- const int m; // rows
|
|
- const int r; // rows to get
|
|
- const int b; // batch size
|
|
- const bool v; // view (non-contiguous src1)
|
|
-
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR6(type, n, m, r, b, v);
|
|
- }
|
|
-
|
|
- test_get_rows_back(ggml_type type = GGML_TYPE_F32, int n = 10, int m = 5, int r = 3, int b = 1, bool v = false)
|
|
- : type(type), n(n), m(m), r(r), b(b), v(v) {}
|
|
+ const int n; // cols
|
|
+ const int m; // rows
|
|
+ const int r; // rows to get
|
|
+ const int b; // batch size
|
|
+ const bool v; // view (non-contiguous src1)
|
|
+
|
|
+ std::string vars() override { return VARS_TO_STR6(type, n, m, r, b, v); }
|
|
+
|
|
+ test_get_rows_back(ggml_type type = GGML_TYPE_F32, int n = 10, int m = 5, int r = 3, int b = 1, bool v = false) :
|
|
+ type(type),
|
|
+ n(n),
|
|
+ m(m),
|
|
+ r(r),
|
|
+ b(b),
|
|
+ v(v) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * in_forward = ggml_new_tensor_3d(ctx, type, n, m, b);
|
|
@@ -1183,7 +1177,7 @@ struct test_get_rows_back : public test_case {
|
|
ggml_tensor * rows = ggml_new_tensor_2d(ctx, GGML_TYPE_I32, r, b);
|
|
ggml_set_name(rows, "rows");
|
|
if (v) {
|
|
- rows = ggml_view_2d(ctx, rows, r/2, b, rows->nb[1], 0);
|
|
+ rows = ggml_view_2d(ctx, rows, r / 2, b, rows->nb[1], 0);
|
|
ggml_set_name(rows, "view_of_rows");
|
|
}
|
|
|
|
@@ -1199,10 +1193,12 @@ struct test_get_rows_back : public test_case {
|
|
void initialize_tensors(ggml_context * ctx) override {
|
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
if (t->type == GGML_TYPE_I32) {
|
|
- if (ggml_is_view_op(t->op)) { continue; }
|
|
+ if (ggml_is_view_op(t->op)) {
|
|
+ continue;
|
|
+ }
|
|
// rows
|
|
- std::vector<int> data(r*b);
|
|
- for (int i = 0; i < r*b; i++) {
|
|
+ std::vector<int> data(r * b);
|
|
+ for (int i = 0; i < r * b; i++) {
|
|
data[i] = rand() % m;
|
|
}
|
|
ggml_backend_tensor_set(t, data.data(), 0, r * b * sizeof(int));
|
|
@@ -1215,16 +1211,12 @@ struct test_get_rows_back : public test_case {
|
|
|
|
// GGML_OP_ARGMAX
|
|
struct test_argmax : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
- test_argmax(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 100, 1, 1})
|
|
- : type(type), ne(ne) {}
|
|
+ test_argmax(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 100, 1, 1 }) : type(type), ne(ne) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -1237,7 +1229,7 @@ struct test_argmax : public test_case {
|
|
}
|
|
|
|
void initialize_tensors(ggml_context * ctx) override {
|
|
- std::random_device rd;
|
|
+ std::random_device rd;
|
|
std::default_random_engine rng(rd());
|
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
if (t->type == GGML_TYPE_F32) {
|
|
@@ -1256,23 +1248,19 @@ struct test_argmax : public test_case {
|
|
}
|
|
}
|
|
|
|
- double max_nmse_err() override {
|
|
- return 0.0;
|
|
- }
|
|
+ double max_nmse_err() override { return 0.0; }
|
|
};
|
|
|
|
// GGML_OP_COUNT_EQUAL
|
|
struct test_count_equal : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
- test_count_equal(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {4, 500, 1, 1})
|
|
- : type(type), ne(ne) {}
|
|
+ test_count_equal(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 4, 500, 1, 1 }) :
|
|
+ type(type),
|
|
+ ne(ne) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -1293,32 +1281,28 @@ struct test_count_equal : public test_case {
|
|
return out;
|
|
}
|
|
|
|
- double max_nmse_err() override {
|
|
- return 0.0;
|
|
- }
|
|
+ double max_nmse_err() override { return 0.0; }
|
|
};
|
|
|
|
// GGML_OP_REPEAT
|
|
struct test_repeat : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
- const std::array<int, 4> nr;
|
|
+ const std::array<int, 4> nr;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR3(type, ne, nr);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR3(type, ne, nr); }
|
|
|
|
- size_t op_size(ggml_tensor * t) override {
|
|
- return ggml_nbytes(t) * 2;
|
|
- }
|
|
+ size_t op_size(ggml_tensor * t) override { return ggml_nbytes(t) * 2; }
|
|
|
|
- test_repeat(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 5, 4, 3},
|
|
- std::array<int, 4> nr = {2, 2, 2, 2})
|
|
- : type(type), ne(ne), nr(nr) {}
|
|
+ test_repeat(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 5, 4, 3 },
|
|
+ std::array<int, 4> nr = { 2, 2, 2, 2 }) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ nr(nr) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
- ggml_tensor * target = ggml_new_tensor_4d(ctx, type, ne[0]*nr[0], ne[1]*nr[1], ne[2]*nr[2], ne[3]*nr[3]);
|
|
+ ggml_tensor * target =
|
|
+ ggml_new_tensor_4d(ctx, type, ne[0] * nr[0], ne[1] * nr[1], ne[2] * nr[2], ne[3] * nr[3]);
|
|
ggml_set_name(target, "target");
|
|
|
|
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -1334,27 +1318,24 @@ struct test_repeat : public test_case {
|
|
|
|
// GGML_OP_REPEAT_BACK
|
|
struct test_repeat_back : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
- const std::array<int, 4> nr;
|
|
- const bool v; // whether src is a noncontiguous view
|
|
+ const std::array<int, 4> nr;
|
|
+ const bool v; // whether src is a noncontiguous view
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR4(type, ne, nr, v);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR4(type, ne, nr, v); }
|
|
|
|
- size_t op_size(ggml_tensor * t) override {
|
|
- return ggml_nbytes(t) * 2;
|
|
- }
|
|
+ size_t op_size(ggml_tensor * t) override { return ggml_nbytes(t) * 2; }
|
|
|
|
- test_repeat_back(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {8, 6, 4, 2},
|
|
- std::array<int, 4> nr = {2, 2, 2, 2},
|
|
- bool v = false)
|
|
- : type(type), ne(ne), nr(nr), v(v) {}
|
|
+ test_repeat_back(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 8, 6, 4, 2 },
|
|
+ std::array<int, 4> nr = { 2, 2, 2, 2 }, bool v = false) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ nr(nr),
|
|
+ v(v) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
- ggml_tensor * src = ggml_new_tensor_4d(ctx, type, ne[0]*nr[0], ne[1]*nr[1], ne[2]*nr[2], ne[3]*nr[3]);
|
|
+ ggml_tensor * src = ggml_new_tensor_4d(ctx, type, ne[0] * nr[0], ne[1] * nr[1], ne[2] * nr[2], ne[3] * nr[3]);
|
|
ggml_set_name(src, "src");
|
|
|
|
if (v) {
|
|
@@ -1387,22 +1368,25 @@ struct test_repeat_back : public test_case {
|
|
|
|
// GGML_OP_DUP
|
|
struct test_dup : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
const std::array<int64_t, 4> permute;
|
|
- bool _use_permute;
|
|
+ bool _use_permute;
|
|
|
|
std::string vars() override {
|
|
std::string v = VARS_TO_STR2(type, ne);
|
|
- if (_use_permute) v += "," + VAR_TO_STR(permute);
|
|
+ if (_use_permute) {
|
|
+ v += "," + VAR_TO_STR(permute);
|
|
+ }
|
|
return v;
|
|
}
|
|
|
|
- test_dup(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 10, 20, 1},
|
|
- std::array<int64_t, 4> permute = {0, 0, 0, 0})
|
|
- : type(type), ne(ne), permute(permute),
|
|
- _use_permute(permute[0] + permute[1] + permute[2] + permute[3] > 0) {}
|
|
+ test_dup(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 10, 20, 1 },
|
|
+ std::array<int64_t, 4> permute = { 0, 0, 0, 0 }) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ permute(permute),
|
|
+ _use_permute(permute[0] + permute[1] + permute[2] + permute[3] > 0) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -1423,22 +1407,21 @@ struct test_dup : public test_case {
|
|
|
|
// GGML_OP_SET
|
|
struct test_set : public test_case {
|
|
- const ggml_type type_src;
|
|
- const ggml_type type_dst;
|
|
+ const ggml_type type_src;
|
|
+ const ggml_type type_dst;
|
|
const std::array<int64_t, 4> ne;
|
|
- const int dim;
|
|
+ const int dim;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR4(type_src, type_dst, ne, dim);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR4(type_src, type_dst, ne, dim); }
|
|
|
|
- size_t op_size(ggml_tensor * t) override {
|
|
- return ggml_nbytes(t) + ggml_nbytes(t->src[0]);
|
|
- }
|
|
+ size_t op_size(ggml_tensor * t) override { return ggml_nbytes(t) + ggml_nbytes(t->src[0]); }
|
|
|
|
test_set(ggml_type type_src = GGML_TYPE_F32, ggml_type type_dst = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {6, 5, 4, 3}, int dim = 1)
|
|
- : type_src(type_src), type_dst(type_dst), ne(ne), dim(dim) {}
|
|
+ std::array<int64_t, 4> ne = { 6, 5, 4, 3 }, int dim = 1) :
|
|
+ type_src(type_src),
|
|
+ type_dst(type_dst),
|
|
+ ne(ne),
|
|
+ dim(dim) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
|
|
@@ -1449,17 +1432,17 @@ struct test_set : public test_case {
|
|
for (int i = 0; i < dim; ++i) {
|
|
ne_dst[i] *= 2;
|
|
}
|
|
- ggml_tensor* dst = ggml_new_tensor(ctx, type_dst, 4, ne_dst.data());
|
|
+ ggml_tensor * dst = ggml_new_tensor(ctx, type_dst, 4, ne_dst.data());
|
|
ggml_set_param(dst);
|
|
ggml_set_name(dst, "dst");
|
|
|
|
size_t offset = 0;
|
|
for (int i = 0; i < dim; ++i) {
|
|
- offset += ((ne_dst[i] - ne[i])/2)*dst->nb[i];
|
|
+ offset += ((ne_dst[i] - ne[i]) / 2) * dst->nb[i];
|
|
}
|
|
ggml_tensor * out = ggml_set(ctx, dst, src,
|
|
- // The backward pass requires setting a contiguous region:
|
|
- src->nb[1], src->nb[2], src->nb[3], offset);
|
|
+ // The backward pass requires setting a contiguous region:
|
|
+ src->nb[1], src->nb[2], src->nb[3], offset);
|
|
ggml_set_name(out, "out");
|
|
|
|
return out;
|
|
@@ -1468,33 +1451,30 @@ struct test_set : public test_case {
|
|
|
|
// GGML_OP_CPY
|
|
struct test_cpy : public test_case {
|
|
- const ggml_type type_src;
|
|
- const ggml_type type_dst;
|
|
+ const ggml_type type_src;
|
|
+ const ggml_type type_dst;
|
|
const std::array<int64_t, 4> ne;
|
|
const std::array<int64_t, 4> permute_src;
|
|
const std::array<int64_t, 4> permute_dst;
|
|
- bool _src_use_permute;
|
|
- bool _dst_use_permute;
|
|
+ bool _src_use_permute;
|
|
+ bool _dst_use_permute;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR5(type_src, type_dst, ne, permute_src, permute_dst);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR5(type_src, type_dst, ne, permute_src, permute_dst); }
|
|
|
|
- double max_nmse_err() override {
|
|
- return 1e-6;
|
|
- }
|
|
+ double max_nmse_err() override { return 1e-6; }
|
|
|
|
- size_t op_size(ggml_tensor * t) override {
|
|
- return ggml_nbytes(t) + ggml_nbytes(t->src[0]);
|
|
- }
|
|
+ size_t op_size(ggml_tensor * t) override { return ggml_nbytes(t) + ggml_nbytes(t->src[0]); }
|
|
|
|
test_cpy(ggml_type type_src = GGML_TYPE_F32, ggml_type type_dst = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 10, 10, 1},
|
|
- std::array<int64_t, 4> permute_src = {0, 0, 0, 0},
|
|
- std::array<int64_t, 4> permute_dst = {0, 0, 0, 0})
|
|
- : type_src(type_src), type_dst(type_dst), ne(ne), permute_src(permute_src), permute_dst(permute_dst),
|
|
- _src_use_permute(permute_src[0] + permute_src[1] + permute_src[2] + permute_src[3] > 0),
|
|
- _dst_use_permute(permute_dst[0] + permute_dst[1] + permute_dst[2] + permute_dst[3] > 0) {}
|
|
+ std::array<int64_t, 4> ne = { 10, 10, 10, 1 }, std::array<int64_t, 4> permute_src = { 0, 0, 0, 0 },
|
|
+ std::array<int64_t, 4> permute_dst = { 0, 0, 0, 0 }) :
|
|
+ type_src(type_src),
|
|
+ type_dst(type_dst),
|
|
+ ne(ne),
|
|
+ permute_src(permute_src),
|
|
+ permute_dst(permute_dst),
|
|
+ _src_use_permute(permute_src[0] + permute_src[1] + permute_src[2] + permute_src[3] > 0),
|
|
+ _dst_use_permute(permute_dst[0] + permute_dst[1] + permute_dst[2] + permute_dst[3] > 0) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * src = ggml_new_tensor(ctx, type_src, 4, ne.data());
|
|
@@ -1523,16 +1503,12 @@ struct test_cpy : public test_case {
|
|
|
|
// GGML_OP_CONT
|
|
struct test_cont : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
- test_cont(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 10, 10, 1})
|
|
- : type(type), ne(ne) {}
|
|
+ test_cont(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 10, 10, 1 }) : type(type), ne(ne) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -1555,26 +1531,24 @@ struct test_cont : public test_case {
|
|
// GGML_OP_DIV
|
|
struct test_bin_bcast : public test_case {
|
|
using op_t = ggml_tensor * (*) (ggml_context *, ggml_tensor *, ggml_tensor *);
|
|
- op_t op;
|
|
- const ggml_type type;
|
|
+ op_t op;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
- const std::array<int, 4> nr;
|
|
+ const std::array<int, 4> nr;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR3(type, ne, nr);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR3(type, ne, nr); }
|
|
|
|
- size_t op_size(ggml_tensor * t) override {
|
|
- return ggml_nbytes(t) * 3;
|
|
- }
|
|
+ size_t op_size(ggml_tensor * t) override { return ggml_nbytes(t) * 3; }
|
|
|
|
- test_bin_bcast(op_t op, ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 10, 1, 1},
|
|
- std::array<int, 4> nr = {1, 2, 1, 1})
|
|
- : op(op), type(type), ne(ne), nr(nr) {}
|
|
+ test_bin_bcast(op_t op, ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 10, 1, 1 },
|
|
+ std::array<int, 4> nr = { 1, 2, 1, 1 }) :
|
|
+ op(op),
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ nr(nr) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
- ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0]*nr[0], ne[1]*nr[1], ne[2]*nr[2], ne[3]*nr[3]);
|
|
+ ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0] * nr[0], ne[1] * nr[1], ne[2] * nr[2], ne[3] * nr[3]);
|
|
ggml_set_name(a, "a");
|
|
|
|
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -1604,31 +1578,21 @@ struct test_bin_bcast : public test_case {
|
|
}
|
|
}
|
|
|
|
- float grad_eps() override {
|
|
- return 0.1f * (op == ggml_mul ? ne[0]*ne[1]*ne[2]*ne[3] : 1);
|
|
- }
|
|
+ float grad_eps() override { return 0.1f * (op == ggml_mul ? ne[0] * ne[1] * ne[2] * ne[3] : 1); }
|
|
|
|
- bool grad_precise() override {
|
|
- return op == ggml_div;
|
|
- }
|
|
+ bool grad_precise() override { return op == ggml_div; }
|
|
|
|
- double max_maa_err() override {
|
|
- return op == ggml_add ? 1e-4 : 1e-3;
|
|
- }
|
|
+ double max_maa_err() override { return op == ggml_add ? 1e-4 : 1e-3; }
|
|
};
|
|
|
|
// GGML_OP_ADD1
|
|
struct test_add1 : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
- test_add1(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
- : type(type), ne(ne) {}
|
|
+ test_add1(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 5, 4, 3 }) : type(type), ne(ne) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -1645,25 +1609,21 @@ struct test_add1 : public test_case {
|
|
return out;
|
|
}
|
|
|
|
- float grad_eps() override {
|
|
- return 0.1f * ne[0]*ne[1]*ne[2]*ne[3];
|
|
- }
|
|
+ float grad_eps() override { return 0.1f * ne[0] * ne[1] * ne[2] * ne[3]; }
|
|
};
|
|
|
|
// GGML_OP_SCALE
|
|
struct test_scale : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
- float scale;
|
|
+ float scale;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR3(type, ne, scale);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR3(type, ne, scale); }
|
|
|
|
- test_scale(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 10, 10, 10},
|
|
- float scale = 2.0f)
|
|
- : type(type), ne(ne), scale(scale) {}
|
|
+ test_scale(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 10, 10, 10 }, float scale = 2.0f) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ scale(scale) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -1679,18 +1639,16 @@ struct test_scale : public test_case {
|
|
|
|
// GGML_OP_SILU_BACK
|
|
struct test_silu_back : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
- float eps;
|
|
+ float eps;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR3(type, ne, eps);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR3(type, ne, eps); }
|
|
|
|
- test_silu_back(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {64, 5, 4, 3},
|
|
- float eps = 1e-6f)
|
|
- : type(type), ne(ne), eps(eps) {}
|
|
+ test_silu_back(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 64, 5, 4, 3 }, float eps = 1e-6f) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ eps(eps) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -1705,34 +1663,32 @@ struct test_silu_back : public test_case {
|
|
return out;
|
|
}
|
|
|
|
- bool grad_precise() override {
|
|
- return true;
|
|
- }
|
|
+ bool grad_precise() override { return true; }
|
|
};
|
|
|
|
// GGML_OP_NORM
|
|
struct test_norm : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
- const bool v; // whether a is a non-contiguous view
|
|
- const float eps;
|
|
+ const bool v; // whether a is a non-contiguous view
|
|
+ const float eps;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR4(type, ne, v, eps);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR4(type, ne, v, eps); }
|
|
|
|
- test_norm(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {64, 5, 4, 3},
|
|
- bool v = false,
|
|
- float eps = 1e-6f)
|
|
- : type(type), ne(ne), v(v), eps(eps) {}
|
|
+ test_norm(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 64, 5, 4, 3 }, bool v = false,
|
|
+ float eps = 1e-6f) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ v(v),
|
|
+ eps(eps) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
ggml_set_name(a, "a");
|
|
|
|
if (v) {
|
|
- a = ggml_view_4d(ctx, a, a->ne[0]/2, a->ne[1]/2, a->ne[2]/2, a->ne[3]/2, a->nb[1], a->nb[2], a->nb[3], 0);
|
|
+ a = ggml_view_4d(ctx, a, a->ne[0] / 2, a->ne[1] / 2, a->ne[2] / 2, a->ne[3] / 2, a->nb[1], a->nb[2],
|
|
+ a->nb[3], 0);
|
|
ggml_set_name(a, "view of a");
|
|
}
|
|
|
|
@@ -1745,20 +1701,19 @@ struct test_norm : public test_case {
|
|
|
|
// GGML_OP_RMS_NORM
|
|
struct test_rms_norm : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
- const bool v; // whether a is a non-contiguous view
|
|
- const float eps;
|
|
+ const bool v; // whether a is a non-contiguous view
|
|
+ const float eps;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR4(type, ne, v, eps);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR4(type, ne, v, eps); }
|
|
|
|
- test_rms_norm(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {64, 5, 4, 3},
|
|
- bool v = false,
|
|
- float eps = 1e-6f)
|
|
- : type(type), ne(ne), v(v), eps(eps) {}
|
|
+ test_rms_norm(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 64, 5, 4, 3 }, bool v = false,
|
|
+ float eps = 1e-6f) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ v(v),
|
|
+ eps(eps) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -1766,7 +1721,8 @@ struct test_rms_norm : public test_case {
|
|
ggml_set_name(a, "a");
|
|
|
|
if (v) {
|
|
- a = ggml_view_4d(ctx, a, a->ne[0]/2, a->ne[1]/2, a->ne[2]/2, a->ne[3]/2, a->nb[1], a->nb[2], a->nb[3], 0);
|
|
+ a = ggml_view_4d(ctx, a, a->ne[0] / 2, a->ne[1] / 2, a->ne[2] / 2, a->ne[3] / 2, a->nb[1], a->nb[2],
|
|
+ a->nb[3], 0);
|
|
ggml_set_name(a, "view of a");
|
|
}
|
|
|
|
@@ -1782,29 +1738,23 @@ struct test_rms_norm : public test_case {
|
|
}
|
|
}
|
|
|
|
- float grad_eps() override {
|
|
- return 1.0f;
|
|
- }
|
|
+ float grad_eps() override { return 1.0f; }
|
|
|
|
- bool grad_precise() override {
|
|
- return true;
|
|
- }
|
|
+ bool grad_precise() override { return true; }
|
|
};
|
|
|
|
// GGML_OP_RMS_NORM_BACK
|
|
struct test_rms_norm_back : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
- const float eps;
|
|
+ const float eps;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR3(type, ne, eps);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR3(type, ne, eps); }
|
|
|
|
- test_rms_norm_back(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {64, 5, 4, 3},
|
|
- float eps = 1e-6f)
|
|
- : type(type), ne(ne), eps(eps) {}
|
|
+ test_rms_norm_back(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 64, 5, 4, 3 }, float eps = 1e-6f) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ eps(eps) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -1828,18 +1778,17 @@ struct test_rms_norm_back : public test_case {
|
|
|
|
// GGML_OP_SSM_CONV
|
|
struct test_ssm_conv : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne_a;
|
|
const std::array<int64_t, 4> ne_b;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR3(type, ne_a, ne_b);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR3(type, ne_a, ne_b); }
|
|
|
|
- test_ssm_conv(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne_a = {10, 10, 10, 1},
|
|
- std::array<int64_t, 4> ne_b = {3, 3, 1, 1})
|
|
- : type(type), ne_a(ne_a), ne_b(ne_b) {}
|
|
+ test_ssm_conv(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne_a = { 10, 10, 10, 1 },
|
|
+ std::array<int64_t, 4> ne_b = { 3, 3, 1, 1 }) :
|
|
+ type(type),
|
|
+ ne_a(ne_a),
|
|
+ ne_b(ne_b) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
@@ -1858,21 +1807,27 @@ struct test_ssm_scan : public test_case {
|
|
const int64_t n_seq_tokens;
|
|
const int64_t n_seqs;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR5(type, d_state, d_inner, n_seq_tokens, n_seqs);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR5(type, d_state, d_inner, n_seq_tokens, n_seqs); }
|
|
|
|
- test_ssm_scan(ggml_type type = GGML_TYPE_F32,
|
|
- int64_t d_state = 32, int64_t d_inner = 32, int64_t n_seq_tokens = 32, int64_t n_seqs = 32)
|
|
- : type(type), d_state(d_state), d_inner(d_inner), n_seq_tokens(n_seq_tokens), n_seqs(n_seqs) {}
|
|
+ test_ssm_scan(ggml_type type = GGML_TYPE_F32, int64_t d_state = 32, int64_t d_inner = 32, int64_t n_seq_tokens = 32,
|
|
+ int64_t n_seqs = 32) :
|
|
+ type(type),
|
|
+ d_state(d_state),
|
|
+ d_inner(d_inner),
|
|
+ n_seq_tokens(n_seq_tokens),
|
|
+ n_seqs(n_seqs) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
- ggml_tensor * s = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_state, d_inner, n_seqs, 1 }.data());
|
|
- ggml_tensor * x = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_inner, n_seq_tokens, n_seqs, 1 }.data());
|
|
- ggml_tensor * dt = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_inner, n_seq_tokens, n_seqs, 1 }.data());
|
|
- ggml_tensor * A = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_state, d_inner, 1 , 1 }.data());
|
|
- ggml_tensor * B = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_state, n_seq_tokens, n_seqs, 1 }.data());
|
|
- ggml_tensor * C = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_state, n_seq_tokens, n_seqs, 1 }.data());
|
|
+ ggml_tensor * s = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_state, d_inner, n_seqs, 1 }.data());
|
|
+ ggml_tensor * x =
|
|
+ ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_inner, n_seq_tokens, n_seqs, 1 }.data());
|
|
+ ggml_tensor * dt =
|
|
+ ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_inner, n_seq_tokens, n_seqs, 1 }.data());
|
|
+ ggml_tensor * A = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_state, d_inner, 1, 1 }.data());
|
|
+ ggml_tensor * B =
|
|
+ ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_state, n_seq_tokens, n_seqs, 1 }.data());
|
|
+ ggml_tensor * C =
|
|
+ ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ d_state, n_seq_tokens, n_seqs, 1 }.data());
|
|
ggml_tensor * out = ggml_ssm_scan(ctx, s, x, dt, A, B, C);
|
|
return out;
|
|
}
|
|
@@ -1887,22 +1842,26 @@ struct test_rwkv_wkv6 : public test_case {
|
|
const int64_t n_seq_tokens;
|
|
const int64_t n_seqs;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR5(type, head_count, head_size, n_seq_tokens, n_seqs);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR5(type, head_count, head_size, n_seq_tokens, n_seqs); }
|
|
|
|
- test_rwkv_wkv6(ggml_type type = GGML_TYPE_F32,
|
|
- int64_t head_count = 32, int64_t head_size = 64, int64_t n_seq_tokens = 32, int64_t n_seqs = 32)
|
|
- : type(type), head_count(head_count), head_size(head_size), n_seq_tokens(n_seq_tokens), n_seqs(n_seqs) {}
|
|
+ test_rwkv_wkv6(ggml_type type = GGML_TYPE_F32, int64_t head_count = 32, int64_t head_size = 64,
|
|
+ int64_t n_seq_tokens = 32, int64_t n_seqs = 32) :
|
|
+ type(type),
|
|
+ head_count(head_count),
|
|
+ head_size(head_size),
|
|
+ n_seq_tokens(n_seq_tokens),
|
|
+ n_seqs(n_seqs) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
const int64_t n_tokens = n_seq_tokens * n_seqs;
|
|
- ggml_tensor * r = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
- ggml_tensor * k = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
- ggml_tensor * v = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
- ggml_tensor * tf = ggml_new_tensor(ctx, type, 2, std::vector<int64_t>{ head_size, head_count }.data());
|
|
- ggml_tensor * td = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
- ggml_tensor * s = ggml_new_tensor(ctx, type, 2, std::vector<int64_t>{ head_size * head_size * head_count, n_seqs }.data());
|
|
+ ggml_tensor * r = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
+ ggml_tensor * k = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
+ ggml_tensor * v = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
+ ggml_tensor * tf = ggml_new_tensor(ctx, type, 2, std::vector<int64_t>{ head_size, head_count }.data());
|
|
+ ggml_tensor * td =
|
|
+ ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
+ ggml_tensor * s =
|
|
+ ggml_new_tensor(ctx, type, 2, std::vector<int64_t>{ head_size * head_size * head_count, n_seqs }.data());
|
|
ggml_tensor * out = ggml_rwkv_wkv6(ctx, k, v, r, tf, td, s);
|
|
return out;
|
|
}
|
|
@@ -1917,21 +1876,24 @@ struct test_gla : public test_case {
|
|
const int64_t n_seq_tokens;
|
|
const int64_t n_seqs;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR5(type, head_count, head_size, n_seq_tokens, n_seqs);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR5(type, head_count, head_size, n_seq_tokens, n_seqs); }
|
|
|
|
- test_gla(ggml_type type = GGML_TYPE_F32,
|
|
- int64_t head_count = 32, int64_t head_size = 64, int64_t n_seq_tokens = 32, int64_t n_seqs = 32)
|
|
- : type(type), head_count(head_count), head_size(head_size), n_seq_tokens(n_seq_tokens), n_seqs(n_seqs) {}
|
|
+ test_gla(ggml_type type = GGML_TYPE_F32, int64_t head_count = 32, int64_t head_size = 64, int64_t n_seq_tokens = 32,
|
|
+ int64_t n_seqs = 32) :
|
|
+ type(type),
|
|
+ head_count(head_count),
|
|
+ head_size(head_size),
|
|
+ n_seq_tokens(n_seq_tokens),
|
|
+ n_seqs(n_seqs) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
const int64_t n_tokens = n_seq_tokens * n_seqs;
|
|
- ggml_tensor * q = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
- ggml_tensor * k = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
- ggml_tensor * v = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
- ggml_tensor * g = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
- ggml_tensor * s = ggml_new_tensor(ctx, type, 2, std::vector<int64_t>{ head_size * head_size * head_count, n_seqs }.data());
|
|
+ ggml_tensor * q = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
+ ggml_tensor * k = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
+ ggml_tensor * v = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
+ ggml_tensor * g = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
+ ggml_tensor * s =
|
|
+ ggml_new_tensor(ctx, type, 2, std::vector<int64_t>{ head_size * head_size * head_count, n_seqs }.data());
|
|
ggml_tensor * out = ggml_gated_linear_attn(ctx, k, v, q, g, s, pow(head_size, -0.5));
|
|
return out;
|
|
}
|
|
@@ -1946,26 +1908,29 @@ struct test_rwkv_wkv7 : public test_case {
|
|
const int64_t n_seq_tokens;
|
|
const int64_t n_seqs;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR5(type, head_count, head_size, n_seq_tokens, n_seqs);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR5(type, head_count, head_size, n_seq_tokens, n_seqs); }
|
|
|
|
- test_rwkv_wkv7(ggml_type type = GGML_TYPE_F32,
|
|
- int64_t head_count = 32, int64_t head_size = 64, int64_t n_seq_tokens = 32, int64_t n_seqs = 32)
|
|
- : type(type), head_count(head_count), head_size(head_size), n_seq_tokens(n_seq_tokens), n_seqs(n_seqs) {}
|
|
+ test_rwkv_wkv7(ggml_type type = GGML_TYPE_F32, int64_t head_count = 32, int64_t head_size = 64,
|
|
+ int64_t n_seq_tokens = 32, int64_t n_seqs = 32) :
|
|
+ type(type),
|
|
+ head_count(head_count),
|
|
+ head_size(head_size),
|
|
+ n_seq_tokens(n_seq_tokens),
|
|
+ n_seqs(n_seqs) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
const int64_t n_tokens = n_seq_tokens * n_seqs;
|
|
- ggml_tensor * r = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
- ggml_tensor * w = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
- ggml_tensor * k = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
- ggml_tensor * v = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
- ggml_tensor * a = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
- ggml_tensor * b = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
+ ggml_tensor * r = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
+ ggml_tensor * w = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
+ ggml_tensor * k = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
+ ggml_tensor * v = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
+ ggml_tensor * a = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
+ ggml_tensor * b = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
|
// Outputs may become NaN with long seqlen without these normalization
|
|
- a = ggml_l2_norm(ctx, a, 1e-7F);
|
|
- b = ggml_l2_norm(ctx, b, 1e-7F);
|
|
- ggml_tensor * s = ggml_new_tensor(ctx, type, 2, std::vector<int64_t>{ head_size * head_size * head_count, n_seqs }.data());
|
|
+ a = ggml_l2_norm(ctx, a, 1e-7F);
|
|
+ b = ggml_l2_norm(ctx, b, 1e-7F);
|
|
+ ggml_tensor * s =
|
|
+ ggml_new_tensor(ctx, type, 2, std::vector<int64_t>{ head_size * head_size * head_count, n_seqs }.data());
|
|
ggml_tensor * out = ggml_rwkv_wkv7(ctx, r, w, k, v, a, b, s);
|
|
return out;
|
|
}
|
|
@@ -1973,40 +1938,39 @@ struct test_rwkv_wkv7 : public test_case {
|
|
|
|
// GGML_OP_MUL_MAT
|
|
struct test_mul_mat : public test_case {
|
|
- const ggml_type type_a;
|
|
- const ggml_type type_b;
|
|
- const int64_t m;
|
|
- const int64_t n;
|
|
- const int64_t k;
|
|
- const std::array<int64_t, 2> bs; // dims 3 and 4
|
|
- const std::array<int64_t, 2> nr; // repeat in dims 3 and 4
|
|
- const std::array<int64_t, 4> per; // permutation of dimensions
|
|
- const bool v; // whether a and b are non-contiguous views
|
|
+ const ggml_type type_a;
|
|
+ const ggml_type type_b;
|
|
+ const int64_t m;
|
|
+ const int64_t n;
|
|
+ const int64_t k;
|
|
+ const std::array<int64_t, 2> bs; // dims 3 and 4
|
|
+ const std::array<int64_t, 2> nr; // repeat in dims 3 and 4
|
|
+ const std::array<int64_t, 4> per; // permutation of dimensions
|
|
+ const bool v; // whether a and b are non-contiguous views
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR9(type_a, type_b, m, n, k, bs, nr, per, v);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR9(type_a, type_b, m, n, k, bs, nr, per, v); }
|
|
|
|
- double max_nmse_err() override {
|
|
- return 5e-4;
|
|
- }
|
|
+ double max_nmse_err() override { return 5e-4; }
|
|
|
|
- int64_t grad_nmax() override {
|
|
- return 20000;
|
|
- }
|
|
+ int64_t grad_nmax() override { return 20000; }
|
|
|
|
uint64_t op_flops(ggml_tensor * t) override {
|
|
GGML_UNUSED(t);
|
|
return 2 * m * n * k * bs[0] * nr[0] * bs[1] * nr[1];
|
|
}
|
|
|
|
- test_mul_mat(ggml_type type_a = GGML_TYPE_F32, ggml_type type_b = GGML_TYPE_F32,
|
|
- int64_t m = 32, int64_t n = 32, int64_t k = 32,
|
|
- std::array<int64_t, 2> bs = {10, 10},
|
|
- std::array<int64_t, 2> nr = {2, 2},
|
|
- std::array<int64_t, 4> per = {0, 1, 2, 3},
|
|
- bool v = false)
|
|
- : type_a(type_a), type_b(type_b), m(m), n(n), k(k), bs(bs), nr(nr), per(per), v(v) {}
|
|
+ test_mul_mat(ggml_type type_a = GGML_TYPE_F32, ggml_type type_b = GGML_TYPE_F32, int64_t m = 32, int64_t n = 32,
|
|
+ int64_t k = 32, std::array<int64_t, 2> bs = { 10, 10 }, std::array<int64_t, 2> nr = { 2, 2 },
|
|
+ std::array<int64_t, 4> per = { 0, 1, 2, 3 }, bool v = false) :
|
|
+ type_a(type_a),
|
|
+ type_b(type_b),
|
|
+ m(m),
|
|
+ n(n),
|
|
+ k(k),
|
|
+ bs(bs),
|
|
+ nr(nr),
|
|
+ per(per),
|
|
+ v(v) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
// C^T = A * B^T: (k, m) * (k, n) => (m, n)
|
|
@@ -2016,13 +1980,13 @@ struct test_mul_mat : public test_case {
|
|
const int npermuted = (per[0] != 0) + (per[1] != 1) + (per[2] != 2) + (per[3] != 3);
|
|
if (npermuted > 0) {
|
|
GGML_ASSERT(npermuted == 2);
|
|
- GGML_ASSERT(!v); // not handled
|
|
+ GGML_ASSERT(!v); // not handled
|
|
GGML_ASSERT(!ggml_is_quantized(type_a) || per[0] == 0);
|
|
GGML_ASSERT(!ggml_is_quantized(type_b) || per[0] == 0);
|
|
|
|
// Create tensors with the permuted dimensions, then permute them back to the dimensions given by m,n,k.
|
|
- const int64_t ne_a[4] = {k, m, bs[0], bs[1]};
|
|
- const int64_t ne_b[4] = {k, n, bs[0]*nr[0], bs[1]*nr[1]};
|
|
+ const int64_t ne_a[4] = { k, m, bs[0], bs[1] };
|
|
+ const int64_t ne_b[4] = { k, n, bs[0] * nr[0], bs[1] * nr[1] };
|
|
|
|
a = ggml_new_tensor_4d(ctx, type_a, ne_a[per[0]], ne_a[per[1]], ne_a[per[2]], ne_a[per[3]]);
|
|
b = ggml_new_tensor_4d(ctx, type_b, ne_b[per[0]], ne_b[per[1]], ne_b[per[2]], ne_b[per[3]]);
|
|
@@ -2041,8 +2005,8 @@ struct test_mul_mat : public test_case {
|
|
ggml_set_name(b, "b_permuted");
|
|
} else {
|
|
if (v) {
|
|
- a = ggml_new_tensor_4d(ctx, type_a, k*2, m, bs[0], bs[1]);
|
|
- b = ggml_new_tensor_4d(ctx, type_b, k*2, n, bs[0]*nr[0], bs[1]*nr[1]);
|
|
+ a = ggml_new_tensor_4d(ctx, type_a, k * 2, m, bs[0], bs[1]);
|
|
+ b = ggml_new_tensor_4d(ctx, type_b, k * 2, n, bs[0] * nr[0], bs[1] * nr[1]);
|
|
|
|
if (!ggml_is_quantized(type_a)) {
|
|
if (bs[1] == 1 && nr[1] == 1) {
|
|
@@ -2051,11 +2015,11 @@ struct test_mul_mat : public test_case {
|
|
ggml_set_param(b);
|
|
}
|
|
|
|
- a = ggml_view_4d(ctx, a, k, m, bs[0], bs[1], a->nb[1], a->nb[2], a->nb[3], 0);
|
|
- b = ggml_view_4d(ctx, b, k, n, bs[0]*nr[0], bs[1]*nr[1], b->nb[1], b->nb[2], b->nb[3], 0);
|
|
+ a = ggml_view_4d(ctx, a, k, m, bs[0], bs[1], a->nb[1], a->nb[2], a->nb[3], 0);
|
|
+ b = ggml_view_4d(ctx, b, k, n, bs[0] * nr[0], bs[1] * nr[1], b->nb[1], b->nb[2], b->nb[3], 0);
|
|
} else {
|
|
- a = ggml_new_tensor_4d(ctx, type_a, k, m, bs[0], bs[1]);
|
|
- b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0]*nr[0], bs[1]*nr[1]);
|
|
+ a = ggml_new_tensor_4d(ctx, type_a, k, m, bs[0], bs[1]);
|
|
+ b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0] * nr[0], bs[1] * nr[1]);
|
|
|
|
if (!ggml_is_quantized(type_a)) {
|
|
if (bs[1] == 1 && nr[1] == 1) {
|
|
@@ -2079,33 +2043,34 @@ struct test_mul_mat : public test_case {
|
|
struct test_mul_mat_id : public test_case {
|
|
const ggml_type type_a;
|
|
const ggml_type type_b;
|
|
- const int n_mats;
|
|
- const int n_used;
|
|
- const bool b; // broadcast b matrix
|
|
- const int64_t m;
|
|
- const int64_t n;
|
|
- const int64_t k;
|
|
+ const int n_mats;
|
|
+ const int n_used;
|
|
+ const bool b; // broadcast b matrix
|
|
+ const int64_t m;
|
|
+ const int64_t n;
|
|
+ const int64_t k;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR8(type_a, type_b, n_mats, n_used, b, m, n, k);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR8(type_a, type_b, n_mats, n_used, b, m, n, k); }
|
|
|
|
- double max_nmse_err() override {
|
|
- return 5e-4;
|
|
- }
|
|
+ double max_nmse_err() override { return 5e-4; }
|
|
|
|
uint64_t op_flops(ggml_tensor * t) override {
|
|
GGML_UNUSED(t);
|
|
return 2 * m * k * n * n_used;
|
|
}
|
|
|
|
- test_mul_mat_id(ggml_type type_a = GGML_TYPE_F32, ggml_type type_b = GGML_TYPE_F32,
|
|
- int n_mats = 8, int n_used = 2, bool b = false,
|
|
- int64_t m = 32, int64_t n = 32, int64_t k = 32)
|
|
- : type_a(type_a), type_b(type_b), n_mats(n_mats), n_used(n_used), b(b),
|
|
- m(m), n(n), k(k) {
|
|
- GGML_ASSERT(n_used <= n_mats);
|
|
- }
|
|
+ test_mul_mat_id(ggml_type type_a = GGML_TYPE_F32, ggml_type type_b = GGML_TYPE_F32, int n_mats = 8, int n_used = 2,
|
|
+ bool b = false, int64_t m = 32, int64_t n = 32, int64_t k = 32) :
|
|
+ type_a(type_a),
|
|
+ type_b(type_b),
|
|
+ n_mats(n_mats),
|
|
+ n_used(n_used),
|
|
+ b(b),
|
|
+ m(m),
|
|
+ n(n),
|
|
+ k(k) {
|
|
+ GGML_ASSERT(n_used <= n_mats);
|
|
+ }
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
// C^T = A * B^T: (k, m) * (k, n) => (m, n)
|
|
@@ -2129,11 +2094,13 @@ struct test_mul_mat_id : public test_case {
|
|
}
|
|
|
|
void initialize_tensors(ggml_context * ctx) override {
|
|
- std::random_device rd;
|
|
+ std::random_device rd;
|
|
std::default_random_engine rng(rd());
|
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
if (t->type == GGML_TYPE_I32) {
|
|
- if (ggml_is_view_op(t->op)) { continue; }
|
|
+ if (ggml_is_view_op(t->op)) {
|
|
+ continue;
|
|
+ }
|
|
// ids
|
|
for (int64_t r = 0; r < ggml_nrows(t); r++) {
|
|
std::vector<int32_t> data(t->ne[0]);
|
|
@@ -2152,29 +2119,30 @@ struct test_mul_mat_id : public test_case {
|
|
|
|
// GGML_OP_OUT_PROD
|
|
struct test_out_prod : public test_case {
|
|
- const ggml_type type_a;
|
|
- const ggml_type type_b;
|
|
- const int64_t m;
|
|
- const int64_t n;
|
|
- const int64_t k;
|
|
- const std::array<int64_t, 2> bs; // dims 3 and 4
|
|
- const std::array<int64_t, 2> nr; // repeat in dims 3 and 4
|
|
- const bool trans_b;
|
|
+ const ggml_type type_a;
|
|
+ const ggml_type type_b;
|
|
+ const int64_t m;
|
|
+ const int64_t n;
|
|
+ const int64_t k;
|
|
+ const std::array<int64_t, 2> bs; // dims 3 and 4
|
|
+ const std::array<int64_t, 2> nr; // repeat in dims 3 and 4
|
|
+ const bool trans_b;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR8(type_a, type_b, m, n, k, bs, nr, trans_b);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR8(type_a, type_b, m, n, k, bs, nr, trans_b); }
|
|
|
|
- double max_nmse_err() override {
|
|
- return 5e-4;
|
|
- }
|
|
+ double max_nmse_err() override { return 5e-4; }
|
|
|
|
- test_out_prod(ggml_type type_a = GGML_TYPE_F32, ggml_type type_b = GGML_TYPE_F32,
|
|
- int64_t m = 32, int64_t n = 32, int64_t k = 32,
|
|
- std::array<int64_t, 2> bs = {10, 10},
|
|
- std::array<int64_t, 2> nr = {2, 2},
|
|
- bool trans_b = false)
|
|
- : type_a(type_a), type_b(type_b), m(m), n(n), k(k), bs(bs), nr(nr), trans_b(trans_b) {}
|
|
+ test_out_prod(ggml_type type_a = GGML_TYPE_F32, ggml_type type_b = GGML_TYPE_F32, int64_t m = 32, int64_t n = 32,
|
|
+ int64_t k = 32, std::array<int64_t, 2> bs = { 10, 10 }, std::array<int64_t, 2> nr = { 2, 2 },
|
|
+ bool trans_b = false) :
|
|
+ type_a(type_a),
|
|
+ type_b(type_b),
|
|
+ m(m),
|
|
+ n(n),
|
|
+ k(k),
|
|
+ bs(bs),
|
|
+ nr(nr),
|
|
+ trans_b(trans_b) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor_4d(ctx, type_a, m, k, bs[0], bs[1]);
|
|
@@ -2182,10 +2150,10 @@ struct test_out_prod : public test_case {
|
|
|
|
ggml_tensor * b;
|
|
if (trans_b) {
|
|
- b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0]*nr[0], bs[1]*nr[1]);
|
|
+ b = ggml_new_tensor_4d(ctx, type_b, k, n, bs[0] * nr[0], bs[1] * nr[1]);
|
|
b = ggml_transpose(ctx, b);
|
|
} else {
|
|
- b = ggml_new_tensor_4d(ctx, type_b, n, k, bs[0]*nr[0], bs[1]*nr[1]);
|
|
+ b = ggml_new_tensor_4d(ctx, type_b, n, k, bs[0] * nr[0], bs[1] * nr[1]);
|
|
}
|
|
ggml_set_name(b, "b");
|
|
|
|
@@ -2198,16 +2166,12 @@ struct test_out_prod : public test_case {
|
|
|
|
// GGML_OP_SQR
|
|
struct test_sqr : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
- test_sqr(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
- : type(type), ne(ne) {}
|
|
+ test_sqr(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 5, 4, 3 }) : type(type), ne(ne) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -2221,22 +2185,18 @@ struct test_sqr : public test_case {
|
|
}
|
|
|
|
float grad_eps() override {
|
|
- return 0.1f * 0.25f*ne[0]*ne[1]*ne[2]*ne[3]; // 10% of expected value of sum.
|
|
+ return 0.1f * 0.25f * ne[0] * ne[1] * ne[2] * ne[3]; // 10% of expected value of sum.
|
|
}
|
|
};
|
|
|
|
// GGML_OP_SQRT
|
|
struct test_sqrt : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
- test_sqrt(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 3, 3, 2})
|
|
- : type(type), ne(ne) {}
|
|
+ test_sqrt(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 3, 3, 2 }) : type(type), ne(ne) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -2256,27 +2216,19 @@ struct test_sqrt : public test_case {
|
|
}
|
|
}
|
|
|
|
- float grad_eps() override {
|
|
- return 20.0f;
|
|
- }
|
|
+ float grad_eps() override { return 20.0f; }
|
|
|
|
- bool grad_precise() override {
|
|
- return true;
|
|
- }
|
|
+ bool grad_precise() override { return true; }
|
|
};
|
|
|
|
// GGML_OP_LOG
|
|
struct test_log : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
- test_log(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
- : type(type), ne(ne) {}
|
|
+ test_log(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 5, 4, 3 }) : type(type), ne(ne) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -2296,23 +2248,17 @@ struct test_log : public test_case {
|
|
}
|
|
}
|
|
|
|
- bool grad_precise() override {
|
|
- return true;
|
|
- }
|
|
+ bool grad_precise() override { return true; }
|
|
};
|
|
|
|
// GGML_OP_SIN
|
|
struct test_sin : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
- test_sin(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 2, 2, 2})
|
|
- : type(type), ne(ne) {}
|
|
+ test_sin(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 2, 2, 2 }) : type(type), ne(ne) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -2327,35 +2273,25 @@ struct test_sin : public test_case {
|
|
|
|
void initialize_tensors(ggml_context * ctx) override {
|
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
- init_tensor_uniform(t, -6.5f, 6.5f); // Covers interval [-2*pi, 2*pi].
|
|
+ init_tensor_uniform(t, -6.5f, 6.5f); // Covers interval [-2*pi, 2*pi].
|
|
}
|
|
}
|
|
|
|
- double max_maa_err() override {
|
|
- return 1e-3;
|
|
- }
|
|
+ double max_maa_err() override { return 1e-3; }
|
|
|
|
- float grad_eps() override {
|
|
- return 0.2f;
|
|
- }
|
|
+ float grad_eps() override { return 0.2f; }
|
|
|
|
- bool grad_precise() override {
|
|
- return true;
|
|
- }
|
|
+ bool grad_precise() override { return true; }
|
|
};
|
|
|
|
// GGML_OP_COS
|
|
struct test_cos : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
- test_cos(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 2, 2, 2})
|
|
- : type(type), ne(ne) {}
|
|
+ test_cos(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 2, 2, 2 }) : type(type), ne(ne) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -2370,38 +2306,32 @@ struct test_cos : public test_case {
|
|
|
|
void initialize_tensors(ggml_context * ctx) override {
|
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
- init_tensor_uniform(t, -6.5f, 6.5f); // Covers interval [-2*pi, 2*pi].
|
|
+ init_tensor_uniform(t, -6.5f, 6.5f); // Covers interval [-2*pi, 2*pi].
|
|
}
|
|
}
|
|
|
|
- double max_maa_err() override {
|
|
- return 1e-3;
|
|
- }
|
|
+ double max_maa_err() override { return 1e-3; }
|
|
|
|
- float grad_eps() override {
|
|
- return 0.2f;
|
|
- }
|
|
+ float grad_eps() override { return 0.2f; }
|
|
|
|
- bool grad_precise() override {
|
|
- return true;
|
|
- }
|
|
+ bool grad_precise() override { return true; }
|
|
};
|
|
|
|
// GGML_OP_CLAMP
|
|
struct test_clamp : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
- float min;
|
|
- float max;
|
|
+ float min;
|
|
+ float max;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR4(type, ne, min, max);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR4(type, ne, min, max); }
|
|
|
|
- test_clamp(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 5, 4, 3},
|
|
- float min = -0.5f, float max = 0.5f)
|
|
- : type(type), ne(ne), min(min), max(max) {}
|
|
+ test_clamp(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 5, 4, 3 }, float min = -0.5f,
|
|
+ float max = 0.5f) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ min(min),
|
|
+ max(max) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -2413,29 +2343,23 @@ struct test_clamp : public test_case {
|
|
return out;
|
|
}
|
|
|
|
- float grad_eps() override {
|
|
- return 1e-2f;
|
|
- }
|
|
+ float grad_eps() override { return 1e-2f; }
|
|
|
|
- std::vector<float> grad_expect() override {
|
|
- return {0.0f, 1.0f};
|
|
- }
|
|
+ std::vector<float> grad_expect() override { return { 0.0f, 1.0f }; }
|
|
};
|
|
|
|
// GGML_OP_DIAG_MASK_INF
|
|
struct test_diag_mask_inf : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
- const int n_past;
|
|
+ const int n_past;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR3(type, ne, n_past);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR3(type, ne, n_past); }
|
|
|
|
- test_diag_mask_inf(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 10, 3, 2},
|
|
- int n_past = 5)
|
|
- : type(type), ne(ne), n_past(n_past) {}
|
|
+ test_diag_mask_inf(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 10, 3, 2 }, int n_past = 5) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ n_past(n_past) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -2451,30 +2375,27 @@ struct test_diag_mask_inf : public test_case {
|
|
|
|
// GGML_OP_SOFT_MAX
|
|
struct test_soft_max : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
- const bool mask;
|
|
- const ggml_type m_prec;
|
|
- const float scale;
|
|
- const float max_bias;
|
|
+ const bool mask;
|
|
+ const ggml_type m_prec;
|
|
+ const float scale;
|
|
+ const float max_bias;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR6(type, ne, mask, m_prec, scale, max_bias);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR6(type, ne, mask, m_prec, scale, max_bias); }
|
|
|
|
// the 1024 test with bias occasionally fails:
|
|
// SOFT_MAX(type=f32,ne=[1024,16,1,1],mask=1,scale=1.000000,max_bias=8.000000): [SOFT_MAX] NMSE = 0.000000103 > 0.000000100 FAIL
|
|
- virtual double max_nmse_err() override {
|
|
- return 1e-6;
|
|
- }
|
|
+ virtual double max_nmse_err() override { return 1e-6; }
|
|
|
|
- test_soft_max(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 5, 4, 3},
|
|
- bool mask = false,
|
|
- ggml_type m_prec = GGML_TYPE_F32,
|
|
- float scale = 1.0f,
|
|
- float max_bias = 0.0f)
|
|
- : type(type), ne(ne), mask(mask), m_prec(m_prec), scale(scale), max_bias(max_bias) {}
|
|
+ test_soft_max(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 5, 4, 3 }, bool mask = false,
|
|
+ ggml_type m_prec = GGML_TYPE_F32, float scale = 1.0f, float max_bias = 0.0f) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ mask(mask),
|
|
+ m_prec(m_prec),
|
|
+ scale(scale),
|
|
+ max_bias(max_bias) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -2493,27 +2414,24 @@ struct test_soft_max : public test_case {
|
|
return out;
|
|
}
|
|
|
|
- bool grad_precise() override {
|
|
- return true;
|
|
- }
|
|
+ bool grad_precise() override { return true; }
|
|
};
|
|
|
|
// GGML_OP_SOFT_MAX_BACK
|
|
struct test_soft_max_back : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
- const float scale;
|
|
- const float max_bias;
|
|
+ const float scale;
|
|
+ const float max_bias;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR4(type, ne, scale, max_bias);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR4(type, ne, scale, max_bias); }
|
|
|
|
- test_soft_max_back(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 5, 4, 3},
|
|
- float scale = 1.0f,
|
|
- float max_bias = 0.0f)
|
|
- : type(type), ne(ne), scale(scale), max_bias(max_bias) {}
|
|
+ test_soft_max_back(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 5, 4, 3 }, float scale = 1.0f,
|
|
+ float max_bias = 0.0f) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ scale(scale),
|
|
+ max_bias(max_bias) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -2531,33 +2449,45 @@ struct test_soft_max_back : public test_case {
|
|
|
|
// GGML_OP_ROPE + GGML_OP_ROPE_BACK
|
|
struct test_rope : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne_a;
|
|
- int n_dims;
|
|
- int mode;
|
|
- int n_ctx; // used to generate positions
|
|
- float fs; // freq_scale
|
|
- float ef; // ext_factor
|
|
- float af; // attn_factor
|
|
- bool ff;
|
|
- int v; // view (1 : non-contiguous a)
|
|
- bool forward;
|
|
+ int n_dims;
|
|
+ int mode;
|
|
+ int n_ctx; // used to generate positions
|
|
+ float fs; // freq_scale
|
|
+ float ef; // ext_factor
|
|
+ float af; // attn_factor
|
|
+ bool ff;
|
|
+ int v; // view (1 : non-contiguous a)
|
|
+ bool forward;
|
|
|
|
std::string vars() override {
|
|
// forward can be inferred from the op, does not need to be printed
|
|
return VARS_TO_STR10(type, ne_a, n_dims, mode, n_ctx, fs, ef, af, ff, v);
|
|
}
|
|
|
|
- test_rope(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne_a = {10, 5, 3, 1},
|
|
- int n_dims = 10, int mode = 0, int n_ctx = 512, float fs = 1.0f,
|
|
- float ef = 0.0f, float af = 0.0f, bool ff = false, int v = 0, bool forward = true)
|
|
- : type(type), ne_a(ne_a), n_dims(n_dims), mode(mode), n_ctx(n_ctx), fs(fs), ef(ef), af(af), ff(ff), v(v), forward(forward) {}
|
|
+ test_rope(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne_a = { 10, 5, 3, 1 }, int n_dims = 10,
|
|
+ int mode = 0, int n_ctx = 512, float fs = 1.0f, float ef = 0.0f, float af = 0.0f, bool ff = false,
|
|
+ int v = 0, bool forward = true) :
|
|
+ type(type),
|
|
+ ne_a(ne_a),
|
|
+ n_dims(n_dims),
|
|
+ mode(mode),
|
|
+ n_ctx(n_ctx),
|
|
+ fs(fs),
|
|
+ ef(ef),
|
|
+ af(af),
|
|
+ ff(ff),
|
|
+ v(v),
|
|
+ forward(forward) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a;
|
|
if (v & 1) {
|
|
- auto ne = ne_a; ne[0] *= 2; ne[1] *= 4; ne[2] *= 3;
|
|
+ auto ne = ne_a;
|
|
+ ne[0] *= 2;
|
|
+ ne[1] *= 4;
|
|
+ ne[2] *= 3;
|
|
a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
if (forward) {
|
|
ggml_set_param(a);
|
|
@@ -2574,7 +2504,7 @@ struct test_rope : public test_case {
|
|
ggml_set_name(a, "a");
|
|
}
|
|
|
|
- const bool is_mrope = mode & GGML_ROPE_TYPE_MROPE;
|
|
+ const bool is_mrope = mode & GGML_ROPE_TYPE_MROPE;
|
|
const bool is_vision = mode == GGML_ROPE_TYPE_VISION;
|
|
|
|
ggml_tensor * pos;
|
|
@@ -2587,32 +2517,37 @@ struct test_rope : public test_case {
|
|
|
|
ggml_tensor * freq = nullptr;
|
|
if (ff) {
|
|
- freq = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_dims/2);
|
|
+ freq = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_dims / 2);
|
|
ggml_set_name(freq, "freq");
|
|
}
|
|
|
|
ggml_tensor * out;
|
|
if (is_mrope) {
|
|
if (is_vision) {
|
|
- GGML_ASSERT(n_dims/4 > 0);
|
|
- int rope_sections[4] = {n_dims/4, n_dims/4, 0, 0}; // Vision-RoPE only use first two dimension for image (x, y) coordinate
|
|
+ GGML_ASSERT(n_dims / 4 > 0);
|
|
+ int rope_sections[4] = { n_dims / 4, n_dims / 4, 0,
|
|
+ 0 }; // Vision-RoPE only use first two dimension for image (x, y) coordinate
|
|
if (forward) {
|
|
- out = ggml_rope_multi (ctx, a, pos, freq, n_dims/2, rope_sections, mode, 0, 10000.0f, fs, ef, af, 1.0f, 1.0f);
|
|
+ out = ggml_rope_multi(ctx, a, pos, freq, n_dims / 2, rope_sections, mode, 0, 10000.0f, fs, ef, af,
|
|
+ 1.0f, 1.0f);
|
|
} else {
|
|
- out = ggml_rope_multi_back(ctx, a, pos, freq, n_dims/2, rope_sections, mode, 0, 10000.0f, fs, ef, af, 1.0f, 1.0f);
|
|
+ out = ggml_rope_multi_back(ctx, a, pos, freq, n_dims / 2, rope_sections, mode, 0, 10000.0f, fs, ef,
|
|
+ af, 1.0f, 1.0f);
|
|
}
|
|
} else {
|
|
- GGML_ASSERT(n_dims/3 > 0);
|
|
- int rope_sections[4] = {n_dims/3, n_dims/3, n_dims/3, 0};
|
|
+ GGML_ASSERT(n_dims / 3 > 0);
|
|
+ int rope_sections[4] = { n_dims / 3, n_dims / 3, n_dims / 3, 0 };
|
|
if (forward) {
|
|
- out = ggml_rope_multi (ctx, a, pos, freq, n_dims, rope_sections, mode, 0, 10000.0f, fs, ef, af, 1.0f, 1.0f);
|
|
+ out = ggml_rope_multi(ctx, a, pos, freq, n_dims, rope_sections, mode, 0, 10000.0f, fs, ef, af, 1.0f,
|
|
+ 1.0f);
|
|
} else {
|
|
- out = ggml_rope_multi_back(ctx, a, pos, freq, n_dims, rope_sections, mode, 0, 10000.0f, fs, ef, af, 1.0f, 1.0f);
|
|
+ out = ggml_rope_multi_back(ctx, a, pos, freq, n_dims, rope_sections, mode, 0, 10000.0f, fs, ef, af,
|
|
+ 1.0f, 1.0f);
|
|
}
|
|
}
|
|
} else {
|
|
if (forward) {
|
|
- out = ggml_rope_ext (ctx, a, pos, freq, n_dims, mode, 0, 10000.0f, fs, ef, af, 1.0f, 1.0f);
|
|
+ out = ggml_rope_ext(ctx, a, pos, freq, n_dims, mode, 0, 10000.0f, fs, ef, af, 1.0f, 1.0f);
|
|
} else {
|
|
out = ggml_rope_ext_back(ctx, a, pos, freq, n_dims, mode, 0, 10000.0f, fs, ef, af, 1.0f, 1.0f);
|
|
}
|
|
@@ -2628,14 +2563,14 @@ struct test_rope : public test_case {
|
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
if (t->type == GGML_TYPE_I32) {
|
|
// pos
|
|
- const int num_pos_ids = (mode & GGML_ROPE_TYPE_MROPE) ? ne_a[2] * 4 : ne_a[2];
|
|
+ const int num_pos_ids = (mode & GGML_ROPE_TYPE_MROPE) ? ne_a[2] * 4 : ne_a[2];
|
|
std::vector<int> data(num_pos_ids);
|
|
for (int i = 0; i < num_pos_ids; i++) {
|
|
data[i] = rand() % n_ctx;
|
|
}
|
|
ggml_backend_tensor_set(t, data.data(), 0, num_pos_ids * sizeof(int));
|
|
} else {
|
|
- if (t->ne[0] == n_dims/2) {
|
|
+ if (t->ne[0] == n_dims / 2) {
|
|
// frequency factors in the range [0.9f, 1.1f]
|
|
init_tensor_uniform(t, 0.9f, 1.1f);
|
|
} else {
|
|
@@ -2645,41 +2580,40 @@ struct test_rope : public test_case {
|
|
}
|
|
}
|
|
|
|
- double max_maa_err() override {
|
|
- return 1e-3;
|
|
- }
|
|
+ double max_maa_err() override { return 1e-3; }
|
|
|
|
- bool grad_precise() override {
|
|
- return true;
|
|
- }
|
|
+ bool grad_precise() override { return true; }
|
|
};
|
|
|
|
// GGML_OP_POOL2D
|
|
struct test_pool2d : public test_case {
|
|
- enum ggml_op_pool pool_type;
|
|
- const ggml_type type_input;
|
|
+ enum ggml_op_pool pool_type;
|
|
+ const ggml_type type_input;
|
|
const std::array<int64_t, 4> ne_input;
|
|
// kernel size
|
|
- const int k0;
|
|
- const int k1;
|
|
+ const int k0;
|
|
+ const int k1;
|
|
// stride
|
|
- const int s0;
|
|
- const int s1;
|
|
+ const int s0;
|
|
+ const int s1;
|
|
// padding
|
|
- const int p0;
|
|
- const int p1;
|
|
-
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR9(pool_type, type_input, ne_input, k0, k1, s0, s1, p0, p1);
|
|
- }
|
|
-
|
|
- test_pool2d(ggml_op_pool pool_type = GGML_OP_POOL_AVG,
|
|
- ggml_type type_input = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne_input = {10, 10, 3, 1}, // [input_width, input_height, input_channels, 1]
|
|
- int k0 = 3, int k1 = 3,
|
|
- int s0 = 1, int s1 = 1,
|
|
- int p0 = 1, int p1 = 1)
|
|
- : pool_type(pool_type), type_input(type_input), ne_input(ne_input), k0(k0), k1(k1), s0(s0), s1(s1), p0(p0), p1(p1) {}
|
|
+ const int p0;
|
|
+ const int p1;
|
|
+
|
|
+ std::string vars() override { return VARS_TO_STR9(pool_type, type_input, ne_input, k0, k1, s0, s1, p0, p1); }
|
|
+
|
|
+ test_pool2d(ggml_op_pool pool_type = GGML_OP_POOL_AVG, ggml_type type_input = GGML_TYPE_F32,
|
|
+ std::array<int64_t, 4> ne_input = { 10, 10, 3, 1 }, // [input_width, input_height, input_channels, 1]
|
|
+ int k0 = 3, int k1 = 3, int s0 = 1, int s1 = 1, int p0 = 1, int p1 = 1) :
|
|
+ pool_type(pool_type),
|
|
+ type_input(type_input),
|
|
+ ne_input(ne_input),
|
|
+ k0(k0),
|
|
+ k1(k1),
|
|
+ s0(s0),
|
|
+ s1(s1),
|
|
+ p0(p0),
|
|
+ p1(p1) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * input = ggml_new_tensor(ctx, type_input, 4, ne_input.data());
|
|
@@ -2698,18 +2632,21 @@ struct test_conv_transpose_1d : public test_case {
|
|
const std::array<int64_t, 4> ne_input;
|
|
const std::array<int64_t, 4> ne_kernel;
|
|
|
|
- const int s0; // stride
|
|
- const int p0; // padding
|
|
- const int d0; // dilation
|
|
+ const int s0; // stride
|
|
+ const int p0; // padding
|
|
+ const int d0; // dilation
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR5(ne_input, ne_kernel, s0, p0, d0);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR5(ne_input, ne_kernel, s0, p0, d0); }
|
|
|
|
- test_conv_transpose_1d(std::array<int64_t, 4> ne_input = {197, 32, 1, 1}, // [input_width, input_height, input_channels, 1]
|
|
- std::array<int64_t, 4> ne_kernel = {16, 32, 32, 1}, // [kernel_width, kernel_height, input_channels, 1]
|
|
- int s0 = 1, int p0 = 0, int d0 = 1)
|
|
- : ne_input(ne_input), ne_kernel(ne_kernel), s0(s0), p0(p0), d0(d0) {}
|
|
+ test_conv_transpose_1d(
|
|
+ std::array<int64_t, 4> ne_input = { 197, 32, 1, 1 }, // [input_width, input_height, input_channels, 1]
|
|
+ std::array<int64_t, 4> ne_kernel = { 16, 32, 32, 1 }, // [kernel_width, kernel_height, input_channels, 1]
|
|
+ int s0 = 1, int p0 = 0, int d0 = 1) :
|
|
+ ne_input(ne_input),
|
|
+ ne_kernel(ne_kernel),
|
|
+ s0(s0),
|
|
+ p0(p0),
|
|
+ d0(d0) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * input = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_input.data());
|
|
@@ -2727,35 +2664,44 @@ struct test_conv_transpose_1d : public test_case {
|
|
|
|
// GGML_OP_IM2COL
|
|
struct test_im2col : public test_case {
|
|
- const ggml_type type_input;
|
|
- const ggml_type type_kernel;
|
|
- const ggml_type dst_type;
|
|
+ const ggml_type type_input;
|
|
+ const ggml_type type_kernel;
|
|
+ const ggml_type dst_type;
|
|
const std::array<int64_t, 4> ne_input;
|
|
const std::array<int64_t, 4> ne_kernel;
|
|
// stride
|
|
- const int s0;
|
|
- const int s1;
|
|
+ const int s0;
|
|
+ const int s1;
|
|
// padding
|
|
- const int p0;
|
|
- const int p1;
|
|
+ const int p0;
|
|
+ const int p1;
|
|
// dilation
|
|
- const int d0;
|
|
- const int d1;
|
|
+ const int d0;
|
|
+ const int d1;
|
|
// mode
|
|
- const bool is_2D;
|
|
+ const bool is_2D;
|
|
|
|
std::string vars() override {
|
|
return VARS_TO_STR12(type_input, type_kernel, dst_type, ne_input, ne_kernel, s0, s1, p0, p1, d0, d1, is_2D);
|
|
}
|
|
|
|
- test_im2col(ggml_type type_input = GGML_TYPE_F32, ggml_type type_kernel = GGML_TYPE_F16, ggml_type dst_type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne_input = {10, 10, 3, 1}, // [input_width, input_height, input_channels, 1]
|
|
- std::array<int64_t, 4> ne_kernel = {3, 3, 3, 1}, // [kernel_width, kernel_height, input_channels, 1]
|
|
- int s0 = 1, int s1 = 1,
|
|
- int p0 = 1, int p1 = 1,
|
|
- int d0 = 1, int d1 = 1,
|
|
- bool is_2D = true)
|
|
- : type_input(type_input), type_kernel(type_kernel), dst_type(dst_type), ne_input(ne_input), ne_kernel(ne_kernel), s0(s0), s1(s1), p0(p0), p1(p1), d0(d0), d1(d1), is_2D(is_2D) {}
|
|
+ test_im2col(ggml_type type_input = GGML_TYPE_F32, ggml_type type_kernel = GGML_TYPE_F16,
|
|
+ ggml_type dst_type = GGML_TYPE_F32,
|
|
+ std::array<int64_t, 4> ne_input = { 10, 10, 3, 1 }, // [input_width, input_height, input_channels, 1]
|
|
+ std::array<int64_t, 4> ne_kernel = { 3, 3, 3, 1 }, // [kernel_width, kernel_height, input_channels, 1]
|
|
+ int s0 = 1, int s1 = 1, int p0 = 1, int p1 = 1, int d0 = 1, int d1 = 1, bool is_2D = true) :
|
|
+ type_input(type_input),
|
|
+ type_kernel(type_kernel),
|
|
+ dst_type(dst_type),
|
|
+ ne_input(ne_input),
|
|
+ ne_kernel(ne_kernel),
|
|
+ s0(s0),
|
|
+ s1(s1),
|
|
+ p0(p0),
|
|
+ p1(p1),
|
|
+ d0(d0),
|
|
+ d1(d1),
|
|
+ is_2D(is_2D) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * input = ggml_new_tensor(ctx, type_input, 4, ne_input.data());
|
|
@@ -2776,19 +2722,22 @@ struct test_im2col : public test_case {
|
|
struct test_conv_2d_dw : public test_case {
|
|
const std::array<int64_t, 4> ne_input;
|
|
const std::array<int64_t, 4> ne_kernel;
|
|
- const int stride;
|
|
- const int padding;
|
|
- const int dilation;
|
|
- const bool cwhn;
|
|
-
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR6(ne_input, ne_kernel, stride, padding, dilation, cwhn);
|
|
- }
|
|
-
|
|
- test_conv_2d_dw(std::array<int64_t, 4> ne_input = {64, 64, 16, 1},
|
|
- std::array<int64_t, 4> ne_kernel = {3, 3, 1, 16},
|
|
- int stride = 1, int padding = 0, int dilation = 1, bool cwhn = false)
|
|
- : ne_input(ne_input), ne_kernel(ne_kernel), stride(stride), padding(padding), dilation(dilation), cwhn(cwhn) {}
|
|
+ const int stride;
|
|
+ const int padding;
|
|
+ const int dilation;
|
|
+ const bool cwhn;
|
|
+
|
|
+ std::string vars() override { return VARS_TO_STR6(ne_input, ne_kernel, stride, padding, dilation, cwhn); }
|
|
+
|
|
+ test_conv_2d_dw(std::array<int64_t, 4> ne_input = { 64, 64, 16, 1 },
|
|
+ std::array<int64_t, 4> ne_kernel = { 3, 3, 1, 16 }, int stride = 1, int padding = 0,
|
|
+ int dilation = 1, bool cwhn = false) :
|
|
+ ne_input(ne_input),
|
|
+ ne_kernel(ne_kernel),
|
|
+ stride(stride),
|
|
+ padding(padding),
|
|
+ dilation(dilation),
|
|
+ cwhn(cwhn) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * input = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_input.data());
|
|
@@ -2800,15 +2749,14 @@ struct test_conv_2d_dw : public test_case {
|
|
if (cwhn) {
|
|
// change memory layout to channel-most-contiguous (CWHN),
|
|
// then permute it back so NE matches the original input
|
|
- input = ggml_cont(ctx, ggml_permute(ctx, input, 1, 2, 0, 3));
|
|
- input = ggml_permute(ctx, input, 2, 0, 1, 3);
|
|
+ input = ggml_cont(ctx, ggml_permute(ctx, input, 1, 2, 0, 3));
|
|
+ input = ggml_permute(ctx, input, 2, 0, 1, 3);
|
|
kernel = ggml_cont(ctx, ggml_permute(ctx, kernel, 2, 3, 1, 0));
|
|
kernel = ggml_permute(ctx, kernel, 3, 2, 0, 1);
|
|
}
|
|
|
|
- ggml_tensor * out = ggml_conv_2d_dw_direct(
|
|
- ctx, kernel, input,
|
|
- stride, stride, padding, padding, dilation, dilation);
|
|
+ ggml_tensor * out =
|
|
+ ggml_conv_2d_dw_direct(ctx, kernel, input, stride, stride, padding, padding, dilation, dilation);
|
|
ggml_set_name(out, "out");
|
|
return out;
|
|
}
|
|
@@ -2816,28 +2764,31 @@ struct test_conv_2d_dw : public test_case {
|
|
|
|
// GGML_OP_CONCAT
|
|
struct test_concat : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne_a;
|
|
- const int64_t ne_b_d;
|
|
- const int dim;
|
|
- const int v; // view (1 << 0: non-cont a, 1 << 1: non-cont b)
|
|
+ const int64_t ne_b_d;
|
|
+ const int dim;
|
|
+ const int v; // view (1 << 0: non-cont a, 1 << 1: non-cont b)
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR5(type, ne_a, ne_b_d, dim, v);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR5(type, ne_a, ne_b_d, dim, v); }
|
|
|
|
- test_concat(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne_a = {10, 5, 5, 5},
|
|
- int64_t ne_b_d = 5,
|
|
- int dim = 2, int v = 0)
|
|
- : type(type), ne_a(ne_a), ne_b_d(ne_b_d), dim(dim), v(v) {}
|
|
+ test_concat(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne_a = { 10, 5, 5, 5 }, int64_t ne_b_d = 5,
|
|
+ int dim = 2, int v = 0) :
|
|
+ type(type),
|
|
+ ne_a(ne_a),
|
|
+ ne_b_d(ne_b_d),
|
|
+ dim(dim),
|
|
+ v(v) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
auto ne_b = ne_a;
|
|
ne_b[dim] = ne_b_d;
|
|
ggml_tensor * a;
|
|
if (v & 1) {
|
|
- auto ne = ne_a; ne[0] *= 2; ne[1] *= 4; ne[2] *= 3;
|
|
+ auto ne = ne_a;
|
|
+ ne[0] *= 2;
|
|
+ ne[1] *= 4;
|
|
+ ne[2] *= 3;
|
|
a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
ggml_set_name(a, "a");
|
|
|
|
@@ -2849,7 +2800,10 @@ struct test_concat : public test_case {
|
|
}
|
|
ggml_tensor * b;
|
|
if (v & 2) {
|
|
- auto ne = ne_b; ne[0] *= 3; ne[1] *= 2; ne[2] *= 4;
|
|
+ auto ne = ne_b;
|
|
+ ne[0] *= 3;
|
|
+ ne[1] *= 2;
|
|
+ ne[2] *= 4;
|
|
b = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
ggml_set_name(b, "b");
|
|
|
|
@@ -2869,18 +2823,17 @@ struct test_concat : public test_case {
|
|
|
|
// GGML_OP_ARGSORT
|
|
struct test_argsort : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
- ggml_sort_order order;
|
|
+ ggml_sort_order order;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR3(type, ne, order);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR3(type, ne, order); }
|
|
|
|
- test_argsort(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {16, 10, 10, 10},
|
|
- ggml_sort_order order = GGML_SORT_ORDER_ASC)
|
|
- : type(type), ne(ne), order(order) {}
|
|
+ test_argsort(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 16, 10, 10, 10 },
|
|
+ ggml_sort_order order = GGML_SORT_ORDER_ASC) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ order(order) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -2893,7 +2846,7 @@ struct test_argsort : public test_case {
|
|
}
|
|
|
|
void initialize_tensors(ggml_context * ctx) override {
|
|
- std::random_device rd;
|
|
+ std::random_device rd;
|
|
std::default_random_engine rng(rd());
|
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
if (t->type == GGML_TYPE_I32) {
|
|
@@ -2903,7 +2856,7 @@ struct test_argsort : public test_case {
|
|
data[i] = rand();
|
|
}
|
|
std::shuffle(data.begin(), data.end(), rng);
|
|
- ggml_backend_tensor_set(t, data.data(), 0, ne[0]*ne[1]*ne[2]*ne[3] * sizeof(int));
|
|
+ ggml_backend_tensor_set(t, data.data(), 0, ne[0] * ne[1] * ne[2] * ne[3] * sizeof(int));
|
|
} else if (t->type == GGML_TYPE_F32) {
|
|
// initialize with unique values to avoid ties
|
|
for (int64_t r = 0; r < ggml_nrows(t); r++) {
|
|
@@ -2923,16 +2876,12 @@ struct test_argsort : public test_case {
|
|
|
|
// GGML_OP_SUM
|
|
struct test_sum : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
- test_sum(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
- : type(type), ne(ne) {}
|
|
+ test_sum(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 5, 4, 3 }) : type(type), ne(ne) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -2945,23 +2894,17 @@ struct test_sum : public test_case {
|
|
return out;
|
|
}
|
|
|
|
- float grad_eps() override {
|
|
- return 0.1f * sqrtf(ne[0]*ne[1]*ne[2]*ne[3]);
|
|
- }
|
|
+ float grad_eps() override { return 0.1f * sqrtf(ne[0] * ne[1] * ne[2] * ne[3]); }
|
|
};
|
|
|
|
// GGML_OP_SUM_ROWS
|
|
struct test_sum_rows : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
- test_sum_rows(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
- : type(type), ne(ne) {}
|
|
+ test_sum_rows(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 5, 4, 3 }) : type(type), ne(ne) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -2977,16 +2920,12 @@ struct test_sum_rows : public test_case {
|
|
|
|
// GGML_OP_MEAN
|
|
struct test_mean : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
- test_mean(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
- : type(type), ne(ne) {}
|
|
+ test_mean(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 5, 4, 3 }) : type(type), ne(ne) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -2999,27 +2938,26 @@ struct test_mean : public test_case {
|
|
return out;
|
|
}
|
|
|
|
- float grad_eps() override {
|
|
- return 0.1f * ne[0]*ne[1]*ne[2]*ne[3];
|
|
- }
|
|
+ float grad_eps() override { return 0.1f * ne[0] * ne[1] * ne[2] * ne[3]; }
|
|
};
|
|
|
|
// GGML_OP_UPSCALE
|
|
struct test_upscale : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
- const int32_t scale_factor;
|
|
- const bool transpose;
|
|
- const ggml_scale_mode mode;
|
|
+ const int32_t scale_factor;
|
|
+ const bool transpose;
|
|
+ const ggml_scale_mode mode;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR5(type, ne, scale_factor, mode, transpose);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR5(type, ne, scale_factor, mode, transpose); }
|
|
|
|
- test_upscale(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {512, 512, 3, 1},
|
|
- int32_t scale_factor = 2, ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST, bool transpose = false)
|
|
- : type(type), ne(ne), scale_factor(scale_factor), transpose(transpose), mode(mode) {}
|
|
+ test_upscale(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 512, 512, 3, 1 },
|
|
+ int32_t scale_factor = 2, ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST, bool transpose = false) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ scale_factor(scale_factor),
|
|
+ transpose(transpose),
|
|
+ mode(mode) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -3039,26 +2977,25 @@ struct test_upscale : public test_case {
|
|
|
|
// GGML_OP_UPSCALE (ext)
|
|
struct test_upscale_ext : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
const std::array<int64_t, 4> ne_tgt;
|
|
- const ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST;
|
|
+ const ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR4(type, ne, ne_tgt, mode);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR4(type, ne, ne_tgt, mode); }
|
|
|
|
- test_upscale_ext(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {2, 5, 7, 11},
|
|
- std::array<int64_t, 4> ne_tgt = {5, 7, 11, 13},
|
|
- ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST)
|
|
- : type(type), ne(ne), ne_tgt(ne_tgt), mode(mode) {}
|
|
+ test_upscale_ext(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 2, 5, 7, 11 },
|
|
+ std::array<int64_t, 4> ne_tgt = { 5, 7, 11, 13 }, ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ ne_tgt(ne_tgt),
|
|
+ mode(mode) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
ggml_set_name(a, "a");
|
|
|
|
- ggml_tensor * out = ggml_upscale_ext(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3], mode);
|
|
+ ggml_tensor * out = ggml_upscale_ext(ctx, a, ne_tgt[0], ne_tgt[1], ne_tgt[2], ne_tgt[3], mode);
|
|
ggml_set_name(out, "out");
|
|
|
|
return out;
|
|
@@ -3067,20 +3004,19 @@ struct test_upscale_ext : public test_case {
|
|
|
|
// GGML_OP_GROUP_NORM
|
|
struct test_group_norm : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
- const int32_t num_groups;
|
|
- const float eps;
|
|
+ const int32_t num_groups;
|
|
+ const float eps;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR4(type, ne, num_groups, eps);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR4(type, ne, num_groups, eps); }
|
|
|
|
- test_group_norm(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {64, 64, 320, 1},
|
|
- int32_t num_groups = 32,
|
|
- float eps = 1e-6f)
|
|
- : type(type), ne(ne), num_groups(num_groups), eps(eps) {}
|
|
+ test_group_norm(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 64, 64, 320, 1 },
|
|
+ int32_t num_groups = 32, float eps = 1e-6f) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ num_groups(num_groups),
|
|
+ eps(eps) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -3095,18 +3031,16 @@ struct test_group_norm : public test_case {
|
|
|
|
// GGML_OP_L2_NORM
|
|
struct test_l2_norm : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
- const float eps;
|
|
+ const float eps;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
- test_l2_norm(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {64, 64, 320, 1},
|
|
- float eps = 1e-12f)
|
|
- : type(type), ne(ne), eps(eps) {}
|
|
+ test_l2_norm(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 64, 64, 320, 1 }, float eps = 1e-12f) :
|
|
+ type(type),
|
|
+ ne(ne),
|
|
+ eps(eps) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -3121,18 +3055,17 @@ struct test_l2_norm : public test_case {
|
|
|
|
// GGML_OP_ACC
|
|
struct test_acc : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne_a;
|
|
const std::array<int64_t, 4> ne_b;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR3(type, ne_a, ne_b);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR3(type, ne_a, ne_b); }
|
|
|
|
- test_acc(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne_a = {256, 17, 1, 1},
|
|
- std::array<int64_t, 4> ne_b = {256, 16, 1, 1})
|
|
- : type(type), ne_a(ne_a), ne_b(ne_b) {}
|
|
+ test_acc(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne_a = { 256, 17, 1, 1 },
|
|
+ std::array<int64_t, 4> ne_b = { 256, 16, 1, 1 }) :
|
|
+ type(type),
|
|
+ ne_a(ne_a),
|
|
+ ne_b(ne_b) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
@@ -3152,19 +3085,19 @@ struct test_acc : public test_case {
|
|
|
|
// GGML_OP_PAD
|
|
struct test_pad : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne_a;
|
|
- const int pad_0;
|
|
- const int pad_1;
|
|
+ const int pad_0;
|
|
+ const int pad_1;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR4(type, ne_a, pad_0, pad_1);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR4(type, ne_a, pad_0, pad_1); }
|
|
|
|
- test_pad(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne_a = {512, 512, 1, 1},
|
|
- int pad_0 = 1, int pad_1 = 1)
|
|
- : type(type), ne_a(ne_a), pad_0(pad_0), pad_1(pad_1) {}
|
|
+ test_pad(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne_a = { 512, 512, 1, 1 }, int pad_0 = 1,
|
|
+ int pad_1 = 1) :
|
|
+ type(type),
|
|
+ ne_a(ne_a),
|
|
+ pad_0(pad_0),
|
|
+ pad_1(pad_1) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
@@ -3179,19 +3112,19 @@ struct test_pad : public test_case {
|
|
|
|
// GGML_OP_PAD_REFLECT_1D
|
|
struct test_pad_reflect_1d : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne_a;
|
|
- const int pad_0;
|
|
- const int pad_1;
|
|
+ const int pad_0;
|
|
+ const int pad_1;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR4(type, ne_a, pad_0, pad_1);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR4(type, ne_a, pad_0, pad_1); }
|
|
|
|
- test_pad_reflect_1d(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne_a = {512, 34, 2, 1},
|
|
- int pad_0 = 10, int pad_1 = 9)
|
|
- : type(type), ne_a(ne_a), pad_0(pad_0), pad_1(pad_1) {}
|
|
+ test_pad_reflect_1d(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne_a = { 512, 34, 2, 1 }, int pad_0 = 10,
|
|
+ int pad_1 = 9) :
|
|
+ type(type),
|
|
+ ne_a(ne_a),
|
|
+ pad_0(pad_0),
|
|
+ pad_1(pad_1) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 2, ne_a.data());
|
|
@@ -3207,17 +3140,17 @@ struct test_pad_reflect_1d : public test_case {
|
|
// GGML_OP_ARANGE
|
|
struct test_arange : public test_case {
|
|
const ggml_type type;
|
|
- const float start;
|
|
- const float stop;
|
|
- const float step;
|
|
+ const float start;
|
|
+ const float stop;
|
|
+ const float step;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR4(type, start, stop, step);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR4(type, start, stop, step); }
|
|
|
|
- test_arange(ggml_type type = GGML_TYPE_F32,
|
|
- float start = 0.f, float stop = 10.f, float step = 1.f)
|
|
- : type(type), start(start), stop(stop), step(step) {}
|
|
+ test_arange(ggml_type type = GGML_TYPE_F32, float start = 0.f, float stop = 10.f, float step = 1.f) :
|
|
+ type(type),
|
|
+ start(start),
|
|
+ stop(stop),
|
|
+ step(step) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * out = ggml_arange(ctx, start, stop, step);
|
|
@@ -3229,19 +3162,19 @@ struct test_arange : public test_case {
|
|
|
|
// GGML_OP_TIMESTEP_EMBEDDING
|
|
struct test_timestep_embedding : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne_a;
|
|
- const int dim;
|
|
- const int max_period;
|
|
+ const int dim;
|
|
+ const int max_period;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR4(type, ne_a, dim, max_period);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR4(type, ne_a, dim, max_period); }
|
|
|
|
- test_timestep_embedding(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne_a = {2, 1, 1, 1},
|
|
- int dim = 320, int max_period=10000)
|
|
- : type(type), ne_a(ne_a), dim(dim), max_period(max_period) {}
|
|
+ test_timestep_embedding(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne_a = { 2, 1, 1, 1 }, int dim = 320,
|
|
+ int max_period = 10000) :
|
|
+ type(type),
|
|
+ ne_a(ne_a),
|
|
+ dim(dim),
|
|
+ max_period(max_period) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
@@ -3256,18 +3189,17 @@ struct test_timestep_embedding : public test_case {
|
|
|
|
// GGML_OP_LEAKY_RELU
|
|
struct test_leaky_relu : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne_a;
|
|
- const float negative_slope;
|
|
+ const float negative_slope;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR3(type, ne_a, negative_slope);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR3(type, ne_a, negative_slope); }
|
|
|
|
- test_leaky_relu(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne_a = {10, 5, 4, 3},
|
|
- float negative_slope = 0.1f)
|
|
- : type(type), ne_a(ne_a), negative_slope(negative_slope) {}
|
|
+ test_leaky_relu(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne_a = { 10, 5, 4, 3 },
|
|
+ float negative_slope = 0.1f) :
|
|
+ type(type),
|
|
+ ne_a(ne_a),
|
|
+ negative_slope(negative_slope) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
|
@@ -3282,66 +3214,77 @@ struct test_leaky_relu : public test_case {
|
|
|
|
// GGML_OP_FLASH_ATTN_EXT
|
|
struct test_flash_attn_ext : public test_case {
|
|
- const int64_t hsk; // K head size
|
|
- const int64_t hsv; // V head size
|
|
- const int64_t nh; // num heads
|
|
- const int64_t nr; // repeat in Q, tests for grouped-query attention
|
|
- const int64_t kv; // kv size
|
|
- const int64_t nb; // batch size
|
|
+ const int64_t hsk; // K head size
|
|
+ const int64_t hsv; // V head size
|
|
+ const int64_t nh; // num heads
|
|
+ const int64_t nr; // repeat in Q, tests for grouped-query attention
|
|
+ const int64_t kv; // kv size
|
|
+ const int64_t nb; // batch size
|
|
|
|
- const bool mask; // use mask
|
|
+ const bool mask; // use mask
|
|
|
|
- const float max_bias; // ALiBi
|
|
- const float logit_softcap; // Gemma 2
|
|
+ const float max_bias; // ALiBi
|
|
+ const float logit_softcap; // Gemma 2
|
|
|
|
- const ggml_prec prec;
|
|
- const ggml_type type_KV;
|
|
+ const ggml_prec prec;
|
|
+ const ggml_type type_KV;
|
|
std::array<int32_t, 4> permute;
|
|
|
|
std::string vars() override {
|
|
return VARS_TO_STR12(hsk, hsv, nh, nr, kv, nb, mask, max_bias, logit_softcap, prec, type_KV, permute);
|
|
}
|
|
|
|
- double max_nmse_err() override {
|
|
- return 5e-4;
|
|
- }
|
|
+ double max_nmse_err() override { return 5e-4; }
|
|
|
|
uint64_t op_flops(ggml_tensor * t) override {
|
|
GGML_UNUSED(t);
|
|
// Just counting matmul costs:
|
|
// Q*K^T is nb x hsk x kv, P*V is nb x kv x hsv, per head
|
|
- return 2 * nh*nr * nb * (hsk + hsv) * kv;
|
|
- }
|
|
-
|
|
- test_flash_attn_ext(int64_t hsk = 128, int64_t hsv = 128, int64_t nh = 32, int64_t nr = 1, int64_t kv = 96, int64_t nb = 8,
|
|
- bool mask = true, float max_bias = 0.0f, float logit_softcap = 0.0f, ggml_prec prec = GGML_PREC_F32,
|
|
- ggml_type type_KV = GGML_TYPE_F16, std::array<int32_t, 4> permute = {0, 1, 2, 3})
|
|
- : hsk(hsk), hsv(hsv), nh(nh), nr(nr), kv(kv), nb(nb), mask(mask), max_bias(max_bias), logit_softcap(logit_softcap), prec(prec), type_KV(type_KV), permute(permute) {}
|
|
+ return 2 * nh * nr * nb * (hsk + hsv) * kv;
|
|
+ }
|
|
+
|
|
+ test_flash_attn_ext(int64_t hsk = 128, int64_t hsv = 128, int64_t nh = 32, int64_t nr = 1, int64_t kv = 96,
|
|
+ int64_t nb = 8, bool mask = true, float max_bias = 0.0f, float logit_softcap = 0.0f,
|
|
+ ggml_prec prec = GGML_PREC_F32, ggml_type type_KV = GGML_TYPE_F16,
|
|
+ std::array<int32_t, 4> permute = { 0, 1, 2, 3 }) :
|
|
+ hsk(hsk),
|
|
+ hsv(hsv),
|
|
+ nh(nh),
|
|
+ nr(nr),
|
|
+ kv(kv),
|
|
+ nb(nb),
|
|
+ mask(mask),
|
|
+ max_bias(max_bias),
|
|
+ logit_softcap(logit_softcap),
|
|
+ prec(prec),
|
|
+ type_KV(type_KV),
|
|
+ permute(permute) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
const int64_t hsk_padded = GGML_PAD(hsk, ggml_blck_size(type_KV));
|
|
const int64_t hsv_padded = GGML_PAD(hsv, ggml_blck_size(type_KV));
|
|
|
|
- auto const &create_permuted = [&](ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) -> ggml_tensor * {
|
|
- int64_t ne[4] = {ne0, ne1, ne2, ne3};
|
|
+ const auto & create_permuted = [&](ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2,
|
|
+ int64_t ne3) -> ggml_tensor * {
|
|
+ int64_t ne[4] = { ne0, ne1, ne2, ne3 };
|
|
int64_t ne_perm[4];
|
|
for (int i = 0; i < 4; ++i) {
|
|
ne_perm[permute[i]] = ne[i];
|
|
}
|
|
ggml_tensor * t = ggml_new_tensor_4d(ctx, type, ne_perm[0], ne_perm[1], ne_perm[2], ne_perm[3]);
|
|
- if (permute != std::array<int32_t, 4>{0, 1, 2, 3}) {
|
|
+ if (permute != std::array<int32_t, 4>{ 0, 1, 2, 3 }) {
|
|
t = ggml_permute(ctx, t, permute[0], permute[1], permute[2], permute[3]);
|
|
}
|
|
return t;
|
|
};
|
|
|
|
- ggml_tensor * q = create_permuted(GGML_TYPE_F32, hsk_padded, nb, nh*nr, 1);
|
|
+ ggml_tensor * q = create_permuted(GGML_TYPE_F32, hsk_padded, nb, nh * nr, 1);
|
|
ggml_set_name(q, "q");
|
|
|
|
- ggml_tensor * k = create_permuted(type_KV, hsk_padded, kv, nh, 1);
|
|
+ ggml_tensor * k = create_permuted(type_KV, hsk_padded, kv, nh, 1);
|
|
ggml_set_name(k, "k");
|
|
|
|
- ggml_tensor * v = create_permuted(type_KV, hsv_padded, kv, nh, 1);
|
|
+ ggml_tensor * v = create_permuted(type_KV, hsv_padded, kv, nh, 1);
|
|
ggml_set_name(v, "v");
|
|
|
|
ggml_tensor * m = nullptr;
|
|
@@ -3350,30 +3293,26 @@ struct test_flash_attn_ext : public test_case {
|
|
ggml_set_name(m, "m");
|
|
}
|
|
|
|
- ggml_tensor * out = ggml_flash_attn_ext(ctx, q, k, v, m, 1.0f/sqrtf(hsk), max_bias, logit_softcap);
|
|
+ ggml_tensor * out = ggml_flash_attn_ext(ctx, q, k, v, m, 1.0f / sqrtf(hsk), max_bias, logit_softcap);
|
|
ggml_flash_attn_ext_set_prec(out, prec);
|
|
ggml_set_name(out, "out");
|
|
|
|
return out;
|
|
}
|
|
|
|
- bool grad_precise() override {
|
|
- return true;
|
|
- }
|
|
+ bool grad_precise() override { return true; }
|
|
};
|
|
|
|
// GGML_OP_CROSS_ENTROPY_LOSS
|
|
struct test_cross_entropy_loss : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
- test_cross_entropy_loss(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
- : type(type), ne(ne) {}
|
|
+ test_cross_entropy_loss(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 5, 4, 3 }) :
|
|
+ type(type),
|
|
+ ne(ne) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * logits = ggml_new_tensor(ctx, type, 4, ne.data());
|
|
@@ -3401,27 +3340,21 @@ struct test_cross_entropy_loss : public test_case {
|
|
}
|
|
}
|
|
|
|
- float grad_eps() override {
|
|
- return 1.0f;
|
|
- }
|
|
+ float grad_eps() override { return 1.0f; }
|
|
|
|
- bool grad_precise() override {
|
|
- return true;
|
|
- }
|
|
+ bool grad_precise() override { return true; }
|
|
};
|
|
|
|
// GGML_OP_CROSS_ENTROPY_LOSS_BACK
|
|
struct test_cross_entropy_loss_back : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
- test_cross_entropy_loss_back(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
- : type(type), ne(ne) {}
|
|
+ test_cross_entropy_loss_back(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 5, 4, 3 }) :
|
|
+ type(type),
|
|
+ ne(ne) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * grad = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
|
|
@@ -3446,20 +3379,18 @@ struct test_cross_entropy_loss_back : public test_case {
|
|
|
|
// GGML_OP_OPT_STEP_ADAMW
|
|
struct test_opt_step_adamw : public test_case {
|
|
- const ggml_type type;
|
|
+ const ggml_type type;
|
|
const std::array<int64_t, 4> ne;
|
|
|
|
- std::string vars() override {
|
|
- return VARS_TO_STR2(type, ne);
|
|
- }
|
|
+ std::string vars() override { return VARS_TO_STR2(type, ne); }
|
|
|
|
- test_opt_step_adamw(ggml_type type = GGML_TYPE_F32,
|
|
- std::array<int64_t, 4> ne = {10, 5, 4, 3})
|
|
- : type(type), ne(ne) {}
|
|
+ test_opt_step_adamw(ggml_type type = GGML_TYPE_F32, std::array<int64_t, 4> ne = { 10, 5, 4, 3 }) :
|
|
+ type(type),
|
|
+ ne(ne) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
|
- ggml_set_param(a); // Despite tensor a having gradients the output tensor will not.
|
|
+ ggml_set_param(a); // Despite tensor a having gradients the output tensor will not.
|
|
ggml_set_name(a, "a");
|
|
|
|
ggml_tensor * grad = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2], ne[3]);
|
|
@@ -3482,13 +3413,11 @@ struct test_opt_step_adamw : public test_case {
|
|
|
|
void initialize_tensors(ggml_context * ctx) override {
|
|
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
|
- init_tensor_uniform(t, 0.0f, 1.0f); // grad_v and adamw_params need non-negative values.
|
|
+ init_tensor_uniform(t, 0.0f, 1.0f); // grad_v and adamw_params need non-negative values.
|
|
}
|
|
}
|
|
|
|
- bool grad_precise() override {
|
|
- return true;
|
|
- }
|
|
+ bool grad_precise() override { return true; }
|
|
};
|
|
|
|
enum llm_norm_type {
|
|
@@ -3497,30 +3426,30 @@ enum llm_norm_type {
|
|
};
|
|
|
|
struct llama_hparams {
|
|
- uint32_t n_vocab;
|
|
- uint32_t n_embd;
|
|
- uint32_t n_head;
|
|
- uint32_t n_head_kv;
|
|
+ uint32_t n_vocab;
|
|
+ uint32_t n_embd;
|
|
+ uint32_t n_head;
|
|
+ uint32_t n_head_kv;
|
|
static constexpr uint32_t n_layer = 1;
|
|
- uint32_t n_rot;
|
|
- uint32_t n_embd_head; // dimension of values (d_v)
|
|
- uint32_t n_ff;
|
|
+ uint32_t n_rot;
|
|
+ uint32_t n_embd_head; // dimension of values (d_v)
|
|
+ uint32_t n_ff;
|
|
|
|
float f_norm_eps;
|
|
float f_norm_rms_eps;
|
|
|
|
// cparams
|
|
- static constexpr uint32_t n_ctx = 512; // user-specified context size
|
|
+ static constexpr uint32_t n_ctx = 512; // user-specified context size
|
|
static constexpr uint32_t n_ctx_orig = n_ctx;
|
|
|
|
// batch
|
|
int32_t n_tokens;
|
|
|
|
// llm_build_context
|
|
- static constexpr int32_t n_kv = 32; // size of KV cache to consider (n_kv <= n_ctx
|
|
- static constexpr int32_t kv_head = 1; // index of where we store new KV data in the cache
|
|
+ static constexpr int32_t n_kv = 32; // size of KV cache to consider (n_kv <= n_ctx
|
|
+ static constexpr int32_t kv_head = 1; // index of where we store new KV data in the cache
|
|
|
|
- uint32_t n_embd_gqa() const { // dimension of key embeddings across all k-v heads
|
|
+ uint32_t n_embd_gqa() const { // dimension of key embeddings across all k-v heads
|
|
return n_embd_head * n_head_kv;
|
|
}
|
|
};
|
|
@@ -3529,21 +3458,19 @@ struct llama_hparams {
|
|
struct test_llm : public test_case {
|
|
llama_hparams hp;
|
|
|
|
-protected:
|
|
- test_llm(llama_hparams hp)
|
|
- : hp(std::move(hp)) {
|
|
- }
|
|
+ protected:
|
|
+ test_llm(llama_hparams hp) : hp(std::move(hp)) {}
|
|
|
|
-public:
|
|
- struct ggml_tensor * llm_build_norm(
|
|
- struct ggml_context * ctx,
|
|
- struct ggml_tensor * cur,
|
|
- struct ggml_tensor * mw,
|
|
- struct ggml_tensor * mb,
|
|
- llm_norm_type type) {
|
|
+ public:
|
|
+ struct ggml_tensor * llm_build_norm(struct ggml_context * ctx, struct ggml_tensor * cur, struct ggml_tensor * mw,
|
|
+ struct ggml_tensor * mb, llm_norm_type type) {
|
|
switch (type) {
|
|
- case LLM_NORM: cur = ggml_norm (ctx, cur, hp.f_norm_eps); break;
|
|
- case LLM_NORM_RMS: cur = ggml_rms_norm(ctx, cur, hp.f_norm_rms_eps); break;
|
|
+ case LLM_NORM:
|
|
+ cur = ggml_norm(ctx, cur, hp.f_norm_eps);
|
|
+ break;
|
|
+ case LLM_NORM_RMS:
|
|
+ cur = ggml_rms_norm(ctx, cur, hp.f_norm_rms_eps);
|
|
+ break;
|
|
}
|
|
cur = ggml_mul(ctx, cur, mw);
|
|
if (mb) {
|
|
@@ -3552,42 +3479,30 @@ public:
|
|
return cur;
|
|
}
|
|
|
|
- void llm_build_kv_store(
|
|
- struct ggml_context * ctx,
|
|
- struct ggml_tensor * k_l,
|
|
- struct ggml_tensor * v_l,
|
|
- struct ggml_tensor * k_cur,
|
|
- struct ggml_tensor * v_cur) {
|
|
+ void llm_build_kv_store(struct ggml_context * ctx, struct ggml_tensor * k_l, struct ggml_tensor * v_l,
|
|
+ struct ggml_tensor * k_cur, struct ggml_tensor * v_cur) {
|
|
// compute the transposed [n_tokens, n_embd] V matrix
|
|
struct ggml_tensor * v_cur_t = ggml_transpose(ctx, ggml_reshape_2d(ctx, v_cur, hp.n_embd_gqa(), hp.n_tokens));
|
|
|
|
- struct ggml_tensor * k_cache_view = ggml_view_1d(ctx, k_l, hp.n_tokens*hp.n_embd_gqa(),
|
|
- (ggml_row_size(k_l->type, hp.n_embd_gqa()))*hp.kv_head);
|
|
+ struct ggml_tensor * k_cache_view = ggml_view_1d(ctx, k_l, hp.n_tokens * hp.n_embd_gqa(),
|
|
+ (ggml_row_size(k_l->type, hp.n_embd_gqa())) *hp.kv_head);
|
|
|
|
- struct ggml_tensor * v_cache_view = ggml_view_2d(ctx, v_l, hp.n_tokens, hp.n_embd_gqa(),
|
|
- ( hp.n_ctx)*ggml_element_size(v_l),
|
|
- (hp.kv_head)*ggml_element_size(v_l));
|
|
+ struct ggml_tensor * v_cache_view =
|
|
+ ggml_view_2d(ctx, v_l, hp.n_tokens, hp.n_embd_gqa(), (hp.n_ctx) * ggml_element_size(v_l),
|
|
+ (hp.kv_head) * ggml_element_size(v_l));
|
|
|
|
// important: storing RoPE-ed version of K in the KV cache!
|
|
- ggml_cpy(ctx, k_cur, k_cache_view);
|
|
+ ggml_cpy(ctx, k_cur, k_cache_view);
|
|
ggml_cpy(ctx, v_cur_t, v_cache_view);
|
|
}
|
|
|
|
- struct ggml_tensor * llm_build_kqv(
|
|
- struct ggml_context * ctx,
|
|
- struct ggml_tensor * k_l,
|
|
- struct ggml_tensor * v_l,
|
|
- struct ggml_tensor * q_cur,
|
|
- struct ggml_tensor * kq_mask,
|
|
- float kq_scale) {
|
|
+ struct ggml_tensor * llm_build_kqv(struct ggml_context * ctx, struct ggml_tensor * k_l, struct ggml_tensor * v_l,
|
|
+ struct ggml_tensor * q_cur, struct ggml_tensor * kq_mask, float kq_scale) {
|
|
struct ggml_tensor * q = ggml_permute(ctx, q_cur, 0, 2, 1, 3);
|
|
|
|
struct ggml_tensor * k =
|
|
- ggml_view_3d(ctx, k_l,
|
|
- hp.n_embd_head, hp.n_kv, hp.n_head_kv,
|
|
- ggml_row_size(k_l->type, hp.n_embd_gqa()),
|
|
- ggml_row_size(k_l->type, hp.n_embd_head),
|
|
- 0);
|
|
+ ggml_view_3d(ctx, k_l, hp.n_embd_head, hp.n_kv, hp.n_head_kv, ggml_row_size(k_l->type, hp.n_embd_gqa()),
|
|
+ ggml_row_size(k_l->type, hp.n_embd_head), 0);
|
|
|
|
struct ggml_tensor * kq = ggml_mul_mat(ctx, k, q);
|
|
|
|
@@ -3595,20 +3510,17 @@ public:
|
|
|
|
// split cached v into n_head heads
|
|
struct ggml_tensor * v =
|
|
- ggml_view_3d(ctx, v_l,
|
|
- hp.n_kv, hp.n_embd_head, hp.n_head_kv,
|
|
- ggml_element_size(v_l)*hp.n_ctx,
|
|
- ggml_element_size(v_l)*hp.n_ctx*hp.n_embd_head,
|
|
- 0);
|
|
+ ggml_view_3d(ctx, v_l, hp.n_kv, hp.n_embd_head, hp.n_head_kv, ggml_element_size(v_l) * hp.n_ctx,
|
|
+ ggml_element_size(v_l) * hp.n_ctx * hp.n_embd_head, 0);
|
|
|
|
struct ggml_tensor * kqv = ggml_mul_mat(ctx, v, kq);
|
|
|
|
struct ggml_tensor * kqv_merged = ggml_permute(ctx, kqv, 0, 2, 1, 3);
|
|
|
|
- struct ggml_tensor * cur = ggml_cont_2d(ctx, kqv_merged, hp.n_embd_head*hp.n_head, hp.n_tokens);
|
|
+ struct ggml_tensor * cur = ggml_cont_2d(ctx, kqv_merged, hp.n_embd_head * hp.n_head, hp.n_tokens);
|
|
|
|
struct ggml_tensor * wo = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, hp.n_embd, hp.n_embd);
|
|
- cur = ggml_mul_mat(ctx, wo, cur);
|
|
+ cur = ggml_mul_mat(ctx, wo, cur);
|
|
|
|
return cur;
|
|
}
|
|
@@ -3631,12 +3543,12 @@ public:
|
|
|
|
// Llama
|
|
struct test_llama : public test_llm {
|
|
- static constexpr float freq_base = 10000.0f;
|
|
- static constexpr float freq_scale = 1.0f;
|
|
- static constexpr float ext_factor = 0.0f;
|
|
+ static constexpr float freq_base = 10000.0f;
|
|
+ static constexpr float freq_scale = 1.0f;
|
|
+ static constexpr float ext_factor = 0.0f;
|
|
static constexpr float attn_factor = 1.0f;
|
|
- static constexpr float beta_fast = 32.0f;
|
|
- static constexpr float beta_slow = 1.0f;
|
|
+ static constexpr float beta_fast = 32.0f;
|
|
+ static constexpr float beta_slow = 1.0f;
|
|
|
|
std::string op_desc(ggml_tensor * t) override {
|
|
GGML_UNUSED(t);
|
|
@@ -3648,24 +3560,21 @@ struct test_llama : public test_llm {
|
|
return VARS_TO_STR1(n_tokens);
|
|
}
|
|
|
|
- double max_nmse_err() override {
|
|
- return 2e-3;
|
|
- }
|
|
+ double max_nmse_err() override { return 2e-3; }
|
|
|
|
- test_llama(int n_tokens = 1)
|
|
- : test_llm({
|
|
- /*n_vocab =*/ 32000,
|
|
- /*n_embd =*/ 3200,
|
|
- /*n_head =*/ 32,
|
|
- /*n_head_kv =*/ 32,
|
|
- /*n_rot =*/ 100,
|
|
- /*n_embd_head =*/ 100,
|
|
- /*n_ff =*/ 8640,
|
|
- /*f_norm_eps =*/ 0.f,
|
|
- /*f_norm_rms_eps =*/ 1e-5f,
|
|
- /*n_tokens =*/ n_tokens,
|
|
- }) {
|
|
- }
|
|
+ test_llama(int n_tokens = 1) :
|
|
+ test_llm({
|
|
+ /*n_vocab =*/32000,
|
|
+ /*n_embd =*/3200,
|
|
+ /*n_head =*/32,
|
|
+ /*n_head_kv =*/32,
|
|
+ /*n_rot =*/100,
|
|
+ /*n_embd_head =*/100,
|
|
+ /*n_ff =*/8640,
|
|
+ /*f_norm_eps =*/0.f,
|
|
+ /*f_norm_rms_eps =*/1e-5f,
|
|
+ /*n_tokens =*/n_tokens,
|
|
+ }) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
struct ggml_tensor * cur;
|
|
@@ -3687,7 +3596,7 @@ struct test_llama : public test_llm {
|
|
|
|
// norm
|
|
ggml_tensor * attn_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hp.n_embd);
|
|
- cur = llm_build_norm(ctx, inpL, attn_norm, nullptr, LLM_NORM_RMS);
|
|
+ cur = llm_build_norm(ctx, inpL, attn_norm, nullptr, LLM_NORM_RMS);
|
|
|
|
// self-attention
|
|
{
|
|
@@ -3700,37 +3609,33 @@ struct test_llama : public test_llm {
|
|
struct ggml_tensor * Kcur = ggml_mul_mat(ctx, wk, cur);
|
|
struct ggml_tensor * Vcur = ggml_mul_mat(ctx, wv, cur);
|
|
|
|
- Qcur = ggml_rope_ext(
|
|
- ctx, ggml_reshape_3d(ctx, Qcur, hp.n_embd_head, hp.n_head, hp.n_tokens), inp_pos, nullptr,
|
|
- hp.n_rot, 0, hp.n_ctx_orig, freq_base, freq_scale,
|
|
- ext_factor, attn_factor, beta_fast, beta_slow
|
|
- );
|
|
+ Qcur = ggml_rope_ext(ctx, ggml_reshape_3d(ctx, Qcur, hp.n_embd_head, hp.n_head, hp.n_tokens), inp_pos,
|
|
+ nullptr, hp.n_rot, 0, hp.n_ctx_orig, freq_base, freq_scale, ext_factor,
|
|
+ attn_factor, beta_fast, beta_slow);
|
|
|
|
- Kcur = ggml_rope_ext(
|
|
- ctx, ggml_reshape_3d(ctx, Kcur, hp.n_embd_head, hp.n_head_kv, hp.n_tokens), inp_pos, nullptr,
|
|
- hp.n_rot, 0, hp.n_ctx_orig, freq_base, freq_scale,
|
|
- ext_factor, attn_factor, beta_fast, beta_slow
|
|
- );
|
|
+ Kcur = ggml_rope_ext(ctx, ggml_reshape_3d(ctx, Kcur, hp.n_embd_head, hp.n_head_kv, hp.n_tokens),
|
|
+ inp_pos, nullptr, hp.n_rot, 0, hp.n_ctx_orig, freq_base, freq_scale, ext_factor,
|
|
+ attn_factor, beta_fast, beta_slow);
|
|
|
|
llm_build_kv_store(ctx, k_l, v_l, Kcur, Vcur);
|
|
|
|
- cur = llm_build_kqv(ctx, k_l, v_l, Qcur, KQ_mask, 1.0f/sqrtf(float(hp.n_embd_head)));
|
|
+ cur = llm_build_kqv(ctx, k_l, v_l, Qcur, KQ_mask, 1.0f / sqrtf(float(hp.n_embd_head)));
|
|
}
|
|
|
|
struct ggml_tensor * ffn_inp = ggml_add(ctx, cur, inpSA);
|
|
|
|
// feed-forward network
|
|
ggml_tensor * ffn_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hp.n_embd);
|
|
- cur = llm_build_norm(ctx, ffn_inp, ffn_norm, nullptr, LLM_NORM_RMS);
|
|
+ cur = llm_build_norm(ctx, ffn_inp, ffn_norm, nullptr, LLM_NORM_RMS);
|
|
|
|
- ggml_tensor * ffn_gate = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, hp.n_embd, hp.n_ff);
|
|
- ggml_tensor * ffn_down = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, hp.n_ff, hp.n_embd);
|
|
- ggml_tensor * ffn_up = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, hp.n_embd, hp.n_ff);
|
|
- struct ggml_tensor * tmp = ggml_mul_mat(ctx, ffn_up, cur);
|
|
- cur = ggml_mul_mat(ctx, ffn_gate, cur);
|
|
- cur = ggml_silu(ctx, cur);
|
|
- cur = ggml_mul(ctx, cur, tmp);
|
|
- cur = ggml_mul_mat(ctx, ffn_down, cur);
|
|
+ ggml_tensor * ffn_gate = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, hp.n_embd, hp.n_ff);
|
|
+ ggml_tensor * ffn_down = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, hp.n_ff, hp.n_embd);
|
|
+ ggml_tensor * ffn_up = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, hp.n_embd, hp.n_ff);
|
|
+ struct ggml_tensor * tmp = ggml_mul_mat(ctx, ffn_up, cur);
|
|
+ cur = ggml_mul_mat(ctx, ffn_gate, cur);
|
|
+ cur = ggml_silu(ctx, cur);
|
|
+ cur = ggml_mul(ctx, cur, tmp);
|
|
+ cur = ggml_mul_mat(ctx, ffn_down, cur);
|
|
|
|
cur = ggml_add(ctx, cur, ffn_inp);
|
|
|
|
@@ -3741,11 +3646,11 @@ struct test_llama : public test_llm {
|
|
cur = inpL;
|
|
|
|
ggml_tensor * output_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hp.n_embd);
|
|
- cur = llm_build_norm(ctx, cur, output_norm, nullptr, LLM_NORM_RMS);
|
|
+ cur = llm_build_norm(ctx, cur, output_norm, nullptr, LLM_NORM_RMS);
|
|
|
|
// lm_head
|
|
ggml_tensor * output = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, hp.n_embd, hp.n_vocab);
|
|
- cur = ggml_mul_mat(ctx, output, cur);
|
|
+ cur = ggml_mul_mat(ctx, output, cur);
|
|
|
|
return cur;
|
|
}
|
|
@@ -3753,12 +3658,12 @@ struct test_llama : public test_llm {
|
|
|
|
// Falcon
|
|
struct test_falcon : public test_llm {
|
|
- static constexpr float freq_base = 10000.0f;
|
|
- static constexpr float freq_scale = 1.0f;
|
|
- static constexpr float ext_factor = 0.0f;
|
|
+ static constexpr float freq_base = 10000.0f;
|
|
+ static constexpr float freq_scale = 1.0f;
|
|
+ static constexpr float ext_factor = 0.0f;
|
|
static constexpr float attn_factor = 1.0f;
|
|
- static constexpr float beta_fast = 32.0f;
|
|
- static constexpr float beta_slow = 1.0f;
|
|
+ static constexpr float beta_fast = 32.0f;
|
|
+ static constexpr float beta_slow = 1.0f;
|
|
|
|
std::string op_desc(ggml_tensor * t) override {
|
|
GGML_UNUSED(t);
|
|
@@ -3770,24 +3675,21 @@ struct test_falcon : public test_llm {
|
|
return VARS_TO_STR1(n_tokens);
|
|
}
|
|
|
|
- double max_nmse_err() override {
|
|
- return 2e-3;
|
|
- }
|
|
+ double max_nmse_err() override { return 2e-3; }
|
|
|
|
- test_falcon(int n_tokens = 1)
|
|
- : test_llm({
|
|
- /*n_vocab =*/ 32000,
|
|
- /*n_embd =*/ 3200,
|
|
- /*n_head =*/ 50,
|
|
- /*n_head_kv =*/ 1,
|
|
- /*n_rot =*/ 64,
|
|
- /*n_embd_head =*/ 64,
|
|
- /*n_ff =*/ 8640,
|
|
- /*f_norm_eps =*/ 1e-5f,
|
|
- /*f_norm_rms_eps =*/ 0.f,
|
|
- /*n_tokens =*/ n_tokens,
|
|
- }) {
|
|
- }
|
|
+ test_falcon(int n_tokens = 1) :
|
|
+ test_llm({
|
|
+ /*n_vocab =*/32000,
|
|
+ /*n_embd =*/3200,
|
|
+ /*n_head =*/50,
|
|
+ /*n_head_kv =*/1,
|
|
+ /*n_rot =*/64,
|
|
+ /*n_embd_head =*/64,
|
|
+ /*n_ff =*/8640,
|
|
+ /*f_norm_eps =*/1e-5f,
|
|
+ /*f_norm_rms_eps =*/0.f,
|
|
+ /*n_tokens =*/n_tokens,
|
|
+ }) {}
|
|
|
|
ggml_tensor * build_graph(ggml_context * ctx) override {
|
|
struct ggml_tensor * cur;
|
|
@@ -3808,37 +3710,38 @@ struct test_falcon : public test_llm {
|
|
// norm
|
|
ggml_tensor * attn_norm_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hp.n_embd);
|
|
ggml_tensor * attn_norm_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hp.n_embd);
|
|
- ggml_tensor * attn_norm = llm_build_norm(ctx, inpL, attn_norm_w, attn_norm_b, LLM_NORM);
|
|
+ ggml_tensor * attn_norm = llm_build_norm(ctx, inpL, attn_norm_w, attn_norm_b, LLM_NORM);
|
|
|
|
// self-attention
|
|
{
|
|
cur = attn_norm;
|
|
|
|
- ggml_tensor * wqkv = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, hp.n_embd, hp.n_embd + 2*hp.n_embd_gqa());
|
|
+ ggml_tensor * wqkv =
|
|
+ ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, hp.n_embd, hp.n_embd + 2 * hp.n_embd_gqa());
|
|
|
|
cur = ggml_mul_mat(ctx, wqkv, cur);
|
|
|
|
- struct ggml_tensor * Qcur = ggml_cont(ctx, ggml_view_2d(ctx, cur, hp.n_embd, hp.n_tokens, cur->nb[1], 0*sizeof(float)*(hp.n_embd)));
|
|
- struct ggml_tensor * Kcur = ggml_cont(ctx, ggml_view_2d(ctx, cur, hp.n_embd_gqa(), hp.n_tokens, cur->nb[1], 1*sizeof(float)*(hp.n_embd)));
|
|
- struct ggml_tensor * Vcur = ggml_cont(ctx, ggml_view_2d(ctx, cur, hp.n_embd_gqa(), hp.n_tokens, cur->nb[1], 1*sizeof(float)*(hp.n_embd + hp.n_embd_gqa())));
|
|
+ struct ggml_tensor * Qcur = ggml_cont(
|
|
+ ctx, ggml_view_2d(ctx, cur, hp.n_embd, hp.n_tokens, cur->nb[1], 0 * sizeof(float) * (hp.n_embd)));
|
|
+ struct ggml_tensor * Kcur = ggml_cont(ctx, ggml_view_2d(ctx, cur, hp.n_embd_gqa(), hp.n_tokens,
|
|
+ cur->nb[1], 1 * sizeof(float) * (hp.n_embd)));
|
|
+ struct ggml_tensor * Vcur =
|
|
+ ggml_cont(ctx, ggml_view_2d(ctx, cur, hp.n_embd_gqa(), hp.n_tokens, cur->nb[1],
|
|
+ 1 * sizeof(float) * (hp.n_embd + hp.n_embd_gqa())));
|
|
|
|
- Qcur = ggml_reshape_3d(ctx, Qcur, hp.n_embd_head, hp.n_head, hp.n_tokens);
|
|
+ Qcur = ggml_reshape_3d(ctx, Qcur, hp.n_embd_head, hp.n_head, hp.n_tokens);
|
|
Kcur = ggml_reshape_3d(ctx, Kcur, hp.n_embd_head, hp.n_head_kv, hp.n_tokens);
|
|
|
|
// using mode = 2 for neox mode
|
|
- Qcur = ggml_rope_ext(
|
|
- ctx, Qcur, inp_pos, nullptr, hp.n_rot, 2, hp.n_ctx_orig,
|
|
- freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
|
- );
|
|
+ Qcur = ggml_rope_ext(ctx, Qcur, inp_pos, nullptr, hp.n_rot, 2, hp.n_ctx_orig, freq_base, freq_scale,
|
|
+ ext_factor, attn_factor, beta_fast, beta_slow);
|
|
|
|
- Kcur = ggml_rope_ext(
|
|
- ctx, Kcur, inp_pos, nullptr, hp.n_rot, 2, hp.n_ctx_orig,
|
|
- freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
|
- );
|
|
+ Kcur = ggml_rope_ext(ctx, Kcur, inp_pos, nullptr, hp.n_rot, 2, hp.n_ctx_orig, freq_base, freq_scale,
|
|
+ ext_factor, attn_factor, beta_fast, beta_slow);
|
|
|
|
llm_build_kv_store(ctx, k_l, v_l, Kcur, Vcur);
|
|
|
|
- cur = llm_build_kqv(ctx, k_l, v_l, Qcur, KQ_mask, 1.0f/sqrtf(float(hp.n_embd_head)));
|
|
+ cur = llm_build_kqv(ctx, k_l, v_l, Qcur, KQ_mask, 1.0f / sqrtf(float(hp.n_embd_head)));
|
|
}
|
|
|
|
struct ggml_tensor * ffn_inp = cur;
|
|
@@ -3847,10 +3750,10 @@ struct test_falcon : public test_llm {
|
|
{
|
|
ggml_tensor * ffn_up = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, hp.n_embd, hp.n_ff);
|
|
ggml_tensor * ffn_down = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, hp.n_ff, hp.n_embd);
|
|
- cur = attn_norm;
|
|
- cur = ggml_mul_mat(ctx, ffn_up, cur);
|
|
- cur = ggml_gelu(ctx, cur);
|
|
- cur = ggml_mul_mat(ctx, ffn_down, cur);
|
|
+ cur = attn_norm;
|
|
+ cur = ggml_mul_mat(ctx, ffn_up, cur);
|
|
+ cur = ggml_gelu(ctx, cur);
|
|
+ cur = ggml_mul_mat(ctx, ffn_down, cur);
|
|
}
|
|
|
|
cur = ggml_add(ctx, cur, ffn_inp);
|
|
@@ -3865,65 +3768,80 @@ struct test_falcon : public test_llm {
|
|
|
|
ggml_tensor * output_norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hp.n_embd);
|
|
ggml_tensor * output_norm_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hp.n_embd);
|
|
- cur = llm_build_norm(ctx, cur, output_norm, output_norm_b, LLM_NORM);
|
|
+ cur = llm_build_norm(ctx, cur, output_norm, output_norm_b, LLM_NORM);
|
|
|
|
// lm_head
|
|
ggml_tensor * output = ggml_new_tensor_2d(ctx, GGML_TYPE_Q8_0, hp.n_embd, hp.n_vocab);
|
|
- cur = ggml_mul_mat(ctx, output, cur);
|
|
+ cur = ggml_mul_mat(ctx, output, cur);
|
|
|
|
return cur;
|
|
}
|
|
};
|
|
|
|
-
|
|
// ###########################################
|
|
// ## Section 3: GGML Op Test Instantiation ##
|
|
// ###########################################
|
|
static const ggml_type all_types[] = {
|
|
- GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_BF16,
|
|
- GGML_TYPE_Q4_0, GGML_TYPE_Q4_1,
|
|
- GGML_TYPE_Q5_0, GGML_TYPE_Q5_1,
|
|
+ GGML_TYPE_F32,
|
|
+ GGML_TYPE_F16,
|
|
+ GGML_TYPE_BF16,
|
|
+ GGML_TYPE_Q4_0,
|
|
+ GGML_TYPE_Q4_1,
|
|
+ GGML_TYPE_Q5_0,
|
|
+ GGML_TYPE_Q5_1,
|
|
GGML_TYPE_Q8_0,
|
|
- GGML_TYPE_Q2_K, GGML_TYPE_Q3_K,
|
|
- GGML_TYPE_Q4_K, GGML_TYPE_Q5_K,
|
|
+ GGML_TYPE_Q2_K,
|
|
+ GGML_TYPE_Q3_K,
|
|
+ GGML_TYPE_Q4_K,
|
|
+ GGML_TYPE_Q5_K,
|
|
GGML_TYPE_Q6_K,
|
|
// GGML_TYPE_TQ1_0, GGML_TYPE_TQ2_0, // TODO: implement for all backends
|
|
- GGML_TYPE_IQ2_XXS, GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S,
|
|
- GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M,
|
|
- GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS,
|
|
+ GGML_TYPE_IQ2_XXS,
|
|
+ GGML_TYPE_IQ2_XS,
|
|
+ GGML_TYPE_IQ2_S,
|
|
+ GGML_TYPE_IQ3_XXS,
|
|
+ GGML_TYPE_IQ1_S,
|
|
+ GGML_TYPE_IQ1_M,
|
|
+ GGML_TYPE_IQ4_NL,
|
|
+ GGML_TYPE_IQ3_S,
|
|
+ GGML_TYPE_IQ4_XS,
|
|
};
|
|
|
|
-static const ggml_type base_types[] = {
|
|
- GGML_TYPE_F32, GGML_TYPE_F16,
|
|
- GGML_TYPE_Q8_0, // for I8MM tests
|
|
- GGML_TYPE_Q4_0,
|
|
- GGML_TYPE_Q4_1, // for I8MM tests
|
|
- GGML_TYPE_Q4_K,
|
|
- GGML_TYPE_IQ2_XXS
|
|
-};
|
|
+static const ggml_type base_types[] = { GGML_TYPE_F32, GGML_TYPE_F16,
|
|
+ GGML_TYPE_Q8_0, // for I8MM tests
|
|
+ GGML_TYPE_Q4_0,
|
|
+ GGML_TYPE_Q4_1, // for I8MM tests
|
|
+ GGML_TYPE_Q4_K, GGML_TYPE_IQ2_XXS };
|
|
|
|
static const ggml_type other_types[] = {
|
|
GGML_TYPE_Q4_1,
|
|
- GGML_TYPE_Q5_0, GGML_TYPE_Q5_1,
|
|
+ GGML_TYPE_Q5_0,
|
|
+ GGML_TYPE_Q5_1,
|
|
GGML_TYPE_Q8_0,
|
|
- GGML_TYPE_Q2_K, GGML_TYPE_Q3_K,
|
|
+ GGML_TYPE_Q2_K,
|
|
+ GGML_TYPE_Q3_K,
|
|
GGML_TYPE_Q5_K,
|
|
GGML_TYPE_Q6_K,
|
|
// GGML_TYPE_TQ1_0, GGML_TYPE_TQ2_0, // TODO: implement for all backends
|
|
- GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S,
|
|
- GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M,
|
|
- GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS,
|
|
+ GGML_TYPE_IQ2_XS,
|
|
+ GGML_TYPE_IQ2_S,
|
|
+ GGML_TYPE_IQ3_XXS,
|
|
+ GGML_TYPE_IQ1_S,
|
|
+ GGML_TYPE_IQ1_M,
|
|
+ GGML_TYPE_IQ4_NL,
|
|
+ GGML_TYPE_IQ3_S,
|
|
+ GGML_TYPE_IQ4_XS,
|
|
GGML_TYPE_BF16,
|
|
};
|
|
|
|
// Test cases for evaluation: should try to cover edge cases while using small input sizes to keep the runtime low
|
|
static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
std::vector<std::unique_ptr<test_case>> test_cases;
|
|
- std::default_random_engine rng(0);
|
|
+ std::default_random_engine rng(0);
|
|
|
|
// unary ops
|
|
- for (ggml_type type : {GGML_TYPE_F16, GGML_TYPE_F32}) {
|
|
- for (int v : {0, 1}) {
|
|
+ for (ggml_type type : { GGML_TYPE_F16, GGML_TYPE_F32 }) {
|
|
+ for (int v : { 0, 1 }) {
|
|
for (int op = 0; op < GGML_UNARY_OP_COUNT; op++) {
|
|
test_cases.emplace_back(new test_unary((ggml_unary_op) op, type, { 128, 2, 2, 2 }, v));
|
|
test_cases.emplace_back(new test_unary((ggml_unary_op) op, type, { 5, 7, 11, 13 }, v));
|
|
@@ -3933,37 +3851,38 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
|
test_cases.emplace_back(new test_get_rows(GGML_TYPE_F32, 1, 8, 2, 1, false));
|
|
for (ggml_type type : all_types) {
|
|
- for (int b : {1, 7}) {
|
|
- for (bool v : {false, true}) {
|
|
+ for (int b : { 1, 7 }) {
|
|
+ for (bool v : { false, true }) {
|
|
test_cases.emplace_back(new test_get_rows(type, 256, 5, 4, b, v));
|
|
}
|
|
}
|
|
}
|
|
- for (int b : {1, 7}) {
|
|
- for (bool v : {false, true}) {
|
|
+ for (int b : { 1, 7 }) {
|
|
+ for (bool v : { false, true }) {
|
|
test_cases.emplace_back(new test_get_rows(GGML_TYPE_I32, 256, 5, 4, b, v));
|
|
}
|
|
}
|
|
|
|
test_cases.emplace_back(new test_get_rows_back(GGML_TYPE_F32, 1, 8, 2, 1, false));
|
|
for (ggml_type type : all_types) {
|
|
- for (bool v : {false, true}) {
|
|
+ for (bool v : { false, true }) {
|
|
test_cases.emplace_back(new test_get_rows_back(type, 256, 5, 4, 1, v));
|
|
}
|
|
}
|
|
- for (bool v : {false, true}) {
|
|
+ for (bool v : { false, true }) {
|
|
test_cases.emplace_back(new test_get_rows_back(GGML_TYPE_I32, 256, 5, 4, 1, v));
|
|
}
|
|
|
|
- for (ggml_type type_input : {GGML_TYPE_F32}) {
|
|
- for (ggml_op_pool pool_type : {GGML_OP_POOL_AVG, GGML_OP_POOL_MAX}) {
|
|
- for (int k0 : {1, 3}) {
|
|
- for (int k1 : {1, 3}) {
|
|
- for (int s0 : {1, 2}) {
|
|
- for (int s1 : {1, 2}) {
|
|
- for (int p0 : {0, 1}) {
|
|
- for (int p1 : {0, 1}) {
|
|
- test_cases.emplace_back(new test_pool2d(pool_type, type_input, {10, 10, 3, 1}, k0, k1, s0, s1, p0, p1));
|
|
+ for (ggml_type type_input : { GGML_TYPE_F32 }) {
|
|
+ for (ggml_op_pool pool_type : { GGML_OP_POOL_AVG, GGML_OP_POOL_MAX }) {
|
|
+ for (int k0 : { 1, 3 }) {
|
|
+ for (int k1 : { 1, 3 }) {
|
|
+ for (int s0 : { 1, 2 }) {
|
|
+ for (int s1 : { 1, 2 }) {
|
|
+ for (int p0 : { 0, 1 }) {
|
|
+ for (int p1 : { 0, 1 }) {
|
|
+ test_cases.emplace_back(new test_pool2d(pool_type, type_input, { 10, 10, 3, 1 }, k0,
|
|
+ k1, s0, s1, p0, p1));
|
|
}
|
|
}
|
|
}
|
|
@@ -3974,15 +3893,17 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
}
|
|
|
|
// im2col 1D
|
|
- test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false));
|
|
- test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false));
|
|
- test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false));
|
|
- for (int s0 : {1, 3}) {
|
|
- for (int p0 : {0, 3}) {
|
|
- for (int d0 : {1, 3}) {
|
|
- test_cases.emplace_back(new test_im2col(
|
|
- GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32, {20, 2, 2, 1}, {3, 2, 2, 1},
|
|
- s0, 0, p0, 0, d0, 0, false));
|
|
+ test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32, { 3000, 128, 1, 1 },
|
|
+ { 3, 128, 1280, 1 }, 1, 0, 1, 0, 1, 0, false));
|
|
+ test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, { 3000, 128, 1, 1 },
|
|
+ { 3, 128, 1280, 1 }, 1, 0, 1, 0, 1, 0, false));
|
|
+ test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, { 3000, 128, 1, 1 },
|
|
+ { 3, 128, 1280, 1 }, 1, 0, 1, 0, 1, 0, false));
|
|
+ for (int s0 : { 1, 3 }) {
|
|
+ for (int p0 : { 0, 3 }) {
|
|
+ for (int d0 : { 1, 3 }) {
|
|
+ test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32, { 20, 2, 2, 1 },
|
|
+ { 3, 2, 2, 1 }, s0, 0, p0, 0, d0, 0, false));
|
|
}
|
|
}
|
|
}
|
|
@@ -3991,15 +3912,15 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32));
|
|
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32));
|
|
test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16));
|
|
- for (int s0 : {1, 3}) {
|
|
- for (int s1 : {1, 3}) {
|
|
- for (int p0 : {0, 3}) {
|
|
- for (int p1 : {0, 3}) {
|
|
- for (int d0 : {1, 3}) {
|
|
- for (int d1 : {1, 3}) {
|
|
- test_cases.emplace_back(new test_im2col(
|
|
- GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32, {20, 20, 2, 2}, {3, 3, 2, 2},
|
|
- s0, s1, p0, p1, d0, d1, true));
|
|
+ for (int s0 : { 1, 3 }) {
|
|
+ for (int s1 : { 1, 3 }) {
|
|
+ for (int p0 : { 0, 3 }) {
|
|
+ for (int p1 : { 0, 3 }) {
|
|
+ for (int d0 : { 1, 3 }) {
|
|
+ for (int d1 : { 1, 3 }) {
|
|
+ test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32,
|
|
+ { 20, 20, 2, 2 }, { 3, 3, 2, 2 }, s0, s1, p0, p1,
|
|
+ d0, d1, true));
|
|
}
|
|
}
|
|
}
|
|
@@ -4008,14 +3929,22 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
}
|
|
|
|
// extra tests for im2col 2D
|
|
- test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 1, 32}, {3, 3, 1, 32}, 1, 1, 1, 1, 1, 1, true));
|
|
- test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 2, 32}, {3, 3, 2, 32}, 1, 1, 1, 1, 1, 1, true));
|
|
- test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 1, 1024}, {3, 3, 1, 1024}, 1, 1, 1, 1, 1, 1, true));
|
|
- test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 2, 1024}, {3, 3, 2, 1024}, 1, 1, 1, 1, 1, 1, true));
|
|
- test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 1, 2048}, {3, 3, 1, 2048}, 1, 1, 1, 1, 1, 1, true));
|
|
- test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 2, 2048}, {3, 3, 2, 2048}, 1, 1, 1, 1, 1, 1, true));
|
|
- test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 1, 2560}, {3, 3, 1, 2560}, 1, 1, 1, 1, 1, 1, true));
|
|
- test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 2, 2560}, {3, 3, 2, 2560}, 1, 1, 1, 1, 1, 1, true));
|
|
+ test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, { 12, 12, 1, 32 },
|
|
+ { 3, 3, 1, 32 }, 1, 1, 1, 1, 1, 1, true));
|
|
+ test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, { 12, 12, 2, 32 },
|
|
+ { 3, 3, 2, 32 }, 1, 1, 1, 1, 1, 1, true));
|
|
+ test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, { 12, 12, 1, 1024 },
|
|
+ { 3, 3, 1, 1024 }, 1, 1, 1, 1, 1, 1, true));
|
|
+ test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, { 12, 12, 2, 1024 },
|
|
+ { 3, 3, 2, 1024 }, 1, 1, 1, 1, 1, 1, true));
|
|
+ test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, { 12, 12, 1, 2048 },
|
|
+ { 3, 3, 1, 2048 }, 1, 1, 1, 1, 1, 1, true));
|
|
+ test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, { 12, 12, 2, 2048 },
|
|
+ { 3, 3, 2, 2048 }, 1, 1, 1, 1, 1, 1, true));
|
|
+ test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, { 12, 12, 1, 2560 },
|
|
+ { 3, 3, 1, 2560 }, 1, 1, 1, 1, 1, 1, true));
|
|
+ test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, { 12, 12, 2, 2560 },
|
|
+ { 3, 3, 2, 2560 }, 1, 1, 1, 1, 1, 1, true));
|
|
|
|
// sycl backend will limit task global_range < MAX_INT
|
|
// test cases for 2D im2col with large input W and H (occurs in stable-diffusion)
|
|
@@ -4024,65 +3953,65 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
// test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
|
|
// test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {1024, 1024, 256, 1}, {3, 3, 256, 1}, 1, 1, 1, 1, 1, 1, true));
|
|
|
|
- test_cases.emplace_back(new test_conv_2d_dw({17, 34, 9, 1}, {3, 3, 1, 9}, 1, 0, 1, false));
|
|
- test_cases.emplace_back(new test_conv_2d_dw({17, 34, 9, 1}, {3, 3, 1, 9}, 1, 0, 1, true));
|
|
- test_cases.emplace_back(new test_conv_2d_dw({32, 8, 64, 1}, {3, 3, 1, 64}, 2, 1, 1, false));
|
|
- test_cases.emplace_back(new test_conv_2d_dw({32, 8, 64, 1}, {3, 3, 1, 64}, 2, 1, 1, true));
|
|
+ test_cases.emplace_back(new test_conv_2d_dw({ 17, 34, 9, 1 }, { 3, 3, 1, 9 }, 1, 0, 1, false));
|
|
+ test_cases.emplace_back(new test_conv_2d_dw({ 17, 34, 9, 1 }, { 3, 3, 1, 9 }, 1, 0, 1, true));
|
|
+ test_cases.emplace_back(new test_conv_2d_dw({ 32, 8, 64, 1 }, { 3, 3, 1, 64 }, 2, 1, 1, false));
|
|
+ test_cases.emplace_back(new test_conv_2d_dw({ 32, 8, 64, 1 }, { 3, 3, 1, 64 }, 2, 1, 1, true));
|
|
|
|
test_cases.emplace_back(new test_conv_transpose_1d());
|
|
- test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 3, 0, 1));
|
|
- test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 2, 0, 1));
|
|
- test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {2,3,2,1}, 1, 0, 1));
|
|
- test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,2,2,1}, 2, 0, 1));
|
|
- test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,2,2,1}, 1, 0, 1));
|
|
- test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,1,2,1}, 1, 0, 1));
|
|
- test_cases.emplace_back(new test_conv_transpose_1d({2,1,1,1}, {3,1,1,1}, 1, 0, 1));
|
|
-
|
|
- test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {4, 500, 1, 1}));
|
|
- test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {4, 5000, 1, 1}));
|
|
-
|
|
- test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {32, 1, 1, 1}));
|
|
- test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {100, 10, 1, 1}));
|
|
- test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {1024, 10, 1, 1}));
|
|
- test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {1024, 12, 1, 1}));
|
|
- test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {2000, 10, 1, 1}));
|
|
- test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {5438, 3, 1, 1}));
|
|
-
|
|
- for (int ne3 : {1, 3}) { // CUDA backward pass only supports ne3 == 1
|
|
- test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {1, 1, 1, 1}));
|
|
- test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {2, 1, 1, 1}));
|
|
- test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {1, 2, 1, 1}));
|
|
- test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {1, 1, 2, 1}));
|
|
- test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, {10, 5, 4, ne3}, {1, 1, 1, 2}));
|
|
- test_cases.emplace_back(new test_repeat(GGML_TYPE_I32, {10, 5, 4, ne3}, {2, 1, 1, 1}));
|
|
- test_cases.emplace_back(new test_repeat(GGML_TYPE_I16, {10, 5, 4, ne3}, {1, 1, 1, 2}));
|
|
- }
|
|
-
|
|
- for (bool view : {false, true}) {
|
|
- test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, {8, 6, 4, 2}, {1, 1, 1, 1}, view));
|
|
- test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, {8, 6, 4, 2}, {2, 1, 1, 1}, view));
|
|
- test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, {8, 6, 4, 2}, {1, 2, 1, 1}, view));
|
|
- test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, {8, 6, 4, 2}, {1, 1, 2, 1}, view));
|
|
- test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, {8, 6, 4, 2}, {1, 1, 1, 2}, view));
|
|
+ test_cases.emplace_back(new test_conv_transpose_1d({ 3, 2, 1, 1 }, { 2, 3, 2, 1 }, 3, 0, 1));
|
|
+ test_cases.emplace_back(new test_conv_transpose_1d({ 3, 2, 1, 1 }, { 2, 3, 2, 1 }, 2, 0, 1));
|
|
+ test_cases.emplace_back(new test_conv_transpose_1d({ 3, 2, 1, 1 }, { 2, 3, 2, 1 }, 1, 0, 1));
|
|
+ test_cases.emplace_back(new test_conv_transpose_1d({ 3, 2, 1, 1 }, { 3, 2, 2, 1 }, 2, 0, 1));
|
|
+ test_cases.emplace_back(new test_conv_transpose_1d({ 3, 2, 1, 1 }, { 3, 2, 2, 1 }, 1, 0, 1));
|
|
+ test_cases.emplace_back(new test_conv_transpose_1d({ 3, 2, 1, 1 }, { 3, 1, 2, 1 }, 1, 0, 1));
|
|
+ test_cases.emplace_back(new test_conv_transpose_1d({ 2, 1, 1, 1 }, { 3, 1, 1, 1 }, 1, 0, 1));
|
|
+
|
|
+ test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, { 4, 500, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, { 4, 5000, 1, 1 }));
|
|
+
|
|
+ test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, { 32, 1, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, { 100, 10, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, { 1024, 10, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, { 1024, 12, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, { 2000, 10, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, { 5438, 3, 1, 1 }));
|
|
+
|
|
+ for (int ne3 : { 1, 3 }) { // CUDA backward pass only supports ne3 == 1
|
|
+ test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, { 10, 5, 4, ne3 }, { 1, 1, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, { 10, 5, 4, ne3 }, { 2, 1, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, { 10, 5, 4, ne3 }, { 1, 2, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, { 10, 5, 4, ne3 }, { 1, 1, 2, 1 }));
|
|
+ test_cases.emplace_back(new test_repeat(GGML_TYPE_F32, { 10, 5, 4, ne3 }, { 1, 1, 1, 2 }));
|
|
+ test_cases.emplace_back(new test_repeat(GGML_TYPE_I32, { 10, 5, 4, ne3 }, { 2, 1, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_repeat(GGML_TYPE_I16, { 10, 5, 4, ne3 }, { 1, 1, 1, 2 }));
|
|
+ }
|
|
+
|
|
+ for (bool view : { false, true }) {
|
|
+ test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, { 8, 6, 4, 2 }, { 1, 1, 1, 1 }, view));
|
|
+ test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, { 8, 6, 4, 2 }, { 2, 1, 1, 1 }, view));
|
|
+ test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, { 8, 6, 4, 2 }, { 1, 2, 1, 1 }, view));
|
|
+ test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, { 8, 6, 4, 2 }, { 1, 1, 2, 1 }, view));
|
|
+ test_cases.emplace_back(new test_repeat_back(GGML_TYPE_F32, { 8, 6, 4, 2 }, { 1, 1, 1, 2 }, view));
|
|
}
|
|
|
|
test_cases.emplace_back(new test_dup(GGML_TYPE_F32));
|
|
test_cases.emplace_back(new test_dup(GGML_TYPE_F16));
|
|
test_cases.emplace_back(new test_dup(GGML_TYPE_I32));
|
|
test_cases.emplace_back(new test_dup(GGML_TYPE_I16));
|
|
- test_cases.emplace_back(new test_dup(GGML_TYPE_F32, {10, 10, 5, 1}, {0, 2, 1, 3}));
|
|
- test_cases.emplace_back(new test_dup(GGML_TYPE_F16, {10, 10, 5, 1}, {0, 2, 1, 3})); // dup by rows
|
|
- test_cases.emplace_back(new test_dup(GGML_TYPE_F32, {10, 10, 5, 1}, {1, 0, 2, 3}));
|
|
- test_cases.emplace_back(new test_dup(GGML_TYPE_F16, {10, 10, 5, 1}, {1, 0, 2, 3})); // dup dst not-contiguous
|
|
- test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {0, 2, 1, 3}));
|
|
- test_cases.emplace_back(new test_dup(GGML_TYPE_I16, {10, 8, 3, 1}, {1, 2, 0, 3}));
|
|
+ test_cases.emplace_back(new test_dup(GGML_TYPE_F32, { 10, 10, 5, 1 }, { 0, 2, 1, 3 }));
|
|
+ test_cases.emplace_back(new test_dup(GGML_TYPE_F16, { 10, 10, 5, 1 }, { 0, 2, 1, 3 })); // dup by rows
|
|
+ test_cases.emplace_back(new test_dup(GGML_TYPE_F32, { 10, 10, 5, 1 }, { 1, 0, 2, 3 }));
|
|
+ test_cases.emplace_back(new test_dup(GGML_TYPE_F16, { 10, 10, 5, 1 }, { 1, 0, 2, 3 })); // dup dst not-contiguous
|
|
+ test_cases.emplace_back(new test_dup(GGML_TYPE_I16, { 10, 8, 3, 1 }, { 0, 2, 1, 3 }));
|
|
+ test_cases.emplace_back(new test_dup(GGML_TYPE_I16, { 10, 8, 3, 1 }, { 1, 2, 0, 3 }));
|
|
|
|
for (int dim = 1; dim < GGML_MAX_DIMS; ++dim) {
|
|
- test_cases.emplace_back(new test_set(GGML_TYPE_F32, GGML_TYPE_F32, {6, 5, 4, 3}, dim));
|
|
+ test_cases.emplace_back(new test_set(GGML_TYPE_F32, GGML_TYPE_F32, { 6, 5, 4, 3 }, dim));
|
|
}
|
|
|
|
for (int dim = 1; dim < GGML_MAX_DIMS; ++dim) {
|
|
- test_cases.emplace_back(new test_set(GGML_TYPE_I32, GGML_TYPE_I32, {6, 5, 4, 3}, dim));
|
|
+ test_cases.emplace_back(new test_set(GGML_TYPE_I32, GGML_TYPE_I32, { 6, 5, 4, 3 }, dim));
|
|
}
|
|
|
|
// same-type copy
|
|
@@ -4090,75 +4019,76 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
const auto nk = ggml_blck_size(type);
|
|
|
|
for (int k = 1; k < 4; ++k) {
|
|
- test_cases.emplace_back(new test_cpy(type, type, {k*nk, 2, 3, 4}));
|
|
- test_cases.emplace_back(new test_cpy(type, type, {k*nk, 2, 3, 4}, {0, 2, 1, 3}));
|
|
- test_cases.emplace_back(new test_cpy(type, type, {k*nk, 2, 3, 4}, {0, 3, 1, 2}, {0, 2, 1, 3}));
|
|
+ test_cases.emplace_back(new test_cpy(type, type, { k * nk, 2, 3, 4 }));
|
|
+ test_cases.emplace_back(new test_cpy(type, type, { k * nk, 2, 3, 4 }, { 0, 2, 1, 3 }));
|
|
+ test_cases.emplace_back(new test_cpy(type, type, { k * nk, 2, 3, 4 }, { 0, 3, 1, 2 }, { 0, 2, 1, 3 }));
|
|
}
|
|
}
|
|
|
|
- for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_BF16, GGML_TYPE_F32}) {
|
|
+ for (ggml_type type_src : { GGML_TYPE_F16, GGML_TYPE_BF16, GGML_TYPE_F32 }) {
|
|
for (ggml_type type_dst : all_types) {
|
|
- test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 4, 4, 4}));
|
|
- test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 2, 3, 4}, {0, 2, 1, 3})); // cpy by rows
|
|
+ test_cases.emplace_back(new test_cpy(type_src, type_dst, { 256, 4, 4, 4 }));
|
|
+ test_cases.emplace_back(new test_cpy(type_src, type_dst, { 256, 2, 3, 4 }, { 0, 2, 1, 3 })); // cpy by rows
|
|
}
|
|
}
|
|
for (ggml_type type_src : all_types) {
|
|
- for (ggml_type type_dst : {GGML_TYPE_F32}) {
|
|
- test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 4, 4, 4}));
|
|
- test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 2, 3, 4}, {0, 2, 1, 3})); // cpy by rows
|
|
+ for (ggml_type type_dst : { GGML_TYPE_F32 }) {
|
|
+ test_cases.emplace_back(new test_cpy(type_src, type_dst, { 256, 4, 4, 4 }));
|
|
+ test_cases.emplace_back(new test_cpy(type_src, type_dst, { 256, 2, 3, 4 }, { 0, 2, 1, 3 })); // cpy by rows
|
|
}
|
|
}
|
|
- for (ggml_type type_src : {GGML_TYPE_F16, GGML_TYPE_F32}) {
|
|
- for (ggml_type type_dst : {GGML_TYPE_F16, GGML_TYPE_F32}) {
|
|
- test_cases.emplace_back(new test_cpy(type_src, type_dst, {256, 2, 3, 4}, {1, 0, 2, 3})); // cpy not-contiguous
|
|
+ for (ggml_type type_src : { GGML_TYPE_F16, GGML_TYPE_F32 }) {
|
|
+ for (ggml_type type_dst : { GGML_TYPE_F16, GGML_TYPE_F32 }) {
|
|
+ test_cases.emplace_back(
|
|
+ new test_cpy(type_src, type_dst, { 256, 2, 3, 4 }, { 1, 0, 2, 3 })); // cpy not-contiguous
|
|
}
|
|
}
|
|
|
|
test_cases.emplace_back(new test_cont());
|
|
- test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1}));
|
|
- test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 3 ,5}));
|
|
- test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 3, 5 ,7}));
|
|
- test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 1 ,1}));
|
|
- test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 3 ,5}));
|
|
- test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 3, 5 ,7}));
|
|
- test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 1 ,1}));
|
|
- test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 3 ,5}));
|
|
- test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 3, 5 ,7}));
|
|
+ test_cases.emplace_back(new test_cont(GGML_TYPE_F32, { 2, 1, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_cont(GGML_TYPE_F32, { 2, 1, 3, 5 }));
|
|
+ test_cases.emplace_back(new test_cont(GGML_TYPE_F32, { 2, 3, 5, 7 }));
|
|
+ test_cases.emplace_back(new test_cont(GGML_TYPE_F16, { 2, 1, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_cont(GGML_TYPE_F16, { 2, 1, 3, 5 }));
|
|
+ test_cases.emplace_back(new test_cont(GGML_TYPE_F16, { 2, 3, 5, 7 }));
|
|
+ test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, { 2, 1, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, { 2, 1, 3, 5 }));
|
|
+ test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, { 2, 3, 5, 7 }));
|
|
|
|
auto add_test_bin_bcast = [&](ggml_type type, std::array<int64_t, 4> ne, std::array<int, 4> nr) {
|
|
- for (auto op : {ggml_add, ggml_sub, ggml_mul, ggml_div}) {
|
|
+ for (auto op : { ggml_add, ggml_sub, ggml_mul, ggml_div }) {
|
|
test_cases.emplace_back(new test_bin_bcast(op, type, ne, nr));
|
|
}
|
|
};
|
|
- for (ggml_type type : {GGML_TYPE_F16, GGML_TYPE_F32}) {
|
|
- add_test_bin_bcast(type, {1, 1, 8, 1}, {1, 1, 1, 1});
|
|
- add_test_bin_bcast(type, {1, 1, 1, 1}, {32, 1, 1, 1});
|
|
- add_test_bin_bcast(type, {1, 1, 320, 320}, {1, 1, 1, 1});
|
|
- add_test_bin_bcast(type, {10, 5, 1, 1}, {1, 1, 1, 1});
|
|
- add_test_bin_bcast(type, {10, 5, 4, 1}, {1, 1, 1, 1});
|
|
- add_test_bin_bcast(type, {10, 5, 4, 3}, {1, 1, 1, 1});
|
|
- add_test_bin_bcast(type, {10, 5, 4, 3}, {2, 1, 1, 1});
|
|
- add_test_bin_bcast(type, {10, 5, 4, 3}, {1, 2, 1, 1});
|
|
- add_test_bin_bcast(type, {10, 5, 4, 3}, {1, 1, 2, 1});
|
|
- add_test_bin_bcast(type, {10, 5, 4, 3}, {1, 1, 1, 2});
|
|
- add_test_bin_bcast(type, {10, 5, 4, 3}, {1, 1, 2, 2});
|
|
- add_test_bin_bcast(type, {10, 5, 4, 3}, {1, 2, 2, 2});
|
|
- add_test_bin_bcast(type, {10, 5, 4, 3}, {2, 2, 2, 2});
|
|
+ for (ggml_type type : { GGML_TYPE_F16, GGML_TYPE_F32 }) {
|
|
+ add_test_bin_bcast(type, { 1, 1, 8, 1 }, { 1, 1, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 1, 1, 1, 1 }, { 32, 1, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 1, 1, 320, 320 }, { 1, 1, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 10, 5, 1, 1 }, { 1, 1, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 10, 5, 4, 1 }, { 1, 1, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 10, 5, 4, 3 }, { 1, 1, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 10, 5, 4, 3 }, { 2, 1, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 10, 5, 4, 3 }, { 1, 2, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 10, 5, 4, 3 }, { 1, 1, 2, 1 });
|
|
+ add_test_bin_bcast(type, { 10, 5, 4, 3 }, { 1, 1, 1, 2 });
|
|
+ add_test_bin_bcast(type, { 10, 5, 4, 3 }, { 1, 1, 2, 2 });
|
|
+ add_test_bin_bcast(type, { 10, 5, 4, 3 }, { 1, 2, 2, 2 });
|
|
+ add_test_bin_bcast(type, { 10, 5, 4, 3 }, { 2, 2, 2, 2 });
|
|
|
|
// stable diffusion
|
|
- add_test_bin_bcast(type, {1280, 1, 1, 1}, {1, 1, 1, 1});
|
|
- add_test_bin_bcast(type, {1280, 1, 1, 1}, {1, 16, 16, 1});
|
|
- add_test_bin_bcast(type, {1280, 16, 16, 1}, {1, 1, 1, 1});
|
|
- add_test_bin_bcast(type, {1280, 1, 1, 1}, {1, 256, 1, 1});
|
|
- add_test_bin_bcast(type, {1, 1, 1280, 1}, {16, 16, 1, 1});
|
|
- add_test_bin_bcast(type, {16, 16, 1280, 1}, {1, 1, 1, 1});
|
|
- add_test_bin_bcast(type, {1, 1, 1920, 1}, {16, 16, 1, 1});
|
|
- add_test_bin_bcast(type, {1, 1, 2560, 1}, {16, 16, 1, 1});
|
|
- add_test_bin_bcast(type, {1, 1, 1280, 1}, {32, 32, 1, 1});
|
|
- add_test_bin_bcast(type, {1, 1, 1920, 1}, {32, 32, 1, 1});
|
|
- add_test_bin_bcast(type, {1, 1, 640, 1}, {32, 32, 1, 1});
|
|
- add_test_bin_bcast(type, {5120, 1, 1, 1}, {1, 256, 1, 1});
|
|
- add_test_bin_bcast(type, {640, 1, 1, 1}, {1, 1, 1, 1});
|
|
+ add_test_bin_bcast(type, { 1280, 1, 1, 1 }, { 1, 1, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 1280, 1, 1, 1 }, { 1, 16, 16, 1 });
|
|
+ add_test_bin_bcast(type, { 1280, 16, 16, 1 }, { 1, 1, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 1280, 1, 1, 1 }, { 1, 256, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 1, 1, 1280, 1 }, { 16, 16, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 16, 16, 1280, 1 }, { 1, 1, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 1, 1, 1920, 1 }, { 16, 16, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 1, 1, 2560, 1 }, { 16, 16, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 1, 1, 1280, 1 }, { 32, 32, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 1, 1, 1920, 1 }, { 32, 32, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 1, 1, 640, 1 }, { 32, 32, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 5120, 1, 1, 1 }, { 1, 256, 1, 1 });
|
|
+ add_test_bin_bcast(type, { 640, 1, 1, 1 }, { 1, 1, 1, 1 });
|
|
//add_test_bin_bcast(type, {3, 3, 2560, 1280}, {1, 1, 1, 1});
|
|
//add_test_bin_bcast(type, {3, 3, 2560, 1280}, {2, 1, 1, 1});
|
|
}
|
|
@@ -4167,20 +4097,20 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
test_cases.emplace_back(new test_scale());
|
|
test_cases.emplace_back(new test_silu_back());
|
|
|
|
- for (float eps : {0.0f, 1e-6f, 1e-4f, 1e-1f}) {
|
|
- for (bool v : {false, true}) {
|
|
- test_cases.emplace_back(new test_norm (GGML_TYPE_F32, {64, 5, 4, 3}, v, eps));
|
|
- test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, {64, 5, 4, 3}, v, eps));
|
|
+ for (float eps : { 0.0f, 1e-6f, 1e-4f, 1e-1f }) {
|
|
+ for (bool v : { false, true }) {
|
|
+ test_cases.emplace_back(new test_norm(GGML_TYPE_F32, { 64, 5, 4, 3 }, v, eps));
|
|
+ test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, { 64, 5, 4, 3 }, v, eps));
|
|
}
|
|
- test_cases.emplace_back(new test_rms_norm_back(GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
|
- test_cases.emplace_back(new test_l2_norm (GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
|
+ test_cases.emplace_back(new test_rms_norm_back(GGML_TYPE_F32, { 64, 5, 4, 3 }, eps));
|
|
+ test_cases.emplace_back(new test_l2_norm(GGML_TYPE_F32, { 64, 5, 4, 3 }, eps));
|
|
}
|
|
|
|
- test_cases.emplace_back(new test_l2_norm(GGML_TYPE_F32, {64, 5, 4, 3}, 1e-12f));
|
|
+ test_cases.emplace_back(new test_l2_norm(GGML_TYPE_F32, { 64, 5, 4, 3 }, 1e-12f));
|
|
|
|
- test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, 1536, 1, 1}, {4, 1536, 1, 1}));
|
|
- test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {8, 1536, 1, 1}, {4, 1536, 1, 1}));
|
|
- test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, 1536, 4, 1}, {4, 1536, 1, 1}));
|
|
+ test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, { 4, 1536, 1, 1 }, { 4, 1536, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, { 8, 1536, 1, 1 }, { 4, 1536, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, { 4, 1536, 4, 1 }, { 4, 1536, 1, 1 }));
|
|
|
|
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 16, 1024, 32, 4));
|
|
|
|
@@ -4201,59 +4131,60 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
|
for (ggml_type type_a : all_types) {
|
|
for (int i = 1; i < 10; ++i) {
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, GGML_TYPE_F32, 16, i, 256, { 1, 1}, {1, 1}));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, GGML_TYPE_F32, 16, i, 256, { 1, 1 }, { 1, 1 }));
|
|
}
|
|
}
|
|
|
|
#if 1
|
|
for (ggml_type type_a : base_types) {
|
|
- for (ggml_type type_b : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
|
+ for (ggml_type type_b : { GGML_TYPE_F32, GGML_TYPE_F16 }) {
|
|
// test cases without permutation
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {1, 1}, {1, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {1, 1}, {2, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {1, 1}, {1, 2}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 1}, {1, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 1}, {2, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 2}, {1, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 2}, {2, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 2}, {1, 2}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 2}, {2, 2}));
|
|
-
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {1, 1}, {1, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {1, 1}, {2, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {1, 1}, {1, 2}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 1}, {1, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 1}, {2, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 2}, {1, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 2}, {2, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 2}, {1, 2}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 2}, {2, 2}));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 1, 1 }, { 1, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 1, 1 }, { 2, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 1, 1 }, { 1, 2 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 3, 1 }, { 1, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 3, 1 }, { 2, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 3, 2 }, { 1, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 3, 2 }, { 2, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 3, 2 }, { 1, 2 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 3, 2 }, { 2, 2 }));
|
|
+
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, { 1, 1 }, { 1, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, { 1, 1 }, { 2, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, { 1, 1 }, { 1, 2 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, { 3, 1 }, { 1, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, { 3, 1 }, { 2, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, { 3, 2 }, { 1, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, { 3, 2 }, { 2, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, { 3, 2 }, { 1, 2 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, { 3, 2 }, { 2, 2 }));
|
|
|
|
// test cases with permutation
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {2, 3}, {1, 1}, {0, 2, 1, 3}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {2, 3}, {1, 1}, {0, 1, 3, 2}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {2, 3}, {1, 1}, {0, 3, 2, 1}));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 2, 3 }, { 1, 1 }, { 0, 2, 1, 3 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 2, 3 }, { 1, 1 }, { 0, 1, 3, 2 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 2, 3 }, { 1, 1 }, { 0, 3, 2, 1 }));
|
|
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 256, {2, 3}, {1, 1}, {0, 2, 1, 3}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 256, {2, 3}, {1, 1}, {0, 1, 3, 2}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 256, {2, 3}, {1, 1}, {0, 3, 2, 1}));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 256, { 2, 3 }, { 1, 1 }, { 0, 2, 1, 3 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 256, { 2, 3 }, { 1, 1 }, { 0, 1, 3, 2 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 256, { 2, 3 }, { 1, 1 }, { 0, 3, 2, 1 }));
|
|
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {2, 3}, {1, 1}, {0, 2, 1, 3}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {2, 3}, {1, 1}, {0, 1, 3, 2}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {2, 3}, {1, 1}, {0, 3, 2, 1}));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, { 2, 3 }, { 1, 1 }, { 0, 2, 1, 3 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, { 2, 3 }, { 1, 1 }, { 0, 1, 3, 2 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, { 2, 3 }, { 1, 1 }, { 0, 3, 2, 1 }));
|
|
|
|
// test cases with large ne00/ne10 to cover stream-k fixup
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 1024, {3, 2}, {1, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 1024, {3, 2}, {1, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 1024, {3, 2}, {1, 1}));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 1024, { 3, 2 }, { 1, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 1024, { 3, 2 }, { 1, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 1024, { 3, 2 }, { 1, 1 }));
|
|
}
|
|
}
|
|
for (ggml_type type_a : other_types) {
|
|
- for (ggml_type type_b : {GGML_TYPE_F32}) {
|
|
+ for (ggml_type type_b : { GGML_TYPE_F32 }) {
|
|
if (ggml_blck_size(type_a) != 256) {
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, ggml_blck_size(type_a), {1, 1}, {1, 1}));
|
|
+ test_cases.emplace_back(
|
|
+ new test_mul_mat(type_a, type_b, 16, 1, ggml_blck_size(type_a), { 1, 1 }, { 1, 1 }));
|
|
}
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {1, 1}, {1, 1}));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 1, 1 }, { 1, 1 }));
|
|
}
|
|
}
|
|
#else
|
|
@@ -4265,31 +4196,35 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
std::uniform_int_distribution<> dist_k(1, 16);
|
|
for (int i = 0; i < 1000; i++) {
|
|
for (ggml_type type_a : all_types) {
|
|
- for (ggml_type type_b : {GGML_TYPE_F32}) {
|
|
+ for (ggml_type type_b : { GGML_TYPE_F32 }) {
|
|
int m = dist_m(rng);
|
|
int n = dist_n(rng);
|
|
int k = dist_k(rng) * ggml_blck_size(type_a);
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, m, n, k, { 1, 1}, {1, 1}));
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, m, n, k, { 1, 1 }, { 1, 1 }));
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
- test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 2, 128, { 8, 1}, {1, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 83, 2, 128, { 8, 1}, {4, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 2, 64, { 8, 1}, {4, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 83, 2, 64, { 8, 1}, {4, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 45, 128, { 8, 1}, {4, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 45, 64, { 8, 1}, {4, 1}));
|
|
- test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 1056, 1, 193, {1, 1}, {4, 1}, {0, 2, 1, 3}));
|
|
- test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 1056, 1, 67, {1, 1}, {4, 1}, {0, 2, 1, 3}));
|
|
-
|
|
- for (auto bs : {1,2,4,8}) {
|
|
- for (auto nr : {1,4}) {
|
|
+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 2, 128, { 8, 1 }, { 1, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 83, 2, 128, { 8, 1 }, { 4, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 2, 64, { 8, 1 }, { 4, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 83, 2, 64, { 8, 1 }, { 4, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 45, 128, { 8, 1 }, { 4, 1 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 45, 64, { 8, 1 }, { 4, 1 }));
|
|
+ test_cases.emplace_back(
|
|
+ new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 1056, 1, 193, { 1, 1 }, { 4, 1 }, { 0, 2, 1, 3 }));
|
|
+ test_cases.emplace_back(
|
|
+ new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 1056, 1, 67, { 1, 1 }, { 4, 1 }, { 0, 2, 1, 3 }));
|
|
+
|
|
+ for (auto bs : { 1, 2, 4, 8 }) {
|
|
+ for (auto nr : { 1, 4 }) {
|
|
for (uint32_t m = 0; m < 2; ++m) {
|
|
for (uint32_t k = 0; k < 2; ++k) {
|
|
- test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 1056 + m, 1, 128 + k, {bs, 1}, {nr, 1}, {0, 2, 1, 3}));
|
|
- test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128 + m, 1, 1056 + k, {bs, 1}, {nr, 1}, {0, 1, 2, 3}, true));
|
|
+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 1056 + m, 1, 128 + k,
|
|
+ { bs, 1 }, { nr, 1 }, { 0, 2, 1, 3 }));
|
|
+ test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128 + m, 1, 1056 + k,
|
|
+ { bs, 1 }, { nr, 1 }, { 0, 1, 2, 3 }, true));
|
|
}
|
|
}
|
|
}
|
|
@@ -4302,11 +4237,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
// test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F16, 512, 262144, 9216, {1, 1}, {1, 1}));
|
|
|
|
for (ggml_type type_a : base_types) {
|
|
- for (ggml_type type_b : {GGML_TYPE_F32 /*, GGML_TYPE_F16 */}) {
|
|
- for (int n_mats : {4, 8}) {
|
|
- for (int n_used : {1, 2, 4}) {
|
|
- for (bool b : {false, true}) {
|
|
- for (int n : {1, 32, 129}) {
|
|
+ for (ggml_type type_b : { GGML_TYPE_F32 /*, GGML_TYPE_F16 */ }) {
|
|
+ for (int n_mats : { 4, 8 }) {
|
|
+ for (int n_used : { 1, 2, 4 }) {
|
|
+ for (bool b : { false, true }) {
|
|
+ for (int n : { 1, 32, 129 }) {
|
|
int m = 512;
|
|
int k = 256;
|
|
test_cases.emplace_back(new test_mul_mat_id(type_a, type_b, n_mats, n_used, b, m, n, k));
|
|
@@ -4318,11 +4253,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
}
|
|
|
|
for (ggml_type type_a : other_types) {
|
|
- for (ggml_type type_b : {GGML_TYPE_F32 /*, GGML_TYPE_F16 */}) {
|
|
- for (int n_mats : {4}) {
|
|
- for (int n_used : {2}) {
|
|
- for (bool b : {false}) {
|
|
- for (int n : {1, 32}) {
|
|
+ for (ggml_type type_b : { GGML_TYPE_F32 /*, GGML_TYPE_F16 */ }) {
|
|
+ for (int n_mats : { 4 }) {
|
|
+ for (int n_used : { 2 }) {
|
|
+ for (bool b : { false }) {
|
|
+ for (int n : { 1, 32 }) {
|
|
int m = 512;
|
|
int k = 256;
|
|
test_cases.emplace_back(new test_mul_mat_id(type_a, type_b, n_mats, n_used, b, m, n, k));
|
|
@@ -4334,14 +4269,15 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
}
|
|
|
|
for (ggml_type type_a : base_types) {
|
|
- for (ggml_type type_b : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
|
- for (int n : {1, 16}) {
|
|
- for (int k : {1, 16}) {
|
|
- for (int bs2 : {1, 3}) {
|
|
- for (int bs3 : {1, 3}) {
|
|
- for (int nr2 : {1, 2}) {
|
|
- for (int nr3 : {1, 2}) {
|
|
- test_cases.emplace_back(new test_out_prod(type_a, type_b, 256, n, k, {bs2, bs3}, {nr2, nr3}));
|
|
+ for (ggml_type type_b : { GGML_TYPE_F32, GGML_TYPE_F16 }) {
|
|
+ for (int n : { 1, 16 }) {
|
|
+ for (int k : { 1, 16 }) {
|
|
+ for (int bs2 : { 1, 3 }) {
|
|
+ for (int bs3 : { 1, 3 }) {
|
|
+ for (int nr2 : { 1, 2 }) {
|
|
+ for (int nr3 : { 1, 2 }) {
|
|
+ test_cases.emplace_back(
|
|
+ new test_out_prod(type_a, type_b, 256, n, k, { bs2, bs3 }, { nr2, nr3 }));
|
|
}
|
|
}
|
|
}
|
|
@@ -4351,7 +4287,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
}
|
|
}
|
|
|
|
- for (ggml_type type : {GGML_TYPE_F16, GGML_TYPE_F32}) {
|
|
+ for (ggml_type type : { GGML_TYPE_F16, GGML_TYPE_F32 }) {
|
|
test_cases.emplace_back(new test_sqr(type));
|
|
test_cases.emplace_back(new test_sqrt(type));
|
|
test_cases.emplace_back(new test_log(type));
|
|
@@ -4360,9 +4296,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
test_cases.emplace_back(new test_clamp(type));
|
|
}
|
|
|
|
- test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10, 1, 1}, 5));
|
|
- test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10, 3, 1}, 5));
|
|
- test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10, 3, 2}, 5));
|
|
+ test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, { 10, 10, 1, 1 }, 5));
|
|
+ test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, { 10, 10, 3, 1 }, 5));
|
|
+ test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, { 10, 10, 3, 2 }, 5));
|
|
|
|
#if 0
|
|
std::uniform_int_distribution<> dist_ne1(1, 50);
|
|
@@ -4379,78 +4315,101 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
exponent <<= 1;
|
|
}
|
|
#endif
|
|
- for (bool mask : {false, true}) {
|
|
- for (float max_bias : {0.0f, 8.0f}) {
|
|
- if (!mask && max_bias > 0.0f) continue;
|
|
- for (float scale : {1.0f, 0.1f}) {
|
|
- for (int64_t ne0 : {16, 1024}) {
|
|
- for (int64_t ne1 : {16, 1024}) {
|
|
+ for (bool mask : { false, true }) {
|
|
+ for (float max_bias : { 0.0f, 8.0f }) {
|
|
+ if (!mask && max_bias > 0.0f) {
|
|
+ continue;
|
|
+ }
|
|
+ for (float scale : { 1.0f, 0.1f }) {
|
|
+ for (int64_t ne0 : { 16, 1024 }) {
|
|
+ for (int64_t ne1 : { 16, 1024 }) {
|
|
if (mask) {
|
|
- for (ggml_type m_prec : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 1}, mask, m_prec, scale, max_bias));
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0-1, ne1-1, 1, 1}, mask, m_prec, scale, max_bias));
|
|
+ for (ggml_type m_prec : { GGML_TYPE_F32, GGML_TYPE_F16 }) {
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { ne0, ne1, 1, 1 }, mask,
|
|
+ m_prec, scale, max_bias));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { ne0 - 1, ne1 - 1, 1, 1 },
|
|
+ mask, m_prec, scale, max_bias));
|
|
}
|
|
} else {
|
|
/* The precision of mask here doesn't matter as boolean mask is false */
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 1}, mask, GGML_TYPE_F32, scale, max_bias));
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0-1, ne1-1, 1, 1}, mask, GGML_TYPE_F32, scale, max_bias));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { ne0, ne1, 1, 1 }, mask,
|
|
+ GGML_TYPE_F32, scale, max_bias));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { ne0 - 1, ne1 - 1, 1, 1 }, mask,
|
|
+ GGML_TYPE_F32, scale, max_bias));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, true, GGML_TYPE_F32, 0.1f, 0.0f));
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, true, GGML_TYPE_F16, 0.1f, 0.0f));
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, false, GGML_TYPE_F32, 0.1f, 0.0f));
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F32, 0.1f, 0.0f));
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F16, 0.1f, 0.0f));
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F32, 0.1f, 8.0f));
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F16, 0.1f, 8.0f));
|
|
-
|
|
- for (float max_bias : {0.0f, 8.0f}) {
|
|
- for (float scale : {1.0f, 0.1f}) {
|
|
- for (int64_t ne0 : {16, 1024}) {
|
|
- for (int64_t ne1 : {16, 1024}) {
|
|
- test_cases.emplace_back(new test_soft_max_back(GGML_TYPE_F32, {ne0, ne1, 1, 1}, scale, max_bias));
|
|
- test_cases.emplace_back(new test_soft_max_back(GGML_TYPE_F32, {ne0-1, ne1-1, 1, 1}, scale, max_bias));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { 16, 2, 32, 1 }, true, GGML_TYPE_F32, 0.1f, 0.0f));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { 16, 2, 32, 1 }, true, GGML_TYPE_F16, 0.1f, 0.0f));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { 16, 2, 32, 1 }, false, GGML_TYPE_F32, 0.1f, 0.0f));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { 32, 2, 32, 1 }, true, GGML_TYPE_F32, 0.1f, 0.0f));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { 32, 2, 32, 1 }, true, GGML_TYPE_F16, 0.1f, 0.0f));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { 32, 2, 32, 1 }, true, GGML_TYPE_F32, 0.1f, 8.0f));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { 32, 2, 32, 1 }, true, GGML_TYPE_F16, 0.1f, 8.0f));
|
|
+
|
|
+ for (float max_bias : { 0.0f, 8.0f }) {
|
|
+ for (float scale : { 1.0f, 0.1f }) {
|
|
+ for (int64_t ne0 : { 16, 1024 }) {
|
|
+ for (int64_t ne1 : { 16, 1024 }) {
|
|
+ test_cases.emplace_back(new test_soft_max_back(GGML_TYPE_F32, { ne0, ne1, 1, 1 }, scale, max_bias));
|
|
+ test_cases.emplace_back(
|
|
+ new test_soft_max_back(GGML_TYPE_F32, { ne0 - 1, ne1 - 1, 1, 1 }, scale, max_bias));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
- for (bool fw : {true, false}) { // fw == forward
|
|
+ for (bool fw : { true, false }) { // fw == forward
|
|
bool all = true;
|
|
|
|
for (float v : { 0, 1 }) {
|
|
for (float fs : { 1.0f, 1.4245f }) {
|
|
for (float ef : { 0.0f, 0.7465f }) {
|
|
for (float af : { 1.0f, 1.4245f }) {
|
|
- for (ggml_type type : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
|
- for (bool ff : {false, true}) { // freq_factors
|
|
- test_cases.emplace_back(new test_rope(type, {128, 32, 2, 1}, 128, 0, 512, fs, ef, af, ff, v, fw)); // llama 7B
|
|
+ for (ggml_type type : { GGML_TYPE_F32, GGML_TYPE_F16 }) {
|
|
+ for (bool ff : { false, true }) { // freq_factors
|
|
+ test_cases.emplace_back(new test_rope(type, { 128, 32, 2, 1 }, 128, 0, 512, fs, ef, af,
|
|
+ ff, v, fw)); // llama 7B
|
|
|
|
if (all) {
|
|
- test_cases.emplace_back(new test_rope(type, {128, 40, 2, 1}, 128, 0, 512, fs, ef, af, ff, v, fw)); // llama 13B
|
|
- test_cases.emplace_back(new test_rope(type, {128, 52, 2, 1}, 128, 0, 512, fs, ef, af, ff, v, fw)); // llama 30B
|
|
- test_cases.emplace_back(new test_rope(type, {128, 64, 2, 1}, 128, 0, 512, fs, ef, af, ff, v, fw)); // llama 65B
|
|
+ test_cases.emplace_back(new test_rope(type, { 128, 40, 2, 1 }, 128, 0, 512, fs, ef,
|
|
+ af, ff, v, fw)); // llama 13B
|
|
+ test_cases.emplace_back(new test_rope(type, { 128, 52, 2, 1 }, 128, 0, 512, fs, ef,
|
|
+ af, ff, v, fw)); // llama 30B
|
|
+ test_cases.emplace_back(new test_rope(type, { 128, 64, 2, 1 }, 128, 0, 512, fs, ef,
|
|
+ af, ff, v, fw)); // llama 65B
|
|
}
|
|
|
|
if (all) {
|
|
- test_cases.emplace_back(new test_rope(type, { 64, 1, 2, 1}, 64, 2, 512, fs, ef, af, ff, v, fw)); // neox (falcon 7B)
|
|
- test_cases.emplace_back(new test_rope(type, { 64, 71, 2, 1}, 64, 2, 512, fs, ef, af, ff, v, fw)); // neox (falcon 7B)
|
|
- test_cases.emplace_back(new test_rope(type, { 64, 8, 2, 1}, 64, 2, 512, fs, ef, af, ff, v, fw)); // neox (falcon 40B)
|
|
- test_cases.emplace_back(new test_rope(type, { 80, 32, 2, 1}, 20, 2, 512, fs, ef, af, ff, v, fw)); // neox (stablelm)
|
|
- test_cases.emplace_back(new test_rope(type, { 80, 32, 2, 1}, 32, 2, 512, fs, ef, af, ff, v, fw)); // neox (phi-2)
|
|
+ test_cases.emplace_back(new test_rope(type, { 64, 1, 2, 1 }, 64, 2, 512, fs, ef, af,
|
|
+ ff, v, fw)); // neox (falcon 7B)
|
|
+ test_cases.emplace_back(new test_rope(type, { 64, 71, 2, 1 }, 64, 2, 512, fs, ef,
|
|
+ af, ff, v, fw)); // neox (falcon 7B)
|
|
+ test_cases.emplace_back(new test_rope(type, { 64, 8, 2, 1 }, 64, 2, 512, fs, ef, af,
|
|
+ ff, v, fw)); // neox (falcon 40B)
|
|
+ test_cases.emplace_back(new test_rope(type, { 80, 32, 2, 1 }, 20, 2, 512, fs, ef,
|
|
+ af, ff, v, fw)); // neox (stablelm)
|
|
+ test_cases.emplace_back(new test_rope(type, { 80, 32, 2, 1 }, 32, 2, 512, fs, ef,
|
|
+ af, ff, v, fw)); // neox (phi-2)
|
|
}
|
|
|
|
if (all) {
|
|
- test_cases.emplace_back(new test_rope(type, {128, 12, 2, 1}, 128, GGML_ROPE_TYPE_MROPE, 512, fs, ef, af, ff, v, fw)); // rope_multi,m-rope (qwen2vl 2B)
|
|
- test_cases.emplace_back(new test_rope(type, {128, 28, 2, 1}, 128, GGML_ROPE_TYPE_MROPE, 512, fs, ef, af, ff, v, fw)); // rope_multi,m-rope (qwen2vl 7B)
|
|
- test_cases.emplace_back(new test_rope(type, { 80, 16, 2, 1}, 80, GGML_ROPE_TYPE_VISION, 512, fs, ef, af, ff, v, fw)); // rope_multi,m-rope (qwen2vl ViT)
|
|
+ test_cases.emplace_back(new test_rope(type, { 128, 12, 2, 1 }, 128,
|
|
+ GGML_ROPE_TYPE_MROPE, 512, fs, ef, af, ff, v,
|
|
+ fw)); // rope_multi,m-rope (qwen2vl 2B)
|
|
+ test_cases.emplace_back(new test_rope(type, { 128, 28, 2, 1 }, 128,
|
|
+ GGML_ROPE_TYPE_MROPE, 512, fs, ef, af, ff, v,
|
|
+ fw)); // rope_multi,m-rope (qwen2vl 7B)
|
|
+ test_cases.emplace_back(new test_rope(type, { 80, 16, 2, 1 }, 80,
|
|
+ GGML_ROPE_TYPE_VISION, 512, fs, ef, af, ff, v,
|
|
+ fw)); // rope_multi,m-rope (qwen2vl ViT)
|
|
}
|
|
|
|
- test_cases.emplace_back(new test_rope(type, { 64, 128, 2, 1}, 64, 2, 512, fs, ef, af, ff, v, fw)); // neox (falcon 40B)
|
|
+ test_cases.emplace_back(new test_rope(type, { 64, 128, 2, 1 }, 64, 2, 512, fs, ef, af,
|
|
+ ff, v, fw)); // neox (falcon 40B)
|
|
}
|
|
}
|
|
|
|
@@ -4462,29 +4421,34 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
}
|
|
|
|
for (int v : { 0, 1, 2, 3 }) {
|
|
- for (int dim : { 0, 1, 2, 3, }) {
|
|
- test_cases.emplace_back(new test_concat(GGML_TYPE_F32, {11, 12, 13, 14}, 7, dim, v));
|
|
- test_cases.emplace_back(new test_concat(GGML_TYPE_I32, {11, 12, 13, 14}, 7, dim, v));
|
|
+ for (int dim : {
|
|
+ 0,
|
|
+ 1,
|
|
+ 2,
|
|
+ 3,
|
|
+ }) {
|
|
+ test_cases.emplace_back(new test_concat(GGML_TYPE_F32, { 11, 12, 13, 14 }, 7, dim, v));
|
|
+ test_cases.emplace_back(new test_concat(GGML_TYPE_I32, { 11, 12, 13, 14 }, 7, dim, v));
|
|
}
|
|
}
|
|
|
|
- for (ggml_sort_order order : {GGML_SORT_ORDER_ASC, GGML_SORT_ORDER_DESC}) {
|
|
- test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {8, 1, 1, 1}, order));
|
|
- test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {16, 10, 10, 10}, order));
|
|
- test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {60, 10, 10, 10}, order)); // qwen
|
|
+ for (ggml_sort_order order : { GGML_SORT_ORDER_ASC, GGML_SORT_ORDER_DESC }) {
|
|
+ test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, { 8, 1, 1, 1 }, order));
|
|
+ test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, { 16, 10, 10, 10 }, order));
|
|
+ test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, { 60, 10, 10, 10 }, order)); // qwen
|
|
}
|
|
|
|
- for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR}) {
|
|
- test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode));
|
|
- test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode, true));
|
|
- test_cases.emplace_back(new test_upscale_ext(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode));
|
|
+ for (ggml_scale_mode mode : { GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR }) {
|
|
+ test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, { 512, 512, 3, 2 }, 2, mode));
|
|
+ test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, { 512, 512, 3, 2 }, 2, mode, true));
|
|
+ test_cases.emplace_back(new test_upscale_ext(GGML_TYPE_F32, { 2, 5, 7, 11 }, { 5, 7, 11, 13 }, mode));
|
|
}
|
|
|
|
test_cases.emplace_back(new test_sum());
|
|
test_cases.emplace_back(new test_sum_rows());
|
|
test_cases.emplace_back(new test_mean());
|
|
- test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, {64, 64, 320, 1}));
|
|
- test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, {9, 9, 1280, 1}));
|
|
+ test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, { 64, 64, 320, 1 }));
|
|
+ test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, { 9, 9, 1280, 1 }));
|
|
test_cases.emplace_back(new test_acc());
|
|
test_cases.emplace_back(new test_pad());
|
|
test_cases.emplace_back(new test_pad_reflect_1d());
|
|
@@ -4494,30 +4458,60 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
|
|
for (int hsk : { 64, 80, 128, 192, 256, 576 }) {
|
|
for (int hsv : { 64, 80, 128, 192, 256, 512 }) {
|
|
- if (hsk != 192 && hsk != 576 && hsk != hsv) continue;
|
|
- if (hsk == 192 && (hsv != 128 && hsv != 192)) continue;
|
|
- if (hsk == 576 && hsv != 512) continue; // DeepSeek MLA
|
|
+ if (hsk != 192 && hsk != 576 && hsk != hsv) {
|
|
+ continue;
|
|
+ }
|
|
+ if (hsk == 192 && (hsv != 128 && hsv != 192)) {
|
|
+ continue;
|
|
+ }
|
|
+ if (hsk == 576 && hsv != 512) {
|
|
+ continue; // DeepSeek MLA
|
|
+ }
|
|
|
|
- for (bool mask : { true, false } ) {
|
|
+ for (bool mask : { true, false }) {
|
|
for (float max_bias : { 0.0f, 8.0f }) {
|
|
- if (!mask && max_bias > 0.0f) continue;
|
|
- for (float logit_softcap : {0.0f, 10.0f}) {
|
|
- if (hsk != 128 && logit_softcap != 0.0f) continue;
|
|
- for (int nh : { 4, }) {
|
|
+ if (!mask && max_bias > 0.0f) {
|
|
+ continue;
|
|
+ }
|
|
+ for (float logit_softcap : { 0.0f, 10.0f }) {
|
|
+ if (hsk != 128 && logit_softcap != 0.0f) {
|
|
+ continue;
|
|
+ }
|
|
+ for (int nh : {
|
|
+ 4,
|
|
+ }) {
|
|
for (int nr : { 1, 4, 16 }) {
|
|
- if (nr == 16 && hsk != 128) continue;
|
|
- for (int kv : { 512, 1024, }) {
|
|
- if (nr != 1 && kv != 512) continue;
|
|
- for (int nb : { 1, 3, 32, 35, }) {
|
|
- for (ggml_prec prec : {GGML_PREC_F32, GGML_PREC_DEFAULT}) {
|
|
- if (hsk != 128 && prec == GGML_PREC_DEFAULT) continue;
|
|
- for (ggml_type type_KV : {GGML_TYPE_F16, GGML_TYPE_BF16, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0}) {
|
|
- test_cases.emplace_back(new test_flash_attn_ext(
|
|
- hsk, hsv, nh, nr, kv, nb, mask, max_bias, logit_softcap, prec, type_KV));
|
|
+ if (nr == 16 && hsk != 128) {
|
|
+ continue;
|
|
+ }
|
|
+ for (int kv : {
|
|
+ 512,
|
|
+ 1024,
|
|
+ }) {
|
|
+ if (nr != 1 && kv != 512) {
|
|
+ continue;
|
|
+ }
|
|
+ for (int nb : {
|
|
+ 1,
|
|
+ 3,
|
|
+ 32,
|
|
+ 35,
|
|
+ }) {
|
|
+ for (ggml_prec prec : { GGML_PREC_F32, GGML_PREC_DEFAULT }) {
|
|
+ if (hsk != 128 && prec == GGML_PREC_DEFAULT) {
|
|
+ continue;
|
|
+ }
|
|
+ for (ggml_type type_KV :
|
|
+ { GGML_TYPE_F16, GGML_TYPE_BF16, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0 }) {
|
|
+ test_cases.emplace_back(
|
|
+ new test_flash_attn_ext(hsk, hsv, nh, nr, kv, nb, mask, max_bias,
|
|
+ logit_softcap, prec, type_KV));
|
|
// run fewer test cases permuted
|
|
- if (mask == true && max_bias == 0.0f && logit_softcap == 0 && kv == 512) {
|
|
+ if (mask == true && max_bias == 0.0f && logit_softcap == 0 &&
|
|
+ kv == 512) {
|
|
test_cases.emplace_back(new test_flash_attn_ext(
|
|
- hsk, hsv, nh, nr, kv, nb, mask, max_bias, logit_softcap, prec, type_KV, {0, 2, 1, 3}));
|
|
+ hsk, hsv, nh, nr, kv, nb, mask, max_bias, logit_softcap, prec,
|
|
+ type_KV, { 0, 2, 1, 3 }));
|
|
}
|
|
}
|
|
}
|
|
@@ -4531,12 +4525,12 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
}
|
|
}
|
|
|
|
- test_cases.emplace_back(new test_cross_entropy_loss (GGML_TYPE_F32, { 10, 5, 4, 3}));
|
|
- test_cases.emplace_back(new test_cross_entropy_loss (GGML_TYPE_F32, {30000, 1, 1, 1}));
|
|
- test_cases.emplace_back(new test_cross_entropy_loss_back(GGML_TYPE_F32, { 10, 5, 4, 3}));
|
|
- test_cases.emplace_back(new test_cross_entropy_loss_back(GGML_TYPE_F32, {30000, 1, 1, 1}));
|
|
+ test_cases.emplace_back(new test_cross_entropy_loss(GGML_TYPE_F32, { 10, 5, 4, 3 }));
|
|
+ test_cases.emplace_back(new test_cross_entropy_loss(GGML_TYPE_F32, { 30000, 1, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_cross_entropy_loss_back(GGML_TYPE_F32, { 10, 5, 4, 3 }));
|
|
+ test_cases.emplace_back(new test_cross_entropy_loss_back(GGML_TYPE_F32, { 30000, 1, 1, 1 }));
|
|
|
|
- test_cases.emplace_back(new test_opt_step_adamw(GGML_TYPE_F32, {10, 5, 4, 3}));
|
|
+ test_cases.emplace_back(new test_opt_step_adamw(GGML_TYPE_F32, { 10, 5, 4, 3 }));
|
|
|
|
// these tests are disabled to save execution time, but they can be handy for debugging
|
|
#if 0
|
|
@@ -4553,58 +4547,77 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|
static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|
std::vector<std::unique_ptr<test_case>> test_cases;
|
|
|
|
- test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {4096, 1, 1, 1}, {1, 1, 1, 1}));
|
|
- test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {4096, 1, 1, 1}, {1, 512, 1, 1}));
|
|
+ test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, { 4096, 1, 1, 1 }, { 1, 1, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, { 4096, 1, 1, 1 }, { 1, 512, 1, 1 }));
|
|
|
|
- test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F16, {512, 3072, 1, 1}));
|
|
- test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {8192, 512, 2, 1}, {0, 2, 1, 3}));
|
|
- test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {3072, 512, 2, 1}, {0, 2, 1, 3}));
|
|
+ test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F16, { 512, 3072, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, { 8192, 512, 2, 1 }, { 0, 2, 1, 3 }));
|
|
+ test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, { 3072, 512, 2, 1 }, { 0, 2, 1, 3 }));
|
|
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {4096, 4096, 5, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 4096, 5, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {1024, 1024, 10, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 1024, 10, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {256, 256, 20, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {64, 64, 20, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
- test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 64, 20, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { 4096, 4096, 5, 1 }, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { 77, 4096, 5, 1 }, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { 1024, 1024, 10, 1 }, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { 77, 1024, 10, 1 }, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { 256, 256, 20, 1 }, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { 64, 64, 20, 1 }, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
+ test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, { 77, 64, 20, 1 }, false, GGML_TYPE_F32, 1.0f, 0.0f));
|
|
|
|
- test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {32, 10, 1, 1}));
|
|
- test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {1024, 10, 1, 1}));
|
|
- test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {32000, 512, 1, 1}));
|
|
+ test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, { 32, 10, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, { 1024, 10, 1, 1 }));
|
|
+ test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, { 32000, 512, 1, 1 }));
|
|
|
|
- test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 16416, 1, 128, {8, 1}, {4, 1}, {0, 2, 1, 3}));
|
|
- test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 1, 16416, {8, 1}, {4, 1}, {0, 1, 2, 3}, true));
|
|
+ test_cases.emplace_back(
|
|
+ new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 16416, 1, 128, { 8, 1 }, { 4, 1 }, { 0, 2, 1, 3 }));
|
|
+ test_cases.emplace_back(
|
|
+ new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 1, 16416, { 8, 1 }, { 4, 1 }, { 0, 1, 2, 3 }, true));
|
|
|
|
- for (int bs : {1, 2, 3, 4, 5, 8, 512}) {
|
|
+ for (int bs : { 1, 2, 3, 4, 5, 8, 512 }) {
|
|
for (ggml_type type_a : all_types) {
|
|
- for (ggml_type type_b : {GGML_TYPE_F32}) {
|
|
- test_cases.emplace_back(new test_mul_mat(type_a, type_b, 4096, bs, 14336, {1, 1}, {1, 1}));
|
|
+ for (ggml_type type_b : { GGML_TYPE_F32 }) {
|
|
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 4096, bs, 14336, { 1, 1 }, { 1, 1 }));
|
|
}
|
|
}
|
|
}
|
|
|
|
- for (int K : {3, 5}) {
|
|
- for (int IC : {256, 2560}) {
|
|
- for (int IW_IH : {32, 64, 256}) {
|
|
+ for (int K : { 3, 5 }) {
|
|
+ for (int IC : { 256, 2560 }) {
|
|
+ for (int IW_IH : { 32, 64, 256 }) {
|
|
if (IC == 2560 && IW_IH == 256) {
|
|
// too big
|
|
continue;
|
|
}
|
|
- test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {IW_IH, IW_IH, IC, 1}, {K, K, IC, 1}, 1, 1, 1, 1, 1, 1, true));
|
|
+ test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32,
|
|
+ { IW_IH, IW_IH, IC, 1 }, { K, K, IC, 1 }, 1, 1, 1, 1, 1, 1,
|
|
+ true));
|
|
}
|
|
}
|
|
}
|
|
|
|
- for (int kv : { 4096, 8192, 16384, }) {
|
|
- for (int hs : { 64, 128, }) {
|
|
- for (int nr : { 1, 4, }) {
|
|
- test_cases.emplace_back(new test_flash_attn_ext(hs, hs, 8, nr, kv, 1, true, 0, 0, GGML_PREC_F32, GGML_TYPE_F16));
|
|
+ for (int kv : {
|
|
+ 4096,
|
|
+ 8192,
|
|
+ 16384,
|
|
+ }) {
|
|
+ for (int hs : {
|
|
+ 64,
|
|
+ 128,
|
|
+ }) {
|
|
+ for (int nr : {
|
|
+ 1,
|
|
+ 4,
|
|
+ }) {
|
|
+ test_cases.emplace_back(
|
|
+ new test_flash_attn_ext(hs, hs, 8, nr, kv, 1, true, 0, 0, GGML_PREC_F32, GGML_TYPE_F16));
|
|
}
|
|
}
|
|
}
|
|
|
|
- test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, false));
|
|
- test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, true));
|
|
+ test_cases.emplace_back(new test_conv_2d_dw({ 512, 512, 256, 1 }, { 3, 3, 1, 256 }, 1, 1, 1, false));
|
|
+ test_cases.emplace_back(new test_conv_2d_dw({ 512, 512, 256, 1 }, { 3, 3, 1, 256 }, 1, 1, 1, true));
|
|
+
|
|
+ test_cases.emplace_back(new test_conv_transpose_2d({ 256, 256, 256, 1 }, { 3, 3, 16, 256 }, 1));
|
|
+
|
|
+ test_cases.emplace_back(new test_mean(GGML_TYPE_F32, { 256, 256, 3, 1 }));
|
|
|
|
return test_cases;
|
|
}
|
|
@@ -4685,10 +4698,10 @@ static void usage(char ** argv) {
|
|
}
|
|
|
|
int main(int argc, char ** argv) {
|
|
- test_mode mode = MODE_TEST;
|
|
+ test_mode mode = MODE_TEST;
|
|
const char * op_name_filter = nullptr;
|
|
const char * backend_filter = nullptr;
|
|
- const char * params_filter = nullptr;
|
|
+ const char * params_filter = nullptr;
|
|
|
|
for (int i = 1; i < argc; i++) {
|
|
if (strcmp(argv[i], "test") == 0) {
|
|
@@ -4752,14 +4765,15 @@ int main(int argc, char ** argv) {
|
|
GGML_ASSERT(backend != NULL);
|
|
|
|
ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
|
|
- auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
|
|
+ auto ggml_backend_set_n_threads_fn =
|
|
+ (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
|
|
if (ggml_backend_set_n_threads_fn) {
|
|
// TODO: better value for n_threads
|
|
ggml_backend_set_n_threads_fn(backend, std::thread::hardware_concurrency());
|
|
}
|
|
|
|
printf(" Device description: %s\n", ggml_backend_dev_description(dev));
|
|
- size_t free, total; // NOLINT
|
|
+ size_t free, total; // NOLINT
|
|
ggml_backend_dev_memory(dev, &free, &total);
|
|
printf(" Device memory: %zu MB (%zu MB free)\n", total / 1024 / 1024, free / 1024 / 1024);
|
|
printf("\n");
|