From 66fca1b685c5ae0b945f071e00982093a4de235b Mon Sep 17 00:00:00 2001 From: Shang Chieh Tseng Date: Wed, 29 Oct 2025 15:24:08 +0800 Subject: [PATCH] Remove remaining MMA/WMMA template instances for CC 3.7 optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delete 24 tensor core template instance files that were missed in the initial optimization: - 19 fattn-mma-f16 template instances (various ncols1/ncols2 combinations) - 5 fattn-wmma-f16 template instances (kqfloat and kqhalf variants) These files implement tensor core operations (MMA/WMMA) which require Compute Capability 7.0+ and are not available on Tesla K80 (CC 3.7). Removing them completes the CC 3.7-only optimization. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../fattn-mma-f16-instance-ncols1_1-ncols2_16.cu | 5 ----- .../fattn-mma-f16-instance-ncols1_1-ncols2_8.cu | 10 ---------- .../fattn-mma-f16-instance-ncols1_16-ncols2_1.cu | 10 ---------- .../fattn-mma-f16-instance-ncols1_16-ncols2_2.cu | 10 ---------- .../fattn-mma-f16-instance-ncols1_16-ncols2_4.cu | 10 ---------- .../fattn-mma-f16-instance-ncols1_2-ncols2_16.cu | 5 ----- .../fattn-mma-f16-instance-ncols1_2-ncols2_4.cu | 10 ---------- .../fattn-mma-f16-instance-ncols1_2-ncols2_8.cu | 10 ---------- .../fattn-mma-f16-instance-ncols1_32-ncols2_1.cu | 10 ---------- .../fattn-mma-f16-instance-ncols1_32-ncols2_2.cu | 10 ---------- .../fattn-mma-f16-instance-ncols1_4-ncols2_16.cu | 5 ----- .../fattn-mma-f16-instance-ncols1_4-ncols2_2.cu | 10 ---------- .../fattn-mma-f16-instance-ncols1_4-ncols2_4.cu | 10 ---------- .../fattn-mma-f16-instance-ncols1_4-ncols2_8.cu | 10 ---------- .../fattn-mma-f16-instance-ncols1_64-ncols2_1.cu | 10 ---------- .../fattn-mma-f16-instance-ncols1_8-ncols2_1.cu | 10 ---------- .../fattn-mma-f16-instance-ncols1_8-ncols2_2.cu | 10 ---------- .../fattn-mma-f16-instance-ncols1_8-ncols2_4.cu | 10 ---------- .../fattn-mma-f16-instance-ncols1_8-ncols2_8.cu | 10 ---------- .../fattn-wmma-f16-instance-kqfloat-cpb16.cu | 10 ---------- .../fattn-wmma-f16-instance-kqfloat-cpb32.cu | 9 --------- .../fattn-wmma-f16-instance-kqhalf-cpb16.cu | 10 ---------- .../fattn-wmma-f16-instance-kqhalf-cpb32.cu | 10 ---------- .../fattn-wmma-f16-instance-kqhalf-cpb8.cu | 8 -------- 24 files changed, 222 deletions(-) delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu delete mode 100644 ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu deleted file mode 100644 index fb26abeb..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +++ /dev/null @@ -1,5 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(576, 512, 1, 16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu deleted file mode 100644 index dc168290..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(64, 64, 1, 8); -DECL_FATTN_MMA_F16_CASE(80, 80, 1, 8); -DECL_FATTN_MMA_F16_CASE(96, 96, 1, 8); -DECL_FATTN_MMA_F16_CASE(112, 112, 1, 8); -DECL_FATTN_MMA_F16_CASE(128, 128, 1, 8); -DECL_FATTN_MMA_F16_CASE(256, 256, 1, 8); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu deleted file mode 100644 index 9d3cfd8e..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(64, 64, 16, 1); -DECL_FATTN_MMA_F16_CASE(80, 80, 16, 1); -DECL_FATTN_MMA_F16_CASE(96, 96, 16, 1); -DECL_FATTN_MMA_F16_CASE(112, 112, 16, 1); -DECL_FATTN_MMA_F16_CASE(128, 128, 16, 1); -DECL_FATTN_MMA_F16_CASE(256, 256, 16, 1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu deleted file mode 100644 index 2e1883af..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(64, 64, 16, 2); -DECL_FATTN_MMA_F16_CASE(80, 80, 16, 2); -DECL_FATTN_MMA_F16_CASE(96, 96, 16, 2); -DECL_FATTN_MMA_F16_CASE(112, 112, 16, 2); -DECL_FATTN_MMA_F16_CASE(128, 128, 16, 2); -DECL_FATTN_MMA_F16_CASE(256, 256, 16, 2); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu deleted file mode 100644 index 2074e954..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(64, 64, 16, 4); -DECL_FATTN_MMA_F16_CASE(80, 80, 16, 4); -DECL_FATTN_MMA_F16_CASE(96, 96, 16, 4); -DECL_FATTN_MMA_F16_CASE(112, 112, 16, 4); -DECL_FATTN_MMA_F16_CASE(128, 128, 16, 4); -DECL_FATTN_MMA_F16_CASE(256, 256, 16, 4); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu deleted file mode 100644 index f011a208..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +++ /dev/null @@ -1,5 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(576, 512, 2, 16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu deleted file mode 100644 index 24c64cf0..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(64, 64, 2, 4); -DECL_FATTN_MMA_F16_CASE(80, 80, 2, 4); -DECL_FATTN_MMA_F16_CASE(96, 96, 2, 4); -DECL_FATTN_MMA_F16_CASE(112, 112, 2, 4); -DECL_FATTN_MMA_F16_CASE(128, 128, 2, 4); -DECL_FATTN_MMA_F16_CASE(256, 256, 2, 4); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu deleted file mode 100644 index 163b1d93..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(64, 64, 2, 8); -DECL_FATTN_MMA_F16_CASE(80, 80, 2, 8); -DECL_FATTN_MMA_F16_CASE(96, 96, 2, 8); -DECL_FATTN_MMA_F16_CASE(112, 112, 2, 8); -DECL_FATTN_MMA_F16_CASE(128, 128, 2, 8); -DECL_FATTN_MMA_F16_CASE(256, 256, 2, 8); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu deleted file mode 100644 index 0543532e..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(64, 64, 32, 1); -DECL_FATTN_MMA_F16_CASE(80, 80, 32, 1); -DECL_FATTN_MMA_F16_CASE(96, 96, 32, 1); -DECL_FATTN_MMA_F16_CASE(112, 112, 32, 1); -DECL_FATTN_MMA_F16_CASE(128, 128, 32, 1); -DECL_FATTN_MMA_F16_CASE(256, 256, 32, 1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu deleted file mode 100644 index 407b6cf4..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(64, 64, 32, 2); -DECL_FATTN_MMA_F16_CASE(80, 80, 32, 2); -DECL_FATTN_MMA_F16_CASE(96, 96, 32, 2); -DECL_FATTN_MMA_F16_CASE(112, 112, 32, 2); -DECL_FATTN_MMA_F16_CASE(128, 128, 32, 2); -DECL_FATTN_MMA_F16_CASE(256, 256, 32, 2); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu deleted file mode 100644 index f5fd0e23..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +++ /dev/null @@ -1,5 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(576, 512, 4, 16); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu deleted file mode 100644 index 5e466850..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(64, 64, 4, 2); -DECL_FATTN_MMA_F16_CASE(80, 80, 4, 2); -DECL_FATTN_MMA_F16_CASE(96, 96, 4, 2); -DECL_FATTN_MMA_F16_CASE(112, 112, 4, 2); -DECL_FATTN_MMA_F16_CASE(128, 128, 4, 2); -DECL_FATTN_MMA_F16_CASE(256, 256, 4, 2); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu deleted file mode 100644 index 1ada657f..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(64, 64, 4, 4); -DECL_FATTN_MMA_F16_CASE(80, 80, 4, 4); -DECL_FATTN_MMA_F16_CASE(96, 96, 4, 4); -DECL_FATTN_MMA_F16_CASE(112, 112, 4, 4); -DECL_FATTN_MMA_F16_CASE(128, 128, 4, 4); -DECL_FATTN_MMA_F16_CASE(256, 256, 4, 4); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu deleted file mode 100644 index bad296b4..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(64, 64, 4, 8); -DECL_FATTN_MMA_F16_CASE(80, 80, 4, 8); -DECL_FATTN_MMA_F16_CASE(96, 96, 4, 8); -DECL_FATTN_MMA_F16_CASE(112, 112, 4, 8); -DECL_FATTN_MMA_F16_CASE(128, 128, 4, 8); -DECL_FATTN_MMA_F16_CASE(256, 256, 4, 8); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu deleted file mode 100644 index 0d7a9c72..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(64, 64, 64, 1); -DECL_FATTN_MMA_F16_CASE(80, 80, 64, 1); -DECL_FATTN_MMA_F16_CASE(96, 96, 64, 1); -DECL_FATTN_MMA_F16_CASE(112, 112, 64, 1); -DECL_FATTN_MMA_F16_CASE(128, 128, 64, 1); -DECL_FATTN_MMA_F16_CASE(256, 256, 64, 1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu deleted file mode 100644 index 9d5a9976..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(64, 64, 8, 1); -DECL_FATTN_MMA_F16_CASE(80, 80, 8, 1); -DECL_FATTN_MMA_F16_CASE(96, 96, 8, 1); -DECL_FATTN_MMA_F16_CASE(112, 112, 8, 1); -DECL_FATTN_MMA_F16_CASE(128, 128, 8, 1); -DECL_FATTN_MMA_F16_CASE(256, 256, 8, 1); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu deleted file mode 100644 index a6e6f093..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(64, 64, 8, 2); -DECL_FATTN_MMA_F16_CASE(80, 80, 8, 2); -DECL_FATTN_MMA_F16_CASE(96, 96, 8, 2); -DECL_FATTN_MMA_F16_CASE(112, 112, 8, 2); -DECL_FATTN_MMA_F16_CASE(128, 128, 8, 2); -DECL_FATTN_MMA_F16_CASE(256, 256, 8, 2); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu deleted file mode 100644 index 86d4ffae..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(64, 64, 8, 4); -DECL_FATTN_MMA_F16_CASE(80, 80, 8, 4); -DECL_FATTN_MMA_F16_CASE(96, 96, 8, 4); -DECL_FATTN_MMA_F16_CASE(112, 112, 8, 4); -DECL_FATTN_MMA_F16_CASE(128, 128, 8, 4); -DECL_FATTN_MMA_F16_CASE(256, 256, 8, 4); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu deleted file mode 100644 index 680a13ca..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-mma-f16.cuh" - -DECL_FATTN_MMA_F16_CASE(64, 64, 8, 8); -DECL_FATTN_MMA_F16_CASE(80, 80, 8, 8); -DECL_FATTN_MMA_F16_CASE(96, 96, 8, 8); -DECL_FATTN_MMA_F16_CASE(112, 112, 8, 8); -DECL_FATTN_MMA_F16_CASE(128, 128, 8, 8); -DECL_FATTN_MMA_F16_CASE(256, 256, 8, 8); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu deleted file mode 100644 index 2d94e65c..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 16, float); -DECL_FATTN_WMMA_F16_CASE(80, 16, float); -DECL_FATTN_WMMA_F16_CASE(96, 16, float); -DECL_FATTN_WMMA_F16_CASE(112, 16, float); -DECL_FATTN_WMMA_F16_CASE(128, 16, float); -DECL_FATTN_WMMA_F16_CASE(256, 16, float); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu deleted file mode 100644 index c3d9df3c..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +++ /dev/null @@ -1,9 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 32, float); -DECL_FATTN_WMMA_F16_CASE(80, 32, float); -DECL_FATTN_WMMA_F16_CASE(96, 32, float); -DECL_FATTN_WMMA_F16_CASE(112, 32, float); -DECL_FATTN_WMMA_F16_CASE(128, 32, float); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu deleted file mode 100644 index bb680e40..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 16, half); -DECL_FATTN_WMMA_F16_CASE(80, 16, half); -DECL_FATTN_WMMA_F16_CASE(96, 16, half); -DECL_FATTN_WMMA_F16_CASE(112, 16, half); -DECL_FATTN_WMMA_F16_CASE(128, 16, half); -DECL_FATTN_WMMA_F16_CASE(256, 16, half); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu deleted file mode 100644 index 073f71b1..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +++ /dev/null @@ -1,10 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 32, half); -DECL_FATTN_WMMA_F16_CASE(80, 32, half); -DECL_FATTN_WMMA_F16_CASE(96, 32, half); -DECL_FATTN_WMMA_F16_CASE(112, 32, half); -DECL_FATTN_WMMA_F16_CASE(128, 32, half); -DECL_FATTN_WMMA_F16_CASE(256, 32, half); diff --git a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu b/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu deleted file mode 100644 index d30710c5..00000000 --- a/ml/backend/ggml/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +++ /dev/null @@ -1,8 +0,0 @@ -// This file has been autogenerated by generate_cu_files.py, do not edit manually. - -#include "../fattn-wmma-f16.cuh" - -DECL_FATTN_WMMA_F16_CASE(64, 8, half); -DECL_FATTN_WMMA_F16_CASE(96, 8, half); -DECL_FATTN_WMMA_F16_CASE(128, 8, half); -DECL_FATTN_WMMA_F16_CASE(256, 8, half);