mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 07:46:59 +00:00
Fix gpt-oss model architecture to match GGUF tensor format
The gpt-oss model architecture code expected fused tensors (attn_qkv, ffn_gate_up_exps) but the actual GGUF files contain separate tensors (attn_q/k/v, ffn_gate_exps/up_exps), causing nil pointer panics during model loading. Changes: - model/models/gptoss/model.go: Updated AttentionBlock to use separate Query/Key/Value fields instead of fused QKV, modified Forward() to compute projections separately - model/models/gptoss/model.go: Updated MLPBlock to use separate Gate/Up fields instead of fused GateUp, simplified Forward() logic - fs/ggml/type.go: Reorganized MXFP4 tensor type constant ordering - ml/backend/ggml/ggml/include/ggml.h: Moved GGML_TYPE_MXFP4 to end of enum to match GGUF file format specification - ml/backend/ggml/ggml/src/ggml.c: Updated type name array to match reordered enum - CLAUDE.md: Documented gpt-oss model compatibility fix Result: gpt-oss:20b model now loads and runs successfully on Tesla K80, all 25 layers offload to GPU correctly. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
7
ml/backend/ggml/ggml/include/ggml.h
vendored
7
ml/backend/ggml/ggml/include/ggml.h
vendored
@@ -353,7 +353,7 @@ extern "C" {
|
||||
GGML_TYPE_F16 = 1,
|
||||
GGML_TYPE_Q4_0 = 2,
|
||||
GGML_TYPE_Q4_1 = 3,
|
||||
GGML_TYPE_MXFP4 = 4, // Formerly removed type GGML_TYPE_Q4_2
|
||||
// GGML_TYPE_Q4_2 = 4, support has been removed
|
||||
// GGML_TYPE_Q4_3 = 5, support has been removed
|
||||
GGML_TYPE_Q5_0 = 6,
|
||||
GGML_TYPE_Q5_1 = 7,
|
||||
@@ -385,10 +385,11 @@ extern "C" {
|
||||
// GGML_TYPE_Q4_0_8_8 = 33,
|
||||
GGML_TYPE_TQ1_0 = 34,
|
||||
GGML_TYPE_TQ2_0 = 35,
|
||||
// GGML_TYPE_IQ4_NL_4_4 = 36,
|
||||
// GGML_TYPE_IQ4_NL_4_4 = 36, support has been removed from gguf files
|
||||
// GGML_TYPE_IQ4_NL_4_8 = 37,
|
||||
// GGML_TYPE_IQ4_NL_8_8 = 38,
|
||||
GGML_TYPE_COUNT = 39,
|
||||
GGML_TYPE_MXFP4 = 39,
|
||||
GGML_TYPE_COUNT = 40,
|
||||
};
|
||||
|
||||
// precision
|
||||
|
||||
20
ml/backend/ggml/ggml/src/ggml.c
vendored
20
ml/backend/ggml/ggml/src/ggml.c
vendored
@@ -589,13 +589,11 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||
.to_float = (ggml_to_float_t) dequantize_row_q4_1,
|
||||
.from_float_ref = (ggml_from_float_t) quantize_row_q4_1_ref,
|
||||
},
|
||||
[GGML_TYPE_MXFP4] = { // formerly deprecated GGML_TYPE_Q4_2
|
||||
.type_name = "mxfp4",
|
||||
.blck_size = MXFP4,
|
||||
.type_size = sizeof(block_mxfp4),
|
||||
.is_quantized = true,
|
||||
.to_float = (ggml_to_float_t) dequantize_row_mxfp4,
|
||||
.from_float_ref = (ggml_from_float_t) quantize_row_mxfp4_ref,
|
||||
[4] = { // GGML_TYPE_Q4_2
|
||||
.type_name = "DEPRECATED",
|
||||
.blck_size = 0,
|
||||
.type_size = 0,
|
||||
.is_quantized = false,
|
||||
},
|
||||
[5] = { // GGML_TYPE_Q4_3
|
||||
.type_name = "DEPRECATED",
|
||||
@@ -812,6 +810,14 @@ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
||||
.type_size = 0,
|
||||
.is_quantized = false,
|
||||
},
|
||||
[GGML_TYPE_MXFP4] = {
|
||||
.type_name = "mxfp4",
|
||||
.blck_size = MXFP4,
|
||||
.type_size = sizeof(block_mxfp4),
|
||||
.is_quantized = true,
|
||||
.to_float = (ggml_to_float_t) dequantize_row_mxfp4,
|
||||
.from_float_ref = (ggml_from_float_t) quantize_row_mxfp4_ref,
|
||||
},
|
||||
};
|
||||
|
||||
const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
|
||||
|
||||
Reference in New Issue
Block a user