package ggml import ( "fmt" "log/slog" "strings" ) // FileType is the Go equivalent to llama_ftype used for gguf file typing type FileType uint32 const ( FileTypeF32 FileType = iota FileTypeF16 FileTypeQ4_0 FileTypeQ4_1 fileTypeQ4_1_F16 // unused by GGML fileTypeQ4_2 // unused by GGML fileTypeQ4_3 // unused by GGML FileTypeQ8_0 FileTypeQ5_0 FileTypeQ5_1 FileTypeQ2_K FileTypeQ3_K_S FileTypeQ3_K_M FileTypeQ3_K_L FileTypeQ4_K_S FileTypeQ4_K_M FileTypeQ5_K_S FileTypeQ5_K_M FileTypeQ6_K fileTypeIQ2_XXS // not supported by ollama fileTypeIQ2_XS // not supported by ollama FileTypeQ2_K_S fileTypeIQ3_XS // not supported by ollama fileTypeIQ3_XXS // not supported by ollama fileTypeIQ1_S // not supported by ollama fileTypeIQ4_NL // not supported by ollama fileTypeIQ3_S // not supported by ollama fileTypeIQ3_M // not supported by ollama fileTypeIQ2_S // not supported by ollama fileTypeIQ2_M // not supported by ollama fileTypeIQ4_XS // not supported by ollama fileTypeIQ1_M // not supported by ollama FileTypeBF16 fileTypeQ4_0_4_4 // unused by GGML fileTypeQ4_0_4_8 // unused by GGML fileTypeQ4_0_8_8 // unused by GGML fileTypeTQ1_0 // not supported by ollama fileTypeTQ2_0 // not supported by ollama FileTypeUnknown = 1024 ) // ParseFileType parses the provided GGUF file type // Only Ollama supported types are considered valid func ParseFileType(s string) (FileType, error) { switch s { case "F32": return FileTypeF32, nil case "F16": return FileTypeF16, nil case "Q4_0": return FileTypeQ4_0, nil case "Q4_1": return FileTypeQ4_1, nil case "Q8_0": return FileTypeQ8_0, nil case "Q5_0": return FileTypeQ5_0, nil case "Q5_1": return FileTypeQ5_1, nil case "Q2_K": return FileTypeQ2_K, nil case "Q3_K_S": return FileTypeQ3_K_S, nil case "Q3_K_M": return FileTypeQ3_K_M, nil case "Q3_K_L": return FileTypeQ3_K_L, nil case "Q4_K_S": return FileTypeQ4_K_S, nil case "Q4_K_M", "Q4_K": return FileTypeQ4_K_M, nil case "Q5_K_S": return FileTypeQ5_K_S, nil case "Q5_K_M", "Q5_K": return FileTypeQ5_K_M, nil case "Q6_K": return FileTypeQ6_K, nil case "Q2_K_S": return FileTypeQ2_K_S, nil case "BF16": return FileTypeBF16, nil default: supportedFileTypes := []FileType{ FileTypeF32, FileTypeF16, FileTypeQ4_K_S, FileTypeQ4_K_M, FileTypeQ8_0, // fsggml.FileTypeBF16, // TODO } strs := make([]string, len(supportedFileTypes)) for i := range supportedFileTypes { strs[i] = supportedFileTypes[i].String() } return FileTypeUnknown, fmt.Errorf("unsupported quantization type %s - supported types are %s", s, strings.Join(strs, ", ")) } } func (t FileType) String() string { switch t { case FileTypeF32: return "F32" case FileTypeF16: return "F16" case FileTypeQ4_0: return "Q4_0" case FileTypeQ4_1: return "Q4_1" case FileTypeQ8_0: return "Q8_0" case FileTypeQ5_0: return "Q5_0" case FileTypeQ5_1: return "Q5_1" case FileTypeQ2_K: return "Q2_K" case FileTypeQ3_K_S: return "Q3_K_S" case FileTypeQ3_K_M: return "Q3_K_M" case FileTypeQ3_K_L: return "Q3_K_L" case FileTypeQ4_K_S: return "Q4_K_S" case FileTypeQ4_K_M: return "Q4_K_M" case FileTypeQ5_K_S: return "Q5_K_S" case FileTypeQ5_K_M: return "Q5_K_M" case FileTypeQ6_K: return "Q6_K" case FileTypeQ2_K_S: return "Q2_K_S" case FileTypeBF16: return "BF16" default: return "unknown" } } func (t FileType) Value() uint32 { return uint32(t) } func (ftype FileType) ToTensorType() TensorType { switch ftype { case FileTypeF32: return TensorTypeF32 case FileTypeF16: return TensorTypeF16 case FileTypeQ4_0: return TensorTypeQ4_0 case FileTypeQ4_1: return TensorTypeQ4_1 case FileTypeQ8_0: return TensorTypeQ8_0 case FileTypeQ5_0: return TensorTypeQ5_0 case FileTypeQ5_1: return TensorTypeQ5_1 case FileTypeQ2_K: return TensorTypeQ2_K case FileTypeQ3_K_S: return TensorTypeQ3_K case FileTypeQ3_K_M: return TensorTypeQ3_K case FileTypeQ3_K_L: return TensorTypeQ3_K case FileTypeQ4_K_S: return TensorTypeQ4_K case FileTypeQ4_K_M: return TensorTypeQ4_K case FileTypeQ5_K_S: return TensorTypeQ5_K case FileTypeQ5_K_M: return TensorTypeQ5_K case FileTypeQ6_K: return TensorTypeQ6_K case FileTypeQ2_K_S: return TensorTypeQ2_K case FileTypeBF16: return TensorTypeBF16 default: slog.Warn("unsupported file type", "type", ftype) return 0 // F32 } } // TensorType is equivalent to ggml_type for individual tensor types // Note: these are not the same as FileType type TensorType uint32 const ( TensorTypeF32 TensorType = iota TensorTypeF16 TensorTypeQ4_0 TensorTypeQ4_1 tensorTypeQ4_2 // unused by GGML tensorTypeQ4_3 // unused by GGML TensorTypeQ5_0 TensorTypeQ5_1 TensorTypeQ8_0 TensorTypeQ8_1 TensorTypeQ2_K TensorTypeQ3_K TensorTypeQ4_K TensorTypeQ5_K TensorTypeQ6_K TensorTypeQ8_K tensorTypeIQ2_XXS // not supported by ollama tensorTypeIQ2_XS // not supported by ollama tensorTypeIQ3_XXS // not supported by ollama tensorTypeIQ1_S // not supported by ollama tensorTypeIQ4_NL // not supported by ollama tensorTypeIQ3_S // not supported by ollama tensorTypeIQ2_S // not supported by ollama tensorTypeIQ4_XS // not supported by ollama TensorTypeI8 TensorTypeI16 TensorTypeI32 TensorTypeI64 TensorTypeF64 tensorTypeIQ1_M // not supported by ollama TensorTypeBF16 tensorTypeQ4_0_4_4 // unused by GGML tensorTypeQ4_0_4_8 // unused by GGML tensorTypeQ4_0_8_8 // unused by GGML tensorTypeTQ1_0 // not supported by ollama tensorTypeTQ2_0 // not supported by ollama tensorTypeIQ4_NL_4_4 // unused by GGML tensorTypeIQ4_NL_4_8 // unused by GGML tensorTypeIQ4_NL_8_8 // unused by GGML ) // ParseFileType parses the provided GGUF file type // Only Ollama supported types are considered valid func ParseTensorType(s string) (TensorType, error) { switch s { case "F32": return TensorTypeF32, nil case "F16": return TensorTypeF16, nil case "Q4_0": return TensorTypeQ4_0, nil case "Q4_1": return TensorTypeQ4_1, nil case "Q5_0": return TensorTypeQ5_0, nil case "Q5_1": return TensorTypeQ5_1, nil case "Q8_0": return TensorTypeQ8_0, nil case "Q8_1": return TensorTypeQ8_1, nil case "Q2_K": return TensorTypeQ2_K, nil case "Q3_K": return TensorTypeQ3_K, nil case "Q4_K": return TensorTypeQ4_K, nil case "Q5_K": return TensorTypeQ5_K, nil case "Q6_K": return TensorTypeQ6_K, nil case "Q8_K": return TensorTypeQ8_K, nil case "F64": return TensorTypeF64, nil case "BF16": return TensorTypeBF16, nil default: return 0, fmt.Errorf("unsupported quantization type %s", s) } } func (t TensorType) IsQuantized() bool { switch t { case TensorTypeF32, TensorTypeF16, TensorTypeBF16: return false default: return true } } func (t TensorType) RowSize(ne uint64) uint64 { return t.TypeSize() * ne / t.BlockSize() } func (t TensorType) String() string { switch t { case TensorTypeF32: return "F32" case TensorTypeF16: return "F16" case TensorTypeQ4_0: return "Q4_0" case TensorTypeQ4_1: return "Q4_1" case TensorTypeQ5_0: return "Q5_0" case TensorTypeQ5_1: return "Q5_1" case TensorTypeQ8_0: return "Q8_0" case TensorTypeQ8_1: return "Q8_1" case TensorTypeQ2_K: return "Q2_K" case TensorTypeQ3_K: return "Q3_K" case TensorTypeQ4_K: return "Q4_K" case TensorTypeQ5_K: return "Q5_K" case TensorTypeQ6_K: return "Q6_K" case TensorTypeQ8_K: return "Q8_K" case TensorTypeF64: return "F64" case TensorTypeBF16: return "BF16" default: return "unknown" } }