mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-09 23:37:06 +00:00
This commit represents a complete rework after pulling the latest changes from official ollama/ollama repository and re-applying Tesla K80 compatibility patches. ## Key Changes ### CUDA Compute Capability 3.7 Support (Tesla K80) - Added sm_37 (compute 3.7) to CMAKE_CUDA_ARCHITECTURES in CMakeLists.txt - Updated CMakePresets.json to include compute 3.7 in "CUDA 11" preset - Using 37-virtual (PTX with JIT compilation) for maximum compatibility ### Legacy Toolchain Compatibility - **NVIDIA Driver**: 470.256.02 (last version supporting Kepler/K80) - **CUDA Version**: 11.4.4 (last CUDA 11.x supporting compute 3.7) - **GCC Version**: 10.5.0 (required by CUDA 11.4 host_config.h) ### CPU Architecture Trade-offs Due to GCC 10.5 limitation, sacrificed newer CPU optimizations: - Alderlake CPU variant enabled WITHOUT AVX_VNNI (requires GCC 11+) - Still supports: SSE4.2, AVX, F16C, AVX2, BMI2, FMA - Performance impact: ~3-7% on newer CPUs (acceptable for K80 compatibility) ### Build System Updates - Modified ml/backend/ggml/ggml/src/ggml-cuda/CMakeLists.txt for compute 3.7 - Added -Wno-deprecated-gpu-targets flag to suppress warnings - Updated ml/backend/ggml/ggml/src/CMakeLists.txt for Alderlake without AVX_VNNI ### Upstream Sync Merged latest llama.cpp changes including: - Enhanced KV cache management with ISWA and hybrid memory support - Improved multi-modal support (mtmd framework) - New model architectures (Gemma3, Llama4, Qwen3, etc.) - GPU backend improvements for CUDA, Metal, and ROCm - Updated quantization support and GGUF format handling ### Documentation - Updated CLAUDE.md with comprehensive build instructions - Documented toolchain constraints and CPU architecture trade-offs - Removed outdated CI/CD workflows (tesla-k80-*.yml) - Cleaned up temporary development artifacts ## Rationale This fork maintains Tesla K80 GPU support (compute 3.7) which was dropped in official Ollama due to legacy driver/CUDA requirements. The toolchain constraint creates a deadlock: - K80 → Driver 470 → CUDA 11.4 → GCC 10 → No AVX_VNNI We accept the loss of cutting-edge CPU optimizations to enable running modern LLMs on legacy but still capable Tesla K80 hardware (12GB VRAM per GPU). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
288 lines
6.7 KiB
Go
288 lines
6.7 KiB
Go
package thinking
|
|
|
|
import (
|
|
"testing"
|
|
)
|
|
|
|
func TestExtractThinking(t *testing.T) {
|
|
tests := []struct {
|
|
in, wantContent, wantThink string
|
|
}{
|
|
{
|
|
in: "<think> internal </think> world",
|
|
wantThink: "internal ",
|
|
wantContent: "world",
|
|
},
|
|
{
|
|
in: "<think>a</think><think>b</think>c",
|
|
wantThink: "a",
|
|
wantContent: "<think>b</think>c",
|
|
},
|
|
{
|
|
in: "no think",
|
|
wantThink: "",
|
|
wantContent: "no think",
|
|
},
|
|
}
|
|
for i, tt := range tests {
|
|
parser := Parser{
|
|
OpeningTag: "<think>",
|
|
ClosingTag: "</think>",
|
|
}
|
|
gotThinking, gotContent := parser.AddContent(tt.in)
|
|
if gotContent != tt.wantContent || gotThinking != tt.wantThink {
|
|
t.Errorf("case %d: got (%q,%q), want (%q,%q)", i, gotThinking, gotContent, tt.wantThink, tt.wantContent)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestThinkingStreaming(t *testing.T) {
|
|
type step struct {
|
|
input string
|
|
wantThinking string
|
|
wantContent string
|
|
wantStateAfter thinkingState
|
|
}
|
|
|
|
cases := []struct {
|
|
desc string
|
|
skip bool
|
|
steps []step
|
|
}{
|
|
{
|
|
desc: "content without a thinking tag",
|
|
steps: []step{
|
|
{
|
|
input: " abc",
|
|
wantThinking: "",
|
|
wantContent: " abc",
|
|
wantStateAfter: thinkingState_ThinkingDone,
|
|
},
|
|
// regression test for a bug where we were transitioning directly to
|
|
// ThinkingDone without clearing the buffer. This would cuase the first
|
|
// step to be outputted twice
|
|
{
|
|
input: "def",
|
|
wantThinking: "",
|
|
wantContent: "def",
|
|
wantStateAfter: thinkingState_ThinkingDone,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
desc: "content before a thinking tag nerfs the thinking tag",
|
|
steps: []step{
|
|
{
|
|
input: " abc <think>def</think> ghi",
|
|
wantThinking: "",
|
|
wantContent: " abc <think>def</think> ghi",
|
|
wantStateAfter: thinkingState_ThinkingDone,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
desc: "building up a thinking tag partially",
|
|
steps: []step{
|
|
{
|
|
input: " <th",
|
|
wantThinking: "",
|
|
wantContent: "",
|
|
wantStateAfter: thinkingState_LookingForOpening,
|
|
},
|
|
{
|
|
input: "in",
|
|
wantThinking: "",
|
|
wantContent: "",
|
|
wantStateAfter: thinkingState_LookingForOpening,
|
|
},
|
|
{
|
|
input: "k>a",
|
|
wantThinking: "a",
|
|
wantContent: "",
|
|
wantStateAfter: thinkingState_Thinking,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
desc: "partial closing tag",
|
|
steps: []step{
|
|
{
|
|
input: "<think>abc</th",
|
|
wantThinking: "abc",
|
|
wantContent: "",
|
|
wantStateAfter: thinkingState_Thinking,
|
|
},
|
|
{
|
|
input: "ink>def",
|
|
wantThinking: "",
|
|
wantContent: "def",
|
|
wantStateAfter: thinkingState_ThinkingDone,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
desc: "partial closing tag fakeout",
|
|
steps: []step{
|
|
{
|
|
input: "<think>abc</th",
|
|
wantThinking: "abc",
|
|
wantContent: "",
|
|
wantStateAfter: thinkingState_Thinking,
|
|
},
|
|
{
|
|
input: "ing>def",
|
|
wantThinking: "</thing>def",
|
|
wantContent: "",
|
|
wantStateAfter: thinkingState_Thinking,
|
|
},
|
|
{
|
|
input: "ghi</thi",
|
|
wantThinking: "ghi",
|
|
wantContent: "",
|
|
wantStateAfter: thinkingState_Thinking,
|
|
},
|
|
{
|
|
input: "nk>jkl",
|
|
wantThinking: "",
|
|
wantContent: "jkl",
|
|
wantStateAfter: thinkingState_ThinkingDone,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
desc: "whitespace after thinking tag",
|
|
steps: []step{
|
|
{
|
|
input: " <think>abc</think>\n\ndef",
|
|
wantThinking: "abc",
|
|
wantContent: "def",
|
|
wantStateAfter: thinkingState_ThinkingDone,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
desc: "whitespace after thinking tag (incremental)",
|
|
steps: []step{
|
|
{
|
|
input: " <think>abc</think>",
|
|
wantThinking: "abc",
|
|
wantContent: "",
|
|
wantStateAfter: thinkingState_ThinkingDoneEatingWhitespace,
|
|
},
|
|
{
|
|
input: "\n\ndef",
|
|
wantThinking: "",
|
|
wantContent: "def",
|
|
wantStateAfter: thinkingState_ThinkingDone,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
desc: "whitespace after thinking tag with content and more whitespace",
|
|
steps: []step{
|
|
{
|
|
input: " <think>abc</think>\n\ndef ",
|
|
wantThinking: "abc",
|
|
wantContent: "def ",
|
|
wantStateAfter: thinkingState_ThinkingDone,
|
|
},
|
|
{
|
|
input: " ghi",
|
|
wantThinking: "",
|
|
wantContent: " ghi",
|
|
wantStateAfter: thinkingState_ThinkingDone,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
desc: "token by token",
|
|
steps: []step{
|
|
{
|
|
input: "<think>",
|
|
wantThinking: "",
|
|
wantContent: "",
|
|
wantStateAfter: thinkingState_ThinkingStartedEatingWhitespace,
|
|
},
|
|
{
|
|
input: "\n",
|
|
wantThinking: "",
|
|
wantContent: "",
|
|
wantStateAfter: thinkingState_ThinkingStartedEatingWhitespace,
|
|
},
|
|
{
|
|
input: "</think>",
|
|
wantThinking: "",
|
|
wantContent: "",
|
|
wantStateAfter: thinkingState_ThinkingDoneEatingWhitespace,
|
|
},
|
|
{
|
|
input: "\n\n",
|
|
wantThinking: "",
|
|
wantContent: "",
|
|
wantStateAfter: thinkingState_ThinkingDoneEatingWhitespace,
|
|
},
|
|
{
|
|
input: "Hi",
|
|
wantThinking: "",
|
|
wantContent: "Hi",
|
|
wantStateAfter: thinkingState_ThinkingDone,
|
|
},
|
|
{
|
|
input: " there",
|
|
wantThinking: "",
|
|
wantContent: " there",
|
|
wantStateAfter: thinkingState_ThinkingDone,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
desc: "leading thinking whitespace",
|
|
steps: []step{
|
|
{
|
|
input: " <think> \t ",
|
|
wantThinking: "",
|
|
wantContent: "",
|
|
wantStateAfter: thinkingState_ThinkingStartedEatingWhitespace,
|
|
},
|
|
{
|
|
input: " these are some ",
|
|
wantThinking: "these are some ",
|
|
wantContent: "",
|
|
wantStateAfter: thinkingState_Thinking,
|
|
},
|
|
{
|
|
input: "thoughts </think> ",
|
|
wantThinking: "thoughts ",
|
|
wantContent: "",
|
|
wantStateAfter: thinkingState_ThinkingDoneEatingWhitespace,
|
|
},
|
|
{
|
|
input: " more content",
|
|
wantThinking: "",
|
|
wantContent: "more content",
|
|
wantStateAfter: thinkingState_ThinkingDone,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
for _, c := range cases {
|
|
parser := Parser{
|
|
OpeningTag: "<think>",
|
|
ClosingTag: "</think>",
|
|
}
|
|
if c.skip {
|
|
continue
|
|
}
|
|
for i, step := range c.steps {
|
|
thinking, content := parser.AddContent(step.input)
|
|
if content != step.wantContent || thinking != step.wantThinking {
|
|
t.Errorf("case %q (step %d): got (%q,%q), want (%q,%q)", c.desc, i, content, thinking, step.wantContent, step.wantThinking)
|
|
}
|
|
if parser.state != step.wantStateAfter {
|
|
t.Errorf("case %q (step %d): got state %s, want %s", c.desc, i, parser.state, step.wantStateAfter)
|
|
}
|
|
}
|
|
}
|
|
}
|