Sync with upstream ollama/ollama and restore Tesla K80 (compute 3.7) support

This commit represents a complete rework after pulling the latest changes from
official ollama/ollama repository and re-applying Tesla K80 compatibility patches.

## Key Changes

### CUDA Compute Capability 3.7 Support (Tesla K80)
- Added sm_37 (compute 3.7) to CMAKE_CUDA_ARCHITECTURES in CMakeLists.txt
- Updated CMakePresets.json to include compute 3.7 in "CUDA 11" preset
- Using 37-virtual (PTX with JIT compilation) for maximum compatibility

### Legacy Toolchain Compatibility
- **NVIDIA Driver**: 470.256.02 (last version supporting Kepler/K80)
- **CUDA Version**: 11.4.4 (last CUDA 11.x supporting compute 3.7)
- **GCC Version**: 10.5.0 (required by CUDA 11.4 host_config.h)

### CPU Architecture Trade-offs
Due to GCC 10.5 limitation, sacrificed newer CPU optimizations:
- Alderlake CPU variant enabled WITHOUT AVX_VNNI (requires GCC 11+)
- Still supports: SSE4.2, AVX, F16C, AVX2, BMI2, FMA
- Performance impact: ~3-7% on newer CPUs (acceptable for K80 compatibility)

### Build System Updates
- Modified ml/backend/ggml/ggml/src/ggml-cuda/CMakeLists.txt for compute 3.7
- Added -Wno-deprecated-gpu-targets flag to suppress warnings
- Updated ml/backend/ggml/ggml/src/CMakeLists.txt for Alderlake without AVX_VNNI

### Upstream Sync
Merged latest llama.cpp changes including:
- Enhanced KV cache management with ISWA and hybrid memory support
- Improved multi-modal support (mtmd framework)
- New model architectures (Gemma3, Llama4, Qwen3, etc.)
- GPU backend improvements for CUDA, Metal, and ROCm
- Updated quantization support and GGUF format handling

### Documentation
- Updated CLAUDE.md with comprehensive build instructions
- Documented toolchain constraints and CPU architecture trade-offs
- Removed outdated CI/CD workflows (tesla-k80-*.yml)
- Cleaned up temporary development artifacts

## Rationale

This fork maintains Tesla K80 GPU support (compute 3.7) which was dropped in
official Ollama due to legacy driver/CUDA requirements. The toolchain constraint
creates a deadlock:
- K80 → Driver 470 → CUDA 11.4 → GCC 10 → No AVX_VNNI

We accept the loss of cutting-edge CPU optimizations to enable running modern
LLMs on legacy but still capable Tesla K80 hardware (12GB VRAM per GPU).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Shang Chieh Tseng
2025-11-05 14:03:05 +08:00
parent fabe2c5cb7
commit ef14fb5b26
817 changed files with 241634 additions and 70888 deletions

76
model/parsers/parsers.go Normal file
View File

@@ -0,0 +1,76 @@
package parsers
import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/harmony"
)
type Parser interface {
// Init initializes the parser with tools and optional last message for chat prefill
// Returns processed tools if the parser needs to modify them (e.g., harmony renames them)
Init(tools []api.Tool, lastMessage *api.Message) []api.Tool
// Add processes streamed content and returns parsed content, thinking, and tool calls
// The done flag indicates if this is the last chunk (used for draining accumulators)
Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error)
HasToolSupport() bool
HasThinkingSupport() bool
}
type ParserConstructor func() Parser
type ParserRegistry struct {
constructors map[string]ParserConstructor
}
func (r *ParserRegistry) Register(name string, constructor ParserConstructor) {
r.constructors[name] = constructor
}
var registry = ParserRegistry{
constructors: make(map[string]ParserConstructor),
}
func Register(name string, constructor ParserConstructor) {
registry.Register(name, constructor)
}
func ParserForName(name string) Parser {
if parser, ok := registry.constructors[name]; ok {
return parser()
}
switch name {
case "qwen3-coder":
parser := &Qwen3CoderParser{}
return parser
case "qwen3-vl-instruct":
parser := &Qwen3VLParser{hasThinkingSupport: false}
return parser
case "qwen3-vl-thinking":
parser := &Qwen3VLParser{hasThinkingSupport: true}
return parser
case "passthrough":
return &PassthroughParser{}
case "harmony":
return harmony.NewHarmonyMessageHandler()
default:
return nil
}
}
type PassthroughParser struct{}
func (p *PassthroughParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
return tools // passthrough doesn't modify tools
}
func (p *PassthroughParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
return s, "", nil, nil
}
func (p *PassthroughParser) HasToolSupport() bool {
return false
}
func (p *PassthroughParser) HasThinkingSupport() bool {
return false
}

View File

@@ -0,0 +1,97 @@
package parsers
import (
"testing"
"github.com/ollama/ollama/api"
)
type mockParser struct {
name string
}
func (m *mockParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
return tools
}
func (m *mockParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
return "mock:" + s, "", nil, nil
}
func (m *mockParser) HasToolSupport() bool {
return false
}
func (m *mockParser) HasThinkingSupport() bool {
return false
}
func TestRegisterCustomParser(t *testing.T) {
// Register a custom parser
Register("custom-parser", func() Parser {
return &mockParser{name: "custom"}
})
// Retrieve it
parser := ParserForName("custom-parser")
if parser == nil {
t.Fatal("expected parser to be registered")
}
// Test it works
content, _, _, err := parser.Add("test", false)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if content != "mock:test" {
t.Errorf("expected 'mock:test', got %q", content)
}
}
func TestBuiltInParsersStillWork(t *testing.T) {
tests := []struct {
name string
}{
{"passthrough"},
{"qwen3-coder"},
{"harmony"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
parser := ParserForName(tt.name)
if parser == nil {
t.Fatalf("expected built-in parser %q to exist", tt.name)
}
})
}
}
func TestOverrideBuiltInParser(t *testing.T) {
// Override a built-in parser
Register("passthrough", func() Parser {
return &mockParser{name: "override"}
})
// Should get the override
parser := ParserForName("passthrough")
if parser == nil {
t.Fatal("expected parser to exist")
}
// Test it's the override
content, _, _, err := parser.Add("test", false)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if content != "mock:test" {
t.Errorf("expected 'mock:test' from override, got %q", content)
}
}
func TestUnknownParserReturnsNil(t *testing.T) {
parser := ParserForName("nonexistent-parser")
if parser != nil {
t.Error("expected nil for unknown parser")
}
}

472
model/parsers/qwen3coder.go Normal file
View File

@@ -0,0 +1,472 @@
package parsers
import (
"context"
"encoding/json"
"encoding/xml"
"fmt"
"log/slog"
"math"
"regexp"
"strconv"
"strings"
"unicode"
"unicode/utf8"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/logutil"
)
type qwenParserState int
const (
toolOpenTag = "<tool_call>"
toolCloseTag = "</tool_call>"
)
const (
qwenParserState_LookingForToolStart qwenParserState = iota
qwenParserState_CollectingToolContent
)
type Qwen3CoderParser struct {
state qwenParserState
acc strings.Builder
tools []api.Tool
}
func (p *Qwen3CoderParser) HasToolSupport() bool {
return true
}
func (p *Qwen3CoderParser) HasThinkingSupport() bool {
return false
}
func (p *Qwen3CoderParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
p.tools = tools
return tools // Qwen doesn't modify tools
}
func (p *Qwen3CoderParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
p.acc.WriteString(s)
events := p.parseEvents()
var toolCalls []api.ToolCall
var sb strings.Builder
for _, event := range events {
switch event := event.(type) {
case qwenEventRawToolCall:
toolCall, err := parseToolCall(event, p.tools)
if err != nil {
slog.Warn("qwen tool call parsing failed", "error", err)
return "", "", nil, err
}
toolCalls = append(toolCalls, toolCall)
case qwenEventContent:
// TODO(drifkin): if the same turn contains multiple interleaved content
// events, we naively append them together here. See the note below about
// `qwenEvent`s for more details
sb.WriteString(event.content)
}
}
return sb.String(), "", toolCalls, nil
}
func (p *Qwen3CoderParser) parseEvents() []qwenEvent {
var all []qwenEvent
keepLooping := true
for keepLooping {
var events []qwenEvent
events, keepLooping = eat(p)
if len(events) > 0 {
all = append(all, events...)
}
}
if len(all) > 0 {
slog.Log(context.TODO(), logutil.LevelTrace, "qwen events parsed", "events", all, "state", p.state, "acc", p.acc.String())
}
return all
}
// we use some internal event types in order to communicate between `Add` and
// `eat`. We do this to support interleaving content and parallel tool calls in
// the parser, even though qwen3-coder isn't supposed to do this. Our API
// doesn't currently support models outputting multiple messages in a turn, so
// we wouldn't be able to represent it yet, but there's no reason to prevent the
// parser from supporting it, especially for future models if they end up using
// a similar format.
type qwenEvent interface {
isQwenEvent()
}
type qwenEventRawToolCall struct {
raw string
}
type qwenEventContent struct {
content string
}
func (qwenEventContent) isQwenEvent() {}
func (qwenEventRawToolCall) isQwenEvent() {}
// eat consumes the parser's buffer, and returns a list of any unambiguous
// events from the current parser state. If the parser transitions to another
// state, it may have additional events to emit on the next call, which is what
// the second return value indicates
func eat(p *Qwen3CoderParser) ([]qwenEvent, bool) {
var events []qwenEvent
switch p.state {
case qwenParserState_LookingForToolStart:
if strings.Contains(p.acc.String(), toolOpenTag) {
// we found a full tool open tag, so we can emit the content before the
// tag, being sure to trim any trailing whitespace
split := strings.SplitN(p.acc.String(), toolOpenTag, 2)
before := split[0]
before = strings.TrimRightFunc(before, unicode.IsSpace)
if len(before) > 0 {
events = append(events, qwenEventContent{content: before})
}
after := split[1]
p.acc.Reset()
p.acc.WriteString(after)
p.state = qwenParserState_CollectingToolContent
return events, true
} else if overlap := overlap(p.acc.String(), toolOpenTag); overlap > 0 {
// we found a partial tool open tag, so we can emit the unambiguous part,
// which is the (trailing-whitespace trimmed) content before the partial
// tool open tag
beforePartialTag := p.acc.String()[:len(p.acc.String())-overlap]
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
unambiguous := p.acc.String()[:ambiguousStart]
ambiguous := p.acc.String()[ambiguousStart:]
p.acc.Reset()
p.acc.WriteString(ambiguous)
if len(unambiguous) > 0 {
events = append(events, qwenEventContent{content: unambiguous})
}
return events, false
} else {
// we found content that is entirely not a tool call. We should withhold
// any trailing whitespace in case this is the end of the content
whitespaceLen := trailingWhitespaceLen(p.acc.String())
ambiguousStart := len(p.acc.String()) - whitespaceLen
unambiguous := p.acc.String()[:ambiguousStart]
ambiguous := p.acc.String()[ambiguousStart:]
p.acc.Reset()
p.acc.WriteString(ambiguous)
if len(unambiguous) > 0 {
events = append(events, qwenEventContent{content: unambiguous})
}
return events, false
}
case qwenParserState_CollectingToolContent:
if strings.Contains(p.acc.String(), toolCloseTag) {
split := strings.SplitN(p.acc.String(), toolCloseTag, 2)
before := split[0]
if len(before) == 0 {
slog.Warn("qwen tool call closing tag found but no content before it")
}
// remove any whitespace between the tool call and any content after it
after := strings.TrimLeftFunc(split[1], unicode.IsSpace)
p.acc.Reset()
p.acc.WriteString(after)
events = append(events, qwenEventRawToolCall{raw: before})
p.state = qwenParserState_LookingForToolStart
return events, true
} else {
// note that we don't need to check the overlap here because we only plan
// on parsing the tool call once we see the full closing tag. We don't
// stream back the unparsed tool content, so there's no need to be eager
// here
return events, false
}
default:
panic("unreachable")
}
}
// TODO(drifkin): move this to a shared location
// longest overlap between suffix of s and prefix of delim
func overlap(s, delim string) int {
max := min(len(delim), len(s))
for i := max; i > 0; i-- {
if strings.HasSuffix(s, delim[:i]) {
return i
}
}
return 0
}
func trailingWhitespaceLen(s string) int {
remaining := s
total := 0
for len(remaining) > 0 {
r, size := utf8.DecodeLastRuneInString(remaining)
// if it's an invalid utf8 rune, assume it isn't whitespace
if r == utf8.RuneError && size == 1 {
break
}
if !unicode.IsSpace(r) {
break
}
total += size
remaining = remaining[:len(remaining)-size]
}
return total
}
type XMLFunctionCall struct {
XMLName xml.Name `xml:"function"`
Name string `xml:"name,attr"`
Parameters []XMLParameter `xml:"parameter"`
}
type XMLParameter struct {
Name string `xml:"name,attr"`
Value string `xml:",chardata"`
}
// parseToolCall parses a raw tool call string into an api.ToolCall.
// The raw string follows an xml-like format, here's an example:
//
// <function=get_current_temperature>
// <parameter=location>
// San Francisco
// </parameter>
// <parameter=unit>
// celsius
// </parameter>
// </function>
func parseToolCall(raw qwenEventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
toolCall := api.ToolCall{}
xmlString := transformToXML(raw.raw)
var functionCall XMLFunctionCall
err := xml.Unmarshal([]byte(xmlString), &functionCall)
if err != nil {
return api.ToolCall{}, err
}
toolCall.Function = api.ToolCallFunction{
Name: functionCall.Name,
}
// Find the matching tool to get parameter types
var matchedTool *api.Tool
for i := range tools {
if tools[i].Function.Name == functionCall.Name {
matchedTool = &tools[i]
break
}
}
toolCall.Function.Arguments = make(api.ToolCallFunctionArguments)
for _, parameter := range functionCall.Parameters {
// Look up the parameter type if we found the tool
var paramType api.PropertyType
if matchedTool != nil && matchedTool.Function.Parameters.Properties != nil {
if prop, ok := matchedTool.Function.Parameters.Properties[parameter.Name]; ok {
// Handle anyOf by collecting all types from the union
if len(prop.AnyOf) > 0 {
for _, anyOfProp := range prop.AnyOf {
paramType = append(paramType, anyOfProp.Type...)
}
} else {
paramType = prop.Type
}
}
}
toolCall.Function.Arguments[parameter.Name] = parseValue(parameter.Value, paramType)
}
return toolCall, nil
}
// parseValue converts a raw string value to the appropriate type based on the parameter type specification.
//
// For union types (multiple types in PropertyType, which we support but doesn't
// seem as though the reference parser does type coercion with those types in
// mind) we use a type precedence approach:
// 1. null - checked first regardless of declared types (matches reference implementation)
// 2. boolean - only "true"/"false" are valid booleans
// 3. integer - must parse as a whole number
// 4. number - must parse as numeric (returns int if no decimal part)
// 5. array - must parse as valid JSON array
// 6. object - must parse as valid JSON object
// 7. string - always succeeds (least specific type)
//
// This precedence ensures we return the most specific type that successfully parses,
// following the principle of least surprise. For example, with PropertyType{"string", "number"},
// "123" becomes 123 (number), while "hello" becomes "hello" (string).
func parseValue(raw string, paramType api.PropertyType) any {
// first remove a single leading newlines, and a single trailing newline (if
// they exist). This follows the reference implementation
raw = strings.TrimPrefix(raw, "\n")
raw = strings.TrimSuffix(raw, "\n")
// Check for null first (case-insensitive) - this takes precedence over any type
if strings.ToLower(raw) == "null" {
return nil
}
// If no type is specified, default to string
if len(paramType) == 0 {
return raw
}
// Check if any of the specified types match, using type precedence
// Order: boolean -> integer -> number -> array -> object -> string
typeSet := make(map[string]bool)
for _, t := range paramType {
typeSet[t] = true
}
// Try boolean first (most restrictive)
if typeSet["boolean"] {
lower := strings.ToLower(raw)
switch lower {
case "true":
return true
case "false":
return false
}
// If not a valid boolean but boolean is the only type, return false (matching reference)
if len(paramType) == 1 {
return false
}
// Otherwise try other types
}
// Try integer
if typeSet["integer"] {
if i, err := strconv.ParseInt(raw, 10, 64); err == nil {
// Return as int if it fits in int32, otherwise int64
if i >= math.MinInt32 && i <= math.MaxInt32 {
return int(i)
}
return i
}
// If integer is the only type and parsing failed, fall back to string
if len(paramType) == 1 {
return raw
}
}
// Try number (float)
if typeSet["number"] {
if f, err := strconv.ParseFloat(raw, 64); err == nil {
// If the number has no decimal part, return as int (matching reference)
if f == math.Trunc(f) {
i := int64(f)
if i >= math.MinInt32 && i <= math.MaxInt32 {
return int(i)
}
return i
}
return f
}
// If number is the only type and parsing failed, fall back to string
if len(paramType) == 1 {
return raw
}
}
// Try array
if typeSet["array"] {
var arr []any
if err := json.Unmarshal([]byte(raw), &arr); err == nil {
return arr
}
// If array is the only type and parsing failed, fall back to string
if len(paramType) == 1 {
return raw
}
}
// Try object
if typeSet["object"] {
var obj map[string]any
if err := json.Unmarshal([]byte(raw), &obj); err == nil {
return obj
}
// If object is the only type and parsing failed, fall back to string
if len(paramType) == 1 {
return raw
}
}
// String always succeeds (or if "string" is in the type set)
if typeSet["string"] {
return raw
}
// If we get here, none of the types matched and string wasn't an option
// We return string as a fallback. The reference implementation will attempt
// to parse the value as a python literal, but we purposefully don't support
// that
return raw
}
var (
qwenTagRegex = regexp.MustCompile(`<(\w+)=([^>]+)>`)
qwenXMLTagRegex = regexp.MustCompile(`</?(?:function|parameter)(?:\s+name="[^"]*")?>`)
)
// transformToXML transforms a raw qwen tool call with xml-like tags into valid
// xml so that it can be parsed by any xml parser
func transformToXML(raw string) string {
// take the form `<tag=abc>` and transform it to `<tag name="abc">`, taking
// care to properly escape the string that becomes the attribute value
transformed := qwenTagRegex.ReplaceAllStringFunc(raw, func(match string) string {
groups := qwenTagRegex.FindStringSubmatch(match)
tag := groups[1]
var escapedValue strings.Builder
xml.EscapeText(&escapedValue, []byte(groups[2]))
return fmt.Sprintf(`<%s name="%s">`, tag, escapedValue.String())
})
// Walk the resulting string, escaping any character data that sits between the
// xml tags we just emitted
var out strings.Builder
lastIdx := 0
for _, loc := range qwenXMLTagRegex.FindAllStringIndex(transformed, -1) {
if loc[0] > lastIdx {
escapeTextNode(&out, transformed[lastIdx:loc[0]])
}
out.WriteString(transformed[loc[0]:loc[1]])
lastIdx = loc[1]
}
if lastIdx < len(transformed) {
escapeTextNode(&out, transformed[lastIdx:])
}
return out.String()
}
// escapeTextNode escapes XML character data without altering other characters
// like newlines or tabs (which is why we don't use xml.EscapeText for this)
func escapeTextNode(sb *strings.Builder, s string) {
for _, r := range s {
switch r {
case '&':
sb.WriteString("&amp;")
case '<':
sb.WriteString("&lt;")
case '>':
sb.WriteString("&gt;")
default:
sb.WriteRune(r)
}
}
}

File diff suppressed because it is too large Load Diff

253
model/parsers/qwen3vl.go Normal file
View File

@@ -0,0 +1,253 @@
package parsers
import (
"context"
"encoding/json"
"log/slog"
"strings"
"unicode"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/logutil"
)
// TODO: call the init function
const (
CollectingThinkingContent qwenParserState = iota
CollectingContent
CollectingToolContent
ThinkingDoneEatingWhitespace
ToolCallDoneEatingWhitespace
)
const (
thinkingCloseTag = "</think>"
)
type Qwen3VLParser struct {
state qwenParserState
buffer strings.Builder
tools []api.Tool
hasThinkingSupport bool
}
func (p *Qwen3VLParser) HasToolSupport() bool {
return true
}
func (p *Qwen3VLParser) HasThinkingSupport() bool {
return p.hasThinkingSupport
}
func (p *Qwen3VLParser) setInitialState(lastMessage *api.Message) {
prefill := lastMessage != nil && lastMessage.Role == "assistant"
if !p.HasThinkingSupport() {
p.state = CollectingContent
return
}
if prefill && lastMessage.Content != "" {
p.state = CollectingContent
return
}
p.state = CollectingThinkingContent
}
func (p *Qwen3VLParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
p.tools = tools
p.setInitialState(lastMessage)
return tools
}
type qwenEventThinkingContent struct {
content string
}
func (qwenEventThinkingContent) isQwenEvent() {}
func (p *Qwen3VLParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
p.buffer.WriteString(s)
events := p.parseEvents()
var toolCalls []api.ToolCall
var contentSb strings.Builder
var thinkingSb strings.Builder
for _, event := range events {
switch event := event.(type) {
case qwenEventRawToolCall:
toolCall, err := parseJSONToolCall(event, p.tools)
if err != nil {
slog.Warn("qwen tool call parsing failed", "error", err)
return "", "", nil, err
}
toolCalls = append(toolCalls, toolCall)
case qwenEventThinkingContent:
thinkingSb.WriteString(event.content)
case qwenEventContent:
// TODO(drifkin): if the same turn contains multiple interleaved content
// events, we naively append them together here.
contentSb.WriteString(event.content)
}
}
return contentSb.String(), thinkingSb.String(), toolCalls, nil
}
func (p *Qwen3VLParser) parseEvents() []qwenEvent {
var all []qwenEvent
keepLooping := true
for keepLooping {
var events []qwenEvent
events, keepLooping = p.eat()
if len(events) > 0 {
all = append(all, events...)
}
}
if len(all) > 0 {
slog.Log(context.TODO(), logutil.LevelTrace, "qwen events parsed", "events", all, "state", p.state, "buffer", p.buffer.String())
}
return all
}
func splitAtTag(p *Qwen3VLParser, tag string, trimAfter bool) (string, string) {
split := strings.SplitN(p.buffer.String(), tag, 2)
before := split[0]
before = strings.TrimRightFunc(before, unicode.IsSpace)
after := split[1]
if trimAfter {
after = strings.TrimLeftFunc(after, unicode.IsSpace)
}
p.buffer.Reset()
p.buffer.WriteString(after)
return before, after // return events
}
func (p *Qwen3VLParser) eatLeadingWhitespaceAndTransitionTo(nextState qwenParserState) ([]qwenEvent, bool) {
trimmed := strings.TrimLeftFunc(p.buffer.String(), unicode.IsSpace)
p.buffer.Reset()
if trimmed == "" {
return nil, false
}
p.state = nextState
p.buffer.WriteString(trimmed)
return nil, true
}
func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
var events []qwenEvent
switch p.state {
case CollectingContent:
if strings.Contains(p.buffer.String(), toolOpenTag) {
// events = emitContentBeforeTag(p, events, toolOpenTag)
before, _ := splitAtTag(p, toolOpenTag, false)
if len(before) > 0 {
events = append(events, qwenEventContent{content: before})
}
p.state = CollectingToolContent
return events, true
} else if overlapLen := overlap(p.buffer.String(), toolOpenTag); overlapLen > 0 {
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
unambiguous := p.buffer.String()[:ambiguousStart]
ambiguous := p.buffer.String()[ambiguousStart:]
p.buffer.Reset()
p.buffer.WriteString(ambiguous)
if len(unambiguous) > 0 {
events = append(events, qwenEventContent{content: unambiguous})
}
return events, false
} else {
whitespaceLen := trailingWhitespaceLen(p.buffer.String())
ambiguousStart := len(p.buffer.String()) - whitespaceLen
unambiguous := p.buffer.String()[:ambiguousStart]
ambiguous := p.buffer.String()[ambiguousStart:]
p.buffer.Reset()
p.buffer.WriteString(ambiguous)
if len(unambiguous) > 0 {
events = append(events, qwenEventContent{content: unambiguous})
}
return events, false
}
case CollectingToolContent:
if strings.Contains(p.buffer.String(), toolCloseTag) {
split := strings.SplitN(p.buffer.String(), toolCloseTag, 2)
before := split[0] // do we also need to do it to tool calls?
if len(before) == 0 {
slog.Warn("qwen tool call closing tag found but no content before it")
}
after := split[1]
events = append(events, qwenEventRawToolCall{raw: before})
p.buffer.Reset()
p.buffer.WriteString(after)
p.state = ToolCallDoneEatingWhitespace
return events, true
} else {
return events, false
}
case CollectingThinkingContent:
if strings.Contains(p.buffer.String(), thinkingCloseTag) {
thinking, remaining := splitAtTag(p, thinkingCloseTag, true)
if len(thinking) > 0 {
events = append(events, qwenEventThinkingContent{content: thinking})
}
if remaining == "" {
p.state = ThinkingDoneEatingWhitespace
} else {
p.state = CollectingContent
}
return events, true
} else if overlapLen := overlap(p.buffer.String(), thinkingCloseTag); overlapLen > 0 {
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
unambiguous := p.buffer.String()[:ambiguousStart]
ambiguous := p.buffer.String()[ambiguousStart:]
p.buffer.Reset()
p.buffer.WriteString(ambiguous)
if len(unambiguous) > 0 {
events = append(events, qwenEventThinkingContent{content: unambiguous})
}
return events, false
} else {
whitespaceLen := trailingWhitespaceLen(p.buffer.String())
ambiguousStart := len(p.buffer.String()) - whitespaceLen
unambiguous := p.buffer.String()[:ambiguousStart]
ambiguous := p.buffer.String()[ambiguousStart:]
p.buffer.Reset()
p.buffer.WriteString(ambiguous)
if len(unambiguous) > 0 {
events = append(events, qwenEventThinkingContent{content: unambiguous})
}
return events, false
}
case ThinkingDoneEatingWhitespace:
return p.eatLeadingWhitespaceAndTransitionTo(CollectingContent)
case ToolCallDoneEatingWhitespace:
return p.eatLeadingWhitespaceAndTransitionTo(CollectingContent)
default:
panic("unreachable")
}
}
func parseJSONToolCall(raw qwenEventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
var toolCallFunction api.ToolCallFunction
if err := json.Unmarshal([]byte(raw.raw), &toolCallFunction); err != nil {
return api.ToolCall{}, err
}
toolCall := api.ToolCall{}
toolCall.Function = toolCallFunction
return toolCall, nil
}

View File

@@ -0,0 +1,841 @@
package parsers
import (
"reflect"
"testing"
"github.com/ollama/ollama/api"
)
func TestQwen3VLNonThinkingParserStreaming(t *testing.T) {
type step struct {
input string
wantEvents []qwenEvent
}
cases := []struct {
desc string
steps []step
only bool
}{
{
desc: "simple thinking",
steps: []step{
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think>"}}},
},
},
{
desc: "simple trip thinking",
steps: []step{
{input: "<think>abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "<think>abc</think>"}}},
},
},
{
desc: "thinking with split tags",
steps: []step{
{input: "abc", wantEvents: []qwenEvent{qwenEventContent{content: "abc"}}},
{input: "</think>", wantEvents: []qwenEvent{qwenEventContent{content: "</think>"}}},
},
},
{
desc: "multiple think tags",
steps: []step{
{input: "abc<think>actually, is not thinking</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc<think>actually, is not thinking</think>"}}},
},
},
{
desc: "thinking and tool call",
steps: []step{
{
input: "I'm thinking</think><tool_call>I'm tool calling</tool_call>",
wantEvents: []qwenEvent{
qwenEventContent{content: "I'm thinking</think>"},
qwenEventRawToolCall{raw: "I'm tool calling"},
},
},
},
},
{
desc: "nested thinking (outside thinking, inside thinking)",
steps: []step{
{
input: "I'm thinking<think>I'm nested thinking</think></think>",
wantEvents: []qwenEvent{
qwenEventContent{content: "I'm thinking<think>I'm nested thinking</think></think>"},
},
},
},
},
{
desc: "interleaved thinking",
steps: []step{
{
input: "<think>I'm thinking</think>I'm actually content</think>",
wantEvents: []qwenEvent{
qwenEventContent{content: "<think>I'm thinking</think>I'm actually content</think>"},
},
},
},
},
{
desc: "nested thinking and tool call (outside thinking, inside tool call)",
steps: []step{
{
input: "I'm thinking<tool_call>I'm nested tool call</tool_call></think>",
wantEvents: []qwenEvent{
qwenEventContent{content: "I'm thinking"},
qwenEventRawToolCall{raw: "I'm nested tool call"},
qwenEventContent{content: "</think>"},
},
},
},
},
{
desc: "nested thinking and tool call (outside tool call, inside thinking)",
steps: []step{
{
input: "<tool_call>I'm nested tool call<think>I'm thinking</think></tool_call>",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: "I'm nested tool call<think>I'm thinking</think>"},
},
},
},
},
{
desc: "interleaved thinking and tool call",
steps: []step{
{
input: "I'm thinking<tool_call>I'm NOT a nested tool call</think></tool_call><tool_call>I'm nested tool call 2<think></tool_call></think>",
wantEvents: []qwenEvent{
qwenEventContent{content: "I'm thinking"},
qwenEventRawToolCall{raw: "I'm NOT a nested tool call</think>"},
qwenEventRawToolCall{raw: "I'm nested tool call 2<think>"},
qwenEventContent{content: "</think>"},
},
},
},
},
{
desc: "emit unambiguous before partial tool open (trailing ws)",
steps: []step{
{
input: "abc\u00a0\n<tool_call",
wantEvents: []qwenEvent{qwenEventContent{content: "abc"}},
},
{
input: " fakeout",
wantEvents: []qwenEvent{qwenEventContent{content: "\u00a0\n<tool_call fakeout"}},
},
},
},
{
desc: "unambiguous empty: partial tool open at buffer start",
steps: []step{
{
input: "<tool_ca",
wantEvents: []qwenEvent{},
},
{
input: "ll>abc</tool_call>",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: "abc"},
},
},
},
},
{
desc: "partial thinking tag fakeout",
steps: []step{
{
input: "abc</think",
wantEvents: []qwenEvent{qwenEventContent{content: "abc</think"}},
},
{
input: " fakeout",
wantEvents: []qwenEvent{qwenEventContent{content: " fakeout"}},
},
},
},
{
desc: "partial thinking incomplete",
steps: []step{
{
input: "abc<think>unfinished<", // when something is ambiguious, we dont emit anything
wantEvents: []qwenEvent{qwenEventContent{content: "abc<think>unfinished"}},
},
},
},
{
desc: "test with split tool and content",
steps: []step{
{
input: "abc<tool_call>unfinished</", // when something is ambiguious, we dont emit anything
wantEvents: []qwenEvent{
qwenEventContent{content: "abc"},
},
},
{
input: "tool_call> def",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: "unfinished"},
qwenEventContent{content: "def"},
},
},
},
},
}
anyOnlies := false
for _, tc := range cases {
if tc.only {
anyOnlies = true
}
}
for _, tc := range cases {
if anyOnlies && !tc.only {
continue
}
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: false}
parser.Init([]api.Tool{}, nil)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
gotEvents := parser.parseEvents()
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
// avoid deep equal on empty vs. nil slices
continue
}
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
}
}
})
}
}
func TestQwenOldParserStreaming(t *testing.T) {
type step struct {
input string
wantEvents []qwenEvent
}
cases := []struct {
desc string
steps []step
only bool
}{
{
desc: "simple message streamed word by word",
steps: []step{
{
input: "hi",
wantEvents: []qwenEvent{qwenEventContent{content: "hi"}},
},
{
input: " there",
wantEvents: []qwenEvent{qwenEventContent{content: " there"}},
},
},
},
{
desc: "content before tool call",
steps: []step{
{
input: "hi there<tool_call>",
wantEvents: []qwenEvent{qwenEventContent{content: "hi there"}},
},
},
},
{
desc: "multiple tool calls in one message",
steps: []step{
{
input: "before1<tool_call>in tool call</tool_call>after1<tool_call>in tool call 2</tool_call>after2",
wantEvents: []qwenEvent{
qwenEventContent{content: "before1"},
qwenEventRawToolCall{raw: "in tool call"},
qwenEventContent{content: "after1"},
qwenEventRawToolCall{raw: "in tool call 2"},
qwenEventContent{content: "after2"},
},
},
},
},
{
desc: "tool calls with split tags",
steps: []step{
{
input: "before<tool",
wantEvents: []qwenEvent{
qwenEventContent{content: "before"},
},
},
{
input: "_call>in tool call</tool",
wantEvents: []qwenEvent{},
},
{
input: "_call>af",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: "in tool call"},
qwenEventContent{content: "af"},
},
},
{
input: "ter",
wantEvents: []qwenEvent{
qwenEventContent{content: "ter"},
},
},
},
},
{
desc: "trailing whitespace between content and tool call",
steps: []step{
{
input: "abc\n<tool_call>def</tool_call>",
wantEvents: []qwenEvent{
qwenEventContent{content: "abc"},
qwenEventRawToolCall{raw: "def"},
},
},
},
},
{
desc: "trailing whitespace between tool call and content",
steps: []step{
{
input: "<tool_call>abc</tool_call>\ndef",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: "abc"},
qwenEventContent{content: "def"},
},
},
},
},
{
desc: "empty content before tool call",
steps: []step{
{
input: "\n<tool_call>abc</tool_call>",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: "abc"},
},
},
},
},
{
desc: "partial tool open tag fakeout",
steps: []step{
{
input: "abc\n<tool_call",
wantEvents: []qwenEvent{
// \n should not be emitted yet because `<tool_call` might be a tool
// open tag, in which case the whitespace should be trimmed
qwenEventContent{content: "abc"},
},
},
{
input: " fakeout",
wantEvents: []qwenEvent{
qwenEventContent{content: "\n<tool_call fakeout"},
},
},
},
},
{
desc: "token-by-token whitespace handling",
steps: []step{
{
input: "a",
wantEvents: []qwenEvent{
qwenEventContent{content: "a"},
},
},
{
input: "\n",
wantEvents: []qwenEvent{},
},
{
input: "b",
wantEvents: []qwenEvent{
qwenEventContent{content: "\nb"},
},
},
},
},
{
desc: "unicode content",
steps: []step{
{
input: "你好 🌍<tool_call>test</tool_call>مرحبا",
wantEvents: []qwenEvent{
qwenEventContent{content: "你好 🌍"},
qwenEventRawToolCall{raw: "test"},
qwenEventContent{content: "مرحبا"},
},
},
},
},
{
desc: "arabic text handling",
steps: []step{
{
input: "مرحبا بالعالم",
wantEvents: []qwenEvent{qwenEventContent{content: "مرحبا بالعالم"}},
},
},
},
{
desc: "emoji passthrough",
steps: []step{
{
input: "✅",
wantEvents: []qwenEvent{qwenEventContent{content: "✅"}},
},
},
},
{
desc: "emoji after tool call",
steps: []step{
{
input: "<tool_call>test</tool_call>完成 ✅",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: "test"},
qwenEventContent{content: "完成 ✅"},
},
},
},
},
{
desc: "unicode streaming with whitespace handling",
steps: []step{
{
input: "مرحبا",
wantEvents: []qwenEvent{
qwenEventContent{content: "مرحبا"},
},
},
{
input: " \n",
wantEvents: []qwenEvent{},
},
{
input: "世界",
wantEvents: []qwenEvent{
qwenEventContent{content: " \n世界"},
},
},
},
},
{
desc: "non-breaking space withheld across chunks",
steps: []step{
{
input: "Hello\u00a0",
wantEvents: []qwenEvent{
qwenEventContent{content: "Hello"},
},
},
{
input: "world",
wantEvents: []qwenEvent{
qwenEventContent{content: "\u00a0world"},
},
},
},
},
{
desc: "ideographic space before partial tool",
steps: []step{
{
input: "Hello\u3000<tool",
wantEvents: []qwenEvent{
qwenEventContent{content: "Hello"},
},
},
{
input: "_call>abc",
wantEvents: []qwenEvent{},
},
{
input: "</tool_call>def",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: "abc"},
qwenEventContent{content: "def"},
},
},
},
},
{
desc: "ideographic space before partial tool fakeout",
steps: []step{
{
input: "Hello\u3000<tool",
wantEvents: []qwenEvent{
qwenEventContent{content: "Hello"},
},
},
{
input: "fakeout>abc",
wantEvents: []qwenEvent{
qwenEventContent{content: "\u3000<toolfakeout>abc"},
},
},
},
},
{
desc: "unicode with partial tool tag",
steps: []step{
{
input: "测试🎯 <to",
wantEvents: []qwenEvent{
qwenEventContent{content: "测试🎯"},
},
},
},
},
}
anyOnlies := false
for _, tc := range cases {
if tc.only {
anyOnlies = true
}
}
for _, tc := range cases {
if anyOnlies && !tc.only {
continue
}
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: false}
parser.Init([]api.Tool{}, nil)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
gotEvents := parser.parseEvents()
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
// avoid deep equal on empty vs. nil slices
continue
}
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
}
}
})
}
}
func TestQwen3VLNonThinkingToolParser(t *testing.T) {
type step struct {
name string
rawToolCall string
tools []api.Tool
wantToolCall api.ToolCall
}
steps := []step{
{
name: "simple tool call",
tools: []api.Tool{},
rawToolCall: `{"name": "get-current-weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "get-current-weather",
Arguments: map[string]any{
"location": "San Francisco, CA",
"unit": "fahrenheit",
},
},
},
},
{
name: "names with spaces",
tools: []api.Tool{},
rawToolCall: `{"name": "get current temperature", "arguments": {"location with spaces": "San Francisco", "unit with spaces": "celsius"}}`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "get current temperature",
Arguments: map[string]any{
"location with spaces": "San Francisco",
"unit with spaces": "celsius",
},
},
},
},
{
name: "names with quotes",
tools: []api.Tool{},
rawToolCall: `{"name": "\"get current temperature\"", "arguments": {"\"location with spaces\"": "San Francisco", "\"unit with spaces\"": "\"celsius\""}}`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "\"get current temperature\"",
Arguments: map[string]any{
"\"location with spaces\"": "San Francisco",
"\"unit with spaces\"": "\"celsius\"",
},
},
},
},
{
name: "tool call with typed parameters (json types)",
tools: []api.Tool{},
rawToolCall: `{"name": "calculate", "arguments": {"x": 3.14, "y": 42, "enabled": true, "items": ["a", "b", "c"]}}`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "calculate",
Arguments: map[string]any{
"x": 3.14,
"y": float64(42),
"enabled": true,
"items": []any{"a", "b", "c"},
},
},
},
},
{
name: "ampersands in parameter values",
tools: []api.Tool{},
rawToolCall: `{"name": "exec", "arguments": {"command": "ls && echo \"done\""}}`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "exec",
Arguments: map[string]any{
"command": "ls && echo \"done\"",
},
},
},
},
{
name: "angle brackets in parameter values",
tools: []api.Tool{},
rawToolCall: `{"name": "exec", "arguments": {"command": "ls && echo \"a > b and a < b\""}}`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "exec",
Arguments: map[string]any{
"command": "ls && echo \"a > b and a < b\"",
},
},
},
},
{
name: "unicode in function names and parameters",
tools: []api.Tool{},
rawToolCall: `{"name": "获取天气", "arguments": {"城市": "北京", "message": "Hello! 你好! 🌟 مرحبا"}}`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "获取天气",
Arguments: map[string]any{
"城市": "北京",
"message": "Hello! 你好! 🌟 مرحبا",
},
},
},
},
}
for i, step := range steps {
gotToolCall, err := parseJSONToolCall(qwenEventRawToolCall{raw: step.rawToolCall}, step.tools)
if err != nil {
t.Errorf("step %d (%s): %v", i, step.name, err)
}
if !reflect.DeepEqual(gotToolCall, step.wantToolCall) {
t.Errorf("step %d (%s): got tool call %#v, want %#v", i, step.name, gotToolCall, step.wantToolCall)
}
}
}
func TestQwen3VLNonThinkingToolCallWhitespaceHandling(t *testing.T) {
type step struct {
input string
wantEvents []qwenEvent
}
cases := []struct {
desc string
steps []step
only bool
}{
{
desc: "whitespace inside tool call preserves trailing space",
steps: []step{
{
input: "before<tool_call> tool content </tool_call>after",
wantEvents: []qwenEvent{
qwenEventContent{content: "before"},
qwenEventRawToolCall{raw: " tool content "},
qwenEventContent{content: "after"},
},
},
},
},
{
desc: "whitespace inside tool call preserves trailing space",
steps: []step{
{
input: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t <tool_call> tool content </tool_call> \n\n\n\n\n\n\n after",
wantEvents: []qwenEvent{
qwenEventContent{content: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh"},
qwenEventRawToolCall{raw: " tool content "},
qwenEventContent{content: "after"},
},
},
},
},
{
desc: "whitespace inside tool call preserves trailing space",
steps: []step{
{
input: "<tool_call> tool content </tool_call> ",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: " tool content "},
},
},
{
input: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t <tool_call> anotha one </tool_call> \n\n\n\n\n\n\n after \n\n\n\n\n\n blep",
wantEvents: []qwenEvent{
qwenEventContent{content: "blahhhhhhhhhh blahhhh blahhhh"},
qwenEventRawToolCall{raw: " anotha one "},
qwenEventContent{content: "after \n\n\n\n\n\n blep"},
},
},
},
},
{
desc: "whitespace between content and tool call",
steps: []step{
{
input: "content \n <tool_call>tool</tool_call> \n more content",
wantEvents: []qwenEvent{
qwenEventContent{content: "content"},
qwenEventRawToolCall{raw: "tool"},
qwenEventContent{content: "more content"},
},
},
},
},
{
desc: "consecutive tool calls with whitespace",
steps: []step{
{
input: "<tool_call>first</tool_call> \n <tool_call>second</tool_call> \n <tool_call>third</tool_call>",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: "first"},
qwenEventRawToolCall{raw: "second"},
qwenEventRawToolCall{raw: "third"},
},
},
},
},
{
desc: "whitespace before and after tool open tag",
steps: []step{
{
input: "text \n <tool_call>content</tool_call>",
wantEvents: []qwenEvent{
qwenEventContent{content: "text"},
qwenEventRawToolCall{raw: "content"},
},
},
},
},
{
desc: "unicode whitespace around tool calls",
steps: []step{
{
input: "text\u00a0\u3000<tool_call>content</tool_call>\u00a0\u3000text",
wantEvents: []qwenEvent{
qwenEventContent{content: "text"},
qwenEventRawToolCall{raw: "content"},
qwenEventContent{content: "text"},
},
},
},
},
{
desc: "empty tool call with surrounding whitespace",
steps: []step{
{
input: "before <tool_call></tool_call> after",
wantEvents: []qwenEvent{
qwenEventContent{content: "before"},
qwenEventRawToolCall{raw: ""},
qwenEventContent{content: "after"},
},
},
},
},
{
desc: "whitespace in tool call split across chunks",
steps: []step{
{
input: "before<tool_call> ",
wantEvents: []qwenEvent{qwenEventContent{content: "before"}},
},
{
input: "tool",
wantEvents: []qwenEvent{},
},
{
input: " </tool_call>after",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: " tool "},
qwenEventContent{content: "after"},
},
},
},
},
{
desc: "mixed whitespace types between tool calls",
steps: []step{
{
input: "<tool_call>first</tool_call> \t\n\r <tool_call>second</tool_call>",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: "first"},
qwenEventRawToolCall{raw: "second"},
},
},
},
},
}
anyOnlies := false
for _, tc := range cases {
if tc.only {
anyOnlies = true
}
}
for _, tc := range cases {
if anyOnlies && !tc.only {
continue
}
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: false}
parser.Init([]api.Tool{}, nil)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
gotEvents := parser.parseEvents()
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
continue
}
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
}
}
})
}
}

View File

@@ -0,0 +1,878 @@
package parsers
import (
"reflect"
"testing"
"github.com/ollama/ollama/api"
)
func TestQwen3VLThinkingParserStreaming(t *testing.T) {
type step struct {
input string
wantEvents []qwenEvent
}
cases := []struct {
desc string
steps []step
only bool
}{
{
desc: "simple thinking",
steps: []step{
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
},
},
{
desc: "simple trip thinking",
steps: []step{
{input: "<think>abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "<think>abc"}}},
},
},
{
desc: "thinking with split tags",
steps: []step{
{input: "abc", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
{input: "</think>", wantEvents: []qwenEvent{}},
},
},
{
desc: "multiple think tags",
steps: []step{
{input: "abc<think>actually, is not thinking</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>actually, is not thinking"}}},
},
},
{
desc: "thinking and tool call",
steps: []step{
{
input: "I'm thinking</think><tool_call>I'm tool calling</tool_call>",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "I'm thinking"},
qwenEventRawToolCall{raw: "I'm tool calling"},
},
},
},
},
{
desc: "thinking and content",
steps: []step{
{
input: "I'm thinking</think>I'm content",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "I'm thinking"},
qwenEventContent{content: "I'm content"},
},
},
},
},
{
desc: "thinking and tool call and content",
},
{
desc: "nested thinking (outside thinking, inside thinking)",
steps: []step{
{
input: "I'm thinking<think>I'm nested thinking</think></think>",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "I'm thinking<think>I'm nested thinking"},
qwenEventContent{content: "</think>"},
},
},
},
},
{
desc: "interleaved thinking",
steps: []step{
{
input: "<think>I'm thinking</think>I'm actually content</think>",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "<think>I'm thinking"},
qwenEventContent{content: "I'm actually content</think>"},
},
},
},
},
{
desc: "nested thinking and tool call (outside thinking, inside tool call)",
steps: []step{
{
input: "I'm thinking<tool_call>I'm nested tool call</tool_call></think>",
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "I'm thinking<tool_call>I'm nested tool call</tool_call>"}},
},
},
},
{
desc: "nested thinking and tool call (outside tool call, inside thinking)",
steps: []step{
{
input: "<tool_call>I'm nested tool call<think>I'm thinking</think></tool_call>",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "<tool_call>I'm nested tool call<think>I'm thinking"},
qwenEventContent{content: "</tool_call>"},
},
},
},
},
{
desc: "interleaved thinking and tool call",
steps: []step{
{
input: "I'm thinking<tool_call>I'm NOT a nested tool call</think></tool_call><tool_call>I'm nested tool call 2<think></tool_call></think>",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "I'm thinking<tool_call>I'm NOT a nested tool call"},
qwenEventContent{content: "</tool_call>"},
qwenEventRawToolCall{raw: "I'm nested tool call 2<think>"},
qwenEventContent{content: "</think>"},
},
},
},
},
{
desc: "partial thinking tag fakeout",
steps: []step{
{
input: "abc</think",
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}},
},
{
input: " fakeout",
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "</think fakeout"}},
},
},
},
{
desc: "partial thinking incomplete",
steps: []step{
{
input: "abc<think>unfinished</think", // when something is ambiguious, we dont emit anything
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>unfinished"}},
},
},
},
{
desc: "test with split thinking and content",
steps: []step{
{
input: "abc<think>unfinished</th", // when something is ambiguious, we dont emit anything
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>unfinished"}},
},
{
input: "ink> def",
wantEvents: []qwenEvent{
qwenEventContent{content: "def"},
},
},
},
},
{
desc: "thinking with no tags",
steps: []step{
{
input: "Hello I am thinking",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "Hello I am thinking"},
},
},
{
input: "Hello I am thinking some more",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "Hello I am thinking some more"},
},
},
{
input: "Hello I am think</think> NOT",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "Hello I am think"},
qwenEventContent{content: "NOT"},
},
},
},
},
}
anyOnlies := false
for _, tc := range cases {
if tc.only {
anyOnlies = true
}
}
for _, tc := range cases {
if anyOnlies && !tc.only {
continue
}
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, nil)
// parser.state = CollectingThinkingContent
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
gotEvents := parser.parseEvents()
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
// avoid deep equal on empty vs. nil slices
continue
}
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
}
}
})
}
}
func TestQwen3VLThinkingToolParser(t *testing.T) {
type step struct {
name string
rawToolCall string
tools []api.Tool
wantToolCall api.ToolCall
}
steps := []step{
{
name: "simple tool call",
tools: []api.Tool{},
rawToolCall: `{"name": "get-current-weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "get-current-weather",
Arguments: map[string]any{
"location": "San Francisco, CA",
"unit": "fahrenheit",
},
},
},
},
{
name: "names with spaces",
tools: []api.Tool{},
rawToolCall: `{"name": "get current temperature", "arguments": {"location with spaces": "San Francisco", "unit with spaces": "celsius"}}`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "get current temperature",
Arguments: map[string]any{
"location with spaces": "San Francisco",
"unit with spaces": "celsius",
},
},
},
},
{
name: "names with quotes",
tools: []api.Tool{},
rawToolCall: `{"name": "\"get current temperature\"", "arguments": {"\"location with spaces\"": "San Francisco", "\"unit with spaces\"": "\"celsius\""}}`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "\"get current temperature\"",
Arguments: map[string]any{
"\"location with spaces\"": "San Francisco",
"\"unit with spaces\"": "\"celsius\"",
},
},
},
},
{
name: "tool call with typed parameters (json types)",
tools: []api.Tool{},
rawToolCall: `{"name": "calculate", "arguments": {"x": 3.14, "y": 42, "enabled": true, "items": ["a", "b", "c"]}}`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "calculate",
Arguments: map[string]any{
"x": 3.14,
"y": float64(42),
"enabled": true,
"items": []any{"a", "b", "c"},
},
},
},
},
{
name: "ampersands in parameter values",
tools: []api.Tool{},
rawToolCall: `{"name": "exec", "arguments": {"command": "ls && echo \"done\""}}`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "exec",
Arguments: map[string]any{
"command": "ls && echo \"done\"",
},
},
},
},
{
name: "angle brackets in parameter values",
tools: []api.Tool{},
rawToolCall: `{"name": "exec", "arguments": {"command": "ls && echo \"a > b and a < b\""}}`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "exec",
Arguments: map[string]any{
"command": "ls && echo \"a > b and a < b\"",
},
},
},
},
{
name: "unicode in function names and parameters",
tools: []api.Tool{},
rawToolCall: `{"name": "获取天气", "arguments": {"城市": "北京", "message": "Hello! 你好! 🌟 مرحبا"}}`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "获取天气",
Arguments: map[string]any{
"城市": "北京",
"message": "Hello! 你好! 🌟 مرحبا",
},
},
},
},
}
for i, step := range steps {
gotToolCall, err := parseJSONToolCall(qwenEventRawToolCall{raw: step.rawToolCall}, step.tools)
if err != nil {
t.Errorf("step %d (%s): %v", i, step.name, err)
}
if !reflect.DeepEqual(gotToolCall, step.wantToolCall) {
t.Errorf("step %d (%s): got tool call %#v, want %#v", i, step.name, gotToolCall, step.wantToolCall)
}
}
}
func TestQwen3VLParserState(t *testing.T) {
cases := []struct {
desc string
hasThinking bool
last *api.Message
wantState qwenParserState
}{
{
desc: "no thinking support => CollectingContent",
hasThinking: false,
last: nil,
wantState: CollectingContent,
},
{
desc: "thinking support, no last message => CollectingThinkingContent",
hasThinking: true,
last: nil,
wantState: CollectingThinkingContent,
},
{
desc: "thinking support, last assistant with empty content => CollectingThinkingContent",
hasThinking: true,
last: &api.Message{Role: "assistant", Content: ""},
wantState: CollectingThinkingContent,
},
{
desc: "thinking support, last assistant with content => CollectingContent",
hasThinking: true,
last: &api.Message{Role: "assistant", Content: "hello"},
wantState: CollectingContent,
},
{
desc: "thinking support, last is user => CollectingThinkingContent",
hasThinking: true,
last: &api.Message{Role: "user", Content: "hi"},
wantState: CollectingThinkingContent,
},
}
for _, tc := range cases {
parser := Qwen3VLParser{hasThinkingSupport: tc.hasThinking}
parser.Init(nil, tc.last)
if parser.state != tc.wantState {
t.Errorf("%s: got state %v, want %v", tc.desc, parser.state, tc.wantState)
}
}
}
func TestQwen3VLThinkingParserWithThinkingPrefill(t *testing.T) {
type step struct {
input string
wantEvents []qwenEvent
}
cases := []struct {
desc string
steps []step
only bool
}{
{
desc: "thinking prefill",
steps: []step{
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
},
},
{
desc: "thinking prefill with content",
steps: []step{
{input: "abc</th", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
{input: "ink> def", wantEvents: []qwenEvent{qwenEventContent{content: "def"}}},
},
},
{
desc: "thinking prefill with fakeout",
steps: []step{
{input: "abc</think", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
{input: " fakeout </think", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "</think fakeout"}}},
{input: ">", wantEvents: []qwenEvent{}},
},
},
{
desc: "thinking prefill with spaces",
steps: []step{
{input: " </think> starting content", wantEvents: []qwenEvent{qwenEventContent{content: "starting content"}}},
},
},
}
last := &api.Message{Role: "assistant", Thinking: "i am thinking"} // so if there is thinking the test is still thinking
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, last)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
gotEvents := parser.parseEvents()
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
// avoid deep equal on empty vs. nil slices
continue
}
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
}
}
})
}
}
func TestQwen3VLThinkingParserWithNonThinkingPrefill(t *testing.T) {
type step struct {
input string
wantEvents []qwenEvent
}
cases := []struct {
desc string
steps []step
only bool
}{
{
desc: "thinking prefill",
steps: []step{
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think>"}}},
},
},
{
desc: "thinking prefill with content",
steps: []step{
{input: "abc</th", wantEvents: []qwenEvent{qwenEventContent{content: "abc</th"}}},
{input: "ink> def", wantEvents: []qwenEvent{qwenEventContent{content: "ink> def"}}},
},
},
{
desc: "thinking prefill with fakeout",
steps: []step{
{input: "abc</think", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think"}}},
{input: " fakeout </think", wantEvents: []qwenEvent{qwenEventContent{content: " fakeout </think"}}},
{input: ">", wantEvents: []qwenEvent{qwenEventContent{content: ">"}}},
},
},
{
desc: "thinking prefill with spaces",
steps: []step{
{input: " </think> starting content", wantEvents: []qwenEvent{qwenEventContent{content: " </think> starting content"}}},
},
},
}
last := &api.Message{Role: "assistant", Thinking: "i am thinking", Content: "i am content"} // so if there is thinking the test is still thinking
for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, last)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
gotEvents := parser.parseEvents()
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
// avoid deep equal on empty vs. nil slices
continue
}
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
}
}
})
}
}
func TestQwen3VLThinkingParserStreamingAssistantPrefillContent(t *testing.T) {
// last message is assistant with content ⇒ start in CollectingContent
last := &api.Message{Role: "assistant", Content: "has content"}
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, last)
type step struct {
input string
wantEvents []qwenEvent
}
steps := []step{
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think>"}}},
{input: "<tool_call>{\"name\": \"x\", \"arguments\": {}}</tool_call>", wantEvents: []qwenEvent{qwenEventRawToolCall{raw: "{\"name\": \"x\", \"arguments\": {}}"}}},
}
for i, s := range steps {
parser.buffer.WriteString(s.input)
gotEvents := parser.parseEvents()
if len(gotEvents) == 0 && len(s.wantEvents) == 0 {
continue
}
if !reflect.DeepEqual(gotEvents, s.wantEvents) {
t.Fatalf("step %d: input %q: got %#v, want %#v", i, s.input, gotEvents, s.wantEvents)
}
}
}
func TestQwen3VLThinkingWhitespaceHandling(t *testing.T) {
type step struct {
input string
wantEvents []qwenEvent
}
cases := []struct {
desc string
steps []step
only bool
}{
{
desc: "whitespace after thinking tag is trimmed",
steps: []step{
{
input: "thinking content</think> \n\t content starts here",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "thinking content"},
qwenEventContent{content: "content starts here"},
},
},
},
},
{
desc: "whitespace after thinking tag split across chunks",
steps: []step{
{
input: "thinking content</think> ",
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "thinking content"}},
},
{
input: " \n\t",
wantEvents: []qwenEvent{},
},
{
input: "content",
wantEvents: []qwenEvent{
qwenEventContent{content: "content"},
},
},
},
},
{
desc: "only whitespace after thinking tag",
steps: []step{
{
input: "thinking content</think> \n\t ",
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "thinking content"}},
},
},
},
{
desc: "multiple spaces and tabs after thinking",
steps: []step{
{
input: "think</think> \t\t\n\n text",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "think"},
qwenEventContent{content: "text"},
},
},
},
},
{
desc: "trailing whitespace before thinking tag is preserved in content",
steps: []step{
{
input: "thinking with spaces </think>text",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "thinking with spaces"},
qwenEventContent{content: "text"},
},
},
},
},
{
desc: "whitespace between thinking and tool call",
steps: []step{
{
input: "thinking</think> \n <tool_call>{\"name\":\"test\"}</tool_call>",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "thinking"},
qwenEventRawToolCall{raw: "{\"name\":\"test\"}"},
},
},
},
},
{
desc: "no whitespace after thinking tag",
steps: []step{
{
input: "thinking</think>content",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "thinking"},
qwenEventContent{content: "content"},
},
},
},
},
{
desc: "unicode whitespace after thinking tag",
steps: []step{
{
input: "thinking</think>\u00a0\u3000content",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "thinking"},
qwenEventContent{content: "content"},
},
},
},
},
{
desc: "whitespace split with partial thinking tag",
steps: []step{
{
input: "thinking</th",
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "thinking"}},
},
{
input: "ink> \n",
wantEvents: []qwenEvent{},
},
{
input: " content",
wantEvents: []qwenEvent{
qwenEventContent{content: "content"},
},
},
},
},
{
desc: "empty thinking tag with whitespace after",
steps: []step{
{
input: "</think> \ncontent",
wantEvents: []qwenEvent{
qwenEventContent{content: "content"},
},
},
},
},
{
desc: "whitespace inside tool call preserves trailing space",
steps: []step{
{
input: "bruh</think> \n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t <tool_call> tool content </tool_call> \n\n\n\n\n\n\n after",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "bruh"},
qwenEventContent{content: "blahhhhhhhhhh blahhhh blahhhh"},
qwenEventRawToolCall{raw: " tool content "},
qwenEventContent{content: "after"},
},
},
},
},
{
desc: "whitespace inside tool call preserves trailing space",
steps: []step{
{
input: "bruh</think> shdjfhksdhfj ",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "bruh"},
qwenEventContent{content: "shdjfhksdhfj"},
},
},
{
input: "another word ",
wantEvents: []qwenEvent{
qwenEventContent{content: " another word"},
},
},
{
input: "<tool_call> tool content </tool_call> ",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: " tool content "},
},
},
{
input: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t <tool_call> anotha one </tool_call> \n\n\n\n\n\n\n after \n\n\n\n\n\n blep",
wantEvents: []qwenEvent{
qwenEventContent{content: "blahhhhhhhhhh blahhhh blahhhh"},
qwenEventRawToolCall{raw: " anotha one "},
qwenEventContent{content: "after \n\n\n\n\n\n blep"},
},
},
},
},
}
anyOnlies := false
for _, tc := range cases {
if tc.only {
anyOnlies = true
}
}
for _, tc := range cases {
if anyOnlies && !tc.only {
continue
}
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, nil)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
gotEvents := parser.parseEvents()
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
continue
}
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
}
}
})
}
}
func TestQwen3VLToolCallWhitespaceHandling(t *testing.T) {
type step struct {
input string
wantEvents []qwenEvent
}
cases := []struct {
desc string
steps []step
only bool
prefillMsg *api.Message // allows starting in content mode instead of thinking mode
}{
{
desc: "whitespace inside tool call is fully preserved (with content prefill)",
prefillMsg: &api.Message{Role: "assistant", Content: "prefill"},
steps: []step{
{
input: "before<tool_call> tool content </tool_call> \n after",
wantEvents: []qwenEvent{
qwenEventContent{content: "before"},
qwenEventRawToolCall{raw: " tool content "},
qwenEventContent{content: "after"},
},
},
},
},
{
desc: "whitespace after tool call trimmed across chunks (with content prefill)",
prefillMsg: &api.Message{Role: "assistant", Content: "prefill"},
steps: []step{
{
input: "before<tool_call>tool</tool_call> ",
wantEvents: []qwenEvent{
qwenEventContent{content: "before"},
qwenEventRawToolCall{raw: "tool"},
},
},
{
input: "\n\t",
wantEvents: []qwenEvent{},
},
{
input: "after \n this is a song",
wantEvents: []qwenEvent{
qwenEventContent{content: "after \n this is a song"},
},
},
},
},
{
desc: "multiple tool calls with whitespace between (with content prefill)",
prefillMsg: &api.Message{Role: "assistant", Content: "prefill"},
steps: []step{
{
input: "<tool_call>first</tool_call> \n <tool_call>second</tool_call>",
wantEvents: []qwenEvent{
qwenEventRawToolCall{raw: "first"},
qwenEventRawToolCall{raw: "second"},
},
},
},
},
{
desc: "thinking with whitespace then tool call",
steps: []step{
{
input: "thinking</think> \n <tool_call>tool</tool_call> \n content",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "thinking"},
qwenEventRawToolCall{raw: "tool"},
qwenEventContent{content: "content"},
},
},
},
},
}
anyOnlies := false
for _, tc := range cases {
if tc.only {
anyOnlies = true
}
}
for _, tc := range cases {
if anyOnlies && !tc.only {
continue
}
t.Run(tc.desc, func(t *testing.T) {
parser := Qwen3VLParser{hasThinkingSupport: true}
parser.Init([]api.Tool{}, tc.prefillMsg)
for i, step := range tc.steps {
parser.buffer.WriteString(step.input)
gotEvents := parser.parseEvents()
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
continue
}
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
}
}
})
}
}