mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-18 11:47:07 +00:00
Sync with upstream ollama/ollama and restore Tesla K80 (compute 3.7) support
This commit represents a complete rework after pulling the latest changes from official ollama/ollama repository and re-applying Tesla K80 compatibility patches. ## Key Changes ### CUDA Compute Capability 3.7 Support (Tesla K80) - Added sm_37 (compute 3.7) to CMAKE_CUDA_ARCHITECTURES in CMakeLists.txt - Updated CMakePresets.json to include compute 3.7 in "CUDA 11" preset - Using 37-virtual (PTX with JIT compilation) for maximum compatibility ### Legacy Toolchain Compatibility - **NVIDIA Driver**: 470.256.02 (last version supporting Kepler/K80) - **CUDA Version**: 11.4.4 (last CUDA 11.x supporting compute 3.7) - **GCC Version**: 10.5.0 (required by CUDA 11.4 host_config.h) ### CPU Architecture Trade-offs Due to GCC 10.5 limitation, sacrificed newer CPU optimizations: - Alderlake CPU variant enabled WITHOUT AVX_VNNI (requires GCC 11+) - Still supports: SSE4.2, AVX, F16C, AVX2, BMI2, FMA - Performance impact: ~3-7% on newer CPUs (acceptable for K80 compatibility) ### Build System Updates - Modified ml/backend/ggml/ggml/src/ggml-cuda/CMakeLists.txt for compute 3.7 - Added -Wno-deprecated-gpu-targets flag to suppress warnings - Updated ml/backend/ggml/ggml/src/CMakeLists.txt for Alderlake without AVX_VNNI ### Upstream Sync Merged latest llama.cpp changes including: - Enhanced KV cache management with ISWA and hybrid memory support - Improved multi-modal support (mtmd framework) - New model architectures (Gemma3, Llama4, Qwen3, etc.) - GPU backend improvements for CUDA, Metal, and ROCm - Updated quantization support and GGUF format handling ### Documentation - Updated CLAUDE.md with comprehensive build instructions - Documented toolchain constraints and CPU architecture trade-offs - Removed outdated CI/CD workflows (tesla-k80-*.yml) - Cleaned up temporary development artifacts ## Rationale This fork maintains Tesla K80 GPU support (compute 3.7) which was dropped in official Ollama due to legacy driver/CUDA requirements. The toolchain constraint creates a deadlock: - K80 → Driver 470 → CUDA 11.4 → GCC 10 → No AVX_VNNI We accept the loss of cutting-edge CPU optimizations to enable running modern LLMs on legacy but still capable Tesla K80 hardware (12GB VRAM per GPU). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
76
model/parsers/parsers.go
Normal file
76
model/parsers/parsers.go
Normal file
@@ -0,0 +1,76 @@
|
||||
package parsers
|
||||
|
||||
import (
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/harmony"
|
||||
)
|
||||
|
||||
type Parser interface {
|
||||
// Init initializes the parser with tools and optional last message for chat prefill
|
||||
// Returns processed tools if the parser needs to modify them (e.g., harmony renames them)
|
||||
Init(tools []api.Tool, lastMessage *api.Message) []api.Tool
|
||||
// Add processes streamed content and returns parsed content, thinking, and tool calls
|
||||
// The done flag indicates if this is the last chunk (used for draining accumulators)
|
||||
Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error)
|
||||
HasToolSupport() bool
|
||||
HasThinkingSupport() bool
|
||||
}
|
||||
|
||||
type ParserConstructor func() Parser
|
||||
|
||||
type ParserRegistry struct {
|
||||
constructors map[string]ParserConstructor
|
||||
}
|
||||
|
||||
func (r *ParserRegistry) Register(name string, constructor ParserConstructor) {
|
||||
r.constructors[name] = constructor
|
||||
}
|
||||
|
||||
var registry = ParserRegistry{
|
||||
constructors: make(map[string]ParserConstructor),
|
||||
}
|
||||
|
||||
func Register(name string, constructor ParserConstructor) {
|
||||
registry.Register(name, constructor)
|
||||
}
|
||||
|
||||
func ParserForName(name string) Parser {
|
||||
if parser, ok := registry.constructors[name]; ok {
|
||||
return parser()
|
||||
}
|
||||
switch name {
|
||||
case "qwen3-coder":
|
||||
parser := &Qwen3CoderParser{}
|
||||
return parser
|
||||
case "qwen3-vl-instruct":
|
||||
parser := &Qwen3VLParser{hasThinkingSupport: false}
|
||||
return parser
|
||||
case "qwen3-vl-thinking":
|
||||
parser := &Qwen3VLParser{hasThinkingSupport: true}
|
||||
return parser
|
||||
case "passthrough":
|
||||
return &PassthroughParser{}
|
||||
case "harmony":
|
||||
return harmony.NewHarmonyMessageHandler()
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
type PassthroughParser struct{}
|
||||
|
||||
func (p *PassthroughParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
|
||||
return tools // passthrough doesn't modify tools
|
||||
}
|
||||
|
||||
func (p *PassthroughParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
|
||||
return s, "", nil, nil
|
||||
}
|
||||
|
||||
func (p *PassthroughParser) HasToolSupport() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (p *PassthroughParser) HasThinkingSupport() bool {
|
||||
return false
|
||||
}
|
||||
97
model/parsers/parsers_test.go
Normal file
97
model/parsers/parsers_test.go
Normal file
@@ -0,0 +1,97 @@
|
||||
package parsers
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
type mockParser struct {
|
||||
name string
|
||||
}
|
||||
|
||||
func (m *mockParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
|
||||
return tools
|
||||
}
|
||||
|
||||
func (m *mockParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
|
||||
return "mock:" + s, "", nil, nil
|
||||
}
|
||||
|
||||
func (m *mockParser) HasToolSupport() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *mockParser) HasThinkingSupport() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func TestRegisterCustomParser(t *testing.T) {
|
||||
// Register a custom parser
|
||||
Register("custom-parser", func() Parser {
|
||||
return &mockParser{name: "custom"}
|
||||
})
|
||||
|
||||
// Retrieve it
|
||||
parser := ParserForName("custom-parser")
|
||||
if parser == nil {
|
||||
t.Fatal("expected parser to be registered")
|
||||
}
|
||||
|
||||
// Test it works
|
||||
content, _, _, err := parser.Add("test", false)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if content != "mock:test" {
|
||||
t.Errorf("expected 'mock:test', got %q", content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuiltInParsersStillWork(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
}{
|
||||
{"passthrough"},
|
||||
{"qwen3-coder"},
|
||||
{"harmony"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
parser := ParserForName(tt.name)
|
||||
if parser == nil {
|
||||
t.Fatalf("expected built-in parser %q to exist", tt.name)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestOverrideBuiltInParser(t *testing.T) {
|
||||
// Override a built-in parser
|
||||
Register("passthrough", func() Parser {
|
||||
return &mockParser{name: "override"}
|
||||
})
|
||||
|
||||
// Should get the override
|
||||
parser := ParserForName("passthrough")
|
||||
if parser == nil {
|
||||
t.Fatal("expected parser to exist")
|
||||
}
|
||||
|
||||
// Test it's the override
|
||||
content, _, _, err := parser.Add("test", false)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if content != "mock:test" {
|
||||
t.Errorf("expected 'mock:test' from override, got %q", content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnknownParserReturnsNil(t *testing.T) {
|
||||
parser := ParserForName("nonexistent-parser")
|
||||
if parser != nil {
|
||||
t.Error("expected nil for unknown parser")
|
||||
}
|
||||
}
|
||||
472
model/parsers/qwen3coder.go
Normal file
472
model/parsers/qwen3coder.go
Normal file
@@ -0,0 +1,472 @@
|
||||
package parsers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"math"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/logutil"
|
||||
)
|
||||
|
||||
type qwenParserState int
|
||||
|
||||
const (
|
||||
toolOpenTag = "<tool_call>"
|
||||
toolCloseTag = "</tool_call>"
|
||||
)
|
||||
|
||||
const (
|
||||
qwenParserState_LookingForToolStart qwenParserState = iota
|
||||
qwenParserState_CollectingToolContent
|
||||
)
|
||||
|
||||
type Qwen3CoderParser struct {
|
||||
state qwenParserState
|
||||
acc strings.Builder
|
||||
tools []api.Tool
|
||||
}
|
||||
|
||||
func (p *Qwen3CoderParser) HasToolSupport() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (p *Qwen3CoderParser) HasThinkingSupport() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (p *Qwen3CoderParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
|
||||
p.tools = tools
|
||||
return tools // Qwen doesn't modify tools
|
||||
}
|
||||
|
||||
func (p *Qwen3CoderParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
|
||||
p.acc.WriteString(s)
|
||||
|
||||
events := p.parseEvents()
|
||||
|
||||
var toolCalls []api.ToolCall
|
||||
var sb strings.Builder
|
||||
for _, event := range events {
|
||||
switch event := event.(type) {
|
||||
case qwenEventRawToolCall:
|
||||
toolCall, err := parseToolCall(event, p.tools)
|
||||
if err != nil {
|
||||
slog.Warn("qwen tool call parsing failed", "error", err)
|
||||
return "", "", nil, err
|
||||
}
|
||||
toolCalls = append(toolCalls, toolCall)
|
||||
case qwenEventContent:
|
||||
// TODO(drifkin): if the same turn contains multiple interleaved content
|
||||
// events, we naively append them together here. See the note below about
|
||||
// `qwenEvent`s for more details
|
||||
sb.WriteString(event.content)
|
||||
}
|
||||
}
|
||||
|
||||
return sb.String(), "", toolCalls, nil
|
||||
}
|
||||
|
||||
func (p *Qwen3CoderParser) parseEvents() []qwenEvent {
|
||||
var all []qwenEvent
|
||||
|
||||
keepLooping := true
|
||||
for keepLooping {
|
||||
var events []qwenEvent
|
||||
events, keepLooping = eat(p)
|
||||
if len(events) > 0 {
|
||||
all = append(all, events...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(all) > 0 {
|
||||
slog.Log(context.TODO(), logutil.LevelTrace, "qwen events parsed", "events", all, "state", p.state, "acc", p.acc.String())
|
||||
}
|
||||
|
||||
return all
|
||||
}
|
||||
|
||||
// we use some internal event types in order to communicate between `Add` and
|
||||
// `eat`. We do this to support interleaving content and parallel tool calls in
|
||||
// the parser, even though qwen3-coder isn't supposed to do this. Our API
|
||||
// doesn't currently support models outputting multiple messages in a turn, so
|
||||
// we wouldn't be able to represent it yet, but there's no reason to prevent the
|
||||
// parser from supporting it, especially for future models if they end up using
|
||||
// a similar format.
|
||||
type qwenEvent interface {
|
||||
isQwenEvent()
|
||||
}
|
||||
|
||||
type qwenEventRawToolCall struct {
|
||||
raw string
|
||||
}
|
||||
|
||||
type qwenEventContent struct {
|
||||
content string
|
||||
}
|
||||
|
||||
func (qwenEventContent) isQwenEvent() {}
|
||||
func (qwenEventRawToolCall) isQwenEvent() {}
|
||||
|
||||
// eat consumes the parser's buffer, and returns a list of any unambiguous
|
||||
// events from the current parser state. If the parser transitions to another
|
||||
// state, it may have additional events to emit on the next call, which is what
|
||||
// the second return value indicates
|
||||
func eat(p *Qwen3CoderParser) ([]qwenEvent, bool) {
|
||||
var events []qwenEvent
|
||||
|
||||
switch p.state {
|
||||
case qwenParserState_LookingForToolStart:
|
||||
if strings.Contains(p.acc.String(), toolOpenTag) {
|
||||
// we found a full tool open tag, so we can emit the content before the
|
||||
// tag, being sure to trim any trailing whitespace
|
||||
split := strings.SplitN(p.acc.String(), toolOpenTag, 2)
|
||||
before := split[0]
|
||||
before = strings.TrimRightFunc(before, unicode.IsSpace)
|
||||
if len(before) > 0 {
|
||||
events = append(events, qwenEventContent{content: before})
|
||||
}
|
||||
after := split[1]
|
||||
p.acc.Reset()
|
||||
p.acc.WriteString(after)
|
||||
p.state = qwenParserState_CollectingToolContent
|
||||
return events, true
|
||||
} else if overlap := overlap(p.acc.String(), toolOpenTag); overlap > 0 {
|
||||
// we found a partial tool open tag, so we can emit the unambiguous part,
|
||||
// which is the (trailing-whitespace trimmed) content before the partial
|
||||
// tool open tag
|
||||
beforePartialTag := p.acc.String()[:len(p.acc.String())-overlap]
|
||||
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
|
||||
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
|
||||
unambiguous := p.acc.String()[:ambiguousStart]
|
||||
ambiguous := p.acc.String()[ambiguousStart:]
|
||||
p.acc.Reset()
|
||||
p.acc.WriteString(ambiguous)
|
||||
if len(unambiguous) > 0 {
|
||||
events = append(events, qwenEventContent{content: unambiguous})
|
||||
}
|
||||
return events, false
|
||||
} else {
|
||||
// we found content that is entirely not a tool call. We should withhold
|
||||
// any trailing whitespace in case this is the end of the content
|
||||
whitespaceLen := trailingWhitespaceLen(p.acc.String())
|
||||
ambiguousStart := len(p.acc.String()) - whitespaceLen
|
||||
unambiguous := p.acc.String()[:ambiguousStart]
|
||||
ambiguous := p.acc.String()[ambiguousStart:]
|
||||
p.acc.Reset()
|
||||
p.acc.WriteString(ambiguous)
|
||||
if len(unambiguous) > 0 {
|
||||
events = append(events, qwenEventContent{content: unambiguous})
|
||||
}
|
||||
return events, false
|
||||
}
|
||||
case qwenParserState_CollectingToolContent:
|
||||
if strings.Contains(p.acc.String(), toolCloseTag) {
|
||||
split := strings.SplitN(p.acc.String(), toolCloseTag, 2)
|
||||
before := split[0]
|
||||
if len(before) == 0 {
|
||||
slog.Warn("qwen tool call closing tag found but no content before it")
|
||||
}
|
||||
// remove any whitespace between the tool call and any content after it
|
||||
after := strings.TrimLeftFunc(split[1], unicode.IsSpace)
|
||||
p.acc.Reset()
|
||||
p.acc.WriteString(after)
|
||||
events = append(events, qwenEventRawToolCall{raw: before})
|
||||
p.state = qwenParserState_LookingForToolStart
|
||||
return events, true
|
||||
} else {
|
||||
// note that we don't need to check the overlap here because we only plan
|
||||
// on parsing the tool call once we see the full closing tag. We don't
|
||||
// stream back the unparsed tool content, so there's no need to be eager
|
||||
// here
|
||||
return events, false
|
||||
}
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(drifkin): move this to a shared location
|
||||
// longest overlap between suffix of s and prefix of delim
|
||||
func overlap(s, delim string) int {
|
||||
max := min(len(delim), len(s))
|
||||
for i := max; i > 0; i-- {
|
||||
if strings.HasSuffix(s, delim[:i]) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func trailingWhitespaceLen(s string) int {
|
||||
remaining := s
|
||||
total := 0
|
||||
for len(remaining) > 0 {
|
||||
r, size := utf8.DecodeLastRuneInString(remaining)
|
||||
// if it's an invalid utf8 rune, assume it isn't whitespace
|
||||
if r == utf8.RuneError && size == 1 {
|
||||
break
|
||||
}
|
||||
if !unicode.IsSpace(r) {
|
||||
break
|
||||
}
|
||||
total += size
|
||||
remaining = remaining[:len(remaining)-size]
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
type XMLFunctionCall struct {
|
||||
XMLName xml.Name `xml:"function"`
|
||||
Name string `xml:"name,attr"`
|
||||
Parameters []XMLParameter `xml:"parameter"`
|
||||
}
|
||||
|
||||
type XMLParameter struct {
|
||||
Name string `xml:"name,attr"`
|
||||
Value string `xml:",chardata"`
|
||||
}
|
||||
|
||||
// parseToolCall parses a raw tool call string into an api.ToolCall.
|
||||
// The raw string follows an xml-like format, here's an example:
|
||||
//
|
||||
// <function=get_current_temperature>
|
||||
// <parameter=location>
|
||||
// San Francisco
|
||||
// </parameter>
|
||||
// <parameter=unit>
|
||||
// celsius
|
||||
// </parameter>
|
||||
// </function>
|
||||
func parseToolCall(raw qwenEventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
|
||||
toolCall := api.ToolCall{}
|
||||
|
||||
xmlString := transformToXML(raw.raw)
|
||||
|
||||
var functionCall XMLFunctionCall
|
||||
err := xml.Unmarshal([]byte(xmlString), &functionCall)
|
||||
if err != nil {
|
||||
return api.ToolCall{}, err
|
||||
}
|
||||
|
||||
toolCall.Function = api.ToolCallFunction{
|
||||
Name: functionCall.Name,
|
||||
}
|
||||
|
||||
// Find the matching tool to get parameter types
|
||||
var matchedTool *api.Tool
|
||||
for i := range tools {
|
||||
if tools[i].Function.Name == functionCall.Name {
|
||||
matchedTool = &tools[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
toolCall.Function.Arguments = make(api.ToolCallFunctionArguments)
|
||||
for _, parameter := range functionCall.Parameters {
|
||||
// Look up the parameter type if we found the tool
|
||||
var paramType api.PropertyType
|
||||
if matchedTool != nil && matchedTool.Function.Parameters.Properties != nil {
|
||||
if prop, ok := matchedTool.Function.Parameters.Properties[parameter.Name]; ok {
|
||||
// Handle anyOf by collecting all types from the union
|
||||
if len(prop.AnyOf) > 0 {
|
||||
for _, anyOfProp := range prop.AnyOf {
|
||||
paramType = append(paramType, anyOfProp.Type...)
|
||||
}
|
||||
} else {
|
||||
paramType = prop.Type
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
toolCall.Function.Arguments[parameter.Name] = parseValue(parameter.Value, paramType)
|
||||
}
|
||||
|
||||
return toolCall, nil
|
||||
}
|
||||
|
||||
// parseValue converts a raw string value to the appropriate type based on the parameter type specification.
|
||||
//
|
||||
// For union types (multiple types in PropertyType, which we support but doesn't
|
||||
// seem as though the reference parser does type coercion with those types in
|
||||
// mind) we use a type precedence approach:
|
||||
// 1. null - checked first regardless of declared types (matches reference implementation)
|
||||
// 2. boolean - only "true"/"false" are valid booleans
|
||||
// 3. integer - must parse as a whole number
|
||||
// 4. number - must parse as numeric (returns int if no decimal part)
|
||||
// 5. array - must parse as valid JSON array
|
||||
// 6. object - must parse as valid JSON object
|
||||
// 7. string - always succeeds (least specific type)
|
||||
//
|
||||
// This precedence ensures we return the most specific type that successfully parses,
|
||||
// following the principle of least surprise. For example, with PropertyType{"string", "number"},
|
||||
// "123" becomes 123 (number), while "hello" becomes "hello" (string).
|
||||
func parseValue(raw string, paramType api.PropertyType) any {
|
||||
// first remove a single leading newlines, and a single trailing newline (if
|
||||
// they exist). This follows the reference implementation
|
||||
raw = strings.TrimPrefix(raw, "\n")
|
||||
raw = strings.TrimSuffix(raw, "\n")
|
||||
|
||||
// Check for null first (case-insensitive) - this takes precedence over any type
|
||||
if strings.ToLower(raw) == "null" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If no type is specified, default to string
|
||||
if len(paramType) == 0 {
|
||||
return raw
|
||||
}
|
||||
|
||||
// Check if any of the specified types match, using type precedence
|
||||
// Order: boolean -> integer -> number -> array -> object -> string
|
||||
typeSet := make(map[string]bool)
|
||||
for _, t := range paramType {
|
||||
typeSet[t] = true
|
||||
}
|
||||
|
||||
// Try boolean first (most restrictive)
|
||||
if typeSet["boolean"] {
|
||||
lower := strings.ToLower(raw)
|
||||
switch lower {
|
||||
case "true":
|
||||
return true
|
||||
case "false":
|
||||
return false
|
||||
}
|
||||
// If not a valid boolean but boolean is the only type, return false (matching reference)
|
||||
if len(paramType) == 1 {
|
||||
return false
|
||||
}
|
||||
// Otherwise try other types
|
||||
}
|
||||
|
||||
// Try integer
|
||||
if typeSet["integer"] {
|
||||
if i, err := strconv.ParseInt(raw, 10, 64); err == nil {
|
||||
// Return as int if it fits in int32, otherwise int64
|
||||
if i >= math.MinInt32 && i <= math.MaxInt32 {
|
||||
return int(i)
|
||||
}
|
||||
return i
|
||||
}
|
||||
// If integer is the only type and parsing failed, fall back to string
|
||||
if len(paramType) == 1 {
|
||||
return raw
|
||||
}
|
||||
}
|
||||
|
||||
// Try number (float)
|
||||
if typeSet["number"] {
|
||||
if f, err := strconv.ParseFloat(raw, 64); err == nil {
|
||||
// If the number has no decimal part, return as int (matching reference)
|
||||
if f == math.Trunc(f) {
|
||||
i := int64(f)
|
||||
if i >= math.MinInt32 && i <= math.MaxInt32 {
|
||||
return int(i)
|
||||
}
|
||||
return i
|
||||
}
|
||||
return f
|
||||
}
|
||||
// If number is the only type and parsing failed, fall back to string
|
||||
if len(paramType) == 1 {
|
||||
return raw
|
||||
}
|
||||
}
|
||||
|
||||
// Try array
|
||||
if typeSet["array"] {
|
||||
var arr []any
|
||||
if err := json.Unmarshal([]byte(raw), &arr); err == nil {
|
||||
return arr
|
||||
}
|
||||
// If array is the only type and parsing failed, fall back to string
|
||||
if len(paramType) == 1 {
|
||||
return raw
|
||||
}
|
||||
}
|
||||
|
||||
// Try object
|
||||
if typeSet["object"] {
|
||||
var obj map[string]any
|
||||
if err := json.Unmarshal([]byte(raw), &obj); err == nil {
|
||||
return obj
|
||||
}
|
||||
// If object is the only type and parsing failed, fall back to string
|
||||
if len(paramType) == 1 {
|
||||
return raw
|
||||
}
|
||||
}
|
||||
|
||||
// String always succeeds (or if "string" is in the type set)
|
||||
if typeSet["string"] {
|
||||
return raw
|
||||
}
|
||||
|
||||
// If we get here, none of the types matched and string wasn't an option
|
||||
// We return string as a fallback. The reference implementation will attempt
|
||||
// to parse the value as a python literal, but we purposefully don't support
|
||||
// that
|
||||
return raw
|
||||
}
|
||||
|
||||
var (
|
||||
qwenTagRegex = regexp.MustCompile(`<(\w+)=([^>]+)>`)
|
||||
qwenXMLTagRegex = regexp.MustCompile(`</?(?:function|parameter)(?:\s+name="[^"]*")?>`)
|
||||
)
|
||||
|
||||
// transformToXML transforms a raw qwen tool call with xml-like tags into valid
|
||||
// xml so that it can be parsed by any xml parser
|
||||
func transformToXML(raw string) string {
|
||||
// take the form `<tag=abc>` and transform it to `<tag name="abc">`, taking
|
||||
// care to properly escape the string that becomes the attribute value
|
||||
transformed := qwenTagRegex.ReplaceAllStringFunc(raw, func(match string) string {
|
||||
groups := qwenTagRegex.FindStringSubmatch(match)
|
||||
tag := groups[1]
|
||||
var escapedValue strings.Builder
|
||||
xml.EscapeText(&escapedValue, []byte(groups[2]))
|
||||
return fmt.Sprintf(`<%s name="%s">`, tag, escapedValue.String())
|
||||
})
|
||||
|
||||
// Walk the resulting string, escaping any character data that sits between the
|
||||
// xml tags we just emitted
|
||||
var out strings.Builder
|
||||
lastIdx := 0
|
||||
for _, loc := range qwenXMLTagRegex.FindAllStringIndex(transformed, -1) {
|
||||
if loc[0] > lastIdx {
|
||||
escapeTextNode(&out, transformed[lastIdx:loc[0]])
|
||||
}
|
||||
out.WriteString(transformed[loc[0]:loc[1]])
|
||||
lastIdx = loc[1]
|
||||
}
|
||||
if lastIdx < len(transformed) {
|
||||
escapeTextNode(&out, transformed[lastIdx:])
|
||||
}
|
||||
|
||||
return out.String()
|
||||
}
|
||||
|
||||
// escapeTextNode escapes XML character data without altering other characters
|
||||
// like newlines or tabs (which is why we don't use xml.EscapeText for this)
|
||||
func escapeTextNode(sb *strings.Builder, s string) {
|
||||
for _, r := range s {
|
||||
switch r {
|
||||
case '&':
|
||||
sb.WriteString("&")
|
||||
case '<':
|
||||
sb.WriteString("<")
|
||||
case '>':
|
||||
sb.WriteString(">")
|
||||
default:
|
||||
sb.WriteRune(r)
|
||||
}
|
||||
}
|
||||
}
|
||||
1125
model/parsers/qwen3coder_test.go
Normal file
1125
model/parsers/qwen3coder_test.go
Normal file
File diff suppressed because it is too large
Load Diff
253
model/parsers/qwen3vl.go
Normal file
253
model/parsers/qwen3vl.go
Normal file
@@ -0,0 +1,253 @@
|
||||
package parsers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/logutil"
|
||||
)
|
||||
|
||||
// TODO: call the init function
|
||||
const (
|
||||
CollectingThinkingContent qwenParserState = iota
|
||||
CollectingContent
|
||||
CollectingToolContent
|
||||
ThinkingDoneEatingWhitespace
|
||||
ToolCallDoneEatingWhitespace
|
||||
)
|
||||
|
||||
const (
|
||||
thinkingCloseTag = "</think>"
|
||||
)
|
||||
|
||||
type Qwen3VLParser struct {
|
||||
state qwenParserState
|
||||
buffer strings.Builder
|
||||
tools []api.Tool
|
||||
hasThinkingSupport bool
|
||||
}
|
||||
|
||||
func (p *Qwen3VLParser) HasToolSupport() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (p *Qwen3VLParser) HasThinkingSupport() bool {
|
||||
return p.hasThinkingSupport
|
||||
}
|
||||
|
||||
func (p *Qwen3VLParser) setInitialState(lastMessage *api.Message) {
|
||||
prefill := lastMessage != nil && lastMessage.Role == "assistant"
|
||||
if !p.HasThinkingSupport() {
|
||||
p.state = CollectingContent
|
||||
return
|
||||
}
|
||||
|
||||
if prefill && lastMessage.Content != "" {
|
||||
p.state = CollectingContent
|
||||
return
|
||||
}
|
||||
|
||||
p.state = CollectingThinkingContent
|
||||
}
|
||||
|
||||
func (p *Qwen3VLParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
|
||||
p.tools = tools
|
||||
p.setInitialState(lastMessage)
|
||||
return tools
|
||||
}
|
||||
|
||||
type qwenEventThinkingContent struct {
|
||||
content string
|
||||
}
|
||||
|
||||
func (qwenEventThinkingContent) isQwenEvent() {}
|
||||
|
||||
func (p *Qwen3VLParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
|
||||
p.buffer.WriteString(s)
|
||||
events := p.parseEvents()
|
||||
|
||||
var toolCalls []api.ToolCall
|
||||
var contentSb strings.Builder
|
||||
var thinkingSb strings.Builder
|
||||
for _, event := range events {
|
||||
switch event := event.(type) {
|
||||
case qwenEventRawToolCall:
|
||||
toolCall, err := parseJSONToolCall(event, p.tools)
|
||||
if err != nil {
|
||||
slog.Warn("qwen tool call parsing failed", "error", err)
|
||||
return "", "", nil, err
|
||||
}
|
||||
toolCalls = append(toolCalls, toolCall)
|
||||
case qwenEventThinkingContent:
|
||||
thinkingSb.WriteString(event.content)
|
||||
case qwenEventContent:
|
||||
// TODO(drifkin): if the same turn contains multiple interleaved content
|
||||
// events, we naively append them together here.
|
||||
contentSb.WriteString(event.content)
|
||||
}
|
||||
}
|
||||
|
||||
return contentSb.String(), thinkingSb.String(), toolCalls, nil
|
||||
}
|
||||
|
||||
func (p *Qwen3VLParser) parseEvents() []qwenEvent {
|
||||
var all []qwenEvent
|
||||
|
||||
keepLooping := true
|
||||
for keepLooping {
|
||||
var events []qwenEvent
|
||||
events, keepLooping = p.eat()
|
||||
if len(events) > 0 {
|
||||
all = append(all, events...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(all) > 0 {
|
||||
slog.Log(context.TODO(), logutil.LevelTrace, "qwen events parsed", "events", all, "state", p.state, "buffer", p.buffer.String())
|
||||
}
|
||||
|
||||
return all
|
||||
}
|
||||
|
||||
func splitAtTag(p *Qwen3VLParser, tag string, trimAfter bool) (string, string) {
|
||||
split := strings.SplitN(p.buffer.String(), tag, 2)
|
||||
before := split[0]
|
||||
before = strings.TrimRightFunc(before, unicode.IsSpace)
|
||||
after := split[1]
|
||||
if trimAfter {
|
||||
after = strings.TrimLeftFunc(after, unicode.IsSpace)
|
||||
}
|
||||
p.buffer.Reset()
|
||||
p.buffer.WriteString(after)
|
||||
return before, after // return events
|
||||
}
|
||||
|
||||
func (p *Qwen3VLParser) eatLeadingWhitespaceAndTransitionTo(nextState qwenParserState) ([]qwenEvent, bool) {
|
||||
trimmed := strings.TrimLeftFunc(p.buffer.String(), unicode.IsSpace)
|
||||
p.buffer.Reset()
|
||||
if trimmed == "" {
|
||||
return nil, false
|
||||
}
|
||||
p.state = nextState
|
||||
p.buffer.WriteString(trimmed)
|
||||
return nil, true
|
||||
}
|
||||
|
||||
func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
|
||||
var events []qwenEvent
|
||||
|
||||
switch p.state {
|
||||
case CollectingContent:
|
||||
if strings.Contains(p.buffer.String(), toolOpenTag) {
|
||||
// events = emitContentBeforeTag(p, events, toolOpenTag)
|
||||
before, _ := splitAtTag(p, toolOpenTag, false)
|
||||
if len(before) > 0 {
|
||||
events = append(events, qwenEventContent{content: before})
|
||||
}
|
||||
p.state = CollectingToolContent
|
||||
return events, true
|
||||
} else if overlapLen := overlap(p.buffer.String(), toolOpenTag); overlapLen > 0 {
|
||||
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
|
||||
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
|
||||
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
|
||||
|
||||
unambiguous := p.buffer.String()[:ambiguousStart]
|
||||
ambiguous := p.buffer.String()[ambiguousStart:]
|
||||
p.buffer.Reset()
|
||||
p.buffer.WriteString(ambiguous)
|
||||
if len(unambiguous) > 0 {
|
||||
events = append(events, qwenEventContent{content: unambiguous})
|
||||
}
|
||||
return events, false
|
||||
} else {
|
||||
whitespaceLen := trailingWhitespaceLen(p.buffer.String())
|
||||
ambiguousStart := len(p.buffer.String()) - whitespaceLen
|
||||
|
||||
unambiguous := p.buffer.String()[:ambiguousStart]
|
||||
ambiguous := p.buffer.String()[ambiguousStart:]
|
||||
p.buffer.Reset()
|
||||
p.buffer.WriteString(ambiguous)
|
||||
if len(unambiguous) > 0 {
|
||||
events = append(events, qwenEventContent{content: unambiguous})
|
||||
}
|
||||
return events, false
|
||||
}
|
||||
case CollectingToolContent:
|
||||
if strings.Contains(p.buffer.String(), toolCloseTag) {
|
||||
split := strings.SplitN(p.buffer.String(), toolCloseTag, 2)
|
||||
before := split[0] // do we also need to do it to tool calls?
|
||||
if len(before) == 0 {
|
||||
slog.Warn("qwen tool call closing tag found but no content before it")
|
||||
}
|
||||
|
||||
after := split[1]
|
||||
events = append(events, qwenEventRawToolCall{raw: before})
|
||||
p.buffer.Reset()
|
||||
p.buffer.WriteString(after)
|
||||
p.state = ToolCallDoneEatingWhitespace
|
||||
return events, true
|
||||
} else {
|
||||
return events, false
|
||||
}
|
||||
case CollectingThinkingContent:
|
||||
if strings.Contains(p.buffer.String(), thinkingCloseTag) {
|
||||
thinking, remaining := splitAtTag(p, thinkingCloseTag, true)
|
||||
if len(thinking) > 0 {
|
||||
events = append(events, qwenEventThinkingContent{content: thinking})
|
||||
}
|
||||
if remaining == "" {
|
||||
p.state = ThinkingDoneEatingWhitespace
|
||||
} else {
|
||||
p.state = CollectingContent
|
||||
}
|
||||
return events, true
|
||||
} else if overlapLen := overlap(p.buffer.String(), thinkingCloseTag); overlapLen > 0 {
|
||||
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
|
||||
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
|
||||
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
|
||||
|
||||
unambiguous := p.buffer.String()[:ambiguousStart]
|
||||
ambiguous := p.buffer.String()[ambiguousStart:]
|
||||
p.buffer.Reset()
|
||||
p.buffer.WriteString(ambiguous)
|
||||
if len(unambiguous) > 0 {
|
||||
events = append(events, qwenEventThinkingContent{content: unambiguous})
|
||||
}
|
||||
return events, false
|
||||
} else {
|
||||
whitespaceLen := trailingWhitespaceLen(p.buffer.String())
|
||||
ambiguousStart := len(p.buffer.String()) - whitespaceLen
|
||||
|
||||
unambiguous := p.buffer.String()[:ambiguousStart]
|
||||
ambiguous := p.buffer.String()[ambiguousStart:]
|
||||
p.buffer.Reset()
|
||||
p.buffer.WriteString(ambiguous)
|
||||
if len(unambiguous) > 0 {
|
||||
events = append(events, qwenEventThinkingContent{content: unambiguous})
|
||||
}
|
||||
return events, false
|
||||
}
|
||||
case ThinkingDoneEatingWhitespace:
|
||||
return p.eatLeadingWhitespaceAndTransitionTo(CollectingContent)
|
||||
case ToolCallDoneEatingWhitespace:
|
||||
return p.eatLeadingWhitespaceAndTransitionTo(CollectingContent)
|
||||
default:
|
||||
panic("unreachable")
|
||||
}
|
||||
}
|
||||
|
||||
func parseJSONToolCall(raw qwenEventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
|
||||
var toolCallFunction api.ToolCallFunction
|
||||
if err := json.Unmarshal([]byte(raw.raw), &toolCallFunction); err != nil {
|
||||
return api.ToolCall{}, err
|
||||
}
|
||||
|
||||
toolCall := api.ToolCall{}
|
||||
toolCall.Function = toolCallFunction
|
||||
|
||||
return toolCall, nil
|
||||
}
|
||||
841
model/parsers/qwen3vl_nonthinking_test.go
Normal file
841
model/parsers/qwen3vl_nonthinking_test.go
Normal file
@@ -0,0 +1,841 @@
|
||||
package parsers
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func TestQwen3VLNonThinkingParserStreaming(t *testing.T) {
|
||||
type step struct {
|
||||
input string
|
||||
wantEvents []qwenEvent
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
steps []step
|
||||
only bool
|
||||
}{
|
||||
{
|
||||
desc: "simple thinking",
|
||||
steps: []step{
|
||||
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think>"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "simple trip thinking",
|
||||
steps: []step{
|
||||
{input: "<think>abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "<think>abc</think>"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking with split tags",
|
||||
steps: []step{
|
||||
{input: "abc", wantEvents: []qwenEvent{qwenEventContent{content: "abc"}}},
|
||||
{input: "</think>", wantEvents: []qwenEvent{qwenEventContent{content: "</think>"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "multiple think tags",
|
||||
steps: []step{
|
||||
{input: "abc<think>actually, is not thinking</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc<think>actually, is not thinking</think>"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking and tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking</think><tool_call>I'm tool calling</tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "I'm thinking</think>"},
|
||||
qwenEventRawToolCall{raw: "I'm tool calling"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "nested thinking (outside thinking, inside thinking)",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking<think>I'm nested thinking</think></think>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "I'm thinking<think>I'm nested thinking</think></think>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "interleaved thinking",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<think>I'm thinking</think>I'm actually content</think>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "<think>I'm thinking</think>I'm actually content</think>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "nested thinking and tool call (outside thinking, inside tool call)",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking<tool_call>I'm nested tool call</tool_call></think>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "I'm thinking"},
|
||||
qwenEventRawToolCall{raw: "I'm nested tool call"},
|
||||
qwenEventContent{content: "</think>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "nested thinking and tool call (outside tool call, inside thinking)",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<tool_call>I'm nested tool call<think>I'm thinking</think></tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "I'm nested tool call<think>I'm thinking</think>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "interleaved thinking and tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking<tool_call>I'm NOT a nested tool call</think></tool_call><tool_call>I'm nested tool call 2<think></tool_call></think>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "I'm thinking"},
|
||||
qwenEventRawToolCall{raw: "I'm NOT a nested tool call</think>"},
|
||||
qwenEventRawToolCall{raw: "I'm nested tool call 2<think>"},
|
||||
qwenEventContent{content: "</think>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "emit unambiguous before partial tool open (trailing ws)",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc\u00a0\n<tool_call",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "abc"}},
|
||||
},
|
||||
{
|
||||
input: " fakeout",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "\u00a0\n<tool_call fakeout"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "unambiguous empty: partial tool open at buffer start",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<tool_ca",
|
||||
wantEvents: []qwenEvent{},
|
||||
},
|
||||
{
|
||||
input: "ll>abc</tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "abc"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "partial thinking tag fakeout",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc</think",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "abc</think"}},
|
||||
},
|
||||
{
|
||||
input: " fakeout",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: " fakeout"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "partial thinking incomplete",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc<think>unfinished<", // when something is ambiguious, we dont emit anything
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "abc<think>unfinished"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "test with split tool and content",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc<tool_call>unfinished</", // when something is ambiguious, we dont emit anything
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "abc"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "tool_call> def",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "unfinished"},
|
||||
qwenEventContent{content: "def"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
anyOnlies := false
|
||||
for _, tc := range cases {
|
||||
if tc.only {
|
||||
anyOnlies = true
|
||||
}
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
if anyOnlies && !tc.only {
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
parser := Qwen3VLParser{hasThinkingSupport: false}
|
||||
parser.Init([]api.Tool{}, nil)
|
||||
|
||||
for i, step := range tc.steps {
|
||||
parser.buffer.WriteString(step.input)
|
||||
gotEvents := parser.parseEvents()
|
||||
|
||||
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
|
||||
// avoid deep equal on empty vs. nil slices
|
||||
continue
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
|
||||
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwenOldParserStreaming(t *testing.T) {
|
||||
type step struct {
|
||||
input string
|
||||
wantEvents []qwenEvent
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
steps []step
|
||||
only bool
|
||||
}{
|
||||
{
|
||||
desc: "simple message streamed word by word",
|
||||
steps: []step{
|
||||
{
|
||||
input: "hi",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "hi"}},
|
||||
},
|
||||
{
|
||||
input: " there",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: " there"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "content before tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "hi there<tool_call>",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "hi there"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "multiple tool calls in one message",
|
||||
steps: []step{
|
||||
{
|
||||
input: "before1<tool_call>in tool call</tool_call>after1<tool_call>in tool call 2</tool_call>after2",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "before1"},
|
||||
qwenEventRawToolCall{raw: "in tool call"},
|
||||
qwenEventContent{content: "after1"},
|
||||
qwenEventRawToolCall{raw: "in tool call 2"},
|
||||
qwenEventContent{content: "after2"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "tool calls with split tags",
|
||||
steps: []step{
|
||||
{
|
||||
input: "before<tool",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "before"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "_call>in tool call</tool",
|
||||
wantEvents: []qwenEvent{},
|
||||
},
|
||||
{
|
||||
input: "_call>af",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "in tool call"},
|
||||
qwenEventContent{content: "af"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "ter",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "ter"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "trailing whitespace between content and tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc\n<tool_call>def</tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "abc"},
|
||||
qwenEventRawToolCall{raw: "def"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "trailing whitespace between tool call and content",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<tool_call>abc</tool_call>\ndef",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "abc"},
|
||||
qwenEventContent{content: "def"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "empty content before tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "\n<tool_call>abc</tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "abc"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "partial tool open tag fakeout",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc\n<tool_call",
|
||||
wantEvents: []qwenEvent{
|
||||
// \n should not be emitted yet because `<tool_call` might be a tool
|
||||
// open tag, in which case the whitespace should be trimmed
|
||||
qwenEventContent{content: "abc"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: " fakeout",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "\n<tool_call fakeout"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "token-by-token whitespace handling",
|
||||
steps: []step{
|
||||
{
|
||||
input: "a",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "a"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "\n",
|
||||
wantEvents: []qwenEvent{},
|
||||
},
|
||||
{
|
||||
input: "b",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "\nb"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "unicode content",
|
||||
steps: []step{
|
||||
{
|
||||
input: "你好 🌍<tool_call>test</tool_call>مرحبا",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "你好 🌍"},
|
||||
qwenEventRawToolCall{raw: "test"},
|
||||
qwenEventContent{content: "مرحبا"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "arabic text handling",
|
||||
steps: []step{
|
||||
{
|
||||
input: "مرحبا بالعالم",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "مرحبا بالعالم"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "emoji passthrough",
|
||||
steps: []step{
|
||||
{
|
||||
input: "✅",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "✅"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "emoji after tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<tool_call>test</tool_call>完成 ✅",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "test"},
|
||||
qwenEventContent{content: "完成 ✅"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "unicode streaming with whitespace handling",
|
||||
steps: []step{
|
||||
{
|
||||
input: "مرحبا",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "مرحبا"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: " \n",
|
||||
wantEvents: []qwenEvent{},
|
||||
},
|
||||
{
|
||||
input: "世界",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: " \n世界"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "non-breaking space withheld across chunks",
|
||||
steps: []step{
|
||||
{
|
||||
input: "Hello\u00a0",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "Hello"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "world",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "\u00a0world"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "ideographic space before partial tool",
|
||||
steps: []step{
|
||||
{
|
||||
input: "Hello\u3000<tool",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "Hello"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "_call>abc",
|
||||
wantEvents: []qwenEvent{},
|
||||
},
|
||||
{
|
||||
input: "</tool_call>def",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "abc"},
|
||||
qwenEventContent{content: "def"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "ideographic space before partial tool fakeout",
|
||||
steps: []step{
|
||||
{
|
||||
input: "Hello\u3000<tool",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "Hello"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "fakeout>abc",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "\u3000<toolfakeout>abc"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "unicode with partial tool tag",
|
||||
steps: []step{
|
||||
{
|
||||
input: "测试🎯 <to",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "测试🎯"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
anyOnlies := false
|
||||
for _, tc := range cases {
|
||||
if tc.only {
|
||||
anyOnlies = true
|
||||
}
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
if anyOnlies && !tc.only {
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
parser := Qwen3VLParser{hasThinkingSupport: false}
|
||||
parser.Init([]api.Tool{}, nil)
|
||||
|
||||
for i, step := range tc.steps {
|
||||
parser.buffer.WriteString(step.input)
|
||||
gotEvents := parser.parseEvents()
|
||||
|
||||
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
|
||||
// avoid deep equal on empty vs. nil slices
|
||||
continue
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
|
||||
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3VLNonThinkingToolParser(t *testing.T) {
|
||||
type step struct {
|
||||
name string
|
||||
rawToolCall string
|
||||
tools []api.Tool
|
||||
wantToolCall api.ToolCall
|
||||
}
|
||||
|
||||
steps := []step{
|
||||
{
|
||||
name: "simple tool call",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "get-current-weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get-current-weather",
|
||||
Arguments: map[string]any{
|
||||
"location": "San Francisco, CA",
|
||||
"unit": "fahrenheit",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "names with spaces",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "get current temperature", "arguments": {"location with spaces": "San Francisco", "unit with spaces": "celsius"}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get current temperature",
|
||||
Arguments: map[string]any{
|
||||
"location with spaces": "San Francisco",
|
||||
"unit with spaces": "celsius",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "names with quotes",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "\"get current temperature\"", "arguments": {"\"location with spaces\"": "San Francisco", "\"unit with spaces\"": "\"celsius\""}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "\"get current temperature\"",
|
||||
Arguments: map[string]any{
|
||||
"\"location with spaces\"": "San Francisco",
|
||||
"\"unit with spaces\"": "\"celsius\"",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool call with typed parameters (json types)",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "calculate", "arguments": {"x": 3.14, "y": 42, "enabled": true, "items": ["a", "b", "c"]}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "calculate",
|
||||
Arguments: map[string]any{
|
||||
"x": 3.14,
|
||||
"y": float64(42),
|
||||
"enabled": true,
|
||||
"items": []any{"a", "b", "c"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ampersands in parameter values",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "exec", "arguments": {"command": "ls && echo \"done\""}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "exec",
|
||||
Arguments: map[string]any{
|
||||
"command": "ls && echo \"done\"",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "angle brackets in parameter values",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "exec", "arguments": {"command": "ls && echo \"a > b and a < b\""}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "exec",
|
||||
Arguments: map[string]any{
|
||||
"command": "ls && echo \"a > b and a < b\"",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "unicode in function names and parameters",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "获取天气", "arguments": {"城市": "北京", "message": "Hello! 你好! 🌟 مرحبا"}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "获取天气",
|
||||
Arguments: map[string]any{
|
||||
"城市": "北京",
|
||||
"message": "Hello! 你好! 🌟 مرحبا",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, step := range steps {
|
||||
gotToolCall, err := parseJSONToolCall(qwenEventRawToolCall{raw: step.rawToolCall}, step.tools)
|
||||
if err != nil {
|
||||
t.Errorf("step %d (%s): %v", i, step.name, err)
|
||||
}
|
||||
if !reflect.DeepEqual(gotToolCall, step.wantToolCall) {
|
||||
t.Errorf("step %d (%s): got tool call %#v, want %#v", i, step.name, gotToolCall, step.wantToolCall)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3VLNonThinkingToolCallWhitespaceHandling(t *testing.T) {
|
||||
type step struct {
|
||||
input string
|
||||
wantEvents []qwenEvent
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
steps []step
|
||||
only bool
|
||||
}{
|
||||
{
|
||||
desc: "whitespace inside tool call preserves trailing space",
|
||||
steps: []step{
|
||||
{
|
||||
input: "before<tool_call> tool content </tool_call>after",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "before"},
|
||||
qwenEventRawToolCall{raw: " tool content "},
|
||||
qwenEventContent{content: "after"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "whitespace inside tool call preserves trailing space",
|
||||
steps: []step{
|
||||
{
|
||||
input: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t <tool_call> tool content </tool_call> \n\n\n\n\n\n\n after",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh"},
|
||||
qwenEventRawToolCall{raw: " tool content "},
|
||||
qwenEventContent{content: "after"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "whitespace inside tool call preserves trailing space",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<tool_call> tool content </tool_call> ",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: " tool content "},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t <tool_call> anotha one </tool_call> \n\n\n\n\n\n\n after \n\n\n\n\n\n blep",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "blahhhhhhhhhh blahhhh blahhhh"},
|
||||
qwenEventRawToolCall{raw: " anotha one "},
|
||||
qwenEventContent{content: "after \n\n\n\n\n\n blep"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "whitespace between content and tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "content \n <tool_call>tool</tool_call> \n more content",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "content"},
|
||||
qwenEventRawToolCall{raw: "tool"},
|
||||
qwenEventContent{content: "more content"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "consecutive tool calls with whitespace",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<tool_call>first</tool_call> \n <tool_call>second</tool_call> \n <tool_call>third</tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "first"},
|
||||
qwenEventRawToolCall{raw: "second"},
|
||||
qwenEventRawToolCall{raw: "third"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "whitespace before and after tool open tag",
|
||||
steps: []step{
|
||||
{
|
||||
input: "text \n <tool_call>content</tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "text"},
|
||||
qwenEventRawToolCall{raw: "content"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "unicode whitespace around tool calls",
|
||||
steps: []step{
|
||||
{
|
||||
input: "text\u00a0\u3000<tool_call>content</tool_call>\u00a0\u3000text",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "text"},
|
||||
qwenEventRawToolCall{raw: "content"},
|
||||
qwenEventContent{content: "text"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "empty tool call with surrounding whitespace",
|
||||
steps: []step{
|
||||
{
|
||||
input: "before <tool_call></tool_call> after",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "before"},
|
||||
qwenEventRawToolCall{raw: ""},
|
||||
qwenEventContent{content: "after"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "whitespace in tool call split across chunks",
|
||||
steps: []step{
|
||||
{
|
||||
input: "before<tool_call> ",
|
||||
wantEvents: []qwenEvent{qwenEventContent{content: "before"}},
|
||||
},
|
||||
{
|
||||
input: "tool",
|
||||
wantEvents: []qwenEvent{},
|
||||
},
|
||||
{
|
||||
input: " </tool_call>after",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: " tool "},
|
||||
qwenEventContent{content: "after"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "mixed whitespace types between tool calls",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<tool_call>first</tool_call> \t\n\r <tool_call>second</tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "first"},
|
||||
qwenEventRawToolCall{raw: "second"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
anyOnlies := false
|
||||
for _, tc := range cases {
|
||||
if tc.only {
|
||||
anyOnlies = true
|
||||
}
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
if anyOnlies && !tc.only {
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
parser := Qwen3VLParser{hasThinkingSupport: false}
|
||||
parser.Init([]api.Tool{}, nil)
|
||||
|
||||
for i, step := range tc.steps {
|
||||
parser.buffer.WriteString(step.input)
|
||||
gotEvents := parser.parseEvents()
|
||||
|
||||
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
|
||||
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
878
model/parsers/qwen3vl_thinking_test.go
Normal file
878
model/parsers/qwen3vl_thinking_test.go
Normal file
@@ -0,0 +1,878 @@
|
||||
package parsers
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
)
|
||||
|
||||
func TestQwen3VLThinkingParserStreaming(t *testing.T) {
|
||||
type step struct {
|
||||
input string
|
||||
wantEvents []qwenEvent
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
steps []step
|
||||
only bool
|
||||
}{
|
||||
{
|
||||
desc: "simple thinking",
|
||||
steps: []step{
|
||||
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "simple trip thinking",
|
||||
steps: []step{
|
||||
{input: "<think>abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "<think>abc"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking with split tags",
|
||||
steps: []step{
|
||||
{input: "abc", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
|
||||
{input: "</think>", wantEvents: []qwenEvent{}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "multiple think tags",
|
||||
steps: []step{
|
||||
{input: "abc<think>actually, is not thinking</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>actually, is not thinking"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking and tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking</think><tool_call>I'm tool calling</tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "I'm thinking"},
|
||||
qwenEventRawToolCall{raw: "I'm tool calling"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking and content",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking</think>I'm content",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "I'm thinking"},
|
||||
qwenEventContent{content: "I'm content"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking and tool call and content",
|
||||
},
|
||||
{
|
||||
desc: "nested thinking (outside thinking, inside thinking)",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking<think>I'm nested thinking</think></think>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "I'm thinking<think>I'm nested thinking"},
|
||||
qwenEventContent{content: "</think>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "interleaved thinking",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<think>I'm thinking</think>I'm actually content</think>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "<think>I'm thinking"},
|
||||
qwenEventContent{content: "I'm actually content</think>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "nested thinking and tool call (outside thinking, inside tool call)",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking<tool_call>I'm nested tool call</tool_call></think>",
|
||||
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "I'm thinking<tool_call>I'm nested tool call</tool_call>"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "nested thinking and tool call (outside tool call, inside thinking)",
|
||||
steps: []step{
|
||||
{
|
||||
input: "<tool_call>I'm nested tool call<think>I'm thinking</think></tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "<tool_call>I'm nested tool call<think>I'm thinking"},
|
||||
qwenEventContent{content: "</tool_call>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "interleaved thinking and tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "I'm thinking<tool_call>I'm NOT a nested tool call</think></tool_call><tool_call>I'm nested tool call 2<think></tool_call></think>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "I'm thinking<tool_call>I'm NOT a nested tool call"},
|
||||
qwenEventContent{content: "</tool_call>"},
|
||||
qwenEventRawToolCall{raw: "I'm nested tool call 2<think>"},
|
||||
qwenEventContent{content: "</think>"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "partial thinking tag fakeout",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc</think",
|
||||
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}},
|
||||
},
|
||||
{
|
||||
input: " fakeout",
|
||||
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "</think fakeout"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "partial thinking incomplete",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc<think>unfinished</think", // when something is ambiguious, we dont emit anything
|
||||
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>unfinished"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "test with split thinking and content",
|
||||
steps: []step{
|
||||
{
|
||||
input: "abc<think>unfinished</th", // when something is ambiguious, we dont emit anything
|
||||
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>unfinished"}},
|
||||
},
|
||||
{
|
||||
input: "ink> def",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "def"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking with no tags",
|
||||
steps: []step{
|
||||
{
|
||||
input: "Hello I am thinking",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "Hello I am thinking"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "Hello I am thinking some more",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "Hello I am thinking some more"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "Hello I am think</think> NOT",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "Hello I am think"},
|
||||
qwenEventContent{content: "NOT"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
anyOnlies := false
|
||||
for _, tc := range cases {
|
||||
if tc.only {
|
||||
anyOnlies = true
|
||||
}
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
if anyOnlies && !tc.only {
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
parser := Qwen3VLParser{hasThinkingSupport: true}
|
||||
parser.Init([]api.Tool{}, nil)
|
||||
// parser.state = CollectingThinkingContent
|
||||
|
||||
for i, step := range tc.steps {
|
||||
parser.buffer.WriteString(step.input)
|
||||
gotEvents := parser.parseEvents()
|
||||
|
||||
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
|
||||
// avoid deep equal on empty vs. nil slices
|
||||
continue
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
|
||||
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3VLThinkingToolParser(t *testing.T) {
|
||||
type step struct {
|
||||
name string
|
||||
rawToolCall string
|
||||
tools []api.Tool
|
||||
wantToolCall api.ToolCall
|
||||
}
|
||||
|
||||
steps := []step{
|
||||
{
|
||||
name: "simple tool call",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "get-current-weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get-current-weather",
|
||||
Arguments: map[string]any{
|
||||
"location": "San Francisco, CA",
|
||||
"unit": "fahrenheit",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "names with spaces",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "get current temperature", "arguments": {"location with spaces": "San Francisco", "unit with spaces": "celsius"}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get current temperature",
|
||||
Arguments: map[string]any{
|
||||
"location with spaces": "San Francisco",
|
||||
"unit with spaces": "celsius",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "names with quotes",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "\"get current temperature\"", "arguments": {"\"location with spaces\"": "San Francisco", "\"unit with spaces\"": "\"celsius\""}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "\"get current temperature\"",
|
||||
Arguments: map[string]any{
|
||||
"\"location with spaces\"": "San Francisco",
|
||||
"\"unit with spaces\"": "\"celsius\"",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "tool call with typed parameters (json types)",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "calculate", "arguments": {"x": 3.14, "y": 42, "enabled": true, "items": ["a", "b", "c"]}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "calculate",
|
||||
Arguments: map[string]any{
|
||||
"x": 3.14,
|
||||
"y": float64(42),
|
||||
"enabled": true,
|
||||
"items": []any{"a", "b", "c"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ampersands in parameter values",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "exec", "arguments": {"command": "ls && echo \"done\""}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "exec",
|
||||
Arguments: map[string]any{
|
||||
"command": "ls && echo \"done\"",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "angle brackets in parameter values",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "exec", "arguments": {"command": "ls && echo \"a > b and a < b\""}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "exec",
|
||||
Arguments: map[string]any{
|
||||
"command": "ls && echo \"a > b and a < b\"",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "unicode in function names and parameters",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `{"name": "获取天气", "arguments": {"城市": "北京", "message": "Hello! 你好! 🌟 مرحبا"}}`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "获取天气",
|
||||
Arguments: map[string]any{
|
||||
"城市": "北京",
|
||||
"message": "Hello! 你好! 🌟 مرحبا",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, step := range steps {
|
||||
gotToolCall, err := parseJSONToolCall(qwenEventRawToolCall{raw: step.rawToolCall}, step.tools)
|
||||
if err != nil {
|
||||
t.Errorf("step %d (%s): %v", i, step.name, err)
|
||||
}
|
||||
if !reflect.DeepEqual(gotToolCall, step.wantToolCall) {
|
||||
t.Errorf("step %d (%s): got tool call %#v, want %#v", i, step.name, gotToolCall, step.wantToolCall)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3VLParserState(t *testing.T) {
|
||||
cases := []struct {
|
||||
desc string
|
||||
hasThinking bool
|
||||
last *api.Message
|
||||
wantState qwenParserState
|
||||
}{
|
||||
{
|
||||
desc: "no thinking support => CollectingContent",
|
||||
hasThinking: false,
|
||||
last: nil,
|
||||
wantState: CollectingContent,
|
||||
},
|
||||
{
|
||||
desc: "thinking support, no last message => CollectingThinkingContent",
|
||||
hasThinking: true,
|
||||
last: nil,
|
||||
wantState: CollectingThinkingContent,
|
||||
},
|
||||
{
|
||||
desc: "thinking support, last assistant with empty content => CollectingThinkingContent",
|
||||
hasThinking: true,
|
||||
last: &api.Message{Role: "assistant", Content: ""},
|
||||
wantState: CollectingThinkingContent,
|
||||
},
|
||||
{
|
||||
desc: "thinking support, last assistant with content => CollectingContent",
|
||||
hasThinking: true,
|
||||
last: &api.Message{Role: "assistant", Content: "hello"},
|
||||
wantState: CollectingContent,
|
||||
},
|
||||
{
|
||||
desc: "thinking support, last is user => CollectingThinkingContent",
|
||||
hasThinking: true,
|
||||
last: &api.Message{Role: "user", Content: "hi"},
|
||||
wantState: CollectingThinkingContent,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
parser := Qwen3VLParser{hasThinkingSupport: tc.hasThinking}
|
||||
parser.Init(nil, tc.last)
|
||||
if parser.state != tc.wantState {
|
||||
t.Errorf("%s: got state %v, want %v", tc.desc, parser.state, tc.wantState)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3VLThinkingParserWithThinkingPrefill(t *testing.T) {
|
||||
type step struct {
|
||||
input string
|
||||
wantEvents []qwenEvent
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
steps []step
|
||||
only bool
|
||||
}{
|
||||
{
|
||||
desc: "thinking prefill",
|
||||
steps: []step{
|
||||
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking prefill with content",
|
||||
steps: []step{
|
||||
{input: "abc</th", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
|
||||
{input: "ink> def", wantEvents: []qwenEvent{qwenEventContent{content: "def"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking prefill with fakeout",
|
||||
steps: []step{
|
||||
{input: "abc</think", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
|
||||
{input: " fakeout </think", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "</think fakeout"}}},
|
||||
{input: ">", wantEvents: []qwenEvent{}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking prefill with spaces",
|
||||
steps: []step{
|
||||
{input: " </think> starting content", wantEvents: []qwenEvent{qwenEventContent{content: "starting content"}}},
|
||||
},
|
||||
},
|
||||
}
|
||||
last := &api.Message{Role: "assistant", Thinking: "i am thinking"} // so if there is thinking the test is still thinking
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
parser := Qwen3VLParser{hasThinkingSupport: true}
|
||||
parser.Init([]api.Tool{}, last)
|
||||
|
||||
for i, step := range tc.steps {
|
||||
parser.buffer.WriteString(step.input)
|
||||
gotEvents := parser.parseEvents()
|
||||
|
||||
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
|
||||
// avoid deep equal on empty vs. nil slices
|
||||
continue
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
|
||||
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3VLThinkingParserWithNonThinkingPrefill(t *testing.T) {
|
||||
type step struct {
|
||||
input string
|
||||
wantEvents []qwenEvent
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
steps []step
|
||||
only bool
|
||||
}{
|
||||
{
|
||||
desc: "thinking prefill",
|
||||
steps: []step{
|
||||
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think>"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking prefill with content",
|
||||
steps: []step{
|
||||
{input: "abc</th", wantEvents: []qwenEvent{qwenEventContent{content: "abc</th"}}},
|
||||
{input: "ink> def", wantEvents: []qwenEvent{qwenEventContent{content: "ink> def"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking prefill with fakeout",
|
||||
steps: []step{
|
||||
{input: "abc</think", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think"}}},
|
||||
{input: " fakeout </think", wantEvents: []qwenEvent{qwenEventContent{content: " fakeout </think"}}},
|
||||
{input: ">", wantEvents: []qwenEvent{qwenEventContent{content: ">"}}},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking prefill with spaces",
|
||||
steps: []step{
|
||||
{input: " </think> starting content", wantEvents: []qwenEvent{qwenEventContent{content: " </think> starting content"}}},
|
||||
},
|
||||
},
|
||||
}
|
||||
last := &api.Message{Role: "assistant", Thinking: "i am thinking", Content: "i am content"} // so if there is thinking the test is still thinking
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
parser := Qwen3VLParser{hasThinkingSupport: true}
|
||||
parser.Init([]api.Tool{}, last)
|
||||
|
||||
for i, step := range tc.steps {
|
||||
parser.buffer.WriteString(step.input)
|
||||
gotEvents := parser.parseEvents()
|
||||
|
||||
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
|
||||
// avoid deep equal on empty vs. nil slices
|
||||
continue
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
|
||||
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3VLThinkingParserStreamingAssistantPrefillContent(t *testing.T) {
|
||||
// last message is assistant with content ⇒ start in CollectingContent
|
||||
last := &api.Message{Role: "assistant", Content: "has content"}
|
||||
parser := Qwen3VLParser{hasThinkingSupport: true}
|
||||
parser.Init([]api.Tool{}, last)
|
||||
|
||||
type step struct {
|
||||
input string
|
||||
wantEvents []qwenEvent
|
||||
}
|
||||
|
||||
steps := []step{
|
||||
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think>"}}},
|
||||
{input: "<tool_call>{\"name\": \"x\", \"arguments\": {}}</tool_call>", wantEvents: []qwenEvent{qwenEventRawToolCall{raw: "{\"name\": \"x\", \"arguments\": {}}"}}},
|
||||
}
|
||||
|
||||
for i, s := range steps {
|
||||
parser.buffer.WriteString(s.input)
|
||||
gotEvents := parser.parseEvents()
|
||||
if len(gotEvents) == 0 && len(s.wantEvents) == 0 {
|
||||
continue
|
||||
}
|
||||
if !reflect.DeepEqual(gotEvents, s.wantEvents) {
|
||||
t.Fatalf("step %d: input %q: got %#v, want %#v", i, s.input, gotEvents, s.wantEvents)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3VLThinkingWhitespaceHandling(t *testing.T) {
|
||||
type step struct {
|
||||
input string
|
||||
wantEvents []qwenEvent
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
steps []step
|
||||
only bool
|
||||
}{
|
||||
{
|
||||
desc: "whitespace after thinking tag is trimmed",
|
||||
steps: []step{
|
||||
{
|
||||
input: "thinking content</think> \n\t content starts here",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "thinking content"},
|
||||
qwenEventContent{content: "content starts here"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "whitespace after thinking tag split across chunks",
|
||||
steps: []step{
|
||||
{
|
||||
input: "thinking content</think> ",
|
||||
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "thinking content"}},
|
||||
},
|
||||
{
|
||||
input: " \n\t",
|
||||
wantEvents: []qwenEvent{},
|
||||
},
|
||||
{
|
||||
input: "content",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "content"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "only whitespace after thinking tag",
|
||||
steps: []step{
|
||||
{
|
||||
input: "thinking content</think> \n\t ",
|
||||
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "thinking content"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "multiple spaces and tabs after thinking",
|
||||
steps: []step{
|
||||
{
|
||||
input: "think</think> \t\t\n\n text",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "think"},
|
||||
qwenEventContent{content: "text"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "trailing whitespace before thinking tag is preserved in content",
|
||||
steps: []step{
|
||||
{
|
||||
input: "thinking with spaces </think>text",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "thinking with spaces"},
|
||||
qwenEventContent{content: "text"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "whitespace between thinking and tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "thinking</think> \n <tool_call>{\"name\":\"test\"}</tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "thinking"},
|
||||
qwenEventRawToolCall{raw: "{\"name\":\"test\"}"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "no whitespace after thinking tag",
|
||||
steps: []step{
|
||||
{
|
||||
input: "thinking</think>content",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "thinking"},
|
||||
qwenEventContent{content: "content"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "unicode whitespace after thinking tag",
|
||||
steps: []step{
|
||||
{
|
||||
input: "thinking</think>\u00a0\u3000content",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "thinking"},
|
||||
qwenEventContent{content: "content"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "whitespace split with partial thinking tag",
|
||||
steps: []step{
|
||||
{
|
||||
input: "thinking</th",
|
||||
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "thinking"}},
|
||||
},
|
||||
{
|
||||
input: "ink> \n",
|
||||
wantEvents: []qwenEvent{},
|
||||
},
|
||||
{
|
||||
input: " content",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "content"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "empty thinking tag with whitespace after",
|
||||
steps: []step{
|
||||
{
|
||||
input: "</think> \ncontent",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "content"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "whitespace inside tool call preserves trailing space",
|
||||
steps: []step{
|
||||
{
|
||||
input: "bruh</think> \n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t <tool_call> tool content </tool_call> \n\n\n\n\n\n\n after",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "bruh"},
|
||||
qwenEventContent{content: "blahhhhhhhhhh blahhhh blahhhh"},
|
||||
qwenEventRawToolCall{raw: " tool content "},
|
||||
qwenEventContent{content: "after"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "whitespace inside tool call preserves trailing space",
|
||||
steps: []step{
|
||||
{
|
||||
input: "bruh</think> shdjfhksdhfj ",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "bruh"},
|
||||
qwenEventContent{content: "shdjfhksdhfj"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "another word ",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: " another word"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "<tool_call> tool content </tool_call> ",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: " tool content "},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t <tool_call> anotha one </tool_call> \n\n\n\n\n\n\n after \n\n\n\n\n\n blep",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "blahhhhhhhhhh blahhhh blahhhh"},
|
||||
qwenEventRawToolCall{raw: " anotha one "},
|
||||
qwenEventContent{content: "after \n\n\n\n\n\n blep"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
anyOnlies := false
|
||||
for _, tc := range cases {
|
||||
if tc.only {
|
||||
anyOnlies = true
|
||||
}
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
if anyOnlies && !tc.only {
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
parser := Qwen3VLParser{hasThinkingSupport: true}
|
||||
parser.Init([]api.Tool{}, nil)
|
||||
|
||||
for i, step := range tc.steps {
|
||||
parser.buffer.WriteString(step.input)
|
||||
gotEvents := parser.parseEvents()
|
||||
|
||||
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
|
||||
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestQwen3VLToolCallWhitespaceHandling(t *testing.T) {
|
||||
type step struct {
|
||||
input string
|
||||
wantEvents []qwenEvent
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
desc string
|
||||
steps []step
|
||||
only bool
|
||||
prefillMsg *api.Message // allows starting in content mode instead of thinking mode
|
||||
}{
|
||||
{
|
||||
desc: "whitespace inside tool call is fully preserved (with content prefill)",
|
||||
prefillMsg: &api.Message{Role: "assistant", Content: "prefill"},
|
||||
steps: []step{
|
||||
{
|
||||
input: "before<tool_call> tool content </tool_call> \n after",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "before"},
|
||||
qwenEventRawToolCall{raw: " tool content "},
|
||||
qwenEventContent{content: "after"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "whitespace after tool call trimmed across chunks (with content prefill)",
|
||||
prefillMsg: &api.Message{Role: "assistant", Content: "prefill"},
|
||||
steps: []step{
|
||||
{
|
||||
input: "before<tool_call>tool</tool_call> ",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "before"},
|
||||
qwenEventRawToolCall{raw: "tool"},
|
||||
},
|
||||
},
|
||||
{
|
||||
input: "\n\t",
|
||||
wantEvents: []qwenEvent{},
|
||||
},
|
||||
{
|
||||
input: "after \n this is a song",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventContent{content: "after \n this is a song"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "multiple tool calls with whitespace between (with content prefill)",
|
||||
prefillMsg: &api.Message{Role: "assistant", Content: "prefill"},
|
||||
steps: []step{
|
||||
{
|
||||
input: "<tool_call>first</tool_call> \n <tool_call>second</tool_call>",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventRawToolCall{raw: "first"},
|
||||
qwenEventRawToolCall{raw: "second"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "thinking with whitespace then tool call",
|
||||
steps: []step{
|
||||
{
|
||||
input: "thinking</think> \n <tool_call>tool</tool_call> \n content",
|
||||
wantEvents: []qwenEvent{
|
||||
qwenEventThinkingContent{content: "thinking"},
|
||||
qwenEventRawToolCall{raw: "tool"},
|
||||
qwenEventContent{content: "content"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
anyOnlies := false
|
||||
for _, tc := range cases {
|
||||
if tc.only {
|
||||
anyOnlies = true
|
||||
}
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
if anyOnlies && !tc.only {
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
parser := Qwen3VLParser{hasThinkingSupport: true}
|
||||
parser.Init([]api.Tool{}, tc.prefillMsg)
|
||||
|
||||
for i, step := range tc.steps {
|
||||
parser.buffer.WriteString(step.input)
|
||||
gotEvents := parser.parseEvents()
|
||||
|
||||
if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
|
||||
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user