Sync with upstream ollama/ollama and restore Tesla K80 (compute 3.7) support

This commit represents a complete rework after pulling the latest changes from official ollama/ollama repository and re-applying Tesla K80 compatibility patches. ## Key Changes ### CUDA Compute Capability 3.7 Support (Tesla K80) - Added sm_37 (compute 3.7) to CMAKE_CUDA_ARCHITECTURES in CMakeLists.txt - Updated CMakePresets.json to include compute 3.7 in "CUDA 11" preset - Using 37-virtual (PTX with JIT compilation) for maximum compatibility ### Legacy Toolchain Compatibility - **NVIDIA Driver**: 470.256.02 (last version supporting Kepler/K80) - **CUDA Version**: 11.4.4 (last CUDA 11.x supporting compute 3.7) - **GCC Version**: 10.5.0 (required by CUDA 11.4 host_config.h) ### CPU Architecture Trade-offs Due to GCC 10.5 limitation, sacrificed newer CPU optimizations: - Alderlake CPU variant enabled WITHOUT AVX_VNNI (requires GCC 11+) - Still supports: SSE4.2, AVX, F16C, AVX2, BMI2, FMA - Performance impact: ~3-7% on newer CPUs (acceptable for K80 compatibility) ### Build System Updates - Modified ml/backend/ggml/ggml/src/ggml-cuda/CMakeLists.txt for compute 3.7 - Added -Wno-deprecated-gpu-targets flag to suppress warnings - Updated ml/backend/ggml/ggml/src/CMakeLists.txt for Alderlake without AVX_VNNI ### Upstream Sync Merged latest llama.cpp changes including: - Enhanced KV cache management with ISWA and hybrid memory support - Improved multi-modal support (mtmd framework) - New model architectures (Gemma3, Llama4, Qwen3, etc.) - GPU backend improvements for CUDA, Metal, and ROCm - Updated quantization support and GGUF format handling ### Documentation - Updated CLAUDE.md with comprehensive build instructions - Documented toolchain constraints and CPU architecture trade-offs - Removed outdated CI/CD workflows (tesla-k80-*.yml) - Cleaned up temporary development artifacts ## Rationale This fork maintains Tesla K80 GPU support (compute 3.7) which was dropped in official Ollama due to legacy driver/CUDA requirements. The toolchain constraint creates a deadlock: - K80 → Driver 470 → CUDA 11.4 → GCC 10 → No AVX_VNNI We accept the loss of cutting-edge CPU optimizations to enable running modern LLMs on legacy but still capable Tesla K80 hardware (12GB VRAM per GPU). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-18 11:47:07 +00:00 · 2025-11-05 14:03:05 +08:00
parent fabe2c5cb7
commit ef14fb5b26
817 changed files with 241634 additions and 70888 deletions
--- a/model/parsers/parsers.go
+++ b/model/parsers/parsers.go
@@ -0,0 +1,76 @@
+package parsers
+
+import (
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/harmony"
+)
+
+type Parser interface {
+	// Init initializes the parser with tools and optional last message for chat prefill
+	// Returns processed tools if the parser needs to modify them (e.g., harmony renames them)
+	Init(tools []api.Tool, lastMessage *api.Message) []api.Tool
+	// Add processes streamed content and returns parsed content, thinking, and tool calls
+	// The done flag indicates if this is the last chunk (used for draining accumulators)
+	Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error)
+	HasToolSupport() bool
+	HasThinkingSupport() bool
+}
+
+type ParserConstructor func() Parser
+
+type ParserRegistry struct {
+	constructors map[string]ParserConstructor
+}
+
+func (r *ParserRegistry) Register(name string, constructor ParserConstructor) {
+	r.constructors[name] = constructor
+}
+
+var registry = ParserRegistry{
+	constructors: make(map[string]ParserConstructor),
+}
+
+func Register(name string, constructor ParserConstructor) {
+	registry.Register(name, constructor)
+}
+
+func ParserForName(name string) Parser {
+	if parser, ok := registry.constructors[name]; ok {
+		return parser()
+	}
+	switch name {
+	case "qwen3-coder":
+		parser := &Qwen3CoderParser{}
+		return parser
+	case "qwen3-vl-instruct":
+		parser := &Qwen3VLParser{hasThinkingSupport: false}
+		return parser
+	case "qwen3-vl-thinking":
+		parser := &Qwen3VLParser{hasThinkingSupport: true}
+		return parser
+	case "passthrough":
+		return &PassthroughParser{}
+	case "harmony":
+		return harmony.NewHarmonyMessageHandler()
+	default:
+		return nil
+	}
+}
+
+type PassthroughParser struct{}
+
+func (p *PassthroughParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
+	return tools // passthrough doesn't modify tools
+}
+
+func (p *PassthroughParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
+	return s, "", nil, nil
+}
+
+func (p *PassthroughParser) HasToolSupport() bool {
+	return false
+}
+
+func (p *PassthroughParser) HasThinkingSupport() bool {
+	return false
+}
--- a/model/parsers/parsers_test.go
+++ b/model/parsers/parsers_test.go
@@ -0,0 +1,97 @@
+package parsers
+
+import (
+	"testing"
+
+	"github.com/ollama/ollama/api"
+)
+
+type mockParser struct {
+	name string
+}
+
+func (m *mockParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
+	return tools
+}
+
+func (m *mockParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
+	return "mock:" + s, "", nil, nil
+}
+
+func (m *mockParser) HasToolSupport() bool {
+	return false
+}
+
+func (m *mockParser) HasThinkingSupport() bool {
+	return false
+}
+
+func TestRegisterCustomParser(t *testing.T) {
+	// Register a custom parser
+	Register("custom-parser", func() Parser {
+		return &mockParser{name: "custom"}
+	})
+
+	// Retrieve it
+	parser := ParserForName("custom-parser")
+	if parser == nil {
+		t.Fatal("expected parser to be registered")
+	}
+
+	// Test it works
+	content, _, _, err := parser.Add("test", false)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if content != "mock:test" {
+		t.Errorf("expected 'mock:test', got %q", content)
+	}
+}
+
+func TestBuiltInParsersStillWork(t *testing.T) {
+	tests := []struct {
+		name string
+	}{
+		{"passthrough"},
+		{"qwen3-coder"},
+		{"harmony"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			parser := ParserForName(tt.name)
+			if parser == nil {
+				t.Fatalf("expected built-in parser %q to exist", tt.name)
+			}
+		})
+	}
+}
+
+func TestOverrideBuiltInParser(t *testing.T) {
+	// Override a built-in parser
+	Register("passthrough", func() Parser {
+		return &mockParser{name: "override"}
+	})
+
+	// Should get the override
+	parser := ParserForName("passthrough")
+	if parser == nil {
+		t.Fatal("expected parser to exist")
+	}
+
+	// Test it's the override
+	content, _, _, err := parser.Add("test", false)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if content != "mock:test" {
+		t.Errorf("expected 'mock:test' from override, got %q", content)
+	}
+}
+
+func TestUnknownParserReturnsNil(t *testing.T) {
+	parser := ParserForName("nonexistent-parser")
+	if parser != nil {
+		t.Error("expected nil for unknown parser")
+	}
+}
--- a/model/parsers/qwen3coder.go
+++ b/model/parsers/qwen3coder.go
@@ -0,0 +1,472 @@
+package parsers
+
+import (
+	"context"
+	"encoding/json"
+	"encoding/xml"
+	"fmt"
+	"log/slog"
+	"math"
+	"regexp"
+	"strconv"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/logutil"
+)
+
+type qwenParserState int
+
+const (
+	toolOpenTag  = "<tool_call>"
+	toolCloseTag = "</tool_call>"
+)
+
+const (
+	qwenParserState_LookingForToolStart qwenParserState = iota
+	qwenParserState_CollectingToolContent
+)
+
+type Qwen3CoderParser struct {
+	state qwenParserState
+	acc   strings.Builder
+	tools []api.Tool
+}
+
+func (p *Qwen3CoderParser) HasToolSupport() bool {
+	return true
+}
+
+func (p *Qwen3CoderParser) HasThinkingSupport() bool {
+	return false
+}
+
+func (p *Qwen3CoderParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
+	p.tools = tools
+	return tools // Qwen doesn't modify tools
+}
+
+func (p *Qwen3CoderParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
+	p.acc.WriteString(s)
+
+	events := p.parseEvents()
+
+	var toolCalls []api.ToolCall
+	var sb strings.Builder
+	for _, event := range events {
+		switch event := event.(type) {
+		case qwenEventRawToolCall:
+			toolCall, err := parseToolCall(event, p.tools)
+			if err != nil {
+				slog.Warn("qwen tool call parsing failed", "error", err)
+				return "", "", nil, err
+			}
+			toolCalls = append(toolCalls, toolCall)
+		case qwenEventContent:
+			// TODO(drifkin): if the same turn contains multiple interleaved content
+			// events, we naively append them together here. See the note below about
+			// `qwenEvent`s for more details
+			sb.WriteString(event.content)
+		}
+	}
+
+	return sb.String(), "", toolCalls, nil
+}
+
+func (p *Qwen3CoderParser) parseEvents() []qwenEvent {
+	var all []qwenEvent
+
+	keepLooping := true
+	for keepLooping {
+		var events []qwenEvent
+		events, keepLooping = eat(p)
+		if len(events) > 0 {
+			all = append(all, events...)
+		}
+	}
+
+	if len(all) > 0 {
+		slog.Log(context.TODO(), logutil.LevelTrace, "qwen events parsed", "events", all, "state", p.state, "acc", p.acc.String())
+	}
+
+	return all
+}
+
+// we use some internal event types in order to communicate between `Add` and
+// `eat`. We do this to support interleaving content and parallel tool calls in
+// the parser, even though qwen3-coder isn't supposed to do this. Our API
+// doesn't currently support models outputting multiple messages in a turn, so
+// we wouldn't be able to represent it yet, but there's no reason to prevent the
+// parser from supporting it, especially for future models if they end up using
+// a similar format.
+type qwenEvent interface {
+	isQwenEvent()
+}
+
+type qwenEventRawToolCall struct {
+	raw string
+}
+
+type qwenEventContent struct {
+	content string
+}
+
+func (qwenEventContent) isQwenEvent()     {}
+func (qwenEventRawToolCall) isQwenEvent() {}
+
+// eat consumes the parser's buffer, and returns a list of any unambiguous
+// events from the current parser state. If the parser transitions to another
+// state, it may have additional events to emit on the next call, which is what
+// the second return value indicates
+func eat(p *Qwen3CoderParser) ([]qwenEvent, bool) {
+	var events []qwenEvent
+
+	switch p.state {
+	case qwenParserState_LookingForToolStart:
+		if strings.Contains(p.acc.String(), toolOpenTag) {
+			// we found a full tool open tag, so we can emit the content before the
+			// tag, being sure to trim any trailing whitespace
+			split := strings.SplitN(p.acc.String(), toolOpenTag, 2)
+			before := split[0]
+			before = strings.TrimRightFunc(before, unicode.IsSpace)
+			if len(before) > 0 {
+				events = append(events, qwenEventContent{content: before})
+			}
+			after := split[1]
+			p.acc.Reset()
+			p.acc.WriteString(after)
+			p.state = qwenParserState_CollectingToolContent
+			return events, true
+		} else if overlap := overlap(p.acc.String(), toolOpenTag); overlap > 0 {
+			// we found a partial tool open tag, so we can emit the unambiguous part,
+			// which is the (trailing-whitespace trimmed) content before the partial
+			// tool open tag
+			beforePartialTag := p.acc.String()[:len(p.acc.String())-overlap]
+			trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
+			ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
+			unambiguous := p.acc.String()[:ambiguousStart]
+			ambiguous := p.acc.String()[ambiguousStart:]
+			p.acc.Reset()
+			p.acc.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, qwenEventContent{content: unambiguous})
+			}
+			return events, false
+		} else {
+			// we found content that is entirely not a tool call. We should withhold
+			// any trailing whitespace in case this is the end of the content
+			whitespaceLen := trailingWhitespaceLen(p.acc.String())
+			ambiguousStart := len(p.acc.String()) - whitespaceLen
+			unambiguous := p.acc.String()[:ambiguousStart]
+			ambiguous := p.acc.String()[ambiguousStart:]
+			p.acc.Reset()
+			p.acc.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, qwenEventContent{content: unambiguous})
+			}
+			return events, false
+		}
+	case qwenParserState_CollectingToolContent:
+		if strings.Contains(p.acc.String(), toolCloseTag) {
+			split := strings.SplitN(p.acc.String(), toolCloseTag, 2)
+			before := split[0]
+			if len(before) == 0 {
+				slog.Warn("qwen tool call closing tag found but no content before it")
+			}
+			// remove any whitespace between the tool call and any content after it
+			after := strings.TrimLeftFunc(split[1], unicode.IsSpace)
+			p.acc.Reset()
+			p.acc.WriteString(after)
+			events = append(events, qwenEventRawToolCall{raw: before})
+			p.state = qwenParserState_LookingForToolStart
+			return events, true
+		} else {
+			// note that we don't need to check the overlap here because we only plan
+			// on parsing the tool call once we see the full closing tag. We don't
+			// stream back the unparsed tool content, so there's no need to be eager
+			// here
+			return events, false
+		}
+	default:
+		panic("unreachable")
+	}
+}
+
+// TODO(drifkin): move this to a shared location
+// longest overlap between suffix of s and prefix of delim
+func overlap(s, delim string) int {
+	max := min(len(delim), len(s))
+	for i := max; i > 0; i-- {
+		if strings.HasSuffix(s, delim[:i]) {
+			return i
+		}
+	}
+	return 0
+}
+
+func trailingWhitespaceLen(s string) int {
+	remaining := s
+	total := 0
+	for len(remaining) > 0 {
+		r, size := utf8.DecodeLastRuneInString(remaining)
+		// if it's an invalid utf8 rune, assume it isn't whitespace
+		if r == utf8.RuneError && size == 1 {
+			break
+		}
+		if !unicode.IsSpace(r) {
+			break
+		}
+		total += size
+		remaining = remaining[:len(remaining)-size]
+	}
+	return total
+}
+
+type XMLFunctionCall struct {
+	XMLName    xml.Name       `xml:"function"`
+	Name       string         `xml:"name,attr"`
+	Parameters []XMLParameter `xml:"parameter"`
+}
+
+type XMLParameter struct {
+	Name  string `xml:"name,attr"`
+	Value string `xml:",chardata"`
+}
+
+// parseToolCall parses a raw tool call string into an api.ToolCall.
+// The raw string follows an xml-like format, here's an example:
+//
+// <function=get_current_temperature>
+// <parameter=location>
+// San Francisco
+// </parameter>
+// <parameter=unit>
+// celsius
+// </parameter>
+// </function>
+func parseToolCall(raw qwenEventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
+	toolCall := api.ToolCall{}
+
+	xmlString := transformToXML(raw.raw)
+
+	var functionCall XMLFunctionCall
+	err := xml.Unmarshal([]byte(xmlString), &functionCall)
+	if err != nil {
+		return api.ToolCall{}, err
+	}
+
+	toolCall.Function = api.ToolCallFunction{
+		Name: functionCall.Name,
+	}
+
+	// Find the matching tool to get parameter types
+	var matchedTool *api.Tool
+	for i := range tools {
+		if tools[i].Function.Name == functionCall.Name {
+			matchedTool = &tools[i]
+			break
+		}
+	}
+
+	toolCall.Function.Arguments = make(api.ToolCallFunctionArguments)
+	for _, parameter := range functionCall.Parameters {
+		// Look up the parameter type if we found the tool
+		var paramType api.PropertyType
+		if matchedTool != nil && matchedTool.Function.Parameters.Properties != nil {
+			if prop, ok := matchedTool.Function.Parameters.Properties[parameter.Name]; ok {
+				// Handle anyOf by collecting all types from the union
+				if len(prop.AnyOf) > 0 {
+					for _, anyOfProp := range prop.AnyOf {
+						paramType = append(paramType, anyOfProp.Type...)
+					}
+				} else {
+					paramType = prop.Type
+				}
+			}
+		}
+
+		toolCall.Function.Arguments[parameter.Name] = parseValue(parameter.Value, paramType)
+	}
+
+	return toolCall, nil
+}
+
+// parseValue converts a raw string value to the appropriate type based on the parameter type specification.
+//
+// For union types (multiple types in PropertyType, which we support but doesn't
+// seem as though the reference parser does type coercion with those types in
+// mind) we use a type precedence approach:
+// 1. null - checked first regardless of declared types (matches reference implementation)
+// 2. boolean - only "true"/"false" are valid booleans
+// 3. integer - must parse as a whole number
+// 4. number - must parse as numeric (returns int if no decimal part)
+// 5. array - must parse as valid JSON array
+// 6. object - must parse as valid JSON object
+// 7. string - always succeeds (least specific type)
+//
+// This precedence ensures we return the most specific type that successfully parses,
+// following the principle of least surprise. For example, with PropertyType{"string", "number"},
+// "123" becomes 123 (number), while "hello" becomes "hello" (string).
+func parseValue(raw string, paramType api.PropertyType) any {
+	// first remove a single leading newlines, and a single trailing newline (if
+	// they exist). This follows the reference implementation
+	raw = strings.TrimPrefix(raw, "\n")
+	raw = strings.TrimSuffix(raw, "\n")
+
+	// Check for null first (case-insensitive) - this takes precedence over any type
+	if strings.ToLower(raw) == "null" {
+		return nil
+	}
+
+	// If no type is specified, default to string
+	if len(paramType) == 0 {
+		return raw
+	}
+
+	// Check if any of the specified types match, using type precedence
+	// Order: boolean -> integer -> number -> array -> object -> string
+	typeSet := make(map[string]bool)
+	for _, t := range paramType {
+		typeSet[t] = true
+	}
+
+	// Try boolean first (most restrictive)
+	if typeSet["boolean"] {
+		lower := strings.ToLower(raw)
+		switch lower {
+		case "true":
+			return true
+		case "false":
+			return false
+		}
+		// If not a valid boolean but boolean is the only type, return false (matching reference)
+		if len(paramType) == 1 {
+			return false
+		}
+		// Otherwise try other types
+	}
+
+	// Try integer
+	if typeSet["integer"] {
+		if i, err := strconv.ParseInt(raw, 10, 64); err == nil {
+			// Return as int if it fits in int32, otherwise int64
+			if i >= math.MinInt32 && i <= math.MaxInt32 {
+				return int(i)
+			}
+			return i
+		}
+		// If integer is the only type and parsing failed, fall back to string
+		if len(paramType) == 1 {
+			return raw
+		}
+	}
+
+	// Try number (float)
+	if typeSet["number"] {
+		if f, err := strconv.ParseFloat(raw, 64); err == nil {
+			// If the number has no decimal part, return as int (matching reference)
+			if f == math.Trunc(f) {
+				i := int64(f)
+				if i >= math.MinInt32 && i <= math.MaxInt32 {
+					return int(i)
+				}
+				return i
+			}
+			return f
+		}
+		// If number is the only type and parsing failed, fall back to string
+		if len(paramType) == 1 {
+			return raw
+		}
+	}
+
+	// Try array
+	if typeSet["array"] {
+		var arr []any
+		if err := json.Unmarshal([]byte(raw), &arr); err == nil {
+			return arr
+		}
+		// If array is the only type and parsing failed, fall back to string
+		if len(paramType) == 1 {
+			return raw
+		}
+	}
+
+	// Try object
+	if typeSet["object"] {
+		var obj map[string]any
+		if err := json.Unmarshal([]byte(raw), &obj); err == nil {
+			return obj
+		}
+		// If object is the only type and parsing failed, fall back to string
+		if len(paramType) == 1 {
+			return raw
+		}
+	}
+
+	// String always succeeds (or if "string" is in the type set)
+	if typeSet["string"] {
+		return raw
+	}
+
+	// If we get here, none of the types matched and string wasn't an option
+	// We return string as a fallback. The reference implementation will attempt
+	// to parse the value as a python literal, but we purposefully don't support
+	// that
+	return raw
+}
+
+var (
+	qwenTagRegex    = regexp.MustCompile(`<(\w+)=([^>]+)>`)
+	qwenXMLTagRegex = regexp.MustCompile(`</?(?:function|parameter)(?:\s+name="[^"]*")?>`)
+)
+
+// transformToXML transforms a raw qwen tool call with xml-like tags into valid
+// xml so that it can be parsed by any xml parser
+func transformToXML(raw string) string {
+	// take the form `<tag=abc>` and transform it to `<tag name="abc">`, taking
+	// care to properly escape the string that becomes the attribute value
+	transformed := qwenTagRegex.ReplaceAllStringFunc(raw, func(match string) string {
+		groups := qwenTagRegex.FindStringSubmatch(match)
+		tag := groups[1]
+		var escapedValue strings.Builder
+		xml.EscapeText(&escapedValue, []byte(groups[2]))
+		return fmt.Sprintf(`<%s name="%s">`, tag, escapedValue.String())
+	})
+
+	// Walk the resulting string, escaping any character data that sits between the
+	// xml tags we just emitted
+	var out strings.Builder
+	lastIdx := 0
+	for _, loc := range qwenXMLTagRegex.FindAllStringIndex(transformed, -1) {
+		if loc[0] > lastIdx {
+			escapeTextNode(&out, transformed[lastIdx:loc[0]])
+		}
+		out.WriteString(transformed[loc[0]:loc[1]])
+		lastIdx = loc[1]
+	}
+	if lastIdx < len(transformed) {
+		escapeTextNode(&out, transformed[lastIdx:])
+	}
+
+	return out.String()
+}
+
+// escapeTextNode escapes XML character data without altering other characters
+// like newlines or tabs (which is why we don't use xml.EscapeText for this)
+func escapeTextNode(sb *strings.Builder, s string) {
+	for _, r := range s {
+		switch r {
+		case '&':
+			sb.WriteString("&amp;")
+		case '<':
+			sb.WriteString("&lt;")
+		case '>':
+			sb.WriteString("&gt;")
+		default:
+			sb.WriteRune(r)
+		}
+	}
+}
--- a/model/parsers/qwen3coder_test.go
+++ b/model/parsers/qwen3coder_test.go
--- a/model/parsers/qwen3vl.go
+++ b/model/parsers/qwen3vl.go
@@ -0,0 +1,253 @@
+package parsers
+
+import (
+	"context"
+	"encoding/json"
+	"log/slog"
+	"strings"
+	"unicode"
+
+	"github.com/ollama/ollama/api"
+	"github.com/ollama/ollama/logutil"
+)
+
+// TODO: call the init function
+const (
+	CollectingThinkingContent qwenParserState = iota
+	CollectingContent
+	CollectingToolContent
+	ThinkingDoneEatingWhitespace
+	ToolCallDoneEatingWhitespace
+)
+
+const (
+	thinkingCloseTag = "</think>"
+)
+
+type Qwen3VLParser struct {
+	state              qwenParserState
+	buffer             strings.Builder
+	tools              []api.Tool
+	hasThinkingSupport bool
+}
+
+func (p *Qwen3VLParser) HasToolSupport() bool {
+	return true
+}
+
+func (p *Qwen3VLParser) HasThinkingSupport() bool {
+	return p.hasThinkingSupport
+}
+
+func (p *Qwen3VLParser) setInitialState(lastMessage *api.Message) {
+	prefill := lastMessage != nil && lastMessage.Role == "assistant"
+	if !p.HasThinkingSupport() {
+		p.state = CollectingContent
+		return
+	}
+
+	if prefill && lastMessage.Content != "" {
+		p.state = CollectingContent
+		return
+	}
+
+	p.state = CollectingThinkingContent
+}
+
+func (p *Qwen3VLParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
+	p.tools = tools
+	p.setInitialState(lastMessage)
+	return tools
+}
+
+type qwenEventThinkingContent struct {
+	content string
+}
+
+func (qwenEventThinkingContent) isQwenEvent() {}
+
+func (p *Qwen3VLParser) Add(s string, done bool) (content string, thinking string, calls []api.ToolCall, err error) {
+	p.buffer.WriteString(s)
+	events := p.parseEvents()
+
+	var toolCalls []api.ToolCall
+	var contentSb strings.Builder
+	var thinkingSb strings.Builder
+	for _, event := range events {
+		switch event := event.(type) {
+		case qwenEventRawToolCall:
+			toolCall, err := parseJSONToolCall(event, p.tools)
+			if err != nil {
+				slog.Warn("qwen tool call parsing failed", "error", err)
+				return "", "", nil, err
+			}
+			toolCalls = append(toolCalls, toolCall)
+		case qwenEventThinkingContent:
+			thinkingSb.WriteString(event.content)
+		case qwenEventContent:
+			// TODO(drifkin): if the same turn contains multiple interleaved content
+			// events, we naively append them together here.
+			contentSb.WriteString(event.content)
+		}
+	}
+
+	return contentSb.String(), thinkingSb.String(), toolCalls, nil
+}
+
+func (p *Qwen3VLParser) parseEvents() []qwenEvent {
+	var all []qwenEvent
+
+	keepLooping := true
+	for keepLooping {
+		var events []qwenEvent
+		events, keepLooping = p.eat()
+		if len(events) > 0 {
+			all = append(all, events...)
+		}
+	}
+
+	if len(all) > 0 {
+		slog.Log(context.TODO(), logutil.LevelTrace, "qwen events parsed", "events", all, "state", p.state, "buffer", p.buffer.String())
+	}
+
+	return all
+}
+
+func splitAtTag(p *Qwen3VLParser, tag string, trimAfter bool) (string, string) {
+	split := strings.SplitN(p.buffer.String(), tag, 2)
+	before := split[0]
+	before = strings.TrimRightFunc(before, unicode.IsSpace)
+	after := split[1]
+	if trimAfter {
+		after = strings.TrimLeftFunc(after, unicode.IsSpace)
+	}
+	p.buffer.Reset()
+	p.buffer.WriteString(after)
+	return before, after // return events
+}
+
+func (p *Qwen3VLParser) eatLeadingWhitespaceAndTransitionTo(nextState qwenParserState) ([]qwenEvent, bool) {
+	trimmed := strings.TrimLeftFunc(p.buffer.String(), unicode.IsSpace)
+	p.buffer.Reset()
+	if trimmed == "" {
+		return nil, false
+	}
+	p.state = nextState
+	p.buffer.WriteString(trimmed)
+	return nil, true
+}
+
+func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
+	var events []qwenEvent
+
+	switch p.state {
+	case CollectingContent:
+		if strings.Contains(p.buffer.String(), toolOpenTag) {
+			// events = emitContentBeforeTag(p, events, toolOpenTag)
+			before, _ := splitAtTag(p, toolOpenTag, false)
+			if len(before) > 0 {
+				events = append(events, qwenEventContent{content: before})
+			}
+			p.state = CollectingToolContent
+			return events, true
+		} else if overlapLen := overlap(p.buffer.String(), toolOpenTag); overlapLen > 0 {
+			beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
+			trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
+			ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
+
+			unambiguous := p.buffer.String()[:ambiguousStart]
+			ambiguous := p.buffer.String()[ambiguousStart:]
+			p.buffer.Reset()
+			p.buffer.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, qwenEventContent{content: unambiguous})
+			}
+			return events, false
+		} else {
+			whitespaceLen := trailingWhitespaceLen(p.buffer.String())
+			ambiguousStart := len(p.buffer.String()) - whitespaceLen
+
+			unambiguous := p.buffer.String()[:ambiguousStart]
+			ambiguous := p.buffer.String()[ambiguousStart:]
+			p.buffer.Reset()
+			p.buffer.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, qwenEventContent{content: unambiguous})
+			}
+			return events, false
+		}
+	case CollectingToolContent:
+		if strings.Contains(p.buffer.String(), toolCloseTag) {
+			split := strings.SplitN(p.buffer.String(), toolCloseTag, 2)
+			before := split[0] // do we also need to do it to tool calls?
+			if len(before) == 0 {
+				slog.Warn("qwen tool call closing tag found but no content before it")
+			}
+
+			after := split[1]
+			events = append(events, qwenEventRawToolCall{raw: before})
+			p.buffer.Reset()
+			p.buffer.WriteString(after)
+			p.state = ToolCallDoneEatingWhitespace
+			return events, true
+		} else {
+			return events, false
+		}
+	case CollectingThinkingContent:
+		if strings.Contains(p.buffer.String(), thinkingCloseTag) {
+			thinking, remaining := splitAtTag(p, thinkingCloseTag, true)
+			if len(thinking) > 0 {
+				events = append(events, qwenEventThinkingContent{content: thinking})
+			}
+			if remaining == "" {
+				p.state = ThinkingDoneEatingWhitespace
+			} else {
+				p.state = CollectingContent
+			}
+			return events, true
+		} else if overlapLen := overlap(p.buffer.String(), thinkingCloseTag); overlapLen > 0 {
+			beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
+			trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
+			ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
+
+			unambiguous := p.buffer.String()[:ambiguousStart]
+			ambiguous := p.buffer.String()[ambiguousStart:]
+			p.buffer.Reset()
+			p.buffer.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, qwenEventThinkingContent{content: unambiguous})
+			}
+			return events, false
+		} else {
+			whitespaceLen := trailingWhitespaceLen(p.buffer.String())
+			ambiguousStart := len(p.buffer.String()) - whitespaceLen
+
+			unambiguous := p.buffer.String()[:ambiguousStart]
+			ambiguous := p.buffer.String()[ambiguousStart:]
+			p.buffer.Reset()
+			p.buffer.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, qwenEventThinkingContent{content: unambiguous})
+			}
+			return events, false
+		}
+	case ThinkingDoneEatingWhitespace:
+		return p.eatLeadingWhitespaceAndTransitionTo(CollectingContent)
+	case ToolCallDoneEatingWhitespace:
+		return p.eatLeadingWhitespaceAndTransitionTo(CollectingContent)
+	default:
+		panic("unreachable")
+	}
+}
+
+func parseJSONToolCall(raw qwenEventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
+	var toolCallFunction api.ToolCallFunction
+	if err := json.Unmarshal([]byte(raw.raw), &toolCallFunction); err != nil {
+		return api.ToolCall{}, err
+	}
+
+	toolCall := api.ToolCall{}
+	toolCall.Function = toolCallFunction
+
+	return toolCall, nil
+}
--- a/model/parsers/qwen3vl_nonthinking_test.go
+++ b/model/parsers/qwen3vl_nonthinking_test.go
@@ -0,0 +1,841 @@
+package parsers
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/ollama/ollama/api"
+)
+
+func TestQwen3VLNonThinkingParserStreaming(t *testing.T) {
+	type step struct {
+		input      string
+		wantEvents []qwenEvent
+	}
+
+	cases := []struct {
+		desc  string
+		steps []step
+		only  bool
+	}{
+		{
+			desc: "simple thinking",
+			steps: []step{
+				{input: "abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think>"}}},
+			},
+		},
+		{
+			desc: "simple trip thinking",
+			steps: []step{
+				{input: "<think>abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "<think>abc</think>"}}},
+			},
+		},
+		{
+			desc: "thinking with split tags",
+			steps: []step{
+				{input: "abc", wantEvents: []qwenEvent{qwenEventContent{content: "abc"}}},
+				{input: "</think>", wantEvents: []qwenEvent{qwenEventContent{content: "</think>"}}},
+			},
+		},
+		{
+			desc: "multiple think tags",
+			steps: []step{
+				{input: "abc<think>actually, is not thinking</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc<think>actually, is not thinking</think>"}}},
+			},
+		},
+		{
+			desc: "thinking and tool call",
+			steps: []step{
+				{
+					input: "I'm thinking</think><tool_call>I'm tool calling</tool_call>",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "I'm thinking</think>"},
+						qwenEventRawToolCall{raw: "I'm tool calling"},
+					},
+				},
+			},
+		},
+		{
+			desc: "nested thinking (outside thinking, inside thinking)",
+			steps: []step{
+				{
+					input: "I'm thinking<think>I'm nested thinking</think></think>",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "I'm thinking<think>I'm nested thinking</think></think>"},
+					},
+				},
+			},
+		},
+		{
+			desc: "interleaved thinking",
+			steps: []step{
+				{
+					input: "<think>I'm thinking</think>I'm actually content</think>",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "<think>I'm thinking</think>I'm actually content</think>"},
+					},
+				},
+			},
+		},
+		{
+			desc: "nested thinking and tool call (outside thinking, inside tool call)",
+			steps: []step{
+				{
+					input: "I'm thinking<tool_call>I'm nested tool call</tool_call></think>",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "I'm thinking"},
+						qwenEventRawToolCall{raw: "I'm nested tool call"},
+						qwenEventContent{content: "</think>"},
+					},
+				},
+			},
+		},
+		{
+			desc: "nested thinking and tool call (outside tool call, inside thinking)",
+			steps: []step{
+				{
+					input: "<tool_call>I'm nested tool call<think>I'm thinking</think></tool_call>",
+					wantEvents: []qwenEvent{
+						qwenEventRawToolCall{raw: "I'm nested tool call<think>I'm thinking</think>"},
+					},
+				},
+			},
+		},
+		{
+			desc: "interleaved thinking and tool call",
+			steps: []step{
+				{
+					input: "I'm thinking<tool_call>I'm NOT a nested tool call</think></tool_call><tool_call>I'm nested tool call 2<think></tool_call></think>",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "I'm thinking"},
+						qwenEventRawToolCall{raw: "I'm NOT a nested tool call</think>"},
+						qwenEventRawToolCall{raw: "I'm nested tool call 2<think>"},
+						qwenEventContent{content: "</think>"},
+					},
+				},
+			},
+		},
+		{
+			desc: "emit unambiguous before partial tool open (trailing ws)",
+			steps: []step{
+				{
+					input:      "abc\u00a0\n<tool_call",
+					wantEvents: []qwenEvent{qwenEventContent{content: "abc"}},
+				},
+				{
+					input:      " fakeout",
+					wantEvents: []qwenEvent{qwenEventContent{content: "\u00a0\n<tool_call fakeout"}},
+				},
+			},
+		},
+		{
+			desc: "unambiguous empty: partial tool open at buffer start",
+			steps: []step{
+				{
+					input:      "<tool_ca",
+					wantEvents: []qwenEvent{},
+				},
+				{
+					input: "ll>abc</tool_call>",
+					wantEvents: []qwenEvent{
+						qwenEventRawToolCall{raw: "abc"},
+					},
+				},
+			},
+		},
+		{
+			desc: "partial thinking tag fakeout",
+			steps: []step{
+				{
+					input:      "abc</think",
+					wantEvents: []qwenEvent{qwenEventContent{content: "abc</think"}},
+				},
+				{
+					input:      " fakeout",
+					wantEvents: []qwenEvent{qwenEventContent{content: " fakeout"}},
+				},
+			},
+		},
+		{
+			desc: "partial thinking incomplete",
+			steps: []step{
+				{
+					input:      "abc<think>unfinished<", // when something is ambiguious, we dont emit anything
+					wantEvents: []qwenEvent{qwenEventContent{content: "abc<think>unfinished"}},
+				},
+			},
+		},
+		{
+			desc: "test with split tool and content",
+			steps: []step{
+				{
+					input: "abc<tool_call>unfinished</", // when something is ambiguious, we dont emit anything
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "abc"},
+					},
+				},
+				{
+					input: "tool_call> def",
+					wantEvents: []qwenEvent{
+						qwenEventRawToolCall{raw: "unfinished"},
+						qwenEventContent{content: "def"},
+					},
+				},
+			},
+		},
+	}
+	anyOnlies := false
+	for _, tc := range cases {
+		if tc.only {
+			anyOnlies = true
+		}
+	}
+
+	for _, tc := range cases {
+		if anyOnlies && !tc.only {
+			continue
+		}
+
+		t.Run(tc.desc, func(t *testing.T) {
+			parser := Qwen3VLParser{hasThinkingSupport: false}
+			parser.Init([]api.Tool{}, nil)
+
+			for i, step := range tc.steps {
+				parser.buffer.WriteString(step.input)
+				gotEvents := parser.parseEvents()
+
+				if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
+					// avoid deep equal on empty vs. nil slices
+					continue
+				}
+
+				if !reflect.DeepEqual(gotEvents, step.wantEvents) {
+					t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
+				}
+			}
+		})
+	}
+}
+
+func TestQwenOldParserStreaming(t *testing.T) {
+	type step struct {
+		input      string
+		wantEvents []qwenEvent
+	}
+
+	cases := []struct {
+		desc  string
+		steps []step
+		only  bool
+	}{
+		{
+			desc: "simple message streamed word by word",
+			steps: []step{
+				{
+					input:      "hi",
+					wantEvents: []qwenEvent{qwenEventContent{content: "hi"}},
+				},
+				{
+					input:      " there",
+					wantEvents: []qwenEvent{qwenEventContent{content: " there"}},
+				},
+			},
+		},
+		{
+			desc: "content before tool call",
+			steps: []step{
+				{
+					input:      "hi there<tool_call>",
+					wantEvents: []qwenEvent{qwenEventContent{content: "hi there"}},
+				},
+			},
+		},
+		{
+			desc: "multiple tool calls in one message",
+			steps: []step{
+				{
+					input: "before1<tool_call>in tool call</tool_call>after1<tool_call>in tool call 2</tool_call>after2",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "before1"},
+						qwenEventRawToolCall{raw: "in tool call"},
+						qwenEventContent{content: "after1"},
+						qwenEventRawToolCall{raw: "in tool call 2"},
+						qwenEventContent{content: "after2"},
+					},
+				},
+			},
+		},
+		{
+			desc: "tool calls with split tags",
+			steps: []step{
+				{
+					input: "before<tool",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "before"},
+					},
+				},
+				{
+					input:      "_call>in tool call</tool",
+					wantEvents: []qwenEvent{},
+				},
+				{
+					input: "_call>af",
+					wantEvents: []qwenEvent{
+						qwenEventRawToolCall{raw: "in tool call"},
+						qwenEventContent{content: "af"},
+					},
+				},
+				{
+					input: "ter",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "ter"},
+					},
+				},
+			},
+		},
+		{
+			desc: "trailing whitespace between content and tool call",
+			steps: []step{
+				{
+					input: "abc\n<tool_call>def</tool_call>",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "abc"},
+						qwenEventRawToolCall{raw: "def"},
+					},
+				},
+			},
+		},
+		{
+			desc: "trailing whitespace between tool call and content",
+			steps: []step{
+				{
+					input: "<tool_call>abc</tool_call>\ndef",
+					wantEvents: []qwenEvent{
+						qwenEventRawToolCall{raw: "abc"},
+						qwenEventContent{content: "def"},
+					},
+				},
+			},
+		},
+		{
+			desc: "empty content before tool call",
+			steps: []step{
+				{
+					input: "\n<tool_call>abc</tool_call>",
+					wantEvents: []qwenEvent{
+						qwenEventRawToolCall{raw: "abc"},
+					},
+				},
+			},
+		},
+		{
+			desc: "partial tool open tag fakeout",
+			steps: []step{
+				{
+					input: "abc\n<tool_call",
+					wantEvents: []qwenEvent{
+						// \n should not be emitted yet because `<tool_call` might be a tool
+						// open tag, in which case the whitespace should be trimmed
+						qwenEventContent{content: "abc"},
+					},
+				},
+				{
+					input: " fakeout",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "\n<tool_call fakeout"},
+					},
+				},
+			},
+		},
+		{
+			desc: "token-by-token whitespace handling",
+			steps: []step{
+				{
+					input: "a",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "a"},
+					},
+				},
+				{
+					input:      "\n",
+					wantEvents: []qwenEvent{},
+				},
+				{
+					input: "b",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "\nb"},
+					},
+				},
+			},
+		},
+		{
+			desc: "unicode content",
+			steps: []step{
+				{
+					input: "你好 🌍<tool_call>test</tool_call>مرحبا",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "你好 🌍"},
+						qwenEventRawToolCall{raw: "test"},
+						qwenEventContent{content: "مرحبا"},
+					},
+				},
+			},
+		},
+		{
+			desc: "arabic text handling",
+			steps: []step{
+				{
+					input:      "مرحبا بالعالم",
+					wantEvents: []qwenEvent{qwenEventContent{content: "مرحبا بالعالم"}},
+				},
+			},
+		},
+		{
+			desc: "emoji passthrough",
+			steps: []step{
+				{
+					input:      "✅",
+					wantEvents: []qwenEvent{qwenEventContent{content: "✅"}},
+				},
+			},
+		},
+		{
+			desc: "emoji after tool call",
+			steps: []step{
+				{
+					input: "<tool_call>test</tool_call>完成 ✅",
+					wantEvents: []qwenEvent{
+						qwenEventRawToolCall{raw: "test"},
+						qwenEventContent{content: "完成 ✅"},
+					},
+				},
+			},
+		},
+		{
+			desc: "unicode streaming with whitespace handling",
+			steps: []step{
+				{
+					input: "مرحبا",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "مرحبا"},
+					},
+				},
+				{
+					input:      " \n",
+					wantEvents: []qwenEvent{},
+				},
+				{
+					input: "世界",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: " \n世界"},
+					},
+				},
+			},
+		},
+		{
+			desc: "non-breaking space withheld across chunks",
+			steps: []step{
+				{
+					input: "Hello\u00a0",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "Hello"},
+					},
+				},
+				{
+					input: "world",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "\u00a0world"},
+					},
+				},
+			},
+		},
+		{
+			desc: "ideographic space before partial tool",
+			steps: []step{
+				{
+					input: "Hello\u3000<tool",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "Hello"},
+					},
+				},
+				{
+					input:      "_call>abc",
+					wantEvents: []qwenEvent{},
+				},
+				{
+					input: "</tool_call>def",
+					wantEvents: []qwenEvent{
+						qwenEventRawToolCall{raw: "abc"},
+						qwenEventContent{content: "def"},
+					},
+				},
+			},
+		},
+		{
+			desc: "ideographic space before partial tool fakeout",
+			steps: []step{
+				{
+					input: "Hello\u3000<tool",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "Hello"},
+					},
+				},
+				{
+					input: "fakeout>abc",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "\u3000<toolfakeout>abc"},
+					},
+				},
+			},
+		},
+		{
+			desc: "unicode with partial tool tag",
+			steps: []step{
+				{
+					input: "测试🎯 <to",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "测试🎯"},
+					},
+				},
+			},
+		},
+	}
+
+	anyOnlies := false
+	for _, tc := range cases {
+		if tc.only {
+			anyOnlies = true
+		}
+	}
+
+	for _, tc := range cases {
+		if anyOnlies && !tc.only {
+			continue
+		}
+
+		t.Run(tc.desc, func(t *testing.T) {
+			parser := Qwen3VLParser{hasThinkingSupport: false}
+			parser.Init([]api.Tool{}, nil)
+
+			for i, step := range tc.steps {
+				parser.buffer.WriteString(step.input)
+				gotEvents := parser.parseEvents()
+
+				if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
+					// avoid deep equal on empty vs. nil slices
+					continue
+				}
+
+				if !reflect.DeepEqual(gotEvents, step.wantEvents) {
+					t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
+				}
+			}
+		})
+	}
+}
+
+func TestQwen3VLNonThinkingToolParser(t *testing.T) {
+	type step struct {
+		name         string
+		rawToolCall  string
+		tools        []api.Tool
+		wantToolCall api.ToolCall
+	}
+
+	steps := []step{
+		{
+			name:        "simple tool call",
+			tools:       []api.Tool{},
+			rawToolCall: `{"name": "get-current-weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}`,
+			wantToolCall: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "get-current-weather",
+					Arguments: map[string]any{
+						"location": "San Francisco, CA",
+						"unit":     "fahrenheit",
+					},
+				},
+			},
+		},
+		{
+			name:        "names with spaces",
+			tools:       []api.Tool{},
+			rawToolCall: `{"name": "get current temperature", "arguments": {"location with spaces": "San Francisco", "unit with spaces": "celsius"}}`,
+			wantToolCall: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "get current temperature",
+					Arguments: map[string]any{
+						"location with spaces": "San Francisco",
+						"unit with spaces":     "celsius",
+					},
+				},
+			},
+		},
+		{
+			name:        "names with quotes",
+			tools:       []api.Tool{},
+			rawToolCall: `{"name": "\"get current temperature\"", "arguments": {"\"location with spaces\"": "San Francisco", "\"unit with spaces\"": "\"celsius\""}}`,
+			wantToolCall: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "\"get current temperature\"",
+					Arguments: map[string]any{
+						"\"location with spaces\"": "San Francisco",
+						"\"unit with spaces\"":     "\"celsius\"",
+					},
+				},
+			},
+		},
+		{
+			name:        "tool call with typed parameters (json types)",
+			tools:       []api.Tool{},
+			rawToolCall: `{"name": "calculate", "arguments": {"x": 3.14, "y": 42, "enabled": true, "items": ["a", "b", "c"]}}`,
+			wantToolCall: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "calculate",
+					Arguments: map[string]any{
+						"x":       3.14,
+						"y":       float64(42),
+						"enabled": true,
+						"items":   []any{"a", "b", "c"},
+					},
+				},
+			},
+		},
+		{
+			name:        "ampersands in parameter values",
+			tools:       []api.Tool{},
+			rawToolCall: `{"name": "exec", "arguments": {"command": "ls && echo \"done\""}}`,
+			wantToolCall: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "exec",
+					Arguments: map[string]any{
+						"command": "ls && echo \"done\"",
+					},
+				},
+			},
+		},
+		{
+			name:        "angle brackets in parameter values",
+			tools:       []api.Tool{},
+			rawToolCall: `{"name": "exec", "arguments": {"command": "ls && echo \"a > b and a < b\""}}`,
+			wantToolCall: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "exec",
+					Arguments: map[string]any{
+						"command": "ls && echo \"a > b and a < b\"",
+					},
+				},
+			},
+		},
+		{
+			name:        "unicode in function names and parameters",
+			tools:       []api.Tool{},
+			rawToolCall: `{"name": "获取天气", "arguments": {"城市": "北京", "message": "Hello! 你好! 🌟 مرحبا"}}`,
+			wantToolCall: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "获取天气",
+					Arguments: map[string]any{
+						"城市":      "北京",
+						"message": "Hello! 你好! 🌟 مرحبا",
+					},
+				},
+			},
+		},
+	}
+
+	for i, step := range steps {
+		gotToolCall, err := parseJSONToolCall(qwenEventRawToolCall{raw: step.rawToolCall}, step.tools)
+		if err != nil {
+			t.Errorf("step %d (%s): %v", i, step.name, err)
+		}
+		if !reflect.DeepEqual(gotToolCall, step.wantToolCall) {
+			t.Errorf("step %d (%s): got tool call %#v, want %#v", i, step.name, gotToolCall, step.wantToolCall)
+		}
+	}
+}
+
+func TestQwen3VLNonThinkingToolCallWhitespaceHandling(t *testing.T) {
+	type step struct {
+		input      string
+		wantEvents []qwenEvent
+	}
+
+	cases := []struct {
+		desc  string
+		steps []step
+		only  bool
+	}{
+		{
+			desc: "whitespace inside tool call preserves trailing space",
+			steps: []step{
+				{
+					input: "before<tool_call>   tool content   </tool_call>after",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "before"},
+						qwenEventRawToolCall{raw: "   tool content   "},
+						qwenEventContent{content: "after"},
+					},
+				},
+			},
+		},
+		{
+			desc: "whitespace inside tool call preserves trailing space",
+			steps: []step{
+				{
+					input: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t     <tool_call>   tool content   </tool_call> \n\n\n\n\n\n\n after",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh"},
+						qwenEventRawToolCall{raw: "   tool content   "},
+						qwenEventContent{content: "after"},
+					},
+				},
+			},
+		},
+		{
+			desc: "whitespace inside tool call preserves trailing space",
+			steps: []step{
+				{
+					input: "<tool_call>   tool content   </tool_call>            ",
+					wantEvents: []qwenEvent{
+						qwenEventRawToolCall{raw: "   tool content   "},
+					},
+				},
+				{
+					input: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t     <tool_call>   anotha one   </tool_call> \n\n\n\n\n\n\n after \n\n\n\n\n\n blep",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "blahhhhhhhhhh blahhhh blahhhh"},
+						qwenEventRawToolCall{raw: "   anotha one   "},
+						qwenEventContent{content: "after \n\n\n\n\n\n blep"},
+					},
+				},
+			},
+		},
+		{
+			desc: "whitespace between content and tool call",
+			steps: []step{
+				{
+					input: "content   \n  <tool_call>tool</tool_call>  \n  more content",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "content"},
+						qwenEventRawToolCall{raw: "tool"},
+						qwenEventContent{content: "more content"},
+					},
+				},
+			},
+		},
+		{
+			desc: "consecutive tool calls with whitespace",
+			steps: []step{
+				{
+					input: "<tool_call>first</tool_call>  \n  <tool_call>second</tool_call>  \n  <tool_call>third</tool_call>",
+					wantEvents: []qwenEvent{
+						qwenEventRawToolCall{raw: "first"},
+						qwenEventRawToolCall{raw: "second"},
+						qwenEventRawToolCall{raw: "third"},
+					},
+				},
+			},
+		},
+		{
+			desc: "whitespace before and after tool open tag",
+			steps: []step{
+				{
+					input: "text   \n   <tool_call>content</tool_call>",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "text"},
+						qwenEventRawToolCall{raw: "content"},
+					},
+				},
+			},
+		},
+		{
+			desc: "unicode whitespace around tool calls",
+			steps: []step{
+				{
+					input: "text\u00a0\u3000<tool_call>content</tool_call>\u00a0\u3000text",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "text"},
+						qwenEventRawToolCall{raw: "content"},
+						qwenEventContent{content: "text"},
+					},
+				},
+			},
+		},
+		{
+			desc: "empty tool call with surrounding whitespace",
+			steps: []step{
+				{
+					input: "before  <tool_call></tool_call>  after",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "before"},
+						qwenEventRawToolCall{raw: ""},
+						qwenEventContent{content: "after"},
+					},
+				},
+			},
+		},
+		{
+			desc: "whitespace in tool call split across chunks",
+			steps: []step{
+				{
+					input:      "before<tool_call>  ",
+					wantEvents: []qwenEvent{qwenEventContent{content: "before"}},
+				},
+				{
+					input:      "tool",
+					wantEvents: []qwenEvent{},
+				},
+				{
+					input: "  </tool_call>after",
+					wantEvents: []qwenEvent{
+						qwenEventRawToolCall{raw: "  tool  "},
+						qwenEventContent{content: "after"},
+					},
+				},
+			},
+		},
+		{
+			desc: "mixed whitespace types between tool calls",
+			steps: []step{
+				{
+					input: "<tool_call>first</tool_call> \t\n\r <tool_call>second</tool_call>",
+					wantEvents: []qwenEvent{
+						qwenEventRawToolCall{raw: "first"},
+						qwenEventRawToolCall{raw: "second"},
+					},
+				},
+			},
+		},
+	}
+
+	anyOnlies := false
+	for _, tc := range cases {
+		if tc.only {
+			anyOnlies = true
+		}
+	}
+
+	for _, tc := range cases {
+		if anyOnlies && !tc.only {
+			continue
+		}
+
+		t.Run(tc.desc, func(t *testing.T) {
+			parser := Qwen3VLParser{hasThinkingSupport: false}
+			parser.Init([]api.Tool{}, nil)
+
+			for i, step := range tc.steps {
+				parser.buffer.WriteString(step.input)
+				gotEvents := parser.parseEvents()
+
+				if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
+					continue
+				}
+
+				if !reflect.DeepEqual(gotEvents, step.wantEvents) {
+					t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
+				}
+			}
+		})
+	}
+}
--- a/model/parsers/qwen3vl_thinking_test.go
+++ b/model/parsers/qwen3vl_thinking_test.go
@@ -0,0 +1,878 @@
+package parsers
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/ollama/ollama/api"
+)
+
+func TestQwen3VLThinkingParserStreaming(t *testing.T) {
+	type step struct {
+		input      string
+		wantEvents []qwenEvent
+	}
+
+	cases := []struct {
+		desc  string
+		steps []step
+		only  bool
+	}{
+		{
+			desc: "simple thinking",
+			steps: []step{
+				{input: "abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
+			},
+		},
+		{
+			desc: "simple trip thinking",
+			steps: []step{
+				{input: "<think>abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "<think>abc"}}},
+			},
+		},
+		{
+			desc: "thinking with split tags",
+			steps: []step{
+				{input: "abc", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
+				{input: "</think>", wantEvents: []qwenEvent{}},
+			},
+		},
+		{
+			desc: "multiple think tags",
+			steps: []step{
+				{input: "abc<think>actually, is not thinking</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>actually, is not thinking"}}},
+			},
+		},
+		{
+			desc: "thinking and tool call",
+			steps: []step{
+				{
+					input: "I'm thinking</think><tool_call>I'm tool calling</tool_call>",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "I'm thinking"},
+						qwenEventRawToolCall{raw: "I'm tool calling"},
+					},
+				},
+			},
+		},
+		{
+			desc: "thinking and content",
+			steps: []step{
+				{
+					input: "I'm thinking</think>I'm content",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "I'm thinking"},
+						qwenEventContent{content: "I'm content"},
+					},
+				},
+			},
+		},
+		{
+			desc: "thinking and tool call and content",
+		},
+		{
+			desc: "nested thinking (outside thinking, inside thinking)",
+			steps: []step{
+				{
+					input: "I'm thinking<think>I'm nested thinking</think></think>",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "I'm thinking<think>I'm nested thinking"},
+						qwenEventContent{content: "</think>"},
+					},
+				},
+			},
+		},
+		{
+			desc: "interleaved thinking",
+			steps: []step{
+				{
+					input: "<think>I'm thinking</think>I'm actually content</think>",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "<think>I'm thinking"},
+						qwenEventContent{content: "I'm actually content</think>"},
+					},
+				},
+			},
+		},
+		{
+			desc: "nested thinking and tool call (outside thinking, inside tool call)",
+			steps: []step{
+				{
+					input:      "I'm thinking<tool_call>I'm nested tool call</tool_call></think>",
+					wantEvents: []qwenEvent{qwenEventThinkingContent{content: "I'm thinking<tool_call>I'm nested tool call</tool_call>"}},
+				},
+			},
+		},
+		{
+			desc: "nested thinking and tool call (outside tool call, inside thinking)",
+			steps: []step{
+				{
+					input: "<tool_call>I'm nested tool call<think>I'm thinking</think></tool_call>",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "<tool_call>I'm nested tool call<think>I'm thinking"},
+						qwenEventContent{content: "</tool_call>"},
+					},
+				},
+			},
+		},
+		{
+			desc: "interleaved thinking and tool call",
+			steps: []step{
+				{
+					input: "I'm thinking<tool_call>I'm NOT a nested tool call</think></tool_call><tool_call>I'm nested tool call 2<think></tool_call></think>",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "I'm thinking<tool_call>I'm NOT a nested tool call"},
+						qwenEventContent{content: "</tool_call>"},
+						qwenEventRawToolCall{raw: "I'm nested tool call 2<think>"},
+						qwenEventContent{content: "</think>"},
+					},
+				},
+			},
+		},
+		{
+			desc: "partial thinking tag fakeout",
+			steps: []step{
+				{
+					input:      "abc</think",
+					wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}},
+				},
+				{
+					input:      " fakeout",
+					wantEvents: []qwenEvent{qwenEventThinkingContent{content: "</think fakeout"}},
+				},
+			},
+		},
+		{
+			desc: "partial thinking incomplete",
+			steps: []step{
+				{
+					input:      "abc<think>unfinished</think", // when something is ambiguious, we dont emit anything
+					wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>unfinished"}},
+				},
+			},
+		},
+		{
+			desc: "test with split thinking and content",
+			steps: []step{
+				{
+					input:      "abc<think>unfinished</th", // when something is ambiguious, we dont emit anything
+					wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>unfinished"}},
+				},
+				{
+					input: "ink> def",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "def"},
+					},
+				},
+			},
+		},
+		{
+			desc: "thinking with no tags",
+			steps: []step{
+				{
+					input: "Hello I am thinking",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "Hello I am thinking"},
+					},
+				},
+				{
+					input: "Hello I am thinking some more",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "Hello I am thinking some more"},
+					},
+				},
+				{
+					input: "Hello I am think</think>     NOT",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "Hello I am think"},
+						qwenEventContent{content: "NOT"},
+					},
+				},
+			},
+		},
+	}
+	anyOnlies := false
+	for _, tc := range cases {
+		if tc.only {
+			anyOnlies = true
+		}
+	}
+
+	for _, tc := range cases {
+		if anyOnlies && !tc.only {
+			continue
+		}
+
+		t.Run(tc.desc, func(t *testing.T) {
+			parser := Qwen3VLParser{hasThinkingSupport: true}
+			parser.Init([]api.Tool{}, nil)
+			// parser.state = CollectingThinkingContent
+
+			for i, step := range tc.steps {
+				parser.buffer.WriteString(step.input)
+				gotEvents := parser.parseEvents()
+
+				if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
+					// avoid deep equal on empty vs. nil slices
+					continue
+				}
+
+				if !reflect.DeepEqual(gotEvents, step.wantEvents) {
+					t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
+				}
+			}
+		})
+	}
+}
+
+func TestQwen3VLThinkingToolParser(t *testing.T) {
+	type step struct {
+		name         string
+		rawToolCall  string
+		tools        []api.Tool
+		wantToolCall api.ToolCall
+	}
+
+	steps := []step{
+		{
+			name:        "simple tool call",
+			tools:       []api.Tool{},
+			rawToolCall: `{"name": "get-current-weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}`,
+			wantToolCall: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "get-current-weather",
+					Arguments: map[string]any{
+						"location": "San Francisco, CA",
+						"unit":     "fahrenheit",
+					},
+				},
+			},
+		},
+		{
+			name:        "names with spaces",
+			tools:       []api.Tool{},
+			rawToolCall: `{"name": "get current temperature", "arguments": {"location with spaces": "San Francisco", "unit with spaces": "celsius"}}`,
+			wantToolCall: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "get current temperature",
+					Arguments: map[string]any{
+						"location with spaces": "San Francisco",
+						"unit with spaces":     "celsius",
+					},
+				},
+			},
+		},
+		{
+			name:        "names with quotes",
+			tools:       []api.Tool{},
+			rawToolCall: `{"name": "\"get current temperature\"", "arguments": {"\"location with spaces\"": "San Francisco", "\"unit with spaces\"": "\"celsius\""}}`,
+			wantToolCall: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "\"get current temperature\"",
+					Arguments: map[string]any{
+						"\"location with spaces\"": "San Francisco",
+						"\"unit with spaces\"":     "\"celsius\"",
+					},
+				},
+			},
+		},
+		{
+			name:        "tool call with typed parameters (json types)",
+			tools:       []api.Tool{},
+			rawToolCall: `{"name": "calculate", "arguments": {"x": 3.14, "y": 42, "enabled": true, "items": ["a", "b", "c"]}}`,
+			wantToolCall: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "calculate",
+					Arguments: map[string]any{
+						"x":       3.14,
+						"y":       float64(42),
+						"enabled": true,
+						"items":   []any{"a", "b", "c"},
+					},
+				},
+			},
+		},
+		{
+			name:        "ampersands in parameter values",
+			tools:       []api.Tool{},
+			rawToolCall: `{"name": "exec", "arguments": {"command": "ls && echo \"done\""}}`,
+			wantToolCall: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "exec",
+					Arguments: map[string]any{
+						"command": "ls && echo \"done\"",
+					},
+				},
+			},
+		},
+		{
+			name:        "angle brackets in parameter values",
+			tools:       []api.Tool{},
+			rawToolCall: `{"name": "exec", "arguments": {"command": "ls && echo \"a > b and a < b\""}}`,
+			wantToolCall: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "exec",
+					Arguments: map[string]any{
+						"command": "ls && echo \"a > b and a < b\"",
+					},
+				},
+			},
+		},
+		{
+			name:        "unicode in function names and parameters",
+			tools:       []api.Tool{},
+			rawToolCall: `{"name": "获取天气", "arguments": {"城市": "北京", "message": "Hello! 你好! 🌟 مرحبا"}}`,
+			wantToolCall: api.ToolCall{
+				Function: api.ToolCallFunction{
+					Name: "获取天气",
+					Arguments: map[string]any{
+						"城市":      "北京",
+						"message": "Hello! 你好! 🌟 مرحبا",
+					},
+				},
+			},
+		},
+	}
+
+	for i, step := range steps {
+		gotToolCall, err := parseJSONToolCall(qwenEventRawToolCall{raw: step.rawToolCall}, step.tools)
+		if err != nil {
+			t.Errorf("step %d (%s): %v", i, step.name, err)
+		}
+		if !reflect.DeepEqual(gotToolCall, step.wantToolCall) {
+			t.Errorf("step %d (%s): got tool call %#v, want %#v", i, step.name, gotToolCall, step.wantToolCall)
+		}
+	}
+}
+
+func TestQwen3VLParserState(t *testing.T) {
+	cases := []struct {
+		desc        string
+		hasThinking bool
+		last        *api.Message
+		wantState   qwenParserState
+	}{
+		{
+			desc:        "no thinking support => CollectingContent",
+			hasThinking: false,
+			last:        nil,
+			wantState:   CollectingContent,
+		},
+		{
+			desc:        "thinking support, no last message => CollectingThinkingContent",
+			hasThinking: true,
+			last:        nil,
+			wantState:   CollectingThinkingContent,
+		},
+		{
+			desc:        "thinking support, last assistant with empty content => CollectingThinkingContent",
+			hasThinking: true,
+			last:        &api.Message{Role: "assistant", Content: ""},
+			wantState:   CollectingThinkingContent,
+		},
+		{
+			desc:        "thinking support, last assistant with content => CollectingContent",
+			hasThinking: true,
+			last:        &api.Message{Role: "assistant", Content: "hello"},
+			wantState:   CollectingContent,
+		},
+		{
+			desc:        "thinking support, last is user => CollectingThinkingContent",
+			hasThinking: true,
+			last:        &api.Message{Role: "user", Content: "hi"},
+			wantState:   CollectingThinkingContent,
+		},
+	}
+
+	for _, tc := range cases {
+		parser := Qwen3VLParser{hasThinkingSupport: tc.hasThinking}
+		parser.Init(nil, tc.last)
+		if parser.state != tc.wantState {
+			t.Errorf("%s: got state %v, want %v", tc.desc, parser.state, tc.wantState)
+		}
+	}
+}
+
+func TestQwen3VLThinkingParserWithThinkingPrefill(t *testing.T) {
+	type step struct {
+		input      string
+		wantEvents []qwenEvent
+	}
+
+	cases := []struct {
+		desc  string
+		steps []step
+		only  bool
+	}{
+		{
+			desc: "thinking prefill",
+			steps: []step{
+				{input: "abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
+			},
+		},
+		{
+			desc: "thinking prefill with content",
+			steps: []step{
+				{input: "abc</th", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
+				{input: "ink> def", wantEvents: []qwenEvent{qwenEventContent{content: "def"}}},
+			},
+		},
+		{
+			desc: "thinking prefill with fakeout",
+			steps: []step{
+				{input: "abc</think", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
+				{input: " fakeout </think", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "</think fakeout"}}},
+				{input: ">", wantEvents: []qwenEvent{}},
+			},
+		},
+		{
+			desc: "thinking prefill with spaces",
+			steps: []step{
+				{input: "        </think> starting content", wantEvents: []qwenEvent{qwenEventContent{content: "starting content"}}},
+			},
+		},
+	}
+	last := &api.Message{Role: "assistant", Thinking: "i am thinking"} // so if there is thinking the test is still thinking
+
+	for _, tc := range cases {
+		t.Run(tc.desc, func(t *testing.T) {
+			parser := Qwen3VLParser{hasThinkingSupport: true}
+			parser.Init([]api.Tool{}, last)
+
+			for i, step := range tc.steps {
+				parser.buffer.WriteString(step.input)
+				gotEvents := parser.parseEvents()
+
+				if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
+					// avoid deep equal on empty vs. nil slices
+					continue
+				}
+
+				if !reflect.DeepEqual(gotEvents, step.wantEvents) {
+					t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
+				}
+			}
+		})
+	}
+}
+
+func TestQwen3VLThinkingParserWithNonThinkingPrefill(t *testing.T) {
+	type step struct {
+		input      string
+		wantEvents []qwenEvent
+	}
+
+	cases := []struct {
+		desc  string
+		steps []step
+		only  bool
+	}{
+		{
+			desc: "thinking prefill",
+			steps: []step{
+				{input: "abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think>"}}},
+			},
+		},
+		{
+			desc: "thinking prefill with content",
+			steps: []step{
+				{input: "abc</th", wantEvents: []qwenEvent{qwenEventContent{content: "abc</th"}}},
+				{input: "ink> def", wantEvents: []qwenEvent{qwenEventContent{content: "ink> def"}}},
+			},
+		},
+		{
+			desc: "thinking prefill with fakeout",
+			steps: []step{
+				{input: "abc</think", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think"}}},
+				{input: " fakeout </think", wantEvents: []qwenEvent{qwenEventContent{content: " fakeout </think"}}},
+				{input: ">", wantEvents: []qwenEvent{qwenEventContent{content: ">"}}},
+			},
+		},
+		{
+			desc: "thinking prefill with spaces",
+			steps: []step{
+				{input: "        </think> starting content", wantEvents: []qwenEvent{qwenEventContent{content: "        </think> starting content"}}},
+			},
+		},
+	}
+	last := &api.Message{Role: "assistant", Thinking: "i am thinking", Content: "i am content"} // so if there is thinking the test is still thinking
+
+	for _, tc := range cases {
+		t.Run(tc.desc, func(t *testing.T) {
+			parser := Qwen3VLParser{hasThinkingSupport: true}
+			parser.Init([]api.Tool{}, last)
+
+			for i, step := range tc.steps {
+				parser.buffer.WriteString(step.input)
+				gotEvents := parser.parseEvents()
+
+				if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
+					// avoid deep equal on empty vs. nil slices
+					continue
+				}
+
+				if !reflect.DeepEqual(gotEvents, step.wantEvents) {
+					t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
+				}
+			}
+		})
+	}
+}
+
+func TestQwen3VLThinkingParserStreamingAssistantPrefillContent(t *testing.T) {
+	// last message is assistant with content ⇒ start in CollectingContent
+	last := &api.Message{Role: "assistant", Content: "has content"}
+	parser := Qwen3VLParser{hasThinkingSupport: true}
+	parser.Init([]api.Tool{}, last)
+
+	type step struct {
+		input      string
+		wantEvents []qwenEvent
+	}
+
+	steps := []step{
+		{input: "abc</think>", wantEvents: []qwenEvent{qwenEventContent{content: "abc</think>"}}},
+		{input: "<tool_call>{\"name\": \"x\", \"arguments\": {}}</tool_call>", wantEvents: []qwenEvent{qwenEventRawToolCall{raw: "{\"name\": \"x\", \"arguments\": {}}"}}},
+	}
+
+	for i, s := range steps {
+		parser.buffer.WriteString(s.input)
+		gotEvents := parser.parseEvents()
+		if len(gotEvents) == 0 && len(s.wantEvents) == 0 {
+			continue
+		}
+		if !reflect.DeepEqual(gotEvents, s.wantEvents) {
+			t.Fatalf("step %d: input %q: got %#v, want %#v", i, s.input, gotEvents, s.wantEvents)
+		}
+	}
+}
+
+func TestQwen3VLThinkingWhitespaceHandling(t *testing.T) {
+	type step struct {
+		input      string
+		wantEvents []qwenEvent
+	}
+
+	cases := []struct {
+		desc  string
+		steps []step
+		only  bool
+	}{
+		{
+			desc: "whitespace after thinking tag is trimmed",
+			steps: []step{
+				{
+					input: "thinking content</think>   \n\t  content starts here",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "thinking content"},
+						qwenEventContent{content: "content starts here"},
+					},
+				},
+			},
+		},
+		{
+			desc: "whitespace after thinking tag split across chunks",
+			steps: []step{
+				{
+					input:      "thinking content</think>   ",
+					wantEvents: []qwenEvent{qwenEventThinkingContent{content: "thinking content"}},
+				},
+				{
+					input:      "  \n\t",
+					wantEvents: []qwenEvent{},
+				},
+				{
+					input: "content",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "content"},
+					},
+				},
+			},
+		},
+		{
+			desc: "only whitespace after thinking tag",
+			steps: []step{
+				{
+					input:      "thinking content</think>   \n\t  ",
+					wantEvents: []qwenEvent{qwenEventThinkingContent{content: "thinking content"}},
+				},
+			},
+		},
+		{
+			desc: "multiple spaces and tabs after thinking",
+			steps: []step{
+				{
+					input: "think</think>     \t\t\n\n   text",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "think"},
+						qwenEventContent{content: "text"},
+					},
+				},
+			},
+		},
+		{
+			desc: "trailing whitespace before thinking tag is preserved in content",
+			steps: []step{
+				{
+					input: "thinking with spaces   </think>text",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "thinking with spaces"},
+						qwenEventContent{content: "text"},
+					},
+				},
+			},
+		},
+		{
+			desc: "whitespace between thinking and tool call",
+			steps: []step{
+				{
+					input: "thinking</think>  \n  <tool_call>{\"name\":\"test\"}</tool_call>",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "thinking"},
+						qwenEventRawToolCall{raw: "{\"name\":\"test\"}"},
+					},
+				},
+			},
+		},
+		{
+			desc: "no whitespace after thinking tag",
+			steps: []step{
+				{
+					input: "thinking</think>content",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "thinking"},
+						qwenEventContent{content: "content"},
+					},
+				},
+			},
+		},
+		{
+			desc: "unicode whitespace after thinking tag",
+			steps: []step{
+				{
+					input: "thinking</think>\u00a0\u3000content",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "thinking"},
+						qwenEventContent{content: "content"},
+					},
+				},
+			},
+		},
+		{
+			desc: "whitespace split with partial thinking tag",
+			steps: []step{
+				{
+					input:      "thinking</th",
+					wantEvents: []qwenEvent{qwenEventThinkingContent{content: "thinking"}},
+				},
+				{
+					input:      "ink>  \n",
+					wantEvents: []qwenEvent{},
+				},
+				{
+					input: "  content",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "content"},
+					},
+				},
+			},
+		},
+		{
+			desc: "empty thinking tag with whitespace after",
+			steps: []step{
+				{
+					input: "</think>   \ncontent",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "content"},
+					},
+				},
+			},
+		},
+		{
+			desc: "whitespace inside tool call preserves trailing space",
+			steps: []step{
+				{
+					input: "bruh</think> \n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t     <tool_call>   tool content   </tool_call> \n\n\n\n\n\n\n after",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "bruh"},
+						qwenEventContent{content: "blahhhhhhhhhh blahhhh blahhhh"},
+						qwenEventRawToolCall{raw: "   tool content   "},
+						qwenEventContent{content: "after"},
+					},
+				},
+			},
+		},
+		{
+			desc: "whitespace inside tool call preserves trailing space",
+			steps: []step{
+				{
+					input: "bruh</think>          shdjfhksdhfj  ",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "bruh"},
+						qwenEventContent{content: "shdjfhksdhfj"},
+					},
+				},
+				{
+					input: "another word  ",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "  another word"},
+					},
+				},
+				{
+					input: "<tool_call>   tool content   </tool_call>            ",
+					wantEvents: []qwenEvent{
+						qwenEventRawToolCall{raw: "   tool content   "},
+					},
+				},
+				{
+					input: "\n \n \n \n \n \n blahhhhhhhhhh blahhhh blahhhh \n\n\n\t\t     <tool_call>   anotha one   </tool_call> \n\n\n\n\n\n\n after \n\n\n\n\n\n blep",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "blahhhhhhhhhh blahhhh blahhhh"},
+						qwenEventRawToolCall{raw: "   anotha one   "},
+						qwenEventContent{content: "after \n\n\n\n\n\n blep"},
+					},
+				},
+			},
+		},
+	}
+
+	anyOnlies := false
+	for _, tc := range cases {
+		if tc.only {
+			anyOnlies = true
+		}
+	}
+
+	for _, tc := range cases {
+		if anyOnlies && !tc.only {
+			continue
+		}
+
+		t.Run(tc.desc, func(t *testing.T) {
+			parser := Qwen3VLParser{hasThinkingSupport: true}
+			parser.Init([]api.Tool{}, nil)
+
+			for i, step := range tc.steps {
+				parser.buffer.WriteString(step.input)
+				gotEvents := parser.parseEvents()
+
+				if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
+					continue
+				}
+
+				if !reflect.DeepEqual(gotEvents, step.wantEvents) {
+					t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
+				}
+			}
+		})
+	}
+}
+
+func TestQwen3VLToolCallWhitespaceHandling(t *testing.T) {
+	type step struct {
+		input      string
+		wantEvents []qwenEvent
+	}
+
+	cases := []struct {
+		desc       string
+		steps      []step
+		only       bool
+		prefillMsg *api.Message // allows starting in content mode instead of thinking mode
+	}{
+		{
+			desc:       "whitespace inside tool call is fully preserved (with content prefill)",
+			prefillMsg: &api.Message{Role: "assistant", Content: "prefill"},
+			steps: []step{
+				{
+					input: "before<tool_call>   tool content   </tool_call>  \n  after",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "before"},
+						qwenEventRawToolCall{raw: "   tool content   "},
+						qwenEventContent{content: "after"},
+					},
+				},
+			},
+		},
+		{
+			desc:       "whitespace after tool call trimmed across chunks (with content prefill)",
+			prefillMsg: &api.Message{Role: "assistant", Content: "prefill"},
+			steps: []step{
+				{
+					input: "before<tool_call>tool</tool_call>   ",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "before"},
+						qwenEventRawToolCall{raw: "tool"},
+					},
+				},
+				{
+					input:      "\n\t",
+					wantEvents: []qwenEvent{},
+				},
+				{
+					input: "after \n this is a song",
+					wantEvents: []qwenEvent{
+						qwenEventContent{content: "after \n this is a song"},
+					},
+				},
+			},
+		},
+		{
+			desc:       "multiple tool calls with whitespace between (with content prefill)",
+			prefillMsg: &api.Message{Role: "assistant", Content: "prefill"},
+			steps: []step{
+				{
+					input: "<tool_call>first</tool_call>  \n  <tool_call>second</tool_call>",
+					wantEvents: []qwenEvent{
+						qwenEventRawToolCall{raw: "first"},
+						qwenEventRawToolCall{raw: "second"},
+					},
+				},
+			},
+		},
+		{
+			desc: "thinking with whitespace then tool call",
+			steps: []step{
+				{
+					input: "thinking</think>   \n   <tool_call>tool</tool_call>   \n   content",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "thinking"},
+						qwenEventRawToolCall{raw: "tool"},
+						qwenEventContent{content: "content"},
+					},
+				},
+			},
+		},
+	}
+
+	anyOnlies := false
+	for _, tc := range cases {
+		if tc.only {
+			anyOnlies = true
+		}
+	}
+
+	for _, tc := range cases {
+		if anyOnlies && !tc.only {
+			continue
+		}
+
+		t.Run(tc.desc, func(t *testing.T) {
+			parser := Qwen3VLParser{hasThinkingSupport: true}
+			parser.Init([]api.Tool{}, tc.prefillMsg)
+
+			for i, step := range tc.steps {
+				parser.buffer.WriteString(step.input)
+				gotEvents := parser.parseEvents()
+
+				if len(gotEvents) == 0 && len(step.wantEvents) == 0 {
+					continue
+				}
+
+				if !reflect.DeepEqual(gotEvents, step.wantEvents) {
+					t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
+				}
+			}
+		})
+	}
+}