mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-18 03:37:09 +00:00
Sync with upstream ollama/ollama and restore Tesla K80 (compute 3.7) support
This commit represents a complete rework after pulling the latest changes from official ollama/ollama repository and re-applying Tesla K80 compatibility patches. ## Key Changes ### CUDA Compute Capability 3.7 Support (Tesla K80) - Added sm_37 (compute 3.7) to CMAKE_CUDA_ARCHITECTURES in CMakeLists.txt - Updated CMakePresets.json to include compute 3.7 in "CUDA 11" preset - Using 37-virtual (PTX with JIT compilation) for maximum compatibility ### Legacy Toolchain Compatibility - **NVIDIA Driver**: 470.256.02 (last version supporting Kepler/K80) - **CUDA Version**: 11.4.4 (last CUDA 11.x supporting compute 3.7) - **GCC Version**: 10.5.0 (required by CUDA 11.4 host_config.h) ### CPU Architecture Trade-offs Due to GCC 10.5 limitation, sacrificed newer CPU optimizations: - Alderlake CPU variant enabled WITHOUT AVX_VNNI (requires GCC 11+) - Still supports: SSE4.2, AVX, F16C, AVX2, BMI2, FMA - Performance impact: ~3-7% on newer CPUs (acceptable for K80 compatibility) ### Build System Updates - Modified ml/backend/ggml/ggml/src/ggml-cuda/CMakeLists.txt for compute 3.7 - Added -Wno-deprecated-gpu-targets flag to suppress warnings - Updated ml/backend/ggml/ggml/src/CMakeLists.txt for Alderlake without AVX_VNNI ### Upstream Sync Merged latest llama.cpp changes including: - Enhanced KV cache management with ISWA and hybrid memory support - Improved multi-modal support (mtmd framework) - New model architectures (Gemma3, Llama4, Qwen3, etc.) - GPU backend improvements for CUDA, Metal, and ROCm - Updated quantization support and GGUF format handling ### Documentation - Updated CLAUDE.md with comprehensive build instructions - Documented toolchain constraints and CPU architecture trade-offs - Removed outdated CI/CD workflows (tesla-k80-*.yml) - Cleaned up temporary development artifacts ## Rationale This fork maintains Tesla K80 GPU support (compute 3.7) which was dropped in official Ollama due to legacy driver/CUDA requirements. The toolchain constraint creates a deadlock: - K80 → Driver 470 → CUDA 11.4 → GCC 10 → No AVX_VNNI We accept the loss of cutting-edge CPU optimizations to enable running modern LLMs on legacy but still capable Tesla K80 hardware (12GB VRAM per GPU). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
777
docs/capabilities/tool-calling.mdx
Normal file
777
docs/capabilities/tool-calling.mdx
Normal file
@@ -0,0 +1,777 @@
|
||||
---
|
||||
title: Tool calling
|
||||
---
|
||||
|
||||
Ollama supports tool calling (also known as function calling) which allows a model to invoke tools and incorporate their results into its replies.
|
||||
|
||||
## Calling a single tool
|
||||
Invoke a single tool and include its response in a follow-up request.
|
||||
|
||||
Also known as "single-shot" tool calling.
|
||||
|
||||
<Tabs>
|
||||
<Tab title="cURL">
|
||||
|
||||
```shell
|
||||
curl -s http://localhost:11434/api/chat -H "Content-Type: application/json" -d '{
|
||||
"model": "qwen3",
|
||||
"messages": [{"role": "user", "content": "What's the temperature in New York?"}],
|
||||
"stream": false,
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_temperature",
|
||||
"description": "Get the current temperature for a city",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["city"],
|
||||
"properties": {
|
||||
"city": {"type": "string", "description": "The name of the city"}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
**Generate a response with a single tool result**
|
||||
```shell
|
||||
curl -s http://localhost:11434/api/chat -H "Content-Type: application/json" -d '{
|
||||
"model": "qwen3",
|
||||
"messages": [
|
||||
{"role": "user", "content": "What's the temperature in New York?"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"index": 0,
|
||||
"name": "get_temperature",
|
||||
"arguments": {"city": "New York"}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{"role": "tool", "tool_name": "get_temperature", "content": "22°C"}
|
||||
],
|
||||
"stream": false
|
||||
}'
|
||||
```
|
||||
</Tab>
|
||||
<Tab title="Python">
|
||||
Install the Ollama Python SDK:
|
||||
```bash
|
||||
# with pip
|
||||
pip install ollama -U
|
||||
|
||||
# with uv
|
||||
uv add ollama
|
||||
```
|
||||
|
||||
```python
|
||||
from ollama import chat
|
||||
|
||||
def get_temperature(city: str) -> str:
|
||||
"""Get the current temperature for a city
|
||||
|
||||
Args:
|
||||
city: The name of the city
|
||||
|
||||
Returns:
|
||||
The current temperature for the city
|
||||
"""
|
||||
temperatures = {
|
||||
"New York": "22°C",
|
||||
"London": "15°C",
|
||||
"Tokyo": "18°C",
|
||||
}
|
||||
return temperatures.get(city, "Unknown")
|
||||
|
||||
messages = [{"role": "user", "content": "What's the temperature in New York?"}]
|
||||
|
||||
# pass functions directly as tools in the tools list or as a JSON schema
|
||||
response = chat(model="qwen3", messages=messages, tools=[get_temperature], think=True)
|
||||
|
||||
messages.append(response.message)
|
||||
if response.message.tool_calls:
|
||||
# only recommended for models which only return a single tool call
|
||||
call = response.message.tool_calls[0]
|
||||
result = get_temperature(**call.function.arguments)
|
||||
# add the tool result to the messages
|
||||
messages.append({"role": "tool", "tool_name": call.function.name, "content": str(result)})
|
||||
|
||||
final_response = chat(model="qwen3", messages=messages, tools=[get_temperature], think=True)
|
||||
print(final_response.message.content)
|
||||
```
|
||||
</Tab>
|
||||
<Tab title="JavaScript">
|
||||
Install the Ollama JavaScript library:
|
||||
```bash
|
||||
# with npm
|
||||
npm i ollama
|
||||
|
||||
# with bun
|
||||
bun i ollama
|
||||
```
|
||||
|
||||
```typescript
|
||||
import ollama from 'ollama'
|
||||
|
||||
function getTemperature(city: string): string {
|
||||
const temperatures: Record<string, string> = {
|
||||
'New York': '22°C',
|
||||
'London': '15°C',
|
||||
'Tokyo': '18°C',
|
||||
}
|
||||
return temperatures[city] ?? 'Unknown'
|
||||
}
|
||||
|
||||
const tools = [
|
||||
{
|
||||
type: 'function',
|
||||
function: {
|
||||
name: 'get_temperature',
|
||||
description: 'Get the current temperature for a city',
|
||||
parameters: {
|
||||
type: 'object',
|
||||
required: ['city'],
|
||||
properties: {
|
||||
city: { type: 'string', description: 'The name of the city' },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
const messages = [{ role: 'user', content: "What's the temperature in New York?" }]
|
||||
|
||||
const response = await ollama.chat({
|
||||
model: 'qwen3',
|
||||
messages,
|
||||
tools,
|
||||
think: true,
|
||||
})
|
||||
|
||||
messages.push(response.message)
|
||||
if (response.message.tool_calls?.length) {
|
||||
// only recommended for models which only return a single tool call
|
||||
const call = response.message.tool_calls[0]
|
||||
const args = call.function.arguments as { city: string }
|
||||
const result = getTemperature(args.city)
|
||||
// add the tool result to the messages
|
||||
messages.push({ role: 'tool', tool_name: call.function.name, content: result })
|
||||
|
||||
// generate the final response
|
||||
const finalResponse = await ollama.chat({ model: 'qwen3', messages, tools, think: true })
|
||||
console.log(finalResponse.message.content)
|
||||
}
|
||||
```
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
## Parallel tool calling
|
||||
|
||||
<Tabs>
|
||||
<Tab title="cURL">
|
||||
Request multiple tool calls in parallel, then send all tool responses back to the model.
|
||||
|
||||
```shell
|
||||
curl -s http://localhost:11434/api/chat -H "Content-Type: application/json" -d '{
|
||||
"model": "qwen3",
|
||||
"messages": [{"role": "user", "content": "What are the current weather conditions and temperature in New York and London?"}],
|
||||
"stream": false,
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_temperature",
|
||||
"description": "Get the current temperature for a city",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["city"],
|
||||
"properties": {
|
||||
"city": {"type": "string", "description": "The name of the city"}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_conditions",
|
||||
"description": "Get the current weather conditions for a city",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["city"],
|
||||
"properties": {
|
||||
"city": {"type": "string", "description": "The name of the city"}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
**Generate a response with multiple tool results**
|
||||
```shell
|
||||
curl -s http://localhost:11434/api/chat -H "Content-Type: application/json" -d '{
|
||||
"model": "qwen3",
|
||||
"messages": [
|
||||
{"role": "user", "content": "What are the current weather conditions and temperature in New York and London?"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"index": 0,
|
||||
"name": "get_temperature",
|
||||
"arguments": {"city": "New York"}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"index": 1,
|
||||
"name": "get_conditions",
|
||||
"arguments": {"city": "New York"}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"index": 2,
|
||||
"name": "get_temperature",
|
||||
"arguments": {"city": "London"}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"index": 3,
|
||||
"name": "get_conditions",
|
||||
"arguments": {"city": "London"}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{"role": "tool", "tool_name": "get_temperature", "content": "22°C"},
|
||||
{"role": "tool", "tool_name": "get_conditions", "content": "Partly cloudy"},
|
||||
{"role": "tool", "tool_name": "get_temperature", "content": "15°C"},
|
||||
{"role": "tool", "tool_name": "get_conditions", "content": "Rainy"}
|
||||
],
|
||||
"stream": false
|
||||
}'
|
||||
```
|
||||
</Tab>
|
||||
<Tab title="Python">
|
||||
```python
|
||||
from ollama import chat
|
||||
|
||||
def get_temperature(city: str) -> str:
|
||||
"""Get the current temperature for a city
|
||||
|
||||
Args:
|
||||
city: The name of the city
|
||||
|
||||
Returns:
|
||||
The current temperature for the city
|
||||
"""
|
||||
temperatures = {
|
||||
"New York": "22°C",
|
||||
"London": "15°C",
|
||||
"Tokyo": "18°C"
|
||||
}
|
||||
return temperatures.get(city, "Unknown")
|
||||
|
||||
def get_conditions(city: str) -> str:
|
||||
"""Get the current weather conditions for a city
|
||||
|
||||
Args:
|
||||
city: The name of the city
|
||||
|
||||
Returns:
|
||||
The current weather conditions for the city
|
||||
"""
|
||||
conditions = {
|
||||
"New York": "Partly cloudy",
|
||||
"London": "Rainy",
|
||||
"Tokyo": "Sunny"
|
||||
}
|
||||
return conditions.get(city, "Unknown")
|
||||
|
||||
|
||||
messages = [{'role': 'user', 'content': 'What are the current weather conditions and temperature in New York and London?'}]
|
||||
|
||||
# The python client automatically parses functions as a tool schema so we can pass them directly
|
||||
# Schemas can be passed directly in the tools list as well
|
||||
response = chat(model='qwen3', messages=messages, tools=[get_temperature, get_conditions], think=True)
|
||||
|
||||
# add the assistant message to the messages
|
||||
messages.append(response.message)
|
||||
if response.message.tool_calls:
|
||||
# process each tool call
|
||||
for call in response.message.tool_calls:
|
||||
# execute the appropriate tool
|
||||
if call.function.name == 'get_temperature':
|
||||
result = get_temperature(**call.function.arguments)
|
||||
elif call.function.name == 'get_conditions':
|
||||
result = get_conditions(**call.function.arguments)
|
||||
else:
|
||||
result = 'Unknown tool'
|
||||
# add the tool result to the messages
|
||||
messages.append({'role': 'tool', 'tool_name': call.function.name, 'content': str(result)})
|
||||
|
||||
# generate the final response
|
||||
final_response = chat(model='qwen3', messages=messages, tools=[get_temperature, get_conditions], think=True)
|
||||
print(final_response.message.content)
|
||||
```
|
||||
</Tab>
|
||||
<Tab title="JavaScript">
|
||||
```typescript
|
||||
import ollama from 'ollama'
|
||||
|
||||
function getTemperature(city: string): string {
|
||||
const temperatures: { [key: string]: string } = {
|
||||
"New York": "22°C",
|
||||
"London": "15°C",
|
||||
"Tokyo": "18°C"
|
||||
}
|
||||
return temperatures[city] || "Unknown"
|
||||
}
|
||||
|
||||
function getConditions(city: string): string {
|
||||
const conditions: { [key: string]: string } = {
|
||||
"New York": "Partly cloudy",
|
||||
"London": "Rainy",
|
||||
"Tokyo": "Sunny"
|
||||
}
|
||||
return conditions[city] || "Unknown"
|
||||
}
|
||||
|
||||
const tools = [
|
||||
{
|
||||
type: 'function',
|
||||
function: {
|
||||
name: 'get_temperature',
|
||||
description: 'Get the current temperature for a city',
|
||||
parameters: {
|
||||
type: 'object',
|
||||
required: ['city'],
|
||||
properties: {
|
||||
city: { type: 'string', description: 'The name of the city' },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'function',
|
||||
function: {
|
||||
name: 'get_conditions',
|
||||
description: 'Get the current weather conditions for a city',
|
||||
parameters: {
|
||||
type: 'object',
|
||||
required: ['city'],
|
||||
properties: {
|
||||
city: { type: 'string', description: 'The name of the city' },
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
const messages = [{ role: 'user', content: 'What are the current weather conditions and temperature in New York and London?' }]
|
||||
|
||||
const response = await ollama.chat({
|
||||
model: 'qwen3',
|
||||
messages,
|
||||
tools,
|
||||
think: true
|
||||
})
|
||||
|
||||
// add the assistant message to the messages
|
||||
messages.push(response.message)
|
||||
if (response.message.tool_calls) {
|
||||
// process each tool call
|
||||
for (const call of response.message.tool_calls) {
|
||||
// execute the appropriate tool
|
||||
let result: string
|
||||
if (call.function.name === 'get_temperature') {
|
||||
const args = call.function.arguments as { city: string }
|
||||
result = getTemperature(args.city)
|
||||
} else if (call.function.name === 'get_conditions') {
|
||||
const args = call.function.arguments as { city: string }
|
||||
result = getConditions(args.city)
|
||||
} else {
|
||||
result = 'Unknown tool'
|
||||
}
|
||||
// add the tool result to the messages
|
||||
messages.push({ role: 'tool', tool_name: call.function.name, content: result })
|
||||
}
|
||||
|
||||
// generate the final response
|
||||
const finalResponse = await ollama.chat({ model: 'qwen3', messages, tools, think: true })
|
||||
console.log(finalResponse.message.content)
|
||||
}
|
||||
```
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
|
||||
## Multi-turn tool calling (Agent loop)
|
||||
|
||||
An agent loop allows the model to decide when to invoke tools and incorporate their results into its replies.
|
||||
|
||||
It also might help to tell the model that it is in a loop and can make multiple tool calls.
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Python">
|
||||
```python
|
||||
from ollama import chat, ChatResponse
|
||||
|
||||
|
||||
def add(a: int, b: int) -> int:
|
||||
"""Add two numbers"""
|
||||
"""
|
||||
Args:
|
||||
a: The first number
|
||||
b: The second number
|
||||
|
||||
Returns:
|
||||
The sum of the two numbers
|
||||
"""
|
||||
return a + b
|
||||
|
||||
|
||||
def multiply(a: int, b: int) -> int:
|
||||
"""Multiply two numbers"""
|
||||
"""
|
||||
Args:
|
||||
a: The first number
|
||||
b: The second number
|
||||
|
||||
Returns:
|
||||
The product of the two numbers
|
||||
"""
|
||||
return a * b
|
||||
|
||||
|
||||
available_functions = {
|
||||
'add': add,
|
||||
'multiply': multiply,
|
||||
}
|
||||
|
||||
messages = [{'role': 'user', 'content': 'What is (11434+12341)*412?'}]
|
||||
while True:
|
||||
response: ChatResponse = chat(
|
||||
model='qwen3',
|
||||
messages=messages,
|
||||
tools=[add, multiply],
|
||||
think=True,
|
||||
)
|
||||
messages.append(response.message)
|
||||
print("Thinking: ", response.message.thinking)
|
||||
print("Content: ", response.message.content)
|
||||
if response.message.tool_calls:
|
||||
for tc in response.message.tool_calls:
|
||||
if tc.function.name in available_functions:
|
||||
print(f"Calling {tc.function.name} with arguments {tc.function.arguments}")
|
||||
result = available_functions[tc.function.name](**tc.function.arguments)
|
||||
print(f"Result: {result}")
|
||||
# add the tool result to the messages
|
||||
messages.append({'role': 'tool', 'tool_name': tc.function.name, 'content': str(result)})
|
||||
else:
|
||||
# end the loop when there are no more tool calls
|
||||
break
|
||||
# continue the loop with the updated messages
|
||||
```
|
||||
</Tab>
|
||||
<Tab title="JavaScript">
|
||||
```typescript
|
||||
import ollama from 'ollama'
|
||||
|
||||
type ToolName = 'add' | 'multiply'
|
||||
|
||||
function add(a: number, b: number): number {
|
||||
return a + b
|
||||
}
|
||||
|
||||
function multiply(a: number, b: number): number {
|
||||
return a * b
|
||||
}
|
||||
|
||||
const availableFunctions: Record<ToolName, (a: number, b: number) => number> = {
|
||||
add,
|
||||
multiply,
|
||||
}
|
||||
|
||||
const tools = [
|
||||
{
|
||||
type: 'function',
|
||||
function: {
|
||||
name: 'add',
|
||||
description: 'Add two numbers',
|
||||
parameters: {
|
||||
type: 'object',
|
||||
required: ['a', 'b'],
|
||||
properties: {
|
||||
a: { type: 'integer', description: 'The first number' },
|
||||
b: { type: 'integer', description: 'The second number' },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
type: 'function',
|
||||
function: {
|
||||
name: 'multiply',
|
||||
description: 'Multiply two numbers',
|
||||
parameters: {
|
||||
type: 'object',
|
||||
required: ['a', 'b'],
|
||||
properties: {
|
||||
a: { type: 'integer', description: 'The first number' },
|
||||
b: { type: 'integer', description: 'The second number' },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
async function agentLoop() {
|
||||
const messages = [{ role: 'user', content: 'What is (11434+12341)*412?' }]
|
||||
|
||||
while (true) {
|
||||
const response = await ollama.chat({
|
||||
model: 'qwen3',
|
||||
messages,
|
||||
tools,
|
||||
think: true,
|
||||
})
|
||||
|
||||
messages.push(response.message)
|
||||
console.log('Thinking:', response.message.thinking)
|
||||
console.log('Content:', response.message.content)
|
||||
|
||||
const toolCalls = response.message.tool_calls ?? []
|
||||
if (toolCalls.length) {
|
||||
for (const call of toolCalls) {
|
||||
const fn = availableFunctions[call.function.name as ToolName]
|
||||
if (!fn) {
|
||||
continue
|
||||
}
|
||||
|
||||
const args = call.function.arguments as { a: number; b: number }
|
||||
console.log(`Calling ${call.function.name} with arguments`, args)
|
||||
const result = fn(args.a, args.b)
|
||||
console.log(`Result: ${result}`)
|
||||
messages.push({ role: 'tool', tool_name: call.function.name, content: String(result) })
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
agentLoop().catch(console.error)
|
||||
```
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
|
||||
## Tool calling with streaming
|
||||
|
||||
When streaming, gather every chunk of `thinking`, `content`, and `tool_calls`, then return those fields together with any tool results in the follow-up request.
|
||||
|
||||
<Tabs>
|
||||
<Tab title="Python">
|
||||
```python
|
||||
from ollama import chat
|
||||
|
||||
|
||||
def get_temperature(city: str) -> str:
|
||||
"""Get the current temperature for a city
|
||||
|
||||
Args:
|
||||
city: The name of the city
|
||||
|
||||
Returns:
|
||||
The current temperature for the city
|
||||
"""
|
||||
temperatures = {
|
||||
'New York': '22°C',
|
||||
'London': '15°C',
|
||||
}
|
||||
return temperatures.get(city, 'Unknown')
|
||||
|
||||
|
||||
messages = [{'role': 'user', 'content': "What's the temperature in New York?"}]
|
||||
|
||||
while True:
|
||||
stream = chat(
|
||||
model='qwen3',
|
||||
messages=messages,
|
||||
tools=[get_temperature],
|
||||
stream=True,
|
||||
think=True,
|
||||
)
|
||||
|
||||
thinking = ''
|
||||
content = ''
|
||||
tool_calls = []
|
||||
|
||||
done_thinking = False
|
||||
# accumulate the partial fields
|
||||
for chunk in stream:
|
||||
if chunk.message.thinking:
|
||||
thinking += chunk.message.thinking
|
||||
print(chunk.message.thinking, end='', flush=True)
|
||||
if chunk.message.content:
|
||||
if not done_thinking:
|
||||
done_thinking = True
|
||||
print('\n')
|
||||
content += chunk.message.content
|
||||
print(chunk.message.content, end='', flush=True)
|
||||
if chunk.message.tool_calls:
|
||||
tool_calls.extend(chunk.message.tool_calls)
|
||||
print(chunk.message.tool_calls)
|
||||
|
||||
# append accumulated fields to the messages
|
||||
if thinking or content or tool_calls:
|
||||
messages.append({'role': 'assistant', 'thinking': thinking, 'content': content, 'tool_calls': tool_calls})
|
||||
|
||||
if not tool_calls:
|
||||
break
|
||||
|
||||
for call in tool_calls:
|
||||
if call.function.name == 'get_temperature':
|
||||
result = get_temperature(**call.function.arguments)
|
||||
else:
|
||||
result = 'Unknown tool'
|
||||
messages.append({'role': 'tool', 'tool_name': call.function.name, 'content': result})
|
||||
```
|
||||
|
||||
</Tab>
|
||||
<Tab title="JavaScript">
|
||||
```typescript
|
||||
import ollama from 'ollama'
|
||||
|
||||
function getTemperature(city: string): string {
|
||||
const temperatures: Record<string, string> = {
|
||||
'New York': '22°C',
|
||||
'London': '15°C',
|
||||
}
|
||||
return temperatures[city] ?? 'Unknown'
|
||||
}
|
||||
|
||||
const getTemperatureTool = {
|
||||
type: 'function',
|
||||
function: {
|
||||
name: 'get_temperature',
|
||||
description: 'Get the current temperature for a city',
|
||||
parameters: {
|
||||
type: 'object',
|
||||
required: ['city'],
|
||||
properties: {
|
||||
city: { type: 'string', description: 'The name of the city' },
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
async function agentLoop() {
|
||||
const messages = [{ role: 'user', content: "What's the temperature in New York?" }]
|
||||
|
||||
while (true) {
|
||||
const stream = await ollama.chat({
|
||||
model: 'qwen3',
|
||||
messages,
|
||||
tools: [getTemperatureTool],
|
||||
stream: true,
|
||||
think: true,
|
||||
})
|
||||
|
||||
let thinking = ''
|
||||
let content = ''
|
||||
const toolCalls: any[] = []
|
||||
let doneThinking = false
|
||||
|
||||
for await (const chunk of stream) {
|
||||
if (chunk.message.thinking) {
|
||||
thinking += chunk.message.thinking
|
||||
process.stdout.write(chunk.message.thinking)
|
||||
}
|
||||
if (chunk.message.content) {
|
||||
if (!doneThinking) {
|
||||
doneThinking = true
|
||||
process.stdout.write('\n')
|
||||
}
|
||||
content += chunk.message.content
|
||||
process.stdout.write(chunk.message.content)
|
||||
}
|
||||
if (chunk.message.tool_calls?.length) {
|
||||
toolCalls.push(...chunk.message.tool_calls)
|
||||
console.log(chunk.message.tool_calls)
|
||||
}
|
||||
}
|
||||
|
||||
if (thinking || content || toolCalls.length) {
|
||||
messages.push({ role: 'assistant', thinking, content, tool_calls: toolCalls } as any)
|
||||
}
|
||||
|
||||
if (!toolCalls.length) {
|
||||
break
|
||||
}
|
||||
|
||||
for (const call of toolCalls) {
|
||||
if (call.function.name === 'get_temperature') {
|
||||
const args = call.function.arguments as { city: string }
|
||||
const result = getTemperature(args.city)
|
||||
messages.push({ role: 'tool', tool_name: call.function.name, content: result } )
|
||||
} else {
|
||||
messages.push({ role: 'tool', tool_name: call.function.name, content: 'Unknown tool' } )
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
agentLoop().catch(console.error)
|
||||
```
|
||||
</Tab>
|
||||
</Tabs>
|
||||
|
||||
This loop streams the assistant response, accumulates partial fields, passes them back together, and appends the tool results so the model can complete its answer.
|
||||
|
||||
|
||||
## Using functions as tools with Ollama Python SDK
|
||||
The Python SDK automatically parses functions as a tool schema so we can pass them directly.
|
||||
Schemas can still be passed if needed.
|
||||
|
||||
```python
|
||||
from ollama import chat
|
||||
|
||||
def get_temperature(city: str) -> str:
|
||||
"""Get the current temperature for a city
|
||||
|
||||
Args:
|
||||
city: The name of the city
|
||||
|
||||
Returns:
|
||||
The current temperature for the city
|
||||
"""
|
||||
temperatures = {
|
||||
'New York': '22°C',
|
||||
'London': '15°C',
|
||||
}
|
||||
return temperatures.get(city, 'Unknown')
|
||||
|
||||
available_functions = {
|
||||
'get_temperature': get_temperature,
|
||||
}
|
||||
# directly pass the function as part of the tools list
|
||||
response = chat(model='qwen3', messages=messages, tools=available_functions.values(), think=True)
|
||||
```
|
||||
Reference in New Issue
Block a user