mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-18 03:37:09 +00:00
Sync with upstream ollama/ollama and restore Tesla K80 (compute 3.7) support
This commit represents a complete rework after pulling the latest changes from official ollama/ollama repository and re-applying Tesla K80 compatibility patches. ## Key Changes ### CUDA Compute Capability 3.7 Support (Tesla K80) - Added sm_37 (compute 3.7) to CMAKE_CUDA_ARCHITECTURES in CMakeLists.txt - Updated CMakePresets.json to include compute 3.7 in "CUDA 11" preset - Using 37-virtual (PTX with JIT compilation) for maximum compatibility ### Legacy Toolchain Compatibility - **NVIDIA Driver**: 470.256.02 (last version supporting Kepler/K80) - **CUDA Version**: 11.4.4 (last CUDA 11.x supporting compute 3.7) - **GCC Version**: 10.5.0 (required by CUDA 11.4 host_config.h) ### CPU Architecture Trade-offs Due to GCC 10.5 limitation, sacrificed newer CPU optimizations: - Alderlake CPU variant enabled WITHOUT AVX_VNNI (requires GCC 11+) - Still supports: SSE4.2, AVX, F16C, AVX2, BMI2, FMA - Performance impact: ~3-7% on newer CPUs (acceptable for K80 compatibility) ### Build System Updates - Modified ml/backend/ggml/ggml/src/ggml-cuda/CMakeLists.txt for compute 3.7 - Added -Wno-deprecated-gpu-targets flag to suppress warnings - Updated ml/backend/ggml/ggml/src/CMakeLists.txt for Alderlake without AVX_VNNI ### Upstream Sync Merged latest llama.cpp changes including: - Enhanced KV cache management with ISWA and hybrid memory support - Improved multi-modal support (mtmd framework) - New model architectures (Gemma3, Llama4, Qwen3, etc.) - GPU backend improvements for CUDA, Metal, and ROCm - Updated quantization support and GGUF format handling ### Documentation - Updated CLAUDE.md with comprehensive build instructions - Documented toolchain constraints and CPU architecture trade-offs - Removed outdated CI/CD workflows (tesla-k80-*.yml) - Cleaned up temporary development artifacts ## Rationale This fork maintains Tesla K80 GPU support (compute 3.7) which was dropped in official Ollama due to legacy driver/CUDA requirements. The toolchain constraint creates a deadlock: - K80 → Driver 470 → CUDA 11.4 → GCC 10 → No AVX_VNNI We accept the loss of cutting-edge CPU optimizations to enable running modern LLMs on legacy but still capable Tesla K80 hardware (12GB VRAM per GPU). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
203
api/types.go
203
api/types.go
@@ -11,6 +11,8 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
)
|
||||
@@ -36,6 +38,19 @@ func (e StatusError) Error() string {
|
||||
}
|
||||
}
|
||||
|
||||
type AuthorizationError struct {
|
||||
StatusCode int
|
||||
Status string
|
||||
SigninURL string `json:"signin_url"`
|
||||
}
|
||||
|
||||
func (e AuthorizationError) Error() string {
|
||||
if e.Status != "" {
|
||||
return e.Status
|
||||
}
|
||||
return "something went wrong, please see the ollama server logs for details"
|
||||
}
|
||||
|
||||
// ImageData represents the raw binary data of an image file.
|
||||
type ImageData []byte
|
||||
|
||||
@@ -90,6 +105,18 @@ type GenerateRequest struct {
|
||||
// (request that thinking _not_ be used) and unset (use the old behavior
|
||||
// before this option was introduced)
|
||||
Think *ThinkValue `json:"think,omitempty"`
|
||||
|
||||
// Truncate is a boolean that, when set to true, truncates the chat history messages
|
||||
// if the rendered prompt exceeds the context length limit.
|
||||
Truncate *bool `json:"truncate,omitempty"`
|
||||
|
||||
// Shift is a boolean that, when set to true, shifts the chat history
|
||||
// when hitting the context length limit instead of erroring.
|
||||
Shift *bool `json:"shift,omitempty"`
|
||||
|
||||
// DebugRenderOnly is a debug option that, when set to true, returns the rendered
|
||||
// template instead of calling the model.
|
||||
DebugRenderOnly bool `json:"_debug_render_only,omitempty"`
|
||||
}
|
||||
|
||||
// ChatRequest describes a request sent by [Client.Chat].
|
||||
@@ -120,6 +147,18 @@ type ChatRequest struct {
|
||||
// responding. Can be a boolean (true/false) or a string ("high", "medium", "low")
|
||||
// for supported models.
|
||||
Think *ThinkValue `json:"think,omitempty"`
|
||||
|
||||
// Truncate is a boolean that, when set to true, truncates the chat history messages
|
||||
// if the rendered prompt exceeds the context length limit.
|
||||
Truncate *bool `json:"truncate,omitempty"`
|
||||
|
||||
// Shift is a boolean that, when set to true, shifts the chat history
|
||||
// when hitting the context length limit instead of erroring.
|
||||
Shift *bool `json:"shift,omitempty"`
|
||||
|
||||
// DebugRenderOnly is a debug option that, when set to true, returns the rendered
|
||||
// template instead of calling the model.
|
||||
DebugRenderOnly bool `json:"_debug_render_only,omitempty"`
|
||||
}
|
||||
|
||||
type Tools []Tool
|
||||
@@ -165,7 +204,7 @@ type ToolCall struct {
|
||||
}
|
||||
|
||||
type ToolCallFunction struct {
|
||||
Index int `json:"index,omitempty"`
|
||||
Index int `json:"index"`
|
||||
Name string `json:"name"`
|
||||
Arguments ToolCallFunctionArguments `json:"arguments"`
|
||||
}
|
||||
@@ -227,9 +266,9 @@ func (pt PropertyType) String() string {
|
||||
|
||||
type ToolProperty struct {
|
||||
AnyOf []ToolProperty `json:"anyOf,omitempty"`
|
||||
Type PropertyType `json:"type"`
|
||||
Type PropertyType `json:"type,omitempty"`
|
||||
Items any `json:"items,omitempty"`
|
||||
Description string `json:"description"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Enum []any `json:"enum,omitempty"`
|
||||
}
|
||||
|
||||
@@ -278,16 +317,23 @@ func mapToTypeScriptType(jsonType string) string {
|
||||
}
|
||||
}
|
||||
|
||||
type ToolFunctionParameters struct {
|
||||
Type string `json:"type"`
|
||||
Defs any `json:"$defs,omitempty"`
|
||||
Items any `json:"items,omitempty"`
|
||||
Required []string `json:"required"`
|
||||
Properties map[string]ToolProperty `json:"properties"`
|
||||
}
|
||||
|
||||
func (t *ToolFunctionParameters) String() string {
|
||||
bts, _ := json.Marshal(t)
|
||||
return string(bts)
|
||||
}
|
||||
|
||||
type ToolFunction struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
Parameters struct {
|
||||
Type string `json:"type"`
|
||||
Defs any `json:"$defs,omitempty"`
|
||||
Items any `json:"items,omitempty"`
|
||||
Required []string `json:"required"`
|
||||
Properties map[string]ToolProperty `json:"properties"`
|
||||
} `json:"parameters"`
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Parameters ToolFunctionParameters `json:"parameters"`
|
||||
}
|
||||
|
||||
func (t *ToolFunction) String() string {
|
||||
@@ -298,16 +344,38 @@ func (t *ToolFunction) String() string {
|
||||
// ChatResponse is the response returned by [Client.Chat]. Its fields are
|
||||
// similar to [GenerateResponse].
|
||||
type ChatResponse struct {
|
||||
Model string `json:"model"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
Message Message `json:"message"`
|
||||
DoneReason string `json:"done_reason,omitempty"`
|
||||
// Model is the model name that generated the response.
|
||||
Model string `json:"model"`
|
||||
|
||||
// RemoteModel is the name of the upstream model that generated the response.
|
||||
RemoteModel string `json:"remote_model,omitempty"`
|
||||
|
||||
// RemoteHost is the URL of the upstream Ollama host that generated the response.
|
||||
RemoteHost string `json:"remote_host,omitempty"`
|
||||
|
||||
// CreatedAt is the timestamp of the response.
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
|
||||
// Message contains the message or part of a message from the model.
|
||||
Message Message `json:"message"`
|
||||
|
||||
// Done specifies if the response is complete.
|
||||
Done bool `json:"done"`
|
||||
|
||||
// DoneReason is the reason the model stopped generating text.
|
||||
DoneReason string `json:"done_reason,omitempty"`
|
||||
|
||||
DebugInfo *DebugInfo `json:"_debug_info,omitempty"`
|
||||
|
||||
Metrics
|
||||
}
|
||||
|
||||
// DebugInfo contains debug information for template rendering
|
||||
type DebugInfo struct {
|
||||
RenderedTemplate string `json:"rendered_template"`
|
||||
ImageCount int `json:"image_count,omitempty"`
|
||||
}
|
||||
|
||||
type Metrics struct {
|
||||
TotalDuration time.Duration `json:"total_duration,omitempty"`
|
||||
LoadDuration time.Duration `json:"load_duration,omitempty"`
|
||||
@@ -360,8 +428,12 @@ type EmbedRequest struct {
|
||||
// this request.
|
||||
KeepAlive *Duration `json:"keep_alive,omitempty"`
|
||||
|
||||
// Truncate truncates the input to fit the model's max sequence length.
|
||||
Truncate *bool `json:"truncate,omitempty"`
|
||||
|
||||
// Dimensions truncates the output embedding to the specified dimension.
|
||||
Dimensions int `json:"dimensions,omitempty"`
|
||||
|
||||
// Options lists model-specific options.
|
||||
Options map[string]any `json:"options"`
|
||||
}
|
||||
@@ -399,18 +471,47 @@ type EmbeddingResponse struct {
|
||||
|
||||
// CreateRequest is the request passed to [Client.Create].
|
||||
type CreateRequest struct {
|
||||
Model string `json:"model"`
|
||||
Stream *bool `json:"stream,omitempty"`
|
||||
// Model is the model name to create.
|
||||
Model string `json:"model"`
|
||||
|
||||
// Stream specifies whether the response is streaming; it is true by default.
|
||||
Stream *bool `json:"stream,omitempty"`
|
||||
|
||||
// Quantize is the quantization format for the model; leave blank to not change the quantization level.
|
||||
Quantize string `json:"quantize,omitempty"`
|
||||
|
||||
From string `json:"from,omitempty"`
|
||||
Files map[string]string `json:"files,omitempty"`
|
||||
Adapters map[string]string `json:"adapters,omitempty"`
|
||||
Template string `json:"template,omitempty"`
|
||||
License any `json:"license,omitempty"`
|
||||
System string `json:"system,omitempty"`
|
||||
Parameters map[string]any `json:"parameters,omitempty"`
|
||||
Messages []Message `json:"messages,omitempty"`
|
||||
// From is the name of the model or file to use as the source.
|
||||
From string `json:"from,omitempty"`
|
||||
|
||||
// RemoteHost is the URL of the upstream ollama API for the model (if any).
|
||||
RemoteHost string `json:"remote_host,omitempty"`
|
||||
|
||||
// Files is a map of files include when creating the model.
|
||||
Files map[string]string `json:"files,omitempty"`
|
||||
|
||||
// Adapters is a map of LoRA adapters to include when creating the model.
|
||||
Adapters map[string]string `json:"adapters,omitempty"`
|
||||
|
||||
// Template is the template used when constructing a request to the model.
|
||||
Template string `json:"template,omitempty"`
|
||||
|
||||
// License is a string or list of strings for licenses.
|
||||
License any `json:"license,omitempty"`
|
||||
|
||||
// System is the system prompt for the model.
|
||||
System string `json:"system,omitempty"`
|
||||
|
||||
// Parameters is a map of hyper-parameters which are applied to the model.
|
||||
Parameters map[string]any `json:"parameters,omitempty"`
|
||||
|
||||
// Messages is a list of messages added to the model before chat and generation requests.
|
||||
Messages []Message `json:"messages,omitempty"`
|
||||
|
||||
Renderer string `json:"renderer,omitempty"`
|
||||
Parser string `json:"parser,omitempty"`
|
||||
|
||||
// Info is a map of additional information for the model
|
||||
Info map[string]any `json:"info,omitempty"`
|
||||
|
||||
// Deprecated: set the model name with Model instead
|
||||
Name string `json:"name"`
|
||||
@@ -448,8 +549,12 @@ type ShowResponse struct {
|
||||
Parameters string `json:"parameters,omitempty"`
|
||||
Template string `json:"template,omitempty"`
|
||||
System string `json:"system,omitempty"`
|
||||
Renderer string `json:"renderer,omitempty"`
|
||||
Parser string `json:"parser,omitempty"`
|
||||
Details ModelDetails `json:"details,omitempty"`
|
||||
Messages []Message `json:"messages,omitempty"`
|
||||
RemoteModel string `json:"remote_model,omitempty"`
|
||||
RemoteHost string `json:"remote_host,omitempty"`
|
||||
ModelInfo map[string]any `json:"model_info,omitempty"`
|
||||
ProjectorInfo map[string]any `json:"projector_info,omitempty"`
|
||||
Tensors []Tensor `json:"tensors,omitempty"`
|
||||
@@ -508,12 +613,14 @@ type ProcessResponse struct {
|
||||
|
||||
// ListModelResponse is a single model description in [ListResponse].
|
||||
type ListModelResponse struct {
|
||||
Name string `json:"name"`
|
||||
Model string `json:"model"`
|
||||
ModifiedAt time.Time `json:"modified_at"`
|
||||
Size int64 `json:"size"`
|
||||
Digest string `json:"digest"`
|
||||
Details ModelDetails `json:"details,omitempty"`
|
||||
Name string `json:"name"`
|
||||
Model string `json:"model"`
|
||||
RemoteModel string `json:"remote_model,omitempty"`
|
||||
RemoteHost string `json:"remote_host,omitempty"`
|
||||
ModifiedAt time.Time `json:"modified_at"`
|
||||
Size int64 `json:"size"`
|
||||
Digest string `json:"digest"`
|
||||
Details ModelDetails `json:"details,omitempty"`
|
||||
}
|
||||
|
||||
// ProcessModelResponse is a single model description in [ProcessResponse].
|
||||
@@ -537,6 +644,12 @@ type GenerateResponse struct {
|
||||
// Model is the model name that generated the response.
|
||||
Model string `json:"model"`
|
||||
|
||||
// RemoteModel is the name of the upstream model that generated the response.
|
||||
RemoteModel string `json:"remote_model,omitempty"`
|
||||
|
||||
// RemoteHost is the URL of the upstream Ollama host that generated the response.
|
||||
RemoteHost string `json:"remote_host,omitempty"`
|
||||
|
||||
// CreatedAt is the timestamp of the response.
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
|
||||
@@ -560,6 +673,8 @@ type GenerateResponse struct {
|
||||
Metrics
|
||||
|
||||
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
||||
|
||||
DebugInfo *DebugInfo `json:"_debug_info,omitempty"`
|
||||
}
|
||||
|
||||
// ModelDetails provides details about a model.
|
||||
@@ -572,6 +687,18 @@ type ModelDetails struct {
|
||||
QuantizationLevel string `json:"quantization_level"`
|
||||
}
|
||||
|
||||
// UserResponse provides information about a user.
|
||||
type UserResponse struct {
|
||||
ID uuid.UUID `json:"id"`
|
||||
Email string `json:"email"`
|
||||
Name string `json:"name"`
|
||||
Bio string `json:"bio,omitempty"`
|
||||
AvatarURL string `json:"avatarurl,omitempty"`
|
||||
FirstName string `json:"firstname,omitempty"`
|
||||
LastName string `json:"lastname,omitempty"`
|
||||
Plan string `json:"plan,omitempty"`
|
||||
}
|
||||
|
||||
// Tensor describes the metadata for a given tensor.
|
||||
type Tensor struct {
|
||||
Name string `json:"name"`
|
||||
@@ -769,8 +896,8 @@ func (t *ThinkValue) IsString() bool {
|
||||
return ok
|
||||
}
|
||||
|
||||
// AsBool returns the value as a bool (true if enabled in any way)
|
||||
func (t *ThinkValue) AsBool() bool {
|
||||
// Bool returns the value as a bool (true if enabled in any way)
|
||||
func (t *ThinkValue) Bool() bool {
|
||||
if t == nil || t.Value == nil {
|
||||
return false
|
||||
}
|
||||
@@ -786,8 +913,8 @@ func (t *ThinkValue) AsBool() bool {
|
||||
}
|
||||
}
|
||||
|
||||
// AsString returns the value as a string
|
||||
func (t *ThinkValue) AsString() string {
|
||||
// String returns the value as a string
|
||||
func (t *ThinkValue) String() string {
|
||||
if t == nil || t.Value == nil {
|
||||
return ""
|
||||
}
|
||||
@@ -825,7 +952,7 @@ func (t *ThinkValue) UnmarshalJSON(data []byte) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\")")
|
||||
return fmt.Errorf("think must be a boolean or string (\"high\", \"medium\", \"low\", true, or false)")
|
||||
}
|
||||
|
||||
// MarshalJSON implements json.Marshaler
|
||||
@@ -860,7 +987,7 @@ func (d *Duration) UnmarshalJSON(b []byte) (err error) {
|
||||
if t < 0 {
|
||||
d.Duration = time.Duration(math.MaxInt64)
|
||||
} else {
|
||||
d.Duration = time.Duration(int(t) * int(time.Second))
|
||||
d.Duration = time.Duration(t * float64(time.Second))
|
||||
}
|
||||
case string:
|
||||
d.Duration, err = time.ParseDuration(t)
|
||||
|
||||
Reference in New Issue
Block a user