mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 08:17:03 +00:00
Merge pull request #1552 from jmorganca/mxyng/lint-test
add lint and test on pull_request
This commit is contained in:
@@ -98,9 +98,9 @@ func (c *containerLORA) Name() string {
|
||||
return "ggla"
|
||||
}
|
||||
|
||||
func (c *containerLORA) Decode(ro *readSeekOffset) (model, error) {
|
||||
func (c *containerLORA) Decode(rso *readSeekOffset) (model, error) {
|
||||
var version uint32
|
||||
binary.Read(ro, binary.LittleEndian, &version)
|
||||
binary.Read(rso, binary.LittleEndian, &version)
|
||||
|
||||
switch version {
|
||||
case 1:
|
||||
@@ -111,7 +111,7 @@ func (c *containerLORA) Decode(ro *readSeekOffset) (model, error) {
|
||||
c.version = version
|
||||
|
||||
// remaining file contents aren't decoded
|
||||
ro.Seek(0, io.SeekEnd)
|
||||
rso.Seek(0, io.SeekEnd)
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
49
llm/llama.go
49
llm/llama.go
@@ -1,17 +1,11 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
_ "embed"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"time"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/format"
|
||||
)
|
||||
|
||||
const jsonGrammar = `
|
||||
@@ -42,51 +36,12 @@ number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
|
||||
ws ::= ([ \t\n] ws)?
|
||||
`
|
||||
|
||||
type Running struct {
|
||||
Port int
|
||||
Cmd *exec.Cmd
|
||||
Cancel context.CancelFunc
|
||||
*StatusWriter // captures error messages from the llama runner process
|
||||
}
|
||||
|
||||
type ImageData struct {
|
||||
Data []byte `json:"data"`
|
||||
ID int `json:"id"`
|
||||
}
|
||||
|
||||
var (
|
||||
errNvidiaSMI = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
|
||||
errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
|
||||
payloadMissing = fmt.Errorf("expected dynamic library payloads not included in this build of ollama")
|
||||
)
|
||||
|
||||
// StatusWriter is a writer that captures error messages from the llama runner process
|
||||
type StatusWriter struct {
|
||||
ErrCh chan error
|
||||
LastErrMsg string
|
||||
}
|
||||
|
||||
func NewStatusWriter() *StatusWriter {
|
||||
return &StatusWriter{
|
||||
ErrCh: make(chan error, 1),
|
||||
}
|
||||
}
|
||||
|
||||
func (w *StatusWriter) Write(b []byte) (int, error) {
|
||||
var errMsg string
|
||||
if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
|
||||
errMsg = string(bytes.TrimSpace(after))
|
||||
} else if _, after, ok := bytes.Cut(b, []byte("CUDA error")); ok {
|
||||
errMsg = string(bytes.TrimSpace(after))
|
||||
}
|
||||
|
||||
if errMsg != "" {
|
||||
w.LastErrMsg = errMsg
|
||||
w.ErrCh <- fmt.Errorf("llama runner: %s", errMsg)
|
||||
}
|
||||
|
||||
return os.Stderr.Write(b)
|
||||
}
|
||||
var payloadMissing = fmt.Errorf("expected dynamic library payloads not included in this build of ollama")
|
||||
|
||||
type prediction struct {
|
||||
Content string `json:"content"`
|
||||
@@ -102,9 +57,7 @@ type prediction struct {
|
||||
}
|
||||
}
|
||||
|
||||
const maxBufferSize = 512 * format.KiloByte
|
||||
const maxRetries = 3
|
||||
const retryDelay = 1 * time.Second
|
||||
|
||||
type PredictOpts struct {
|
||||
Prompt string
|
||||
|
||||
@@ -47,7 +47,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
|
||||
kv := 2 * 2 * int64(opts.NumCtx) * int64(ggml.NumLayers()) * int64(ggml.NumEmbed()) * int64(ggml.NumHeadKv()) / int64(ggml.NumHead())
|
||||
|
||||
// this amount is the overhead + tensors in memory
|
||||
// TODO: get this from the llama.cpp's graph calcluations instead of
|
||||
// TODO: get this from the llama.cpp's graph calculations instead of
|
||||
// estimating it's 1/6 * kv_cache_size * num_gqa
|
||||
graph := int64(ggml.NumGQA()) * kv / 6
|
||||
|
||||
|
||||
Reference in New Issue
Block a user