Merge pull request #1552 from jmorganca/mxyng/lint-test

add lint and test on pull_request
This commit is contained in:
Michael Yang
2024-01-11 09:37:45 -08:00
committed by GitHub
17 changed files with 141 additions and 82 deletions

View File

@@ -98,9 +98,9 @@ func (c *containerLORA) Name() string {
return "ggla"
}
func (c *containerLORA) Decode(ro *readSeekOffset) (model, error) {
func (c *containerLORA) Decode(rso *readSeekOffset) (model, error) {
var version uint32
binary.Read(ro, binary.LittleEndian, &version)
binary.Read(rso, binary.LittleEndian, &version)
switch version {
case 1:
@@ -111,7 +111,7 @@ func (c *containerLORA) Decode(ro *readSeekOffset) (model, error) {
c.version = version
// remaining file contents aren't decoded
ro.Seek(0, io.SeekEnd)
rso.Seek(0, io.SeekEnd)
return nil, nil
}

View File

@@ -1,17 +1,11 @@
package llm
import (
"bytes"
"context"
_ "embed"
"errors"
"fmt"
"os"
"os/exec"
"time"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/format"
)
const jsonGrammar = `
@@ -42,51 +36,12 @@ number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
ws ::= ([ \t\n] ws)?
`
type Running struct {
Port int
Cmd *exec.Cmd
Cancel context.CancelFunc
*StatusWriter // captures error messages from the llama runner process
}
type ImageData struct {
Data []byte `json:"data"`
ID int `json:"id"`
}
var (
errNvidiaSMI = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
payloadMissing = fmt.Errorf("expected dynamic library payloads not included in this build of ollama")
)
// StatusWriter is a writer that captures error messages from the llama runner process
type StatusWriter struct {
ErrCh chan error
LastErrMsg string
}
func NewStatusWriter() *StatusWriter {
return &StatusWriter{
ErrCh: make(chan error, 1),
}
}
func (w *StatusWriter) Write(b []byte) (int, error) {
var errMsg string
if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
errMsg = string(bytes.TrimSpace(after))
} else if _, after, ok := bytes.Cut(b, []byte("CUDA error")); ok {
errMsg = string(bytes.TrimSpace(after))
}
if errMsg != "" {
w.LastErrMsg = errMsg
w.ErrCh <- fmt.Errorf("llama runner: %s", errMsg)
}
return os.Stderr.Write(b)
}
var payloadMissing = fmt.Errorf("expected dynamic library payloads not included in this build of ollama")
type prediction struct {
Content string `json:"content"`
@@ -102,9 +57,7 @@ type prediction struct {
}
}
const maxBufferSize = 512 * format.KiloByte
const maxRetries = 3
const retryDelay = 1 * time.Second
type PredictOpts struct {
Prompt string

View File

@@ -47,7 +47,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
kv := 2 * 2 * int64(opts.NumCtx) * int64(ggml.NumLayers()) * int64(ggml.NumEmbed()) * int64(ggml.NumHeadKv()) / int64(ggml.NumHead())
// this amount is the overhead + tensors in memory
// TODO: get this from the llama.cpp's graph calcluations instead of
// TODO: get this from the llama.cpp's graph calculations instead of
// estimating it's 1/6 * kv_cache_size * num_gqa
graph := int64(ggml.NumGQA()) * kv / 6