Merge pull request #1552 from jmorganca/mxyng/lint-test

add lint and test on pull_request
2025-12-11 08:17:03 +00:00 · 2024-01-11 09:37:45 -08:00
parent 3bc8b9832b f921e2696e
commit f4f939de28
17 changed files with 141 additions and 82 deletions
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -98,9 +98,9 @@ func (c *containerLORA) Name() string {
 	return "ggla"
 }

-func (c *containerLORA) Decode(ro *readSeekOffset) (model, error) {
+func (c *containerLORA) Decode(rso *readSeekOffset) (model, error) {
 	var version uint32
-	binary.Read(ro, binary.LittleEndian, &version)
+	binary.Read(rso, binary.LittleEndian, &version)

 	switch version {
 	case 1:
@@ -111,7 +111,7 @@ func (c *containerLORA) Decode(ro *readSeekOffset) (model, error) {
 	c.version = version

 	// remaining file contents aren't decoded
-	ro.Seek(0, io.SeekEnd)
+	rso.Seek(0, io.SeekEnd)

 	return nil, nil
 }
--- a/llm/llama.go
+++ b/llm/llama.go
@@ -1,17 +1,11 @@
 package llm

 import (
-	"bytes"
-	"context"
 	_ "embed"
-	"errors"
 	"fmt"
-	"os"
-	"os/exec"
 	"time"

 	"github.com/jmorganca/ollama/api"
-	"github.com/jmorganca/ollama/format"
 )

 const jsonGrammar = `
@@ -42,51 +36,12 @@ number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
 ws ::= ([ \t\n] ws)?
 `

-type Running struct {
-	Port          int
-	Cmd           *exec.Cmd
-	Cancel        context.CancelFunc
-	*StatusWriter // captures error messages from the llama runner process
-}
-
 type ImageData struct {
 	Data []byte `json:"data"`
 	ID   int    `json:"id"`
 }

-var (
-	errNvidiaSMI     = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
-	errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
-	payloadMissing   = fmt.Errorf("expected dynamic library payloads not included in this build of ollama")
-)
-
-// StatusWriter is a writer that captures error messages from the llama runner process
-type StatusWriter struct {
-	ErrCh      chan error
-	LastErrMsg string
-}
-
-func NewStatusWriter() *StatusWriter {
-	return &StatusWriter{
-		ErrCh: make(chan error, 1),
-	}
-}
-
-func (w *StatusWriter) Write(b []byte) (int, error) {
-	var errMsg string
-	if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
-		errMsg = string(bytes.TrimSpace(after))
-	} else if _, after, ok := bytes.Cut(b, []byte("CUDA error")); ok {
-		errMsg = string(bytes.TrimSpace(after))
-	}
-
-	if errMsg != "" {
-		w.LastErrMsg = errMsg
-		w.ErrCh <- fmt.Errorf("llama runner: %s", errMsg)
-	}
-
-	return os.Stderr.Write(b)
-}
+var payloadMissing = fmt.Errorf("expected dynamic library payloads not included in this build of ollama")

 type prediction struct {
 	Content string `json:"content"`
@@ -102,9 +57,7 @@ type prediction struct {
 	}
 }

-const maxBufferSize = 512 * format.KiloByte
 const maxRetries = 3
-const retryDelay = 1 * time.Second

 type PredictOpts struct {
 	Prompt  string
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -47,7 +47,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
 	kv := 2 * 2 * int64(opts.NumCtx) * int64(ggml.NumLayers()) * int64(ggml.NumEmbed()) * int64(ggml.NumHeadKv()) / int64(ggml.NumHead())

 	// this amount is the overhead + tensors in memory
-	// TODO: get this from the llama.cpp's graph calcluations instead of
+	// TODO: get this from the llama.cpp's graph calculations instead of
 	// estimating it's 1/6 * kv_cache_size * num_gqa
 	graph := int64(ggml.NumGQA()) * kv / 6