This commit is contained in:
Michael Yang
2024-05-21 22:21:04 -07:00
parent c895a7d13f
commit e40145a39d
31 changed files with 127 additions and 136 deletions

View File

@@ -987,7 +987,7 @@ func getTokenSubject(token string) string {
func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *registryOptions) (*http.Response, error) {
anonymous := true // access will default to anonymous if no user is found associated with the public key
for i := 0; i < 2; i++ {
for range 2 {
resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
if err != nil {
if !errors.Is(err, context.Canceled) {

View File

@@ -72,7 +72,6 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
default:
layers = append(layers, &layerWithGGML{layer, nil})
}
}
return layers, nil

View File

@@ -6,12 +6,13 @@ import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestGetBlobsPath(t *testing.T) {
// GetBlobsPath expects an actual directory to exist
dir, err := os.MkdirTemp("", "ollama-test")
assert.Nil(t, err)
require.NoError(t, err)
defer os.RemoveAll(dir)
tests := []struct {
@@ -63,7 +64,7 @@ func TestGetBlobsPath(t *testing.T) {
got, err := GetBlobsPath(tc.digest)
assert.ErrorIs(t, tc.err, err, tc.name)
require.ErrorIs(t, tc.err, err, tc.name)
assert.Equal(t, tc.expected, got, tc.name)
})
}

View File

@@ -77,7 +77,6 @@ func isSupportedImageType(image []byte) bool {
}
func (s *Server) GenerateHandler(c *gin.Context) {
checkpointStart := time.Now()
var req api.GenerateRequest
err := c.ShouldBindJSON(&req)
@@ -942,7 +941,7 @@ func allowedHostsMiddleware(addr net.Addr) gin.HandlerFunc {
}
if allowedHost(host) {
if c.Request.Method == "OPTIONS" {
if c.Request.Method == http.MethodOptions {
c.AbortWithStatus(http.StatusNoContent)
return
}
@@ -1306,7 +1305,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
defer close(ch)
fn := func(r llm.CompletionResponse) {
resp := api.ChatResponse{
Model: req.Model,
CreatedAt: time.Now().UTC(),

View File

@@ -15,6 +15,7 @@ import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/parser"
@@ -25,20 +26,20 @@ func createTestFile(t *testing.T, name string) string {
t.Helper()
f, err := os.CreateTemp(t.TempDir(), name)
assert.Nil(t, err)
assert.NoError(t, err)
defer f.Close()
err = binary.Write(f, binary.LittleEndian, []byte("GGUF"))
assert.Nil(t, err)
assert.NoError(t, err)
err = binary.Write(f, binary.LittleEndian, uint32(3))
assert.Nil(t, err)
assert.NoError(t, err)
err = binary.Write(f, binary.LittleEndian, uint64(0))
assert.Nil(t, err)
assert.NoError(t, err)
err = binary.Write(f, binary.LittleEndian, uint64(0))
assert.Nil(t, err)
assert.NoError(t, err)
return f.Name()
}
@@ -57,12 +58,12 @@ func Test_Routes(t *testing.T) {
r := strings.NewReader(fmt.Sprintf("FROM %s\nPARAMETER seed 42\nPARAMETER top_p 0.9\nPARAMETER stop foo\nPARAMETER stop bar", fname))
modelfile, err := parser.ParseFile(r)
assert.Nil(t, err)
require.NoError(t, err)
fn := func(resp api.ProgressResponse) {
t.Logf("Status: %s", resp.Status)
}
err = CreateModel(context.TODO(), name, "", "", modelfile, fn)
assert.Nil(t, err)
require.NoError(t, err)
}
testCases := []testCase{
@@ -74,9 +75,9 @@ func Test_Routes(t *testing.T) {
},
Expected: func(t *testing.T, resp *http.Response) {
contentType := resp.Header.Get("Content-Type")
assert.Equal(t, contentType, "application/json; charset=utf-8")
assert.Equal(t, "application/json; charset=utf-8", contentType)
body, err := io.ReadAll(resp.Body)
assert.Nil(t, err)
require.NoError(t, err)
assert.Equal(t, fmt.Sprintf(`{"version":"%s"}`, version.Version), string(body))
},
},
@@ -86,17 +87,17 @@ func Test_Routes(t *testing.T) {
Path: "/api/tags",
Expected: func(t *testing.T, resp *http.Response) {
contentType := resp.Header.Get("Content-Type")
assert.Equal(t, contentType, "application/json; charset=utf-8")
assert.Equal(t, "application/json; charset=utf-8", contentType)
body, err := io.ReadAll(resp.Body)
assert.Nil(t, err)
require.NoError(t, err)
var modelList api.ListResponse
err = json.Unmarshal(body, &modelList)
assert.Nil(t, err)
require.NoError(t, err)
assert.NotNil(t, modelList.Models)
assert.Equal(t, 0, len(modelList.Models))
assert.Empty(t, len(modelList.Models))
},
},
{
@@ -108,16 +109,16 @@ func Test_Routes(t *testing.T) {
},
Expected: func(t *testing.T, resp *http.Response) {
contentType := resp.Header.Get("Content-Type")
assert.Equal(t, contentType, "application/json; charset=utf-8")
assert.Equal(t, "application/json; charset=utf-8", contentType)
body, err := io.ReadAll(resp.Body)
assert.Nil(t, err)
require.NoError(t, err)
var modelList api.ListResponse
err = json.Unmarshal(body, &modelList)
assert.Nil(t, err)
require.NoError(t, err)
assert.Equal(t, 1, len(modelList.Models))
assert.Equal(t, modelList.Models[0].Name, "test-model:latest")
assert.Len(t, modelList.Models, 1)
assert.Equal(t, "test-model:latest", modelList.Models[0].Name)
},
},
{
@@ -134,7 +135,7 @@ func Test_Routes(t *testing.T) {
Stream: &stream,
}
jsonData, err := json.Marshal(createReq)
assert.Nil(t, err)
require.NoError(t, err)
req.Body = io.NopCloser(bytes.NewReader(jsonData))
},
@@ -142,11 +143,11 @@ func Test_Routes(t *testing.T) {
contentType := resp.Header.Get("Content-Type")
assert.Equal(t, "application/json", contentType)
_, err := io.ReadAll(resp.Body)
assert.Nil(t, err)
assert.Equal(t, resp.StatusCode, 200)
require.NoError(t, err)
assert.Equal(t, 200, resp.StatusCode)
model, err := GetModel("t-bone")
assert.Nil(t, err)
require.NoError(t, err)
assert.Equal(t, "t-bone:latest", model.ShortName)
},
},
@@ -161,13 +162,13 @@ func Test_Routes(t *testing.T) {
Destination: "beefsteak",
}
jsonData, err := json.Marshal(copyReq)
assert.Nil(t, err)
require.NoError(t, err)
req.Body = io.NopCloser(bytes.NewReader(jsonData))
},
Expected: func(t *testing.T, resp *http.Response) {
model, err := GetModel("beefsteak")
assert.Nil(t, err)
require.NoError(t, err)
assert.Equal(t, "beefsteak:latest", model.ShortName)
},
},
@@ -179,18 +180,18 @@ func Test_Routes(t *testing.T) {
createTestModel(t, "show-model")
showReq := api.ShowRequest{Model: "show-model"}
jsonData, err := json.Marshal(showReq)
assert.Nil(t, err)
require.NoError(t, err)
req.Body = io.NopCloser(bytes.NewReader(jsonData))
},
Expected: func(t *testing.T, resp *http.Response) {
contentType := resp.Header.Get("Content-Type")
assert.Equal(t, contentType, "application/json; charset=utf-8")
assert.Equal(t, "application/json; charset=utf-8", contentType)
body, err := io.ReadAll(resp.Body)
assert.Nil(t, err)
require.NoError(t, err)
var showResp api.ShowResponse
err = json.Unmarshal(body, &showResp)
assert.Nil(t, err)
require.NoError(t, err)
var params []string
paramsSplit := strings.Split(showResp.Parameters, "\n")
@@ -221,14 +222,14 @@ func Test_Routes(t *testing.T) {
t.Run(tc.Name, func(t *testing.T) {
u := httpSrv.URL + tc.Path
req, err := http.NewRequestWithContext(context.TODO(), tc.Method, u, nil)
assert.Nil(t, err)
require.NoError(t, err)
if tc.Setup != nil {
tc.Setup(t, req)
}
resp, err := httpSrv.Client().Do(req)
assert.Nil(t, err)
require.NoError(t, err)
defer resp.Body.Close()
if tc.Expected != nil {

View File

@@ -370,7 +370,6 @@ func (s *Scheduler) updateFreeSpace(allGpus gpu.GpuInfoList) {
r.refMu.Lock()
gpuIDs := make([]string, 0, len(r.gpus))
if r.llama != nil {
// TODO this should be broken down by GPU instead of assuming uniform spread
estimatedVRAMPerGPU := r.llama.EstimatedVRAM() / uint64(len(r.gpus))
for _, gpu := range r.gpus {
@@ -529,7 +528,6 @@ func (runner *runnerRef) waitForVRAMRecovery() chan interface{} {
}
}()
return finished
}
type ByDuration []*runnerRef

View File

@@ -12,11 +12,10 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/app/lifecycle"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/envconfig"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@@ -53,10 +52,10 @@ func TestLoad(t *testing.T) {
}
gpus := gpu.GpuInfoList{}
s.load(req, ggml, gpus)
require.Len(t, req.successCh, 0)
require.Empty(t, req.successCh)
require.Len(t, req.errCh, 1)
s.loadedMu.Lock()
require.Len(t, s.loaded, 0)
require.Empty(t, s.loaded)
s.loadedMu.Unlock()
err := <-req.errCh
require.Contains(t, err.Error(), "this model may be incompatible")
@@ -113,7 +112,7 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV
t.Helper()
f, err := os.CreateTemp(t.TempDir(), modelName)
assert.Nil(t, err)
require.NoError(t, err)
defer f.Close()
gguf := llm.NewGGUFV3(binary.LittleEndian)
@@ -131,7 +130,7 @@ func newScenario(t *testing.T, ctx context.Context, modelName string, estimatedV
}, []llm.Tensor{
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: &bytes.Reader{}},
})
assert.Nil(t, err)
require.NoError(t, err)
fname := f.Name()
model := &Model{Name: modelName, ModelPath: fname}
@@ -190,8 +189,8 @@ func TestRequests(t *testing.T) {
select {
case resp := <-scenario1a.req.successCh:
require.Equal(t, resp.llama, scenario1a.srv)
require.Len(t, s.pendingReqCh, 0)
require.Len(t, scenario1a.req.errCh, 0)
require.Empty(t, s.pendingReqCh)
require.Empty(t, scenario1a.req.errCh)
case <-ctx.Done():
t.Errorf("timeout")
}
@@ -203,8 +202,8 @@ func TestRequests(t *testing.T) {
select {
case resp := <-scenario1b.req.successCh:
require.Equal(t, resp.llama, scenario1a.srv)
require.Len(t, s.pendingReqCh, 0)
require.Len(t, scenario1b.req.errCh, 0)
require.Empty(t, s.pendingReqCh)
require.Empty(t, scenario1b.req.errCh)
case <-ctx.Done():
t.Errorf("timeout")
}
@@ -221,8 +220,8 @@ func TestRequests(t *testing.T) {
select {
case resp := <-scenario2a.req.successCh:
require.Equal(t, resp.llama, scenario2a.srv)
require.Len(t, s.pendingReqCh, 0)
require.Len(t, scenario2a.req.errCh, 0)
require.Empty(t, s.pendingReqCh)
require.Empty(t, scenario2a.req.errCh)
case <-ctx.Done():
t.Errorf("timeout")
}
@@ -237,8 +236,8 @@ func TestRequests(t *testing.T) {
select {
case resp := <-scenario3a.req.successCh:
require.Equal(t, resp.llama, scenario3a.srv)
require.Len(t, s.pendingReqCh, 0)
require.Len(t, scenario3a.req.errCh, 0)
require.Empty(t, s.pendingReqCh)
require.Empty(t, scenario3a.req.errCh)
case <-ctx.Done():
t.Errorf("timeout")
}
@@ -253,8 +252,8 @@ func TestRequests(t *testing.T) {
select {
case resp := <-scenario3b.req.successCh:
require.Equal(t, resp.llama, scenario3b.srv)
require.Len(t, s.pendingReqCh, 0)
require.Len(t, scenario3b.req.errCh, 0)
require.Empty(t, s.pendingReqCh)
require.Empty(t, scenario3b.req.errCh)
case <-ctx.Done():
t.Errorf("timeout")
}
@@ -269,8 +268,8 @@ func TestRequests(t *testing.T) {
select {
case resp := <-scenario3c.req.successCh:
require.Equal(t, resp.llama, scenario3c.srv)
require.Len(t, s.pendingReqCh, 0)
require.Len(t, scenario3c.req.errCh, 0)
require.Empty(t, s.pendingReqCh)
require.Empty(t, scenario3c.req.errCh)
case <-ctx.Done():
t.Errorf("timeout")
}
@@ -296,8 +295,8 @@ func TestRequests(t *testing.T) {
select {
case resp := <-scenario3d.req.successCh:
require.Equal(t, resp.llama, scenario3d.srv)
require.Len(t, s.pendingReqCh, 0)
require.Len(t, scenario3d.req.errCh, 0)
require.Empty(t, s.pendingReqCh)
require.Empty(t, scenario3d.req.errCh)
case <-ctx.Done():
t.Errorf("timeout")
}
@@ -332,7 +331,7 @@ func TestGetRunner(t *testing.T) {
slog.Info("scenario1b")
successCh1b, errCh1b := s.GetRunner(scenario1b.ctx, scenario1b.req.model, scenario1b.req.opts, scenario1b.req.sessionDuration)
require.Len(t, s.pendingReqCh, 1)
require.Len(t, successCh1b, 0)
require.Empty(t, successCh1b)
require.Len(t, errCh1b, 1)
err := <-errCh1b
require.Contains(t, err.Error(), "server busy")
@@ -340,8 +339,8 @@ func TestGetRunner(t *testing.T) {
select {
case resp := <-successCh1a:
require.Equal(t, resp.llama, scenario1a.srv)
require.Len(t, s.pendingReqCh, 0)
require.Len(t, errCh1a, 0)
require.Empty(t, s.pendingReqCh)
require.Empty(t, errCh1a)
case <-ctx.Done():
t.Errorf("timeout")
}
@@ -355,9 +354,9 @@ func TestGetRunner(t *testing.T) {
successCh1c, errCh1c := s.GetRunner(scenario1c.ctx, scenario1c.req.model, scenario1c.req.opts, scenario1c.req.sessionDuration)
// Starts in pending channel, then should be quickly processsed to return an error
time.Sleep(5 * time.Millisecond)
require.Len(t, successCh1c, 0)
require.Empty(t, successCh1c)
s.loadedMu.Lock()
require.Len(t, s.loaded, 0)
require.Empty(t, s.loaded)
s.loadedMu.Unlock()
require.Len(t, errCh1c, 1)
err = <-errCh1c
@@ -386,8 +385,8 @@ func TestPrematureExpired(t *testing.T) {
select {
case resp := <-successCh1a:
require.Equal(t, resp.llama, scenario1a.srv)
require.Len(t, s.pendingReqCh, 0)
require.Len(t, errCh1a, 0)
require.Empty(t, s.pendingReqCh)
require.Empty(t, errCh1a)
s.loadedMu.Lock()
require.Len(t, s.loaded, 1)
s.loadedMu.Unlock()
@@ -401,9 +400,9 @@ func TestPrematureExpired(t *testing.T) {
time.Sleep(20 * time.Millisecond)
require.LessOrEqual(t, len(s.finishedReqCh), 1)
time.Sleep(10 * time.Millisecond)
require.Len(t, s.finishedReqCh, 0)
require.Empty(t, s.finishedReqCh)
s.loadedMu.Lock()
require.Len(t, s.loaded, 0)
require.Empty(t, s.loaded)
s.loadedMu.Unlock()
// also shouldn't happen in real life
@@ -487,7 +486,6 @@ func TestFindRunnerToUnload(t *testing.T) {
r2.refCount = 1
resp = s.findRunnerToUnload()
require.Equal(t, r1, resp)
}
func TestNeedsReload(t *testing.T) {

View File

@@ -146,7 +146,7 @@ func (b *blobUpload) Run(ctx context.Context, opts *registryOptions) {
case requestURL := <-b.nextURL:
g.Go(func() error {
var err error
for try := 0; try < maxRetries; try++ {
for try := range maxRetries {
err = b.uploadPart(inner, http.MethodPatch, requestURL, part, opts)
switch {
case errors.Is(err, context.Canceled):
@@ -190,7 +190,7 @@ func (b *blobUpload) Run(ctx context.Context, opts *registryOptions) {
headers.Set("Content-Type", "application/octet-stream")
headers.Set("Content-Length", "0")
for try := 0; try < maxRetries; try++ {
for try := range maxRetries {
var resp *http.Response
resp, err = makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts)
if errors.Is(err, context.Canceled) {
@@ -253,7 +253,7 @@ func (b *blobUpload) uploadPart(ctx context.Context, method string, requestURL *
}
// retry uploading to the redirect URL
for try := 0; try < maxRetries; try++ {
for try := range maxRetries {
err = b.uploadPart(ctx, http.MethodPut, redirectURL, part, nil)
switch {
case errors.Is(err, context.Canceled):