mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-09 23:37:06 +00:00
Improve multi-gpu handling at the limit
Still not complete, needs some refinement to our prediction to understand the discrete GPUs available space so we can see how many layers fit in each one since we can't split one layer across multiple GPUs we can't treat free space as one logical block
This commit is contained in:
@@ -38,7 +38,7 @@ func TestMultiModelConcurrency(t *testing.T) {
|
||||
}
|
||||
resp = [2][]string{
|
||||
[]string{"sunlight"},
|
||||
[]string{"england", "english", "massachusetts", "pilgrims"},
|
||||
[]string{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||
}
|
||||
)
|
||||
var wg sync.WaitGroup
|
||||
@@ -229,5 +229,23 @@ func TestMultiModelStress(t *testing.T) {
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
go func() {
|
||||
for {
|
||||
time.Sleep(2 * time.Second)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
models, err := client.ListRunning(ctx)
|
||||
if err != nil {
|
||||
slog.Warn("failed to list running models", "error", err)
|
||||
continue
|
||||
}
|
||||
for _, m := range models.Models {
|
||||
slog.Info("loaded model snapshot", "model", m)
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
)
|
||||
|
||||
func TestContextExhaustion(t *testing.T) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) // TODO maybe shorter?
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 4*time.Minute) // Longer needed for small footprint GPUs
|
||||
defer cancel()
|
||||
// Set up the test data
|
||||
req := api.GenerateRequest{
|
||||
|
||||
@@ -331,7 +331,7 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
|
||||
[][]string{
|
||||
[]string{"sunlight"},
|
||||
[]string{"soil", "organic", "earth", "black", "tan"},
|
||||
[]string{"england", "english", "massachusetts", "pilgrims"},
|
||||
[]string{"england", "english", "massachusetts", "pilgrims", "british"},
|
||||
[]string{"fourth", "july", "declaration", "independence"},
|
||||
[]string{"nitrogen", "oxygen", "carbon", "dioxide"},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user