mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-09 23:37:06 +00:00
Fix embeddings memory corruption (#6467)
* Fix embeddings memory corruption The patch was leading to a buffer overrun corruption. Once removed though, parallism in server.cpp lead to hitting an assert due to slot/seq IDs being >= token count. To work around this, only use slot 0 for embeddings. * Fix embed integration test assumption The token eval count has changed with recent llama.cpp bumps (0.3.5+)
This commit is contained in:
@@ -70,8 +70,8 @@ func TestAllMiniLMEmbed(t *testing.T) {
|
||||
t.Fatalf("expected 0.010071031, got %.8f", res.Embeddings[0][0])
|
||||
}
|
||||
|
||||
if res.PromptEvalCount != 8 {
|
||||
t.Fatalf("expected 8 prompt tokens, got %d", res.PromptEvalCount)
|
||||
if res.PromptEvalCount != 6 {
|
||||
t.Fatalf("expected 6 prompt tokens, got %d", res.PromptEvalCount)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -102,8 +102,8 @@ func TestAllMiniLMBatchEmbed(t *testing.T) {
|
||||
t.Fatalf("expected 0.010071031 and -0.009802706, got %.8f and %.8f", res.Embeddings[0][0], res.Embeddings[1][0])
|
||||
}
|
||||
|
||||
if res.PromptEvalCount != 16 {
|
||||
t.Fatalf("expected 16 prompt tokens, got %d", res.PromptEvalCount)
|
||||
if res.PromptEvalCount != 12 {
|
||||
t.Fatalf("expected 12 prompt tokens, got %d", res.PromptEvalCount)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user