mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 07:46:59 +00:00
kvcache: Add check for values that fall out of sliding window cache
The sliding window cache trims entries that are outside the window for the latest token. This works when we are extending the cache, such as when the conversation continues. However, if we have a partial overlap in conversation (including the BOS tokens), then we resume from a past point in the conversation and the needed tokens are no longer stored in memory. This verifies that the new window overlaps with the old one before reusing the cache. Co-authored-by: Jesse Gross <jesse@ollama.com>
This commit is contained in:
@@ -118,6 +118,10 @@ func (c *InputCache) LoadCacheSlot(prompt []input.Input) (*InputCacheSlot, []inp
|
||||
}
|
||||
|
||||
if c.cache != nil {
|
||||
if numPast > 0 && !c.cache.CanResume(slot.Id, numPast) {
|
||||
numPast = 0
|
||||
}
|
||||
|
||||
err = c.cache.Remove(slot.Id, numPast, math.MaxInt32)
|
||||
if err != nil {
|
||||
// Some models don't support partial erasure
|
||||
|
||||
@@ -451,6 +451,7 @@ func (m *mockCache) Close()
|
||||
func (m *mockCache) StartForward(ctx ml.Context, batch input.Batch) error { return nil }
|
||||
func (m *mockCache) CopyPrefix(srcSeq, dstSeq int, len int32) {}
|
||||
func (m *mockCache) SetConfig(ml.CacheConfig) {}
|
||||
func (m *mockCache) CanResume(seq int, pos int32) bool { return true }
|
||||
|
||||
func TestShiftCacheSlot(t *testing.T) {
|
||||
tests := []struct {
|
||||
|
||||
Reference in New Issue
Block a user