add stablelm graph calculation

This commit is contained in:
Michael Yang
2024-04-17 13:57:19 -07:00
parent c8afe7168c
commit 3cf483fe48
2 changed files with 6 additions and 1 deletions

View File

@@ -381,6 +381,12 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
)
partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
case "stablelm":
fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2)
partialOffload = max(
4*batch*(vocab+2*embedding),
fullOffload,
)
}
return