client updates

2025-12-12 08:47:01 +00:00 · 2023-07-04 00:47:00 -04:00
parent 6292f4b64c
commit fd962a36e5
21 changed files with 198 additions and 3137 deletions
--- a/llama/binding/binding.h
+++ b/llama/binding/binding.h
@@ -30,22 +30,13 @@ extern "C" {

 extern unsigned char tokenCallback(void *, char *);

-int load_state(void *ctx, char *statefile, char *modes);
-
-int eval(void *params_ptr, void *ctx, char *text);
-
-void save_state(void *ctx, char *dst, char *modes);
+int eval(void *p, void *c, char *text);

 void *load_model(const char *fname, int n_ctx, int n_seed, bool memory_f16,
                 bool mlock, bool embeddings, bool mmap, bool low_vram,
                 bool vocab_only, int n_gpu, int n_batch, const char *maingpu,
                 const char *tensorsplit, bool numa);

-int get_embeddings(void *params_ptr, void *state_pr, float *res_embeddings);
-
-int get_token_embeddings(void *params_ptr, void *state_pr, int *tokens,
-                         int tokenSize, float *res_embeddings);
-
 void *llama_allocate_params(
    const char *prompt, int seed, int threads, int tokens, int top_k,
    float top_p, float temp, float repeat_penalty, int repeat_last_n,
@@ -59,13 +50,11 @@ void *llama_allocate_params(

 void llama_free_params(void *params_ptr);

-void llama_binding_free_model(void *state);
+void llama_binding_free_model(void *ctx);

 int llama_predict(void *params_ptr, void *state_pr, char *result, bool debug);

 #ifdef __cplusplus
 }

-std::vector<std::string> create_vector(const char **strings, int count);
-void delete_vector(std::vector<std::string> *vec);
 #endif