reorganize directories

2025-12-10 15:57:04 +00:00 · 2023-06-25 13:08:03 -04:00
parent d3709f85b5
commit b361fa72ec
27 changed files with 59 additions and 49 deletions
--- a/server/.gitignore
+++ b/server/.gitignore
@@ -1,5 +0,0 @@
-.env
-.venv
-*.spec
-build
-dist
--- a/server/README.md
+++ b/server/README.md
@@ -1,39 +0,0 @@
-# Server
-
-🙊
-
-## Installation
-
-If using Apple silicon, you need a Python version that supports arm64:
-
-```bash
-wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh
-bash Miniforge3-MacOSX-arm64.sh
-```
-
-Get the dependencies:
-
-```bash
-pip install -r requirements.txt
-```
-
-## Building
-
-```bash
-python3 build.py
-```
-
-## Running
-
-Put your model in `models/` and run:
-
-```bash
-python server.py
-```
-
-## API
-
-### `POST /generate`
-
-model: `string` - The name of the model to use in the `models` folder.
-prompt: `string` - The prompt to use.
--- a/server/build.py
+++ b/server/build.py
@@ -1,20 +0,0 @@
-import site
-import os
-from PyInstaller.__main__ import run as pyi_run
-
-# the llama_cpp directory is not included if not explicitly added
-site_packages_dir = site.getsitepackages()[0]
-llama_cpp_dir = os.path.join(site_packages_dir, "llama_cpp")
-
-args = [
-    "server.py",
-    "--paths",
-    site_packages_dir,
-    "--add-data",
-    f"{llama_cpp_dir}{os.pathsep}llama_cpp",
-    "--onefile"
-]
-
-# generate the .spec file and run PyInstaller
-pyi_run(args)
-
--- a/server/requirements.txt
+++ b/server/requirements.txt
@@ -1,5 +0,0 @@
-Flask==2.3.2
-flask_cors==3.0.10
-llama-cpp-python==0.1.65
-pyinstaller==5.13.0
-pyinstaller-hooks-contrib==2023.3
--- a/server/server.py
+++ b/server/server.py
@@ -1,79 +0,0 @@
-import json
-import os
-from llama_cpp import Llama
-from flask import Flask, Response, stream_with_context, request
-from flask_cors import CORS
-
-app = Flask(__name__)
-CORS(app)  # enable CORS for all routes
-
-# llms tracks which models are loaded
-llms = {}
-
-
-@app.route("/load", methods=["POST"])
-def load():
-    data = request.get_json()
-    model = data.get("model")
-
-    if not model:
-        return Response("Model is required", status=400)
-    if not os.path.exists(f"../models/{model}.bin"):
-        return {"error": "The model does not exist."}, 400
-
-    if model not in llms:
-        llms[model] = Llama(model_path=f"../models/{model}.bin")
-
-    return Response(status=204)
-
-
-@app.route("/unload", methods=["POST"])
-def unload():
-    data = request.get_json()
-    model = data.get("model")
-
-    if not model:
-        return Response("Model is required", status=400)
-    if not os.path.exists(f"../models/{model}.bin"):
-        return {"error": "The model does not exist."}, 400
-
-    llms.pop(model, None)
-
-    return Response(status=204)
-
-
-@app.route("/generate", methods=["POST"])
-def generate():
-    data = request.get_json()
-    model = data.get("model")
-    prompt = data.get("prompt")
-
-    if not model:
-        return Response("Model is required", status=400)
-    if not prompt:
-        return Response("Prompt is required", status=400)
-    if not os.path.exists(f"../models/{model}.bin"):
-        return {"error": "The model does not exist."}, 400
-
-    if model not in llms:
-        # auto load
-        llms[model] = Llama(model_path=f"../models/{model}.bin")
-
-    def stream_response():
-        stream = llms[model](
-            str(prompt),  # TODO: optimize prompt based on model
-            max_tokens=4096,
-            stop=["Q:", "\n"],
-            echo=True,
-            stream=True,
-        )
-        for output in stream:
-            yield json.dumps(output)
-
-    return Response(
-        stream_with_context(stream_response()), mimetype="text/event-stream"
-    )
-
-
-if __name__ == "__main__":
-    app.run(debug=True, threaded=True, port=5001)