mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
reorganize directories
This commit is contained in:
5
server/.gitignore
vendored
5
server/.gitignore
vendored
@@ -1,5 +0,0 @@
|
||||
.env
|
||||
.venv
|
||||
*.spec
|
||||
build
|
||||
dist
|
||||
@@ -1,39 +0,0 @@
|
||||
# Server
|
||||
|
||||
🙊
|
||||
|
||||
## Installation
|
||||
|
||||
If using Apple silicon, you need a Python version that supports arm64:
|
||||
|
||||
```bash
|
||||
wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh
|
||||
bash Miniforge3-MacOSX-arm64.sh
|
||||
```
|
||||
|
||||
Get the dependencies:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Building
|
||||
|
||||
```bash
|
||||
python3 build.py
|
||||
```
|
||||
|
||||
## Running
|
||||
|
||||
Put your model in `models/` and run:
|
||||
|
||||
```bash
|
||||
python server.py
|
||||
```
|
||||
|
||||
## API
|
||||
|
||||
### `POST /generate`
|
||||
|
||||
model: `string` - The name of the model to use in the `models` folder.
|
||||
prompt: `string` - The prompt to use.
|
||||
@@ -1,20 +0,0 @@
|
||||
import site
|
||||
import os
|
||||
from PyInstaller.__main__ import run as pyi_run
|
||||
|
||||
# the llama_cpp directory is not included if not explicitly added
|
||||
site_packages_dir = site.getsitepackages()[0]
|
||||
llama_cpp_dir = os.path.join(site_packages_dir, "llama_cpp")
|
||||
|
||||
args = [
|
||||
"server.py",
|
||||
"--paths",
|
||||
site_packages_dir,
|
||||
"--add-data",
|
||||
f"{llama_cpp_dir}{os.pathsep}llama_cpp",
|
||||
"--onefile"
|
||||
]
|
||||
|
||||
# generate the .spec file and run PyInstaller
|
||||
pyi_run(args)
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
Flask==2.3.2
|
||||
flask_cors==3.0.10
|
||||
llama-cpp-python==0.1.65
|
||||
pyinstaller==5.13.0
|
||||
pyinstaller-hooks-contrib==2023.3
|
||||
@@ -1,79 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
from llama_cpp import Llama
|
||||
from flask import Flask, Response, stream_with_context, request
|
||||
from flask_cors import CORS
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app) # enable CORS for all routes
|
||||
|
||||
# llms tracks which models are loaded
|
||||
llms = {}
|
||||
|
||||
|
||||
@app.route("/load", methods=["POST"])
|
||||
def load():
|
||||
data = request.get_json()
|
||||
model = data.get("model")
|
||||
|
||||
if not model:
|
||||
return Response("Model is required", status=400)
|
||||
if not os.path.exists(f"../models/{model}.bin"):
|
||||
return {"error": "The model does not exist."}, 400
|
||||
|
||||
if model not in llms:
|
||||
llms[model] = Llama(model_path=f"../models/{model}.bin")
|
||||
|
||||
return Response(status=204)
|
||||
|
||||
|
||||
@app.route("/unload", methods=["POST"])
|
||||
def unload():
|
||||
data = request.get_json()
|
||||
model = data.get("model")
|
||||
|
||||
if not model:
|
||||
return Response("Model is required", status=400)
|
||||
if not os.path.exists(f"../models/{model}.bin"):
|
||||
return {"error": "The model does not exist."}, 400
|
||||
|
||||
llms.pop(model, None)
|
||||
|
||||
return Response(status=204)
|
||||
|
||||
|
||||
@app.route("/generate", methods=["POST"])
|
||||
def generate():
|
||||
data = request.get_json()
|
||||
model = data.get("model")
|
||||
prompt = data.get("prompt")
|
||||
|
||||
if not model:
|
||||
return Response("Model is required", status=400)
|
||||
if not prompt:
|
||||
return Response("Prompt is required", status=400)
|
||||
if not os.path.exists(f"../models/{model}.bin"):
|
||||
return {"error": "The model does not exist."}, 400
|
||||
|
||||
if model not in llms:
|
||||
# auto load
|
||||
llms[model] = Llama(model_path=f"../models/{model}.bin")
|
||||
|
||||
def stream_response():
|
||||
stream = llms[model](
|
||||
str(prompt), # TODO: optimize prompt based on model
|
||||
max_tokens=4096,
|
||||
stop=["Q:", "\n"],
|
||||
echo=True,
|
||||
stream=True,
|
||||
)
|
||||
for output in stream:
|
||||
yield json.dumps(output)
|
||||
|
||||
return Response(
|
||||
stream_with_context(stream_response()), mimetype="text/event-stream"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True, threaded=True, port=5001)
|
||||
Reference in New Issue
Block a user