wip go engine

Co-authored-by: Patrick Devine <pdevine@sonic.net>
This commit is contained in:
Jeffrey Morgan
2023-07-03 15:22:44 -04:00
parent 172274b809
commit 76cb60d496
39 changed files with 1377 additions and 1 deletions

View File

@@ -0,0 +1,9 @@
from ollama.model import models
from ollama.engine import generate, load, unload
__all__ = [
'models',
'generate',
'load',
'unload',
]

View File

@@ -0,0 +1,4 @@
from ollama.cmd import cli
if __name__ == '__main__':
cli.main()

View File

192
python/ollama/cmd/cli.py Normal file
View File

@@ -0,0 +1,192 @@
import os
import sys
from argparse import ArgumentParser, HelpFormatter, PARSER
from yaspin import yaspin
from ollama import model, engine
from ollama.cmd import server
class CustomHelpFormatter(HelpFormatter):
"""
This class is used to customize the way the argparse help text is displayed.
We specifically override the _format_action method to exclude the line that
shows all the subparser command options in the help text. This line is typically
in the form "{serve,models,pull,run}".
"""
def _format_action(self, action):
# get the original help text
parts = super()._format_action(action)
if action.nargs == PARSER:
# remove the unwanted first line
parts = "\n".join(parts.split("\n")[1:])
return parts
def main():
parser = ArgumentParser(
description='Ollama: Run any large language model on any machine.',
formatter_class=CustomHelpFormatter,
)
# create models home if it doesn't exist
os.makedirs(model.MODELS_CACHE_PATH, exist_ok=True)
subparsers = parser.add_subparsers(
title='commands',
)
list_parser = subparsers.add_parser(
"models",
description="List all available models stored locally.",
help="List all available models stored locally.",
)
list_parser.set_defaults(fn=list_models)
search_parser = subparsers.add_parser(
"search",
description="Search for compatible models that Ollama can run.",
help="Search for compatible models that Ollama can run. Usage: search [model]",
)
search_parser.add_argument(
"query",
nargs="?",
help="Optional name of the model to search for.",
)
search_parser.set_defaults(fn=search)
pull_parser = subparsers.add_parser(
"pull",
description="Download a specified model from a remote source.",
help="Download a specified model from a remote source. Usage: pull [model]",
)
pull_parser.add_argument("model", help="Name of the model to download.")
pull_parser.set_defaults(fn=pull)
run_parser = subparsers.add_parser(
"run",
description="Run a model and submit prompts.",
help="Run a model and submit prompts. Usage: run [model] [prompt]",
)
run_parser.add_argument("model", help="Name of the model to run.")
run_parser.add_argument(
"prompt",
nargs="?",
help="Optional prompt for the model, interactive mode enabled when not specified.",
)
run_parser.set_defaults(fn=run)
server.set_parser(
subparsers.add_parser(
"serve",
description="Start a persistent server to interact with models via the API.",
help="Start a persistent server to interact with models via the API.",
)
)
args = parser.parse_args()
args = vars(args)
try:
fn = args.pop("fn")
fn(**args)
except KeyboardInterrupt:
pass
except KeyError:
parser.print_help()
except Exception as e:
print(e)
def list_models(*args, **kwargs):
for m in model.models(*args, **kwargs):
print(m)
def generate(*args, **kwargs):
if prompt := kwargs.get("prompt"):
print(">>>", prompt, flush=True)
generate_oneshot(*args, **kwargs)
return
if sys.stdin.isatty():
return generate_interactive(*args, **kwargs)
return generate_batch(*args, **kwargs)
def generate_oneshot(*args, **kwargs):
print(flush=True)
spinner = yaspin()
spinner.start()
spinner_running = True
try:
for output in engine.generate(model_name=kwargs.pop('model'), *args, **kwargs):
choices = output.get("choices", [])
if len(choices) > 0:
if spinner_running:
spinner.stop()
spinner_running = False
print("\r", end="") # move cursor back to beginning of line again
print(choices[0].get("text", ""), end="", flush=True)
except Exception:
spinner.stop()
raise
# end with a new line
print(flush=True)
print(flush=True)
def generate_interactive(*args, **kwargs):
while True:
print(">>> ", end="", flush=True)
line = next(sys.stdin)
if not line:
return
kwargs.update({"prompt": line})
generate_oneshot(*args, **kwargs)
def generate_batch(*args, **kwargs):
for line in sys.stdin:
print(">>> ", line, end="", flush=True)
kwargs.update({"prompt": line})
generate_oneshot(*args, **kwargs)
def search(*args, **kwargs):
try:
model_names = model.search_directory(*args, **kwargs)
if len(model_names) == 0:
print("No models found.")
return
elif len(model_names) == 1:
print(f"Found {len(model_names)} available model:")
else:
print(f"Found {len(model_names)} available models:")
for model_name in model_names:
print(model_name.lower())
except Exception as e:
print("Failed to fetch available models, check your network connection")
def pull(*args, **kwargs):
try:
model.pull(model_name=kwargs.pop('model'), *args, **kwargs)
print("Up to date.")
except Exception as e:
print(f"An error occurred: {e}")
def run(*args, **kwargs):
try:
name = model.pull(model_name=kwargs.pop('model'), *args, **kwargs)
kwargs.update({"model": name})
print(f"Running {name}...")
generate(*args, **kwargs)
except Exception as e:
print(f"An error occurred: {e}")

View File

@@ -0,0 +1,94 @@
import json
import aiohttp_cors
from aiohttp import web
from ollama import engine
def set_parser(parser):
parser.add_argument("--host", default="127.0.0.1")
parser.add_argument("--port", default=7734)
parser.set_defaults(fn=serve)
def serve(*args, **kwargs):
app = web.Application()
cors = aiohttp_cors.setup(
app,
defaults={
"*": aiohttp_cors.ResourceOptions(
allow_credentials=True,
expose_headers="*",
allow_headers="*",
)
},
)
app.add_routes(
[
web.post("/load", load),
web.post("/unload", unload),
web.post("/generate", generate),
]
)
for route in app.router.routes():
cors.add(route)
app.update(
{
"models": {},
}
)
web.run_app(app, **kwargs)
async def load(request):
body = await request.json()
name = body.get("model")
if not name:
raise web.HTTPBadRequest()
kwargs = {
"models": request.app.get("models"),
}
engine.load(name, **kwargs)
return web.Response()
async def unload(request):
body = await request.json()
name = body.get("model")
if not name:
raise web.HTTPBadRequest()
engine.unload(name, models=request.app.get("models"))
return web.Response()
async def generate(request):
body = await request.json()
name = body.get("model")
if not name:
raise web.HTTPBadRequest()
prompt = body.get("prompt")
if not prompt:
raise web.HTTPBadRequest()
response = web.StreamResponse()
await response.prepare(request)
kwargs = {
"models": request.app.get("models"),
}
for output in engine.generate(name, prompt, **kwargs):
output = json.dumps(output).encode('utf-8')
await response.write(output)
await response.write(b"\n")
return response

121
python/ollama/engine.py Normal file
View File

@@ -0,0 +1,121 @@
import os
import sys
from os import path
from contextlib import contextmanager
from thefuzz import process
from llama_cpp import Llama
from ctransformers import AutoModelForCausalLM
import ollama.prompt
from ollama.model import MODELS_CACHE_PATH
@contextmanager
def suppress(file):
original = os.dup(file.fileno())
with open(os.devnull, "w") as devnull:
os.dup2(devnull.fileno(), file.fileno())
yield
os.dup2(original, file.fileno())
def generate(model_name, prompt, models={}, *args, **kwargs):
model = load(model_name, models=models)
inputs = ollama.prompt.template(model_name, prompt)
return model.generate(inputs, *args, **kwargs)
def load(model_name, models={}):
if not models.get(model_name, None):
model_path = path.expanduser(model_name)
if not path.exists(model_path):
model_path = str(MODELS_CACHE_PATH / (model_name + ".bin"))
runners = {
model_type: cls
for cls in [LlamaCppRunner, CtransformerRunner]
for model_type in cls.model_types()
}
for match, _ in process.extract(model_path, runners.keys(), limit=len(runners)):
try:
model = runners.get(match)
runner = model(model_path, match)
models.update({model_name: runner})
return runner
except Exception:
pass
raise Exception("failed to load model", model_path, model_name)
def unload(model_name, models={}):
if model_name in models:
models.pop(model_name)
class LlamaCppRunner:
def __init__(self, model_path, model_type):
try:
with suppress(sys.stderr), suppress(sys.stdout):
self.model = Llama(model_path, verbose=False, n_gpu_layers=1, seed=-1)
except Exception:
raise Exception("Failed to load model", model_path, model_type)
@staticmethod
def model_types():
return [
'llama',
'orca',
'vicuna',
'ultralm',
]
def generate(self, prompt, *args, **kwargs):
if "max_tokens" not in kwargs:
kwargs.update({"max_tokens": 512})
if "stop" not in kwargs:
kwargs.update({"stop": ["Q:"]})
if "stream" not in kwargs:
kwargs.update({"stream": True})
with suppress(sys.stderr):
for output in self.model(prompt, *args, **kwargs):
yield output
class CtransformerRunner:
def __init__(self, model_path, model_type):
self.model = AutoModelForCausalLM.from_pretrained(
model_path, model_type=model_type, local_files_only=True
)
@staticmethod
def model_types():
return [
'falcon',
'mpt',
'starcoder',
]
def generate(self, prompt, *args, **kwargs):
if "max_new_tokens" not in kwargs:
kwargs.update({"max_new_tokens": 512})
if "stop" not in kwargs:
kwargs.update({"stop": ["User"]})
if "stream" not in kwargs:
kwargs.update({"stream": True})
for output in self.model(prompt, *args, **kwargs):
yield {
'choices': [
{
'text': output,
},
],
}

157
python/ollama/model.py Normal file
View File

@@ -0,0 +1,157 @@
import requests
import validators
from pathlib import Path
from os import path, walk
from urllib.parse import urlsplit, urlunsplit
from tqdm import tqdm
MODELS_MANIFEST = 'https://ollama.ai/api/models'
MODELS_CACHE_PATH = Path.home() / '.ollama' / 'models'
def models(*args, **kwargs):
for _, _, files in walk(MODELS_CACHE_PATH):
for file in files:
base, ext = path.splitext(file)
if ext == '.bin':
yield base
# search the directory and return all models which contain the search term as a substring,
# or all models if no search term is provided
def search_directory(query):
response = requests.get(MODELS_MANIFEST)
response.raise_for_status()
directory = response.json()
model_names = []
for model_info in directory:
if not query or query.lower() in model_info.get('name', '').lower():
model_names.append(model_info.get('name'))
return model_names
# get the url of the model from our curated directory
def get_url_from_directory(model):
response = requests.get(MODELS_MANIFEST)
response.raise_for_status()
directory = response.json()
for model_info in directory:
if model_info.get('name').lower() == model.lower():
return model_info.get('url')
return model
def download_from_repo(url, file_name):
parts = urlsplit(url)
path_parts = parts.path.split('/tree/')
if len(path_parts) == 1:
location = path_parts[0]
branch = 'main'
else:
location, branch = path_parts
location = location.strip('/')
if file_name == '':
file_name = path.basename(location).lower()
download_url = urlunsplit(
(
'https',
parts.netloc,
f'/api/models/{location}/tree/{branch}',
parts.query,
parts.fragment,
)
)
response = requests.get(download_url)
response.raise_for_status()
json_response = response.json()
download_url, file_size = find_bin_file(json_response, location, branch)
return download_file(download_url, file_name, file_size)
def find_bin_file(json_response, location, branch):
download_url = None
file_size = 0
for file_info in json_response:
if file_info.get('type') == 'file' and file_info.get('path').endswith('.bin'):
f_path = file_info.get('path')
download_url = (
f'https://huggingface.co/{location}/resolve/{branch}/{f_path}'
)
file_size = file_info.get('size')
if download_url is None:
raise Exception('No model found')
return download_url, file_size
def download_file(download_url, file_name, file_size):
local_filename = MODELS_CACHE_PATH / str(file_name + '.bin')
first_byte = path.getsize(local_filename) if path.exists(local_filename) else 0
if first_byte >= file_size:
return local_filename
print(f'Pulling {file_name}...')
header = {'Range': f'bytes={first_byte}-'} if first_byte != 0 else {}
response = requests.get(download_url, headers=header, stream=True)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0)) + first_byte
with open(local_filename, 'ab' if first_byte else 'wb') as file, tqdm(
total=total_size,
unit='iB',
unit_scale=True,
unit_divisor=1024,
initial=first_byte,
ascii=' ==',
bar_format='Downloading [{bar}] {percentage:3.2f}% {rate_fmt}{postfix}',
) as bar:
for data in response.iter_content(chunk_size=1024):
size = file.write(data)
bar.update(size)
return local_filename
def pull(model_name, *args, **kwargs):
# check the remote model location and see if it needs to be downloaded
url = model_name
file_name = ""
if not validators.url(url) and not url.startswith('huggingface.co'):
try:
url = get_url_from_directory(model_name)
except Exception as e:
# may not have been able to check remote directory, return now
return model_name
if url is model_name:
# this is not a model from our directory, so can't check remote
maybe_existing_model_location = MODELS_CACHE_PATH / str(model_name + '.bin')
if path.exists(model_name) or path.exists(maybe_existing_model_location):
# a file on the filesystem is being specified
return model_name
raise Exception("unknown model")
else:
# this is a model from our directory, check remote
file_name = model_name
if not (url.startswith('http://') or url.startswith('https://')):
url = f'https://{url}'
if not validators.url(url):
if model_name in models(MODELS_CACHE_PATH):
# the model is already downloaded, and specified by name
return model_name
raise Exception(f'Unknown model {model_name}')
local_filename = download_from_repo(url, file_name)
return local_filename

12
python/ollama/prompt.py Normal file
View File

@@ -0,0 +1,12 @@
from os import path
from difflib import get_close_matches
from jinja2 import Environment, PackageLoader
def template(name, prompt):
environment = Environment(loader=PackageLoader(__name__, 'templates'))
best_templates = get_close_matches(
path.basename(name), environment.list_templates(), n=1, cutoff=0
)
template = environment.get_template(best_templates.pop())
return template.render(prompt=prompt)

View File

@@ -0,0 +1,8 @@
Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{{ prompt }}
### Response:

View File

@@ -0,0 +1,3 @@
A helpful assistant who helps the user with any questions asked.
User: {{ prompt }}
Assistant:

View File

@@ -0,0 +1,5 @@
### Instruction:
{{ prompt }}
### Response:

View File

@@ -0,0 +1,5 @@
### Instruction:
{{ prompt }}
### Response:

View File

@@ -0,0 +1,4 @@
Below is an instruction that describes a task. Write a response that appropriately completes the request. Be concise. Once the request is completed, include no other text.
### Instruction:
{{ prompt }}
### Response:

View File

@@ -0,0 +1 @@
{{ prompt }}

View File

@@ -0,0 +1,7 @@
### System:
You are an AI assistant that follows instruction extremely well. Help as much as you can.
### User:
{{ prompt }}
### Response:

View File

@@ -0,0 +1,2 @@
### Human: {{ prompt }}
### Assistant:

View File

@@ -0,0 +1,4 @@
{{ prompt }}

View File

@@ -0,0 +1,2 @@
USER: {{ prompt }}
ASSISTANT:

View File

@@ -0,0 +1,4 @@
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
USER: {{ prompt }}
ASSISTANT:

View File

@@ -0,0 +1,5 @@
Below is an instruction that describes a task. Write a response that appropriately completes the request
### Instruction: {{ prompt }}
### Response:

View File

@@ -0,0 +1,2 @@
{{ prompt }}
### Response: