wip go engine

Co-authored-by: Patrick Devine <pdevine@sonic.net>
2025-12-15 02:07:03 +00:00 · 2023-07-03 15:22:44 -04:00
parent 172274b809
commit 76cb60d496
39 changed files with 1377 additions and 1 deletions
--- a/python/ollama/model.py
+++ b/python/ollama/model.py
@@ -0,0 +1,157 @@
+import requests
+import validators
+from pathlib import Path
+from os import path, walk
+from urllib.parse import urlsplit, urlunsplit
+from tqdm import tqdm
+
+
+MODELS_MANIFEST = 'https://ollama.ai/api/models'
+MODELS_CACHE_PATH = Path.home() / '.ollama' / 'models'
+
+
+def models(*args, **kwargs):
+    for _, _, files in walk(MODELS_CACHE_PATH):
+        for file in files:
+            base, ext = path.splitext(file)
+            if ext == '.bin':
+                yield base
+
+
+# search the directory and return all models which contain the search term as a substring,
+# or all models if no search term is provided
+def search_directory(query):
+    response = requests.get(MODELS_MANIFEST)
+    response.raise_for_status()
+    directory = response.json()
+    model_names = []
+    for model_info in directory:
+        if not query or query.lower() in model_info.get('name', '').lower():
+            model_names.append(model_info.get('name'))
+    return model_names
+
+
+# get the url of the model from our curated directory
+def get_url_from_directory(model):
+    response = requests.get(MODELS_MANIFEST)
+    response.raise_for_status()
+    directory = response.json()
+    for model_info in directory:
+        if model_info.get('name').lower() == model.lower():
+            return model_info.get('url')
+    return model
+
+
+def download_from_repo(url, file_name):
+    parts = urlsplit(url)
+    path_parts = parts.path.split('/tree/')
+
+    if len(path_parts) == 1:
+        location = path_parts[0]
+        branch = 'main'
+    else:
+        location, branch = path_parts
+
+    location = location.strip('/')
+    if file_name == '':
+        file_name = path.basename(location).lower()
+    download_url = urlunsplit(
+        (
+            'https',
+            parts.netloc,
+            f'/api/models/{location}/tree/{branch}',
+            parts.query,
+            parts.fragment,
+        )
+    )
+    response = requests.get(download_url)
+    response.raise_for_status()
+    json_response = response.json()
+
+    download_url, file_size = find_bin_file(json_response, location, branch)
+    return download_file(download_url, file_name, file_size)
+
+
+def find_bin_file(json_response, location, branch):
+    download_url = None
+    file_size = 0
+    for file_info in json_response:
+        if file_info.get('type') == 'file' and file_info.get('path').endswith('.bin'):
+            f_path = file_info.get('path')
+            download_url = (
+                f'https://huggingface.co/{location}/resolve/{branch}/{f_path}'
+            )
+            file_size = file_info.get('size')
+
+    if download_url is None:
+        raise Exception('No model found')
+
+    return download_url, file_size
+
+
+def download_file(download_url, file_name, file_size):
+    local_filename = MODELS_CACHE_PATH / str(file_name + '.bin')
+
+    first_byte = path.getsize(local_filename) if path.exists(local_filename) else 0
+
+    if first_byte >= file_size:
+        return local_filename
+
+    print(f'Pulling {file_name}...')
+
+    header = {'Range': f'bytes={first_byte}-'} if first_byte != 0 else {}
+
+    response = requests.get(download_url, headers=header, stream=True)
+    response.raise_for_status()
+
+    total_size = int(response.headers.get('content-length', 0)) + first_byte
+
+    with open(local_filename, 'ab' if first_byte else 'wb') as file, tqdm(
+        total=total_size,
+        unit='iB',
+        unit_scale=True,
+        unit_divisor=1024,
+        initial=first_byte,
+        ascii=' ==',
+        bar_format='Downloading [{bar}] {percentage:3.2f}% {rate_fmt}{postfix}',
+    ) as bar:
+        for data in response.iter_content(chunk_size=1024):
+            size = file.write(data)
+            bar.update(size)
+
+    return local_filename
+
+
+def pull(model_name, *args, **kwargs):
+    # check the remote model location and see if it needs to be downloaded
+    url = model_name
+    file_name = ""
+    if not validators.url(url) and not url.startswith('huggingface.co'):
+        try:
+            url = get_url_from_directory(model_name)
+        except Exception as e:
+            # may not have been able to check remote directory, return now
+            return model_name
+        if url is model_name:
+            # this is not a model from our directory, so can't check remote
+            maybe_existing_model_location = MODELS_CACHE_PATH / str(model_name + '.bin')
+            if path.exists(model_name) or path.exists(maybe_existing_model_location):
+                # a file on the filesystem is being specified
+                return model_name
+            raise Exception("unknown model")
+        else:
+            # this is a model from our directory, check remote
+            file_name = model_name
+
+    if not (url.startswith('http://') or url.startswith('https://')):
+        url = f'https://{url}'
+
+    if not validators.url(url):
+        if model_name in models(MODELS_CACHE_PATH):
+            # the model is already downloaded, and specified by name
+            return model_name
+        raise Exception(f'Unknown model {model_name}')
+
+    local_filename = download_from_repo(url, file_name)
+
+    return local_filename