Add remote server support for Ollama, Add template profile, Updated Llamafiles

KillianLucas · web-flow · commit 6ee05863ae3e · 2024-08-07T15:51:37.000-07:00
diff --git a/docs/guides/profiles.mdx b/docs/guides/profiles.mdx
@@ -8,6 +8,8 @@ Profiles are Python files that configure Open Interpreter. A wide range of field
 
 You can access your Profiles by running `interpreter --profiles`. This will open the directory where all of your Profiles are stored.
 
+If you want to make your own profile, start with the [Template Profile](https://github.com/OpenInterpreter/open-interpreter/blob/main/interpreter/terminal_interface/profiles/defaults/template_profile.py).
+
 To apply a Profile to an Open Interpreter session, you can run `interpreter --profile <name>`
 
 # Example Profile
diff --git a/interpreter/core/async_core.py b/interpreter/core/async_core.py
@@ -47,7 +47,7 @@ def __init__(self, *args, **kwargs):
         self.output_queue = None
         self.unsent_messages = deque()
         self.id = os.getenv("INTERPRETER_ID", datetime.now().timestamp())
-        self.print = True  # Will print output
+        self.print = False  # Will print output
 
         self.require_acknowledge = (
             os.getenv("INTERPRETER_REQUIRE_ACKNOWLEDGE", "False").lower() == "true"
@@ -121,7 +121,7 @@ def respond(self, run_code=None):
                 if self.stop_event.is_set():
                     return
 
-                if self.print:
+                if self.print or self.debug:
                     if "start" in chunk:
                         print("\n")
                     if chunk["type"] in ["code", "console"] and "format" in chunk:
@@ -133,7 +133,11 @@ def respond(self, run_code=None):
                         if "format" in chunk and "base64" in chunk["format"]:
                             print("\n[An image was produced]")
                         else:
-                            print(chunk.get("content", ""), end="", flush=True)
+                            content = chunk.get("content", "")
+                            content = (
+                                str(content).encode("ascii", "ignore").decode("ascii")
+                            )
+                            print(content, end="", flush=True)
 
                 self.output_queue.sync_q.put(chunk)
 
diff --git a/interpreter/core/llm/llm.py b/interpreter/core/llm/llm.py
@@ -310,34 +310,34 @@ def load(self):
 
         if self.model.startswith("ollama/"):
             model_name = self.model.replace("ollama/", "")
+            api_base = getattr(self, 'api_base', None) or "http://localhost:11434"
+            names = []
             try:
                 # List out all downloaded ollama models. Will fail if ollama isn't installed
-                result = subprocess.run(
-                    ["ollama", "list"], capture_output=True, text=True, check=True
-                )
+                response = requests.get(f"{api_base}/api/tags")
+                if response.ok:
+                    data = response.json()
+                    names = [
+                        model['name'].replace(":latest", "")
+                        for model in data['models']
+                        if 'name' in model and model['name']
+                    ]
+
             except Exception as e:
                 print(str(e))
                 self.interpreter.display_message(
                     f"> Ollama not found\n\nPlease download Ollama from [ollama.com](https://ollama.com/) to use `{model_name}`.\n"
                 )
                 exit()
 
-            lines = result.stdout.split("\n")
-            names = [
-                line.split()[0].replace(":latest", "")
-                for line in lines[1:]
-                if line.strip()
-            ]  # Extract names, trim out ":latest", skip header
-
+            # Download model if not already installed
             if model_name not in names:
                 self.interpreter.display_message(f"\nDownloading {model_name}...\n")
-                subprocess.run(["ollama", "pull", model_name], check=True)
+                requests.post(f"{api_base}/api/pull", json={"name": model_name})
 
             # Get context window if not set
             if self.context_window == None:
-                response = requests.post(
-                    "http://localhost:11434/api/show", json={"name": model_name}
-                )
+                response = requests.post(f"{api_base}/api/show", json={"name": model_name})
                 model_info = response.json().get("model_info", {})
                 context_length = None
                 for key in model_info:
diff --git a/interpreter/core/llm/run_tool_calling_llm.py b/interpreter/core/llm/run_tool_calling_llm.py
@@ -1,3 +1,5 @@
+import re
+
 from .utils.merge_deltas import merge_deltas
 from .utils.parse_partial_json import parse_partial_json
 
@@ -170,6 +172,7 @@ def run_tool_calling_llm(llm, request_params):
     function_call_detected = False
     accumulated_review = ""
     review_category = None
+    buffer = ""
 
     for chunk in llm.completions(**request_params):
         if "choices" not in chunk or len(chunk["choices"]) == 0:
@@ -222,11 +225,23 @@ def run_tool_calling_llm(llm, request_params):
                     ]:
                         delta["content"] = delta["content"].replace(tag, "")
 
-                    yield {
-                        "type": "review",
-                        "format": review_category,
-                        "content": delta["content"],
-                    }
+                    if re.search("</.*>$", accumulated_review):
+                        buffer += delta["content"]
+                        continue
+                    elif buffer:
+                        yield {
+                            "type": "review",
+                            "format": review_category,
+                            "content": buffer + delta["content"],
+                        }
+                        buffer = ""
+                    else:
+                        yield {
+                            "type": "review",
+                            "format": review_category,
+                            "content": delta["content"],
+                        }
+                        buffer = ""
 
             else:
                 yield {"type": "message", "content": delta["content"]}
diff --git a/interpreter/core/respond.py b/interpreter/core/respond.py
@@ -98,8 +98,10 @@ def respond(interpreter):
                 """
                 )
                 break
-            # Provide extra information on how to change API keys, if we encounter that error
-            # (Many people writing GitHub issues were struggling with this)
+
+                # Provide extra information on how to change API keys, if we encounter that error
+                # (Many people writing GitHub issues were struggling with this)
+
             except Exception as e:
                 error_message = str(e).lower()
                 if (
@@ -115,36 +117,34 @@ def respond(interpreter):
                     interpreter.offline == False and "not have access" in str(e).lower()
                 ):
                     """
-                    Check for invalid model in error message and then fallback to groq, then OpenAI.
+                    Check for invalid model in error message and then fallback.
                     """
                     if (
                         "invalid model" in error_message
                         or "model does not exist" in error_message
                     ):
-                        provider_message = f"  The model '{interpreter.llm.model}' does not exist or is invalid. Please check the model name and try again.\n\nWould you like to try an alternative model instead? (y/n)\n\n  "
+                        provider_message = f"\n\nThe model '{interpreter.llm.model}' does not exist or is invalid. Please check the model name and try again.\n\nWould you like to try Open Interpreter's hosted `i` model instead? (y/n)\n\n  "
                     elif "groq" in error_message:
-                        provider_message = f"  You do not have access to {interpreter.llm.model}. Please check with Groq for more details.\n\nWould you like to try an alternative model instead? (y/n)\n\n  "
+                        provider_message = f"\n\nYou do not have access to {interpreter.llm.model}. Please check with Groq for more details.\n\nWould you like to try Open Interpreter's hosted `i` model instead? (y/n)\n\n  "
                     else:
-                        provider_message = f"  You do not have access to {interpreter.llm.model}. You will need to add a payment method and purchase credits for the OpenAI API billing page (different from ChatGPT) to use `GPT-4`.\n\nhttps://platform.openai.com/account/billing/overview\n\nWould you like to try GPT-3.5-TURBO instead? (y/n)\n\n  "
+                        provider_message = f"\n\nYou do not have access to {interpreter.llm.model}. If you are using an OpenAI model, you may need to add a payment method and purchase credits for the OpenAI API billing page (this is different from ChatGPT Plus).\n\nhttps://platform.openai.com/account/billing/overview\n\nWould you like to try Open Interpreter's hosted `i` model instead? (y/n)\n\n"
 
-                    response = input(provider_message)
+                    print(provider_message)
+
+                    response = input()
                     print("")  # <- Aesthetic choice
 
                     if response.strip().lower() == "y":
-                        interpreter.llm.model = "gpt-3.5-turbo-1106"
-                        interpreter.llm.context_window = 16000
-                        interpreter.llm.max_tokens = 4096
-                        interpreter.llm.supports_functions = True
+                        interpreter.llm.model = "i"
+                        display_markdown_message(f"> Model set to `i`")
                         display_markdown_message(
-                            f"> Model set to `{interpreter.llm.model}`"
+                            "***Note:*** *Conversations with this model will be used to train our open-source model.*\n"
                         )
+
                     else:
-                        raise Exception(
-                            "\n\nYou will need to add a payment method and purchase credits for the OpenAI API billing page (different from ChatGPT) to use GPT-4.\n\nhttps://platform.openai.com/account/billing/overview"
-                        )
+                        raise
                 elif interpreter.offline and not interpreter.os:
-                    print(traceback.format_exc())
-                    raise Exception("Error occurred. " + str(e))
+                    raise
                 else:
                     raise
 
diff --git a/interpreter/terminal_interface/local_setup.py b/interpreter/terminal_interface/local_setup.py
@@ -47,46 +47,58 @@ def download_model(models_dir, models, interpreter):
         try:
             model_list = [
                 {
-                    "name": "Llama-3-8B-Instruct",
-                    "file_name": " Meta-Llama-3-8B-Instruct.Q5_K_M.llamafile",
-                    "size": 5.76,
-                    "url": "https://huggingface.co/jartine/Meta-Llama-3-8B-Instruct-llamafile/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_M.llamafile?download=true",
+                    "name": "Llama-3.1-8B-Instruct",
+                    "file_name": "Meta-Llama-3-8B-Instruct.Q4_K_M.llamafile",
+                    "size": 4.95,
+                    "url": "https://huggingface.co/Mozilla/Meta-Llama-3.1-8B-Instruct-llamafile/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.llamafile?download=true",
+                },
+                {
+                    "name": "Gemma-2-9b",
+                    "file_name": "gemma-2-9b-it.Q4_K_M.llamafile",
+                    "size": 5.79,
+                    "url": "https://huggingface.co/jartine/gemma-2-9b-it-llamafile/resolve/main/gemma-2-9b-it.Q4_K_M.llamafile?download=true",
                 },
                 {
                     "name": "Phi-3-mini",
-                    "file_name": "Phi-3-mini-4k-instruct.Q5_K_M.llamafile",
-                    "size": 2.84,
-                    "url": "https://huggingface.co/jartine/Phi-3-mini-4k-instruct-llamafile/resolve/main/Phi-3-mini-4k-instruct.Q5_K_M.llamafile?download=true",
+                    "file_name": "Phi-3-mini-4k-instruct.Q4_K_M.llamafile",
+                    "size": 2.42,
+                    "url": "https://huggingface.co/Mozilla/Phi-3-mini-4k-instruct-llamafile/resolve/main/Phi-3-mini-4k-instruct.Q4_K_M.llamafile?download=true",
                 },
                 {
-                    "name": "TinyLlama-1.1B",
-                    "file_name": "TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile",
-                    "size": 0.76,
-                    "url": "https://huggingface.co/jartine/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/TinyLlama-1.1B-Chat-v1.0.Q5_K_M.llamafile?download=true",
+                    "name": "Moondream2 (vision)",
+                    "file_name": "moondream2-q5km-050824.llamafile",
+                    "size": 1.98,
+                    "url": "https://huggingface.co/cjpais/moondream2-llamafile/resolve/main/moondream2-q5km-050824.llamafile?download=true",
                 },
                 {
-                    "name": "Rocket-3B",
-                    "file_name": "rocket-3b.Q5_K_M.llamafile",
-                    "size": 1.89,
-                    "url": "https://huggingface.co/jartine/rocket-3B-llamafile/resolve/main/rocket-3b.Q5_K_M.llamafile?download=true",
+                    "name": "Mistral-7B-Instruct",
+                    "file_name": "Mistral-7B-Instruct-v0.3.Q5_K_M.llamafile",
+                    "size": 4.40,
+                    "url": "https://huggingface.co/Mozilla/Mistral-7B-Instruct-v0.3-llamafile/resolve/main/Mistral-7B-Instruct-v0.3.Q4_K_M.llamafile?download=true",
                 },
                 {
-                    "name": "Phi-2",
-                    "file_name": "phi-2.Q5_K_M.llamafile",
-                    "size": 1.96,
-                    "url": "https://huggingface.co/jartine/phi-2-llamafile/resolve/main/phi-2.Q5_K_M.llamafile?download=true",
+                    "name": "Gemma-2-27b",
+                    "file_name": "gemma-2-27b-it.Q4_K_M.llamafile",
+                    "size": 16.7,
+                    "url": "https://huggingface.co/jartine/gemma-2-27b-it-llamafile/resolve/main/gemma-2-27b-it.Q4_K_M.llamafile?download=true",
                 },
                 {
-                    "name": "LLaVA 1.5",
-                    "file_name": "llava-v1.5-7b-q4.llamafile",
-                    "size": 3.97,
-                    "url": "https://huggingface.co/jartine/llava-v1.5-7B-GGUF/resolve/main/llava-v1.5-7b-q4.llamafile?download=true",
+                    "name": "TinyLlama-1.1B",
+                    "file_name": "TinyLlama-1.1B-Chat-v1.0.Q4_K_M.llamafile",
+                    "size": 0.70,
+                    "url": "https://huggingface.co/Mozilla/TinyLlama-1.1B-Chat-v1.0-llamafile/resolve/main/TinyLlama-1.1B-Chat-v1.0.Q4_K_M.llamafile?download=true",
                 },
                 {
-                    "name": "Mistral-7B-Instruct",
-                    "file_name": "mistral-7b-instruct-v0.2.Q5_K_M.llamafile",
-                    "size": 5.15,
-                    "url": "https://huggingface.co/jartine/Mistral-7B-Instruct-v0.2-llamafile/resolve/main/mistral-7b-instruct-v0.2.Q5_K_M.llamafile?download=true",
+                    "name": "Rocket-3B",
+                    "file_name": "rocket-3b.Q4_K_M.llamafile",
+                    "size": 1.74,
+                    "url": "https://huggingface.co/Mozilla/rocket-3B-llamafile/resolve/main/rocket-3b.Q4_K_M.llamafile?download=true",
+                },
+                {
+                    "name": "LLaVA 1.5 (vision)",
+                    "file_name": "llava-v1.5-7b-q4.llamafile",
+                    "size": 4.29,
+                    "url": "https://huggingface.co/Mozilla/llava-v1.5-7b-llamafile/resolve/main/llava-v1.5-7b-q4.llamafile?download=true",
                 },
                 {
                     "name": "WizardCoder-Python-13B",
@@ -96,9 +108,9 @@ def download_model(models_dir, models, interpreter):
                 },
                 {
                     "name": "WizardCoder-Python-34B",
-                    "file_name": "wizardcoder-python-34b-v1.0.Q5_K_M.llamafile",
-                    "size": 22.23,
-                    "url": "https://huggingface.co/jartine/WizardCoder-Python-34B-V1.0-llamafile/resolve/main/wizardcoder-python-34b-v1.0.Q5_K_M.llamafile?download=true",
+                    "file_name": "wizardcoder-python-34b-v1.0.Q4_K_M.llamafile",
+                    "size": 20.22,
+                    "url": "https://huggingface.co/Mozilla/WizardCoder-Python-34B-V1.0-llamafile/resolve/main/wizardcoder-python-34b-v1.0.Q4_K_M.llamafile?download=true",
                 },
                 {
                     "name": "Mixtral-8x7B-Instruct",
diff --git a/interpreter/terminal_interface/profiles/defaults/template_profile.py b/interpreter/terminal_interface/profiles/defaults/template_profile.py
@@ -0,0 +1,44 @@
+"""
+This is the template Open Interpreter profile.
+
+A starting point for creating a new profile.
+
+Learn about all the available settings - https://docs.openinterpreter.com/settings/all-settings
+
+"""
+
+# Import the interpreter
+from interpreter import interpreter
+
+# You can import other libraries too
+from datetime import date
+
+# You can set variables
+today = date.today()
+
+# LLM Settings
+interpreter.llm.model = "groq/llama-3.1-70b-versatile"
+interpreter.llm.context_window = 110000
+interpreter.llm.max_tokens = 4096
+interpreter.llm.api_base = "https://api.example.com"
+interpreter.llm.api_key = "your_api_key_here"
+interpreter.llm.supports_functions = False
+interpreter.llm.supports_vision = False
+
+
+# Interpreter Settings
+interpreter.offline = False
+interpreter.loop = True
+interpreter.auto_run = False
+
+# Toggle OS Mode - https://docs.openinterpreter.com/guides/os-mode
+interpreter.os = False
+
+# Import Computer API - https://docs.openinterpreter.com/code-execution/computer-api
+interpreter.computer.import_computer_api = True
+
+
+# Set Custom Instructions to improve your Interpreter's performance at a given task
+interpreter.custom_instructions = f"""
+    Today's date is {today}.
+    """
diff --git a/interpreter/terminal_interface/start_terminal_interface.py b/interpreter/terminal_interface/start_terminal_interface.py
@@ -1,4 +1,5 @@
 import argparse
+import os
 import sys
 import time
 
@@ -289,13 +290,24 @@ def start_terminal_interface(interpreter):
         },
     ]
 
+    # i shortcut
     if len(sys.argv) > 1 and not sys.argv[1].startswith("-"):
         message = " ".join(sys.argv[1:])
         interpreter.messages.append(
             {"role": "user", "type": "message", "content": "I " + message}
         )
         sys.argv = sys.argv[:1]
 
+        interpreter.custom_instructions = "UPDATED INSTRUCTIONS: You are in ULTRA FAST, ULTRA CERTAIN mode. Do not ask the user any questions or run code to gathet information. Go as quickly as you can. Run code quickly. Do not plan out loud, simply start doing the best thing. The user expects speed. Trust that the user knows best. Just interpret their ambiguous command as quickly and certainly as possible and try to fulfill it IN ONE COMMAND, assuming they have the right information. If they tell you do to something, just do it quickly in one command, DO NOT try to get more information (for example by running `cat` to get a file's infomration— this is probably unecessary!). DIRECTLY DO THINGS AS FAST AS POSSIBLE."
+
+        files_in_directory = os.listdir()[:100]
+        interpreter.custom_instructions += (
+            "\nThe files in CWD, which THE USER MAY BE REFERRING TO, are: "
+            + ", ".join(files_in_directory)
+        )
+
+        # interpreter.debug = True
+
     # Check for deprecated flags before parsing arguments
     deprecated_flags = {
         "--debug_mode": "--verbose",
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
diff --git a/scripts/wtf.py b/scripts/wtf.py