diff --git a/docs/GEMMA3.md b/docs/GEMMA3.md index 533c68ae09..da4370981d 100644 --- a/docs/GEMMA3.md +++ b/docs/GEMMA3.md @@ -12,7 +12,7 @@ The Python and HTTP APIs support sending images as: The Rust API takes an image from the [image](https://docs.rs/image/latest/image/index.html) crate. ## HTTP server -You can find this example [here](../examples/server/gemma3.py). +You can find this example [here](../examples/server/vision_chat.py). We support an OpenAI compatible HTTP API for vision models. This example demonstrates sending a chat completion request with an image. @@ -97,7 +97,7 @@ print(resp) --- ## Rust -You can find this example [here](../mistralrs/examples/gemma3/main.rs). +You can find this example [here](../mistralrs/examples/vision_chat/main.rs). This is a minimal example of running the Gemma 3 model with a dummy image. @@ -142,7 +142,7 @@ async fn main() -> Result<()> { ``` ## Python -You can find this example [here](../examples/python/gemma3.py). +You can find this example [here](../examples/python/vision_chat.py). This example demonstrates loading and sending a chat completion request with an image. diff --git a/docs/IDEFICS2.md b/docs/IDEFICS2.md index 4b93dc092b..8d38f2ed92 100644 --- a/docs/IDEFICS2.md +++ b/docs/IDEFICS2.md @@ -137,7 +137,7 @@ async fn main() -> Result<()> { ``` ## Python -You can find this example [here](../examples/python/phi3v.py). +You can find this example [here](../examples/python/vision_chat.py). This example demonstrates loading and sending a chat completion request with an image. diff --git a/docs/LLAMA4.md b/docs/LLAMA4.md index 68422dcb10..99447b60ca 100644 --- a/docs/LLAMA4.md +++ b/docs/LLAMA4.md @@ -24,7 +24,7 @@ The Python and HTTP APIs support sending images as: The Rust API takes an image from the [image](https://docs.rs/image/latest/image/index.html) crate. ## HTTP server -You can find this example [here](../examples/server/llama4.py). +You can find this example [here](../examples/server/vision_chat.py). We support an OpenAI compatible HTTP API for vision models. This example demonstrates sending a chat completion request with an image. @@ -116,7 +116,7 @@ print(resp) --- ## Rust -You can find this example [here](../mistralrs/examples/llama4/main.rs). +You can find this example [here](../mistralrs/examples/vision_chat/main.rs). This is a minimal example of running the Llama 4 model with a dummy image. @@ -162,7 +162,7 @@ async fn main() -> Result<()> { ``` ## Python -You can find this example [here](../examples/python/llama4.py). +You can find this example [here](../examples/python/vision_chat.py). This example demonstrates loading and sending a chat completion request with an image. diff --git a/docs/LLaVA.md b/docs/LLaVA.md index 2eda71da09..d3b90045a9 100644 --- a/docs/LLaVA.md +++ b/docs/LLaVA.md @@ -24,7 +24,7 @@ The Rust API takes an image from the [image](https://docs.rs/image/latest/image/ > It should be added to messages manually, and is of the format ``. ## HTTP server -You can find this example [here](../examples/server/llava_next.py). +You can find this example [here](../examples/server/vision_chat.py). We support an OpenAI compatible HTTP API for vision models. This example demonstrates sending a chat completion request with an image. @@ -101,7 +101,7 @@ print(resp) --- ## Rust -You can find this example [here](../mistralrs/examples/llava_next/main.rs). +You can find this example [here](../mistralrs/examples/vision_chat/main.rs). This is a minimal example of running the LLaVA and LLaVANext model with a dummy image. @@ -146,7 +146,7 @@ async fn main() -> Result<()> { ``` ## Python -You can find this example [here](../examples/python/llava_next.py). +You can find this example [here](../examples/python/vision_chat.py). This example demonstrates loading and sending a chat completion request with an image. diff --git a/docs/MISTRAL3.md b/docs/MISTRAL3.md index b2d863100a..4f401b0789 100644 --- a/docs/MISTRAL3.md +++ b/docs/MISTRAL3.md @@ -22,7 +22,7 @@ tool calling with Mistral Small 3.1, and you can use it by specifying the `jinja ## HTTP server -You can find this example [here](../examples/server/mistral3.py). +You can find this example [here](../examples/server/vision_chat.py). We support an OpenAI compatible HTTP API for vision models. This example demonstrates sending a chat completion request with an image. @@ -107,7 +107,7 @@ print(resp) --- ## Rust -You can find this example [here](../mistralrs/examples/mistral3/main.rs). +You can find this example [here](../mistralrs/examples/vision_chat/main.rs). This is a minimal example of running the Mistral 3 model with a dummy image. @@ -152,7 +152,7 @@ async fn main() -> Result<()> { ``` ## Python -You can find this example [here](../examples/python/mistral3.py). +You can find this example [here](../examples/python/vision_chat.py). This example demonstrates loading and sending a chat completion request with an image. diff --git a/docs/PHI3V.md b/docs/PHI3V.md index f96ea9af09..7119d7e541 100644 --- a/docs/PHI3V.md +++ b/docs/PHI3V.md @@ -19,7 +19,7 @@ The Rust API takes an image from the [image](https://docs.rs/image/latest/image/ > They should be added to messages manually, and are of the format `<|image_{N}|>` where N starts from 1. ## HTTP server -You can find this example [here](../examples/server/phi3v.py). +You can find this example [here](../examples/server/vision_chat.py). We support an OpenAI compatible HTTP API for vision models. This example demonstrates sending a chat completion request with an image. @@ -96,7 +96,7 @@ print(resp) --- ## Rust -You can find this example [here](../mistralrs/examples/phi3v/main.rs). +You can find this example [here](../mistralrs/examples/vision_chat/main.rs). This is a minimal example of running the Phi 3 Vision model with a dummy image. @@ -140,7 +140,7 @@ async fn main() -> Result<()> { ``` ## Python -You can find this example [here](../examples/python/phi3v.py). +You can find this example [here](../examples/python/vision_chat.py). This example demonstrates loading and sending a chat completion request with an image. diff --git a/docs/PHI4MM.md b/docs/PHI4MM.md index 6609ba567f..8247d24fea 100644 --- a/docs/PHI4MM.md +++ b/docs/PHI4MM.md @@ -19,7 +19,7 @@ The Rust API takes an image from the [image](https://docs.rs/image/latest/image/ > They should be added to messages manually, and are of the format `<|image_{N}|>` where N starts from 1. ## HTTP server -You can find this example [here](../examples/server/phi3v.py). +You can find this example [here](../examples/server/vision_chat.py). We support an OpenAI compatible HTTP API for vision models. This example demonstrates sending a chat completion request with an image. @@ -94,7 +94,7 @@ print(resp) --- ## Rust -You can find this example [here](../mistralrs/examples/phi3v/main.rs). +You can find this example [here](../mistralrs/examples/vision_chat/main.rs). This is a minimal example of running the Phi 4 Multimodal model with a dummy image. @@ -139,7 +139,7 @@ async fn main() -> Result<()> { ``` ## Python -You can find this example [here](../examples/python/phi3v.py). +You can find this example [here](../examples/python/vision_chat.py). This example demonstrates loading and sending a chat completion request with an image. diff --git a/docs/QWEN2VL.md b/docs/QWEN2VL.md index 5df9669e5f..d39b7fe5da 100644 --- a/docs/QWEN2VL.md +++ b/docs/QWEN2VL.md @@ -58,7 +58,7 @@ camellias are also known for their resilience and ability to thrive in a variety ``` ## HTTP server -You can find this example [here](../examples/server/qwen2vl.py). +You can find this example [here](../examples/server/vision_chat.py). We support an OpenAI compatible HTTP API for vision models. This example demonstrates sending a chat completion request with an image. @@ -137,7 +137,7 @@ print(resp) --- ## Rust -You can find this example [here](../mistralrs/examples/qwen2vl/main.rs). +You can find this example [here](../mistralrs/examples/vision_chat/main.rs). ```rust use anyhow::Result; @@ -184,7 +184,7 @@ async fn main() -> Result<()> { --- ## Python -You can find this example [here](../examples/python/qwen2vl.py). +You can find this example [here](../examples/python/vision_chat.py). This example demonstrates loading and sending a chat completion request with an image. diff --git a/docs/VISION_MODELS.md b/docs/VISION_MODELS.md index 67b23ef7d8..42dc71e912 100644 --- a/docs/VISION_MODELS.md +++ b/docs/VISION_MODELS.md @@ -13,4 +13,4 @@ Please see docs for the following model types: - Phi 4 Multimodal: [PHI4MM.md](PHI4MM.md) > Note for the Python and HTTP APIs: -> We follow the OpenAI specification for structuring the image messages and allow both base64 encoded images as well as a URL/path to the image. There are many examples of this, see [this Python example](../examples/python/phi3v.py). \ No newline at end of file +> We follow the OpenAI specification for structuring the image messages and allow both base64 encoded images as well as a URL/path to the image. There are many examples of this, see [this Python example](../examples/python/vision_chat.py). diff --git a/docs/VLLAMA.md b/docs/VLLAMA.md index 32f080a66a..eb66a81987 100644 --- a/docs/VLLAMA.md +++ b/docs/VLLAMA.md @@ -65,7 +65,7 @@ The image appears to be of Mount Washington, which is the highest peak in the No ``` ## HTTP server -You can find this example [here](../examples/server/llama_vision.py). +You can find this example [here](../examples/server/vision_chat.py). We support an OpenAI compatible HTTP API for vision models. This example demonstrates sending a chat completion request with an image. @@ -152,7 +152,7 @@ print(resp) --- ## Rust -You can find this example [here](../mistralrs/examples/llama_vision/main.rs). +You can find this example [here](../mistralrs/examples/vision_chat/main.rs). ```rust use anyhow::Result; @@ -198,7 +198,7 @@ async fn main() -> Result<()> { --- ## Python -You can find this example [here](../examples/python/llama_vision.py). +You can find this example [here](../examples/python/vision_chat.py). This example demonstrates loading and sending a chat completion request with an image. diff --git a/examples/python/deepseekr1.py b/examples/python/deepseekr1.py deleted file mode 100644 index 6cf6747d36..0000000000 --- a/examples/python/deepseekr1.py +++ /dev/null @@ -1,23 +0,0 @@ -from mistralrs import Runner, Which, ChatCompletionRequest, Architecture - -runner = Runner( - which=Which.Plain( - model_id="deepseek-ai/DeepSeek-R1", - arch=Architecture.DeepseekV3, - ), -) - -res = runner.send_chat_completion_request( - ChatCompletionRequest( - model="mistral", - messages=[ - {"role": "user", "content": "Tell me a story about the Rust type system."} - ], - max_tokens=256, - presence_penalty=1.0, - top_p=0.1, - temperature=0.1, - ) -) -print(res.choices[0].message.content) -print(res.usage) diff --git a/examples/python/deepseekv2.py b/examples/python/deepseekv2.py deleted file mode 100644 index d3790253c1..0000000000 --- a/examples/python/deepseekv2.py +++ /dev/null @@ -1,23 +0,0 @@ -from mistralrs import Runner, Which, ChatCompletionRequest, Architecture - -runner = Runner( - which=Which.Plain( - model_id="deepseek-ai/DeepSeek-V2-Lite", - arch=Architecture.DeepseekV2, - ), -) - -res = runner.send_chat_completion_request( - ChatCompletionRequest( - model="mistral", - messages=[ - {"role": "user", "content": "Tell me a story about the Rust type system."} - ], - max_tokens=256, - presence_penalty=1.0, - top_p=0.1, - temperature=0.1, - ) -) -print(res.choices[0].message.content) -print(res.usage) diff --git a/examples/python/gemma3.py b/examples/python/gemma3.py deleted file mode 100644 index c52acaa83f..0000000000 --- a/examples/python/gemma3.py +++ /dev/null @@ -1,37 +0,0 @@ -from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture - -runner = Runner( - which=Which.VisionPlain( - model_id="google/gemma-3-12b-it", - arch=VisionArchitecture.Gemma3, - ), -) - -res = runner.send_chat_completion_request( - ChatCompletionRequest( - model="gemma3", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg" - }, - }, - { - "type": "text", - "text": "What is this?", - }, - ], - } - ], - max_tokens=256, - presence_penalty=1.0, - top_p=0.1, - temperature=0.1, - ) -) -print(res.choices[0].message.content) -print(res.usage) diff --git a/examples/python/llama4.py b/examples/python/llama4.py deleted file mode 100644 index ab1904f283..0000000000 --- a/examples/python/llama4.py +++ /dev/null @@ -1,38 +0,0 @@ -from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture - -runner = Runner( - which=Which.VisionPlain( - model_id="meta-llama/Llama-4-Scout-17B-16E-Instruct", - arch=VisionArchitecture.Llama4, - ), - in_situ_quant="Q4K", -) - -res = runner.send_chat_completion_request( - ChatCompletionRequest( - model="gemma3", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg" - }, - }, - { - "type": "text", - "text": "What is this?", - }, - ], - } - ], - max_tokens=256, - presence_penalty=1.0, - top_p=0.1, - temperature=0.1, - ) -) -print(res.choices[0].message.content) -print(res.usage) diff --git a/examples/python/llama_vision.py b/examples/python/llama_vision.py deleted file mode 100644 index b7685d2694..0000000000 --- a/examples/python/llama_vision.py +++ /dev/null @@ -1,40 +0,0 @@ -from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture - -# MODEL_ID = "meta-llama/Llama-3.2-11B-Vision-Instruct" -MODEL_ID = "lamm-mit/Cephalo-Llama-3.2-11B-Vision-Instruct-128k" - -runner = Runner( - which=Which.VisionPlain( - model_id=MODEL_ID, - arch=VisionArchitecture.VLlama, - ), -) - -res = runner.send_chat_completion_request( - ChatCompletionRequest( - model="llama-vision", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg" - }, - }, - { - "type": "text", - "text": "<|image|>What is shown in this image? Write a detailed response analyzing the scene.", - }, - ], - } - ], - max_tokens=256, - presence_penalty=1.0, - top_p=0.1, - temperature=0.1, - ) -) -print(res.choices[0].message.content) -print(res.usage) diff --git a/examples/python/llava_next.py b/examples/python/llava_next.py deleted file mode 100644 index d858259363..0000000000 --- a/examples/python/llava_next.py +++ /dev/null @@ -1,37 +0,0 @@ -from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture - -runner = Runner( - which=Which.VisionPlain( - model_id="llava-hf/llava-v1.6-mistral-7b-hf", - arch=VisionArchitecture.LLaVANext, - ), -) - -res = runner.send_chat_completion_request( - ChatCompletionRequest( - model="llava_next", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg" - }, - }, - { - "type": "text", - "text": "What is shown in this image? Write a detailed response analyzing the scene.", - }, - ], - } - ], - max_tokens=256, - presence_penalty=1.0, - top_p=0.1, - temperature=0.1, - ) -) -print(res.choices[0].message.content) -print(res.usage) diff --git a/examples/python/mistral3.py b/examples/python/mistral3.py deleted file mode 100644 index 3d3fa64658..0000000000 --- a/examples/python/mistral3.py +++ /dev/null @@ -1,38 +0,0 @@ -from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture - -runner = Runner( - which=Which.VisionPlain( - model_id="mistralai/Mistral-Small-3.1-24B-Instruct-2503", - arch=VisionArchitecture.Gemma3, - ), - in_situ_quant="Q4K", -) - -res = runner.send_chat_completion_request( - ChatCompletionRequest( - model="gemma3", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg" - }, - }, - { - "type": "text", - "text": "What is this?", - }, - ], - } - ], - max_tokens=256, - presence_penalty=1.0, - top_p=0.1, - temperature=0.1, - ) -) -print(res.choices[0].message.content) -print(res.usage) diff --git a/examples/python/phi4mm.py b/examples/python/phi4mm.py deleted file mode 100644 index afc589e3f8..0000000000 --- a/examples/python/phi4mm.py +++ /dev/null @@ -1,37 +0,0 @@ -from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture - -runner = Runner( - which=Which.VisionPlain( - model_id="microsoft/Phi-4-multimodal-instruct", - arch=VisionArchitecture.Phi4MM, - ), -) - -res = runner.send_chat_completion_request( - ChatCompletionRequest( - model="phi4mm", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg" - }, - }, - { - "type": "text", - "text": "<|image_1|>\nWhat is shown in this image? Write a detailed response analyzing the scene.", - }, - ], - } - ], - max_tokens=256, - presence_penalty=1.0, - top_p=0.1, - temperature=0.1, - ) -) -print(res.choices[0].message.content) -print(res.usage) diff --git a/examples/python/plain.py b/examples/python/plain.py index accd56eb8e..b630369613 100644 --- a/examples/python/plain.py +++ b/examples/python/plain.py @@ -1,15 +1,21 @@ +import argparse from mistralrs import Runner, Which, ChatCompletionRequest, Architecture +parser = argparse.ArgumentParser(description="Text model chat example") +parser.add_argument("--model-id", required=True, help="HuggingFace model id") +parser.add_argument("--arch", required=True, help="Architecture name") +args = parser.parse_args() + runner = Runner( which=Which.Plain( - model_id="mistralai/Mistral-7B-Instruct-v0.1", - arch=Architecture.Mistral, + model_id=args.model_id, + arch=Architecture[args.arch], ), ) res = runner.send_chat_completion_request( ChatCompletionRequest( - model="mistral", + model=args.arch.lower(), messages=[ {"role": "user", "content": "Tell me a story about the Rust type system."} ], diff --git a/examples/python/qwen2vl.py b/examples/python/qwen2vl.py deleted file mode 100644 index 27f196a50d..0000000000 --- a/examples/python/qwen2vl.py +++ /dev/null @@ -1,39 +0,0 @@ -from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture - -MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct" - -runner = Runner( - which=Which.VisionPlain( - model_id=MODEL_ID, - arch=VisionArchitecture.Qwen2VL, - ), -) - -res = runner.send_chat_completion_request( - ChatCompletionRequest( - model="qwen2vl", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://www.garden-treasures.com/cdn/shop/products/IMG_6245.jpg" - }, - }, - { - "type": "text", - "text": "What type of flower is this? Give some fun facts.", - }, - ], - } - ], - max_tokens=256, - presence_penalty=1.0, - top_p=0.1, - temperature=0.1, - ) -) -print(res.choices[0].message.content) -print(res.usage) diff --git a/examples/python/smolvlm.py b/examples/python/smolvlm.py deleted file mode 100644 index 9ac878c8f9..0000000000 --- a/examples/python/smolvlm.py +++ /dev/null @@ -1,37 +0,0 @@ -from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture - -runner = Runner( - which=Which.VisionPlain( - model_id="HuggingFaceTB/SmolVLM-Instruct", - arch=VisionArchitecture.Idefics3, - ), -) - -res = runner.send_chat_completion_request( - ChatCompletionRequest( - model="idefics3", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg" - }, - }, - { - "type": "text", - "text": "What is shown in this image?", - }, - ], - }, - ], - max_tokens=256, - presence_penalty=1.0, - top_p=0.1, - temperature=0.1, - ) -) -print(res.choices[0].message.content) -print(res.usage) diff --git a/examples/python/phi3v.py b/examples/python/vision_chat.py similarity index 57% rename from examples/python/phi3v.py rename to examples/python/vision_chat.py index fc9a332783..e7db5e3cf8 100644 --- a/examples/python/phi3v.py +++ b/examples/python/vision_chat.py @@ -1,24 +1,32 @@ +import argparse from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture +parser = argparse.ArgumentParser(description="Vision model chat example") +parser.add_argument("--model-id", required=True, help="HuggingFace model id") +parser.add_argument("--arch", required=True, help="VisionArchitecture name") +parser.add_argument( + "--image-url", + default="https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg", +) +args = parser.parse_args() + runner = Runner( which=Which.VisionPlain( - model_id="microsoft/Phi-3.5-vision-instruct", - arch=VisionArchitecture.Phi3V, + model_id=args.model_id, + arch=VisionArchitecture[args.arch], ), ) res = runner.send_chat_completion_request( ChatCompletionRequest( - model="phi3v", + model=args.arch.lower(), messages=[ { "role": "user", "content": [ { "type": "image_url", - "image_url": { - "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg" - }, + "image_url": {"url": args.image_url}, }, { "type": "text", diff --git a/examples/server/gemma3.py b/examples/server/gemma3.py deleted file mode 100644 index b09ac850e7..0000000000 --- a/examples/server/gemma3.py +++ /dev/null @@ -1,63 +0,0 @@ -from openai import OpenAI -import httpx -import textwrap -import json - - -def log_response(response: httpx.Response): - request = response.request - print(f"Request: {request.method} {request.url}") - print(" Headers:") - for key, value in request.headers.items(): - if key.lower() == "authorization": - value = "[...]" - if key.lower() == "cookie": - value = value.split("=")[0] + "=..." - print(f" {key}: {value}") - print(" Body:") - try: - request_body = json.loads(request.content) - print(textwrap.indent(json.dumps(request_body, indent=2), " ")) - except json.JSONDecodeError: - print(textwrap.indent(request.content.decode(), " ")) - print(f"Response: status_code={response.status_code}") - print(" Headers:") - for key, value in response.headers.items(): - if key.lower() == "set-cookie": - value = value.split("=")[0] + "=..." - print(f" {key}: {value}") - - -client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/") - -# Enable this to log requests and responses -# client._client = httpx.Client( -# event_hooks={"request": [print], "response": [log_response]} -# ) - -completion = client.chat.completions.create( - model="gemma3", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg" - }, - }, - { - "type": "text", - "text": "What is this?", - }, - ], - }, - ], - max_tokens=256, - frequency_penalty=1.0, - top_p=0.1, - temperature=0, -) -resp = completion.choices[0].message.content -print(resp) diff --git a/examples/server/llama4.py b/examples/server/llama4.py deleted file mode 100644 index 648d62b315..0000000000 --- a/examples/server/llama4.py +++ /dev/null @@ -1,63 +0,0 @@ -from openai import OpenAI -import httpx -import textwrap -import json - - -def log_response(response: httpx.Response): - request = response.request - print(f"Request: {request.method} {request.url}") - print(" Headers:") - for key, value in request.headers.items(): - if key.lower() == "authorization": - value = "[...]" - if key.lower() == "cookie": - value = value.split("=")[0] + "=..." - print(f" {key}: {value}") - print(" Body:") - try: - request_body = json.loads(request.content) - print(textwrap.indent(json.dumps(request_body, indent=2), " ")) - except json.JSONDecodeError: - print(textwrap.indent(request.content.decode(), " ")) - print(f"Response: status_code={response.status_code}") - print(" Headers:") - for key, value in response.headers.items(): - if key.lower() == "set-cookie": - value = value.split("=")[0] + "=..." - print(f" {key}: {value}") - - -client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/") - -# Enable this to log requests and responses -# client._client = httpx.Client( -# event_hooks={"request": [print], "response": [log_response]} -# ) - -completion = client.chat.completions.create( - model="llama4", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://upload.wikimedia.org/wikipedia/commons/f/fd/Pink_flower.jpg" - }, - }, - { - "type": "text", - "text": "What is this?", - }, - ], - }, - ], - max_tokens=256, - frequency_penalty=1.0, - top_p=0.1, - temperature=0, -) -resp = completion.choices[0].message.content -print(resp) diff --git a/examples/server/llama_vision.py b/examples/server/llama_vision.py deleted file mode 100644 index ef5dca6835..0000000000 --- a/examples/server/llama_vision.py +++ /dev/null @@ -1,63 +0,0 @@ -from openai import OpenAI -import httpx -import textwrap -import json - - -def log_response(response: httpx.Response): - request = response.request - print(f"Request: {request.method} {request.url}") - print(" Headers:") - for key, value in request.headers.items(): - if key.lower() == "authorization": - value = "[...]" - if key.lower() == "cookie": - value = value.split("=")[0] + "=..." - print(f" {key}: {value}") - print(" Body:") - try: - request_body = json.loads(request.content) - print(textwrap.indent(json.dumps(request_body, indent=2), " ")) - except json.JSONDecodeError: - print(textwrap.indent(request.content.decode(), " ")) - print(f"Response: status_code={response.status_code}") - print(" Headers:") - for key, value in response.headers.items(): - if key.lower() == "set-cookie": - value = value.split("=")[0] + "=..." - print(f" {key}: {value}") - - -client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/") - -# Enable this to log requests and responses -# client._client = httpx.Client( -# event_hooks={"request": [print], "response": [log_response]} -# ) - -completion = client.chat.completions.create( - model="llama-vision", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg" - }, - }, - { - "type": "text", - "text": "<|image|>What is shown in this image? Write a detailed response analyzing the scene.", - }, - ], - }, - ], - # max_tokens=256, - frequency_penalty=1.0, - top_p=0.1, - temperature=0, -) -resp = completion.choices[0].message.content -print(resp) diff --git a/examples/server/llava.py b/examples/server/llava.py deleted file mode 100644 index b8f66df8aa..0000000000 --- a/examples/server/llava.py +++ /dev/null @@ -1,63 +0,0 @@ -from openai import OpenAI -import httpx -import textwrap -import json - - -def log_response(response: httpx.Response): - request = response.request - print(f"Request: {request.method} {request.url}") - print(" Headers:") - for key, value in request.headers.items(): - if key.lower() == "authorization": - value = "[...]" - if key.lower() == "cookie": - value = value.split("=")[0] + "=..." - print(f" {key}: {value}") - print(" Body:") - try: - request_body = json.loads(request.content) - print(textwrap.indent(json.dumps(request_body, indent=2), " ")) - except json.JSONDecodeError: - print(textwrap.indent(request.content.decode(), " ")) - print(f"Response: status_code={response.status_code}") - print(" Headers:") - for key, value in response.headers.items(): - if key.lower() == "set-cookie": - value = value.split("=")[0] + "=..." - print(f" {key}: {value}") - - -client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/") - -# Enable this to log requests and responses -# client._client = httpx.Client( -# event_hooks={"request": [print], "response": [log_response]} -# ) - -completion = client.chat.completions.create( - model="llava", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg" - }, - }, - { - "type": "text", - "text": "What is shown in this image? Write a detailed response analyzing the scene.", - }, - ], - }, - ], - max_tokens=256, - frequency_penalty=1.0, - top_p=0.1, - temperature=0, -) -resp = completion.choices[0].message.content -print(resp) diff --git a/examples/server/llava_next.py b/examples/server/llava_next.py deleted file mode 100644 index cb777f8819..0000000000 --- a/examples/server/llava_next.py +++ /dev/null @@ -1,63 +0,0 @@ -from openai import OpenAI -import httpx -import textwrap -import json - - -def log_response(response: httpx.Response): - request = response.request - print(f"Request: {request.method} {request.url}") - print(" Headers:") - for key, value in request.headers.items(): - if key.lower() == "authorization": - value = "[...]" - if key.lower() == "cookie": - value = value.split("=")[0] + "=..." - print(f" {key}: {value}") - print(" Body:") - try: - request_body = json.loads(request.content) - print(textwrap.indent(json.dumps(request_body, indent=2), " ")) - except json.JSONDecodeError: - print(textwrap.indent(request.content.decode(), " ")) - print(f"Response: status_code={response.status_code}") - print(" Headers:") - for key, value in response.headers.items(): - if key.lower() == "set-cookie": - value = value.split("=")[0] + "=..." - print(f" {key}: {value}") - - -client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/") - -# Enable this to log requests and responses -# client._client = httpx.Client( -# event_hooks={"request": [print], "response": [log_response]} -# ) - -completion = client.chat.completions.create( - model="llava_next", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg" - }, - }, - { - "type": "text", - "text": "What is shown in this image? Write a detailed response analyzing the scene.", - }, - ], - }, - ], - max_tokens=256, - frequency_penalty=1.0, - top_p=0.1, - temperature=0, -) -resp = completion.choices[0].message.content -print(resp) diff --git a/examples/server/mistral3.py b/examples/server/mistral3.py deleted file mode 100644 index 3dc3bcfe3a..0000000000 --- a/examples/server/mistral3.py +++ /dev/null @@ -1,63 +0,0 @@ -from openai import OpenAI -import httpx -import textwrap -import json - - -def log_response(response: httpx.Response): - request = response.request - print(f"Request: {request.method} {request.url}") - print(" Headers:") - for key, value in request.headers.items(): - if key.lower() == "authorization": - value = "[...]" - if key.lower() == "cookie": - value = value.split("=")[0] + "=..." - print(f" {key}: {value}") - print(" Body:") - try: - request_body = json.loads(request.content) - print(textwrap.indent(json.dumps(request_body, indent=2), " ")) - except json.JSONDecodeError: - print(textwrap.indent(request.content.decode(), " ")) - print(f"Response: status_code={response.status_code}") - print(" Headers:") - for key, value in response.headers.items(): - if key.lower() == "set-cookie": - value = value.split("=")[0] + "=..." - print(f" {key}: {value}") - - -client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/") - -# Enable this to log requests and responses -# client._client = httpx.Client( -# event_hooks={"request": [print], "response": [log_response]} -# ) - -completion = client.chat.completions.create( - model="mistral3", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://upload.wikimedia.org/wikipedia/commons/f/fd/Pink_flower.jpg" - }, - }, - { - "type": "text", - "text": "What is this?", - }, - ], - }, - ], - max_tokens=256, - frequency_penalty=1.0, - top_p=0.1, - temperature=0, -) -resp = completion.choices[0].message.content -print(resp) diff --git a/examples/server/phi4mm.py b/examples/server/phi4mm.py deleted file mode 100644 index 03eb0ab698..0000000000 --- a/examples/server/phi4mm.py +++ /dev/null @@ -1,63 +0,0 @@ -from openai import OpenAI -import httpx -import textwrap -import json - - -def log_response(response: httpx.Response): - request = response.request - print(f"Request: {request.method} {request.url}") - print(" Headers:") - for key, value in request.headers.items(): - if key.lower() == "authorization": - value = "[...]" - if key.lower() == "cookie": - value = value.split("=")[0] + "=..." - print(f" {key}: {value}") - print(" Body:") - try: - request_body = json.loads(request.content) - print(textwrap.indent(json.dumps(request_body, indent=2), " ")) - except json.JSONDecodeError: - print(textwrap.indent(request.content.decode(), " ")) - print(f"Response: status_code={response.status_code}") - print(" Headers:") - for key, value in response.headers.items(): - if key.lower() == "set-cookie": - value = value.split("=")[0] + "=..." - print(f" {key}: {value}") - - -client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/") - -# Enable this to log requests and responses -# client._client = httpx.Client( -# event_hooks={"request": [print], "response": [log_response]} -# ) - -completion = client.chat.completions.create( - model="phi4mm", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg" - }, - }, - { - "type": "text", - "text": "<|image_1|>\nWhat is shown in this image? Write a detailed response analyzing the scene.", - }, - ], - }, - ], - max_tokens=256, - frequency_penalty=1.0, - top_p=0.1, - temperature=0, -) -resp = completion.choices[0].message.content -print(resp) diff --git a/examples/server/qwen2vl.py b/examples/server/qwen2vl.py deleted file mode 100644 index fc6e1d1b83..0000000000 --- a/examples/server/qwen2vl.py +++ /dev/null @@ -1,30 +0,0 @@ -from openai import OpenAI - -client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/") - -completion = client.chat.completions.create( - model="qwen2vl", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://www.garden-treasures.com/cdn/shop/products/IMG_6245.jpg" - }, - }, - { - "type": "text", - "text": "What type of flower is this? Give some fun facts.", - }, - ], - }, - ], - max_tokens=256, - frequency_penalty=1.0, - top_p=0.1, - temperature=0, -) -resp = completion.choices[0].message.content -print(resp) diff --git a/examples/server/phi3v.py b/examples/server/vision_chat.py similarity index 70% rename from examples/server/phi3v.py rename to examples/server/vision_chat.py index 623a2564f8..6bfac36701 100644 --- a/examples/server/phi3v.py +++ b/examples/server/vision_chat.py @@ -1,3 +1,4 @@ +import argparse from openai import OpenAI import httpx import textwrap @@ -28,25 +29,26 @@ def log_response(response: httpx.Response): print(f" {key}: {value}") +parser = argparse.ArgumentParser(description="Send a vision chat request") +parser.add_argument("--model", required=True, help="model name for the API") +parser.add_argument( + "--image-url", + default="https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg", +) +args = parser.parse_args() + client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/") -# Enable this to log requests and responses -# client._client = httpx.Client( -# event_hooks={"request": [print], "response": [log_response]} -# ) +# Uncomment to log HTTP requests +# client._client = httpx.Client(event_hooks={"request": [print], "response": [log_response]}) completion = client.chat.completions.create( - model="phi3v", + model=args.model, messages=[ { "role": "user", "content": [ - { - "type": "image_url", - "image_url": { - "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg" - }, - }, + {"type": "image_url", "image_url": {"url": args.image_url}}, { "type": "text", "text": "<|image_1|>\nWhat is shown in this image? Write a detailed response analyzing the scene.", @@ -59,5 +61,4 @@ def log_response(response: httpx.Response): top_p=0.1, temperature=0, ) -resp = completion.choices[0].message.content -print(resp) +print(completion.choices[0].message.content) diff --git a/mistralrs/examples/deepseekr1/main.rs b/mistralrs/examples/deepseekr1/main.rs deleted file mode 100644 index 27d69269d9..0000000000 --- a/mistralrs/examples/deepseekr1/main.rs +++ /dev/null @@ -1,34 +0,0 @@ -use anyhow::Result; -use mistralrs::{ - IsqType, PagedAttentionMetaBuilder, TextMessageRole, TextMessages, TextModelBuilder, -}; - -#[tokio::main] -async fn main() -> Result<()> { - let model = TextModelBuilder::new("deepseek-ai/DeepSeek-R1") - .with_isq(IsqType::Q4K) - .with_logging() - .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())? - .build() - .await?; - - let messages = TextMessages::new() - .add_message( - TextMessageRole::System, - "You are an AI agent with a specialty in programming.", - ) - .add_message( - TextMessageRole::User, - "Hello! How are you? Please write generic binary search function in Rust.", - ); - - let response = model.send_chat_request(messages).await?; - - println!("{}", response.choices[0].message.content.as_ref().unwrap()); - dbg!( - response.usage.avg_prompt_tok_per_sec, - response.usage.avg_compl_tok_per_sec - ); - - Ok(()) -} diff --git a/mistralrs/examples/deepseekv2/main.rs b/mistralrs/examples/deepseekv2/main.rs deleted file mode 100644 index 7f71f08221..0000000000 --- a/mistralrs/examples/deepseekv2/main.rs +++ /dev/null @@ -1,34 +0,0 @@ -use anyhow::Result; -use mistralrs::{ - IsqType, PagedAttentionMetaBuilder, TextMessageRole, TextMessages, TextModelBuilder, -}; - -#[tokio::main] -async fn main() -> Result<()> { - let model = TextModelBuilder::new("deepseek-ai/DeepSeek-V2-Lite") - .with_isq(IsqType::Q4K) - .with_logging() - .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())? - .build() - .await?; - - let messages = TextMessages::new() - .add_message( - TextMessageRole::System, - "You are an AI agent with a specialty in programming.", - ) - .add_message( - TextMessageRole::User, - "Hello! How are you? Please write generic binary search function in Rust.", - ); - - let response = model.send_chat_request(messages).await?; - - println!("{}", response.choices[0].message.content.as_ref().unwrap()); - dbg!( - response.usage.avg_prompt_tok_per_sec, - response.usage.avg_compl_tok_per_sec - ); - - Ok(()) -} diff --git a/mistralrs/examples/gemma2/main.rs b/mistralrs/examples/gemma2/main.rs deleted file mode 100644 index 3f21be0be2..0000000000 --- a/mistralrs/examples/gemma2/main.rs +++ /dev/null @@ -1,25 +0,0 @@ -use anyhow::Result; -use mistralrs::{ - IsqType, PagedAttentionMetaBuilder, RequestBuilder, TextMessageRole, TextModelBuilder, -}; - -#[tokio::main] -async fn main() -> Result<()> { - let model = TextModelBuilder::new("google/gemma-2-9b-it") - .with_isq(IsqType::Q4K) - .with_logging() - .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())? - .build() - .await?; - - let request = RequestBuilder::new().add_message( - TextMessageRole::User, - "Please write a mathematical equation where a few numbers are added.", - ); - - let response = model.send_chat_request(request).await?; - - println!("{}", response.choices[0].message.content.as_ref().unwrap()); - - Ok(()) -} diff --git a/mistralrs/examples/gemma3/main.rs b/mistralrs/examples/gemma3/main.rs deleted file mode 100644 index b94484e094..0000000000 --- a/mistralrs/examples/gemma3/main.rs +++ /dev/null @@ -1,36 +0,0 @@ -use anyhow::Result; -use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder}; - -#[tokio::main] -async fn main() -> Result<()> { - let model = VisionModelBuilder::new("google/gemma-3-12b-it") - .with_isq(IsqType::Q4K) - .with_logging() - .build() - .await?; - - let bytes = match reqwest::blocking::get( - "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg", - ) { - Ok(http_resp) => http_resp.bytes()?.to_vec(), - Err(e) => anyhow::bail!(e), - }; - let image = image::load_from_memory(&bytes)?; - - let messages = VisionMessages::new().add_image_message( - TextMessageRole::User, - "What is this?", - vec![image], - &model, - )?; - - let response = model.send_chat_request(messages).await?; - - println!("{}", response.choices[0].message.content.as_ref().unwrap()); - dbg!( - response.usage.avg_prompt_tok_per_sec, - response.usage.avg_compl_tok_per_sec - ); - - Ok(()) -} diff --git a/mistralrs/examples/llama4/main.rs b/mistralrs/examples/llama4/main.rs deleted file mode 100644 index 0efb6a84da..0000000000 --- a/mistralrs/examples/llama4/main.rs +++ /dev/null @@ -1,36 +0,0 @@ -use anyhow::Result; -use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder}; - -#[tokio::main] -async fn main() -> Result<()> { - let model = VisionModelBuilder::new("meta-llama/Llama-4-Scout-17B-16E-Instruct") - .with_isq(IsqType::Q4K) - .with_logging() - .build() - .await?; - - let bytes = match reqwest::blocking::get( - "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg", - ) { - Ok(http_resp) => http_resp.bytes()?.to_vec(), - Err(e) => anyhow::bail!(e), - }; - let image = image::load_from_memory(&bytes)?; - - let messages = VisionMessages::new().add_image_message( - TextMessageRole::User, - "What is this?", - vec![image], - &model, - )?; - - let response = model.send_chat_request(messages).await?; - - println!("{}", response.choices[0].message.content.as_ref().unwrap()); - dbg!( - response.usage.avg_prompt_tok_per_sec, - response.usage.avg_compl_tok_per_sec - ); - - Ok(()) -} diff --git a/mistralrs/examples/llama_vision/main.rs b/mistralrs/examples/llama_vision/main.rs deleted file mode 100644 index 48500b06d1..0000000000 --- a/mistralrs/examples/llama_vision/main.rs +++ /dev/null @@ -1,39 +0,0 @@ -use anyhow::Result; -use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder}; - -// const MODEL_ID: &str = "meta-llama/Llama-3.2-11B-Vision-Instruct"; -const MODEL_ID: &str = "lamm-mit/Cephalo-Llama-3.2-11B-Vision-Instruct-128k"; - -#[tokio::main] -async fn main() -> Result<()> { - let model = VisionModelBuilder::new(MODEL_ID) - .with_isq(IsqType::Q4K) - .with_logging() - .build() - .await?; - - let bytes = match reqwest::blocking::get( - "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg", - ) { - Ok(http_resp) => http_resp.bytes()?.to_vec(), - Err(e) => anyhow::bail!(e), - }; - let image = image::load_from_memory(&bytes)?; - - let messages = VisionMessages::new().add_image_message( - TextMessageRole::User, - "What is depicted here? Please describe the scene in detail.", - vec![image], - &model, - )?; - - let response = model.send_chat_request(messages).await?; - - println!("{}", response.choices[0].message.content.as_ref().unwrap()); - dbg!( - response.usage.avg_prompt_tok_per_sec, - response.usage.avg_compl_tok_per_sec - ); - - Ok(()) -} diff --git a/mistralrs/examples/llava_next/main.rs b/mistralrs/examples/llava_next/main.rs deleted file mode 100644 index 034f1c5b08..0000000000 --- a/mistralrs/examples/llava_next/main.rs +++ /dev/null @@ -1,36 +0,0 @@ -use anyhow::Result; -use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder}; - -#[tokio::main] -async fn main() -> Result<()> { - let model = VisionModelBuilder::new("llava-hf/llava-v1.6-mistral-7b-hf") - .with_isq(IsqType::Q4K) - .with_logging() - .build() - .await?; - - let bytes = match reqwest::blocking::get( - "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg", - ) { - Ok(http_resp) => http_resp.bytes()?.to_vec(), - Err(e) => anyhow::bail!(e), - }; - let image = image::load_from_memory(&bytes)?; - - let messages = VisionMessages::new().add_image_message( - TextMessageRole::User, - "What is depicted here? Please describe the scene in detail.", - vec![image], - &model, - )?; - - let response = model.send_chat_request(messages).await?; - - println!("{}", response.choices[0].message.content.as_ref().unwrap()); - dbg!( - response.usage.avg_prompt_tok_per_sec, - response.usage.avg_compl_tok_per_sec - ); - - Ok(()) -} diff --git a/mistralrs/examples/mistral3/main.rs b/mistralrs/examples/mistral3/main.rs deleted file mode 100644 index 09dc06b8f5..0000000000 --- a/mistralrs/examples/mistral3/main.rs +++ /dev/null @@ -1,36 +0,0 @@ -use anyhow::Result; -use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder}; - -#[tokio::main] -async fn main() -> Result<()> { - let model = VisionModelBuilder::new("mistralai/Mistral-Small-3.1-24B-Instruct-2503") - .with_isq(IsqType::Q4K) - .with_logging() - .build() - .await?; - - let bytes = match reqwest::blocking::get( - "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg", - ) { - Ok(http_resp) => http_resp.bytes()?.to_vec(), - Err(e) => anyhow::bail!(e), - }; - let image = image::load_from_memory(&bytes)?; - - let messages = VisionMessages::new().add_image_message( - TextMessageRole::User, - "What is this?", - vec![image], - &model, - )?; - - let response = model.send_chat_request(messages).await?; - - println!("{}", response.choices[0].message.content.as_ref().unwrap()); - dbg!( - response.usage.avg_prompt_tok_per_sec, - response.usage.avg_compl_tok_per_sec - ); - - Ok(()) -} diff --git a/mistralrs/examples/phi3v/main.rs b/mistralrs/examples/phi3v/main.rs deleted file mode 100644 index 507477f736..0000000000 --- a/mistralrs/examples/phi3v/main.rs +++ /dev/null @@ -1,36 +0,0 @@ -use anyhow::Result; -use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder}; - -#[tokio::main] -async fn main() -> Result<()> { - let model = VisionModelBuilder::new("microsoft/Phi-3.5-vision-instruct") - .with_isq(IsqType::Q4K) - .with_logging() - .build() - .await?; - - let bytes = match reqwest::blocking::get( - "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg", - ) { - Ok(http_resp) => http_resp.bytes()?.to_vec(), - Err(e) => anyhow::bail!(e), - }; - let image = image::load_from_memory(&bytes)?; - - let messages = VisionMessages::new().add_image_message( - TextMessageRole::User, - "What is depicted here? Please describe the scene in detail.", - vec![image], - &model, - )?; - - let response = model.send_chat_request(messages).await?; - - println!("{}", response.choices[0].message.content.as_ref().unwrap()); - dbg!( - response.usage.avg_prompt_tok_per_sec, - response.usage.avg_compl_tok_per_sec - ); - - Ok(()) -} diff --git a/mistralrs/examples/phi4mm/main.rs b/mistralrs/examples/phi4mm/main.rs deleted file mode 100644 index 7e4e821f35..0000000000 --- a/mistralrs/examples/phi4mm/main.rs +++ /dev/null @@ -1,36 +0,0 @@ -use anyhow::Result; -use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder}; - -#[tokio::main] -async fn main() -> Result<()> { - let model = VisionModelBuilder::new("microsoft/Phi-4-multimodal-instruct") - .with_isq(IsqType::Q4K) - .with_logging() - .build() - .await?; - - let bytes = match reqwest::blocking::get( - "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg", - ) { - Ok(http_resp) => http_resp.bytes()?.to_vec(), - Err(e) => anyhow::bail!(e), - }; - let image = image::load_from_memory(&bytes)?; - - let messages = VisionMessages::new().add_image_message( - TextMessageRole::User, - "What is depicted here? Please describe the scene in detail.", - vec![image], - &model, - )?; - - let response = model.send_chat_request(messages).await?; - - println!("{}", response.choices[0].message.content.as_ref().unwrap()); - dbg!( - response.usage.avg_prompt_tok_per_sec, - response.usage.avg_compl_tok_per_sec - ); - - Ok(()) -} diff --git a/mistralrs/examples/qwen2vl/main.rs b/mistralrs/examples/qwen2vl/main.rs deleted file mode 100644 index 8d833ccb41..0000000000 --- a/mistralrs/examples/qwen2vl/main.rs +++ /dev/null @@ -1,38 +0,0 @@ -use anyhow::Result; -use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder}; - -const MODEL_ID: &str = "Qwen/Qwen2-VL-2B-Instruct"; - -#[tokio::main] -async fn main() -> Result<()> { - let model = VisionModelBuilder::new(MODEL_ID) - .with_isq(IsqType::Q4K) - .with_logging() - .build() - .await?; - - let bytes = match reqwest::blocking::get( - "https://www.garden-treasures.com/cdn/shop/products/IMG_6245.jpg", - ) { - Ok(http_resp) => http_resp.bytes()?.to_vec(), - Err(e) => anyhow::bail!(e), - }; - let image = image::load_from_memory(&bytes)?; - - let messages = VisionMessages::new().add_image_message( - TextMessageRole::User, - "What type of flower is this? Give some fun facts.", - vec![image], - &model, - )?; - - let response = model.send_chat_request(messages).await?; - - println!("{}", response.choices[0].message.content.as_ref().unwrap()); - dbg!( - response.usage.avg_prompt_tok_per_sec, - response.usage.avg_compl_tok_per_sec - ); - - Ok(()) -} diff --git a/mistralrs/examples/simple/main.rs b/mistralrs/examples/simple/main.rs index 3765ab54ba..175fc7fc9d 100644 --- a/mistralrs/examples/simple/main.rs +++ b/mistralrs/examples/simple/main.rs @@ -1,12 +1,20 @@ use anyhow::Result; +use clap::Parser; use mistralrs::{ IsqType, PagedAttentionMetaBuilder, RequestBuilder, TextMessageRole, TextMessages, TextModelBuilder, }; +#[derive(Parser)] +struct Args { + #[clap(long, default_value = "microsoft/Phi-3.5-mini-instruct")] + model_id: String, +} + #[tokio::main] async fn main() -> Result<()> { - let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct") + let args = Args::parse(); + let model = TextModelBuilder::new(&args.model_id) .with_isq(IsqType::Q8_0) .with_logging() .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())? @@ -31,7 +39,6 @@ async fn main() -> Result<()> { response.usage.avg_compl_tok_per_sec ); - // Next example: Return some logprobs with the `RequestBuilder`, which enables higher configurability. let request = RequestBuilder::new().return_logprobs(true).add_message( TextMessageRole::User, "Please write a mathematical equation where a few numbers are added.", diff --git a/mistralrs/examples/smolvlm/main.rs b/mistralrs/examples/smolvlm/main.rs deleted file mode 100644 index 145b737958..0000000000 --- a/mistralrs/examples/smolvlm/main.rs +++ /dev/null @@ -1,38 +0,0 @@ -use anyhow::Result; -use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder}; - -const MODEL_ID: &str = "HuggingFaceTB/SmolVLM-Instruct"; - -#[tokio::main] -async fn main() -> Result<()> { - let model = VisionModelBuilder::new(MODEL_ID) - .with_isq(IsqType::Q8_0) - .with_logging() - .build() - .await?; - - let bytes = match reqwest::blocking::get( - "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg", - ) { - Ok(http_resp) => http_resp.bytes()?.to_vec(), - Err(e) => anyhow::bail!(e), - }; - let image = image::load_from_memory(&bytes)?; - - let messages = VisionMessages::new().add_image_message( - TextMessageRole::User, - "What is this?", - vec![image], - &model, - )?; - - let response = model.send_chat_request(messages).await?; - - println!("{}", response.choices[0].message.content.as_ref().unwrap()); - dbg!( - response.usage.avg_prompt_tok_per_sec, - response.usage.avg_compl_tok_per_sec - ); - - Ok(()) -} diff --git a/mistralrs/examples/llava/main.rs b/mistralrs/examples/vision_chat/main.rs similarity index 63% rename from mistralrs/examples/llava/main.rs rename to mistralrs/examples/vision_chat/main.rs index b2668a3703..b71f5d0972 100644 --- a/mistralrs/examples/llava/main.rs +++ b/mistralrs/examples/vision_chat/main.rs @@ -1,21 +1,28 @@ use anyhow::Result; +use clap::Parser; use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder}; +#[derive(Parser)] +struct Args { + #[clap(long)] + model_id: String, + #[clap( + long, + default_value = "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg" + )] + image_url: String, +} + #[tokio::main] async fn main() -> Result<()> { - let model = VisionModelBuilder::new("llava-hf/llava-1.5-7b-hf") + let args = Args::parse(); + let model = VisionModelBuilder::new(&args.model_id) .with_isq(IsqType::Q4K) - .with_chat_template("chat_templates/vicuna.json") .with_logging() .build() .await?; - let bytes = match reqwest::blocking::get( - "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg", - ) { - Ok(http_resp) => http_resp.bytes()?.to_vec(), - Err(e) => anyhow::bail!(e), - }; + let bytes = reqwest::blocking::get(&args.image_url)?.bytes()?.to_vec(); let image = image::load_from_memory(&bytes)?; let messages = VisionMessages::new().add_image_message(