diff --git a/docs/GEMMA3.md b/docs/GEMMA3.md
index 533c68ae09..da4370981d 100644
--- a/docs/GEMMA3.md
+++ b/docs/GEMMA3.md
@@ -12,7 +12,7 @@ The Python and HTTP APIs support sending images as:
 The Rust API takes an image from the [image](https://docs.rs/image/latest/image/index.html) crate.
 
 ## HTTP server
-You can find this example [here](../examples/server/gemma3.py).
+You can find this example [here](../examples/server/vision_chat.py).
 
 We support an OpenAI compatible HTTP API for vision models. This example demonstrates sending a chat completion request with an image.
 
@@ -97,7 +97,7 @@ print(resp)
 ---
 
 ## Rust
-You can find this example [here](../mistralrs/examples/gemma3/main.rs).
+You can find this example [here](../mistralrs/examples/vision_chat/main.rs).
 
 This is a minimal example of running the Gemma 3 model with a dummy image.
 
@@ -142,7 +142,7 @@ async fn main() -> Result<()> {
 ```
 
 ## Python
-You can find this example [here](../examples/python/gemma3.py).
+You can find this example [here](../examples/python/vision_chat.py).
 
 This example demonstrates loading and sending a chat completion request with an image.
 
diff --git a/docs/IDEFICS2.md b/docs/IDEFICS2.md
index 4b93dc092b..8d38f2ed92 100644
--- a/docs/IDEFICS2.md
+++ b/docs/IDEFICS2.md
@@ -137,7 +137,7 @@ async fn main() -> Result<()> {
 ```
 
 ## Python
-You can find this example [here](../examples/python/phi3v.py).
+You can find this example [here](../examples/python/vision_chat.py).
 
 This example demonstrates loading and sending a chat completion request with an image.
 
diff --git a/docs/LLAMA4.md b/docs/LLAMA4.md
index 68422dcb10..99447b60ca 100644
--- a/docs/LLAMA4.md
+++ b/docs/LLAMA4.md
@@ -24,7 +24,7 @@ The Python and HTTP APIs support sending images as:
 The Rust API takes an image from the [image](https://docs.rs/image/latest/image/index.html) crate.
 
 ## HTTP server
-You can find this example [here](../examples/server/llama4.py).
+You can find this example [here](../examples/server/vision_chat.py).
 
 We support an OpenAI compatible HTTP API for vision models. This example demonstrates sending a chat completion request with an image.
 
@@ -116,7 +116,7 @@ print(resp)
 ---
 
 ## Rust
-You can find this example [here](../mistralrs/examples/llama4/main.rs).
+You can find this example [here](../mistralrs/examples/vision_chat/main.rs).
 
 This is a minimal example of running the Llama 4 model with a dummy image.
 
@@ -162,7 +162,7 @@ async fn main() -> Result<()> {
 ```
 
 ## Python
-You can find this example [here](../examples/python/llama4.py).
+You can find this example [here](../examples/python/vision_chat.py).
 
 This example demonstrates loading and sending a chat completion request with an image.
 
diff --git a/docs/LLaVA.md b/docs/LLaVA.md
index 2eda71da09..d3b90045a9 100644
--- a/docs/LLaVA.md
+++ b/docs/LLaVA.md
@@ -24,7 +24,7 @@ The Rust API takes an image from the [image](https://docs.rs/image/latest/image/
 > It should be added to messages manually, and is of the format `<image>`.
 
 ## HTTP server
-You can find this example [here](../examples/server/llava_next.py).
+You can find this example [here](../examples/server/vision_chat.py).
 
 We support an OpenAI compatible HTTP API for vision models. This example demonstrates sending a chat completion request with an image.
 
@@ -101,7 +101,7 @@ print(resp)
 ---
 
 ## Rust
-You can find this example [here](../mistralrs/examples/llava_next/main.rs).
+You can find this example [here](../mistralrs/examples/vision_chat/main.rs).
 
 This is a minimal example of running the LLaVA and LLaVANext model with a dummy image.
 
@@ -146,7 +146,7 @@ async fn main() -> Result<()> {
 ```
 
 ## Python
-You can find this example [here](../examples/python/llava_next.py).
+You can find this example [here](../examples/python/vision_chat.py).
 
 This example demonstrates loading and sending a chat completion request with an image.
 
diff --git a/docs/MISTRAL3.md b/docs/MISTRAL3.md
index b2d863100a..4f401b0789 100644
--- a/docs/MISTRAL3.md
+++ b/docs/MISTRAL3.md
@@ -22,7 +22,7 @@ tool calling with Mistral Small 3.1, and you can use it by specifying the `jinja
 
 
 ## HTTP server
-You can find this example [here](../examples/server/mistral3.py).
+You can find this example [here](../examples/server/vision_chat.py).
 
 We support an OpenAI compatible HTTP API for vision models. This example demonstrates sending a chat completion request with an image.
 
@@ -107,7 +107,7 @@ print(resp)
 ---
 
 ## Rust
-You can find this example [here](../mistralrs/examples/mistral3/main.rs).
+You can find this example [here](../mistralrs/examples/vision_chat/main.rs).
 
 This is a minimal example of running the Mistral 3 model with a dummy image.
 
@@ -152,7 +152,7 @@ async fn main() -> Result<()> {
 ```
 
 ## Python
-You can find this example [here](../examples/python/mistral3.py).
+You can find this example [here](../examples/python/vision_chat.py).
 
 This example demonstrates loading and sending a chat completion request with an image.
 
diff --git a/docs/PHI3V.md b/docs/PHI3V.md
index f96ea9af09..7119d7e541 100644
--- a/docs/PHI3V.md
+++ b/docs/PHI3V.md
@@ -19,7 +19,7 @@ The Rust API takes an image from the [image](https://docs.rs/image/latest/image/
 > They should be added to messages manually, and are of the format `<|image_{N}|>` where N starts from 1.
 
 ## HTTP server
-You can find this example [here](../examples/server/phi3v.py).
+You can find this example [here](../examples/server/vision_chat.py).
 
 We support an OpenAI compatible HTTP API for vision models. This example demonstrates sending a chat completion request with an image.
 
@@ -96,7 +96,7 @@ print(resp)
 ---
 
 ## Rust
-You can find this example [here](../mistralrs/examples/phi3v/main.rs).
+You can find this example [here](../mistralrs/examples/vision_chat/main.rs).
 
 This is a minimal example of running the Phi 3 Vision model with a dummy image.
 
@@ -140,7 +140,7 @@ async fn main() -> Result<()> {
 ```
 
 ## Python
-You can find this example [here](../examples/python/phi3v.py).
+You can find this example [here](../examples/python/vision_chat.py).
 
 This example demonstrates loading and sending a chat completion request with an image.
 
diff --git a/docs/PHI4MM.md b/docs/PHI4MM.md
index 6609ba567f..8247d24fea 100644
--- a/docs/PHI4MM.md
+++ b/docs/PHI4MM.md
@@ -19,7 +19,7 @@ The Rust API takes an image from the [image](https://docs.rs/image/latest/image/
 > They should be added to messages manually, and are of the format `<|image_{N}|>` where N starts from 1.
 
 ## HTTP server
-You can find this example [here](../examples/server/phi3v.py).
+You can find this example [here](../examples/server/vision_chat.py).
 
 We support an OpenAI compatible HTTP API for vision models. This example demonstrates sending a chat completion request with an image.
 
@@ -94,7 +94,7 @@ print(resp)
 ---
 
 ## Rust
-You can find this example [here](../mistralrs/examples/phi3v/main.rs).
+You can find this example [here](../mistralrs/examples/vision_chat/main.rs).
 
 This is a minimal example of running the Phi 4 Multimodal model with a dummy image.
 
@@ -139,7 +139,7 @@ async fn main() -> Result<()> {
 ```
 
 ## Python
-You can find this example [here](../examples/python/phi3v.py).
+You can find this example [here](../examples/python/vision_chat.py).
 
 This example demonstrates loading and sending a chat completion request with an image.
 
diff --git a/docs/QWEN2VL.md b/docs/QWEN2VL.md
index 5df9669e5f..d39b7fe5da 100644
--- a/docs/QWEN2VL.md
+++ b/docs/QWEN2VL.md
@@ -58,7 +58,7 @@ camellias are also known for their resilience and ability to thrive in a variety
 ```
 
 ## HTTP server
-You can find this example [here](../examples/server/qwen2vl.py).
+You can find this example [here](../examples/server/vision_chat.py).
 
 We support an OpenAI compatible HTTP API for vision models. This example demonstrates sending a chat completion request with an image.
 
@@ -137,7 +137,7 @@ print(resp)
 ---
 
 ## Rust
-You can find this example [here](../mistralrs/examples/qwen2vl/main.rs).
+You can find this example [here](../mistralrs/examples/vision_chat/main.rs).
 
 ```rust
 use anyhow::Result;
@@ -184,7 +184,7 @@ async fn main() -> Result<()> {
 ---
 
 ## Python
-You can find this example [here](../examples/python/qwen2vl.py).
+You can find this example [here](../examples/python/vision_chat.py).
 
 This example demonstrates loading and sending a chat completion request with an image.
 
diff --git a/docs/VISION_MODELS.md b/docs/VISION_MODELS.md
index 67b23ef7d8..42dc71e912 100644
--- a/docs/VISION_MODELS.md
+++ b/docs/VISION_MODELS.md
@@ -13,4 +13,4 @@ Please see docs for the following model types:
 - Phi 4 Multimodal: [PHI4MM.md](PHI4MM.md)
 
 > Note for the Python and HTTP APIs:
-> We follow the OpenAI specification for structuring the image messages and allow both base64 encoded images as well as a URL/path to the image. There are many examples of this, see [this Python example](../examples/python/phi3v.py).
\ No newline at end of file
+> We follow the OpenAI specification for structuring the image messages and allow both base64 encoded images as well as a URL/path to the image. There are many examples of this, see [this Python example](../examples/python/vision_chat.py).
diff --git a/docs/VLLAMA.md b/docs/VLLAMA.md
index 32f080a66a..eb66a81987 100644
--- a/docs/VLLAMA.md
+++ b/docs/VLLAMA.md
@@ -65,7 +65,7 @@ The image appears to be of Mount Washington, which is the highest peak in the No
 ```
 
 ## HTTP server
-You can find this example [here](../examples/server/llama_vision.py).
+You can find this example [here](../examples/server/vision_chat.py).
 
 We support an OpenAI compatible HTTP API for vision models. This example demonstrates sending a chat completion request with an image.
 
@@ -152,7 +152,7 @@ print(resp)
 ---
 
 ## Rust
-You can find this example [here](../mistralrs/examples/llama_vision/main.rs).
+You can find this example [here](../mistralrs/examples/vision_chat/main.rs).
 
 ```rust
 use anyhow::Result;
@@ -198,7 +198,7 @@ async fn main() -> Result<()> {
 ---
 
 ## Python
-You can find this example [here](../examples/python/llama_vision.py).
+You can find this example [here](../examples/python/vision_chat.py).
 
 This example demonstrates loading and sending a chat completion request with an image.
 
diff --git a/examples/python/deepseekr1.py b/examples/python/deepseekr1.py
deleted file mode 100644
index 6cf6747d36..0000000000
--- a/examples/python/deepseekr1.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from mistralrs import Runner, Which, ChatCompletionRequest, Architecture
-
-runner = Runner(
-    which=Which.Plain(
-        model_id="deepseek-ai/DeepSeek-R1",
-        arch=Architecture.DeepseekV3,
-    ),
-)
-
-res = runner.send_chat_completion_request(
-    ChatCompletionRequest(
-        model="mistral",
-        messages=[
-            {"role": "user", "content": "Tell me a story about the Rust type system."}
-        ],
-        max_tokens=256,
-        presence_penalty=1.0,
-        top_p=0.1,
-        temperature=0.1,
-    )
-)
-print(res.choices[0].message.content)
-print(res.usage)
diff --git a/examples/python/deepseekv2.py b/examples/python/deepseekv2.py
deleted file mode 100644
index d3790253c1..0000000000
--- a/examples/python/deepseekv2.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from mistralrs import Runner, Which, ChatCompletionRequest, Architecture
-
-runner = Runner(
-    which=Which.Plain(
-        model_id="deepseek-ai/DeepSeek-V2-Lite",
-        arch=Architecture.DeepseekV2,
-    ),
-)
-
-res = runner.send_chat_completion_request(
-    ChatCompletionRequest(
-        model="mistral",
-        messages=[
-            {"role": "user", "content": "Tell me a story about the Rust type system."}
-        ],
-        max_tokens=256,
-        presence_penalty=1.0,
-        top_p=0.1,
-        temperature=0.1,
-    )
-)
-print(res.choices[0].message.content)
-print(res.usage)
diff --git a/examples/python/gemma3.py b/examples/python/gemma3.py
deleted file mode 100644
index c52acaa83f..0000000000
--- a/examples/python/gemma3.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture
-
-runner = Runner(
-    which=Which.VisionPlain(
-        model_id="google/gemma-3-12b-it",
-        arch=VisionArchitecture.Gemma3,
-    ),
-)
-
-res = runner.send_chat_completion_request(
-    ChatCompletionRequest(
-        model="gemma3",
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg"
-                        },
-                    },
-                    {
-                        "type": "text",
-                        "text": "What is this?",
-                    },
-                ],
-            }
-        ],
-        max_tokens=256,
-        presence_penalty=1.0,
-        top_p=0.1,
-        temperature=0.1,
-    )
-)
-print(res.choices[0].message.content)
-print(res.usage)
diff --git a/examples/python/llama4.py b/examples/python/llama4.py
deleted file mode 100644
index ab1904f283..0000000000
--- a/examples/python/llama4.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture
-
-runner = Runner(
-    which=Which.VisionPlain(
-        model_id="meta-llama/Llama-4-Scout-17B-16E-Instruct",
-        arch=VisionArchitecture.Llama4,
-    ),
-    in_situ_quant="Q4K",
-)
-
-res = runner.send_chat_completion_request(
-    ChatCompletionRequest(
-        model="gemma3",
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg"
-                        },
-                    },
-                    {
-                        "type": "text",
-                        "text": "What is this?",
-                    },
-                ],
-            }
-        ],
-        max_tokens=256,
-        presence_penalty=1.0,
-        top_p=0.1,
-        temperature=0.1,
-    )
-)
-print(res.choices[0].message.content)
-print(res.usage)
diff --git a/examples/python/llama_vision.py b/examples/python/llama_vision.py
deleted file mode 100644
index b7685d2694..0000000000
--- a/examples/python/llama_vision.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture
-
-# MODEL_ID = "meta-llama/Llama-3.2-11B-Vision-Instruct"
-MODEL_ID = "lamm-mit/Cephalo-Llama-3.2-11B-Vision-Instruct-128k"
-
-runner = Runner(
-    which=Which.VisionPlain(
-        model_id=MODEL_ID,
-        arch=VisionArchitecture.VLlama,
-    ),
-)
-
-res = runner.send_chat_completion_request(
-    ChatCompletionRequest(
-        model="llama-vision",
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg"
-                        },
-                    },
-                    {
-                        "type": "text",
-                        "text": "<|image|>What is shown in this image? Write a detailed response analyzing the scene.",
-                    },
-                ],
-            }
-        ],
-        max_tokens=256,
-        presence_penalty=1.0,
-        top_p=0.1,
-        temperature=0.1,
-    )
-)
-print(res.choices[0].message.content)
-print(res.usage)
diff --git a/examples/python/llava_next.py b/examples/python/llava_next.py
deleted file mode 100644
index d858259363..0000000000
--- a/examples/python/llava_next.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture
-
-runner = Runner(
-    which=Which.VisionPlain(
-        model_id="llava-hf/llava-v1.6-mistral-7b-hf",
-        arch=VisionArchitecture.LLaVANext,
-    ),
-)
-
-res = runner.send_chat_completion_request(
-    ChatCompletionRequest(
-        model="llava_next",
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg"
-                        },
-                    },
-                    {
-                        "type": "text",
-                        "text": "<image>What is shown in this image? Write a detailed response analyzing the scene.",
-                    },
-                ],
-            }
-        ],
-        max_tokens=256,
-        presence_penalty=1.0,
-        top_p=0.1,
-        temperature=0.1,
-    )
-)
-print(res.choices[0].message.content)
-print(res.usage)
diff --git a/examples/python/mistral3.py b/examples/python/mistral3.py
deleted file mode 100644
index 3d3fa64658..0000000000
--- a/examples/python/mistral3.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture
-
-runner = Runner(
-    which=Which.VisionPlain(
-        model_id="mistralai/Mistral-Small-3.1-24B-Instruct-2503",
-        arch=VisionArchitecture.Gemma3,
-    ),
-    in_situ_quant="Q4K",
-)
-
-res = runner.send_chat_completion_request(
-    ChatCompletionRequest(
-        model="gemma3",
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg"
-                        },
-                    },
-                    {
-                        "type": "text",
-                        "text": "What is this?",
-                    },
-                ],
-            }
-        ],
-        max_tokens=256,
-        presence_penalty=1.0,
-        top_p=0.1,
-        temperature=0.1,
-    )
-)
-print(res.choices[0].message.content)
-print(res.usage)
diff --git a/examples/python/phi4mm.py b/examples/python/phi4mm.py
deleted file mode 100644
index afc589e3f8..0000000000
--- a/examples/python/phi4mm.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture
-
-runner = Runner(
-    which=Which.VisionPlain(
-        model_id="microsoft/Phi-4-multimodal-instruct",
-        arch=VisionArchitecture.Phi4MM,
-    ),
-)
-
-res = runner.send_chat_completion_request(
-    ChatCompletionRequest(
-        model="phi4mm",
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg"
-                        },
-                    },
-                    {
-                        "type": "text",
-                        "text": "<|image_1|>\nWhat is shown in this image? Write a detailed response analyzing the scene.",
-                    },
-                ],
-            }
-        ],
-        max_tokens=256,
-        presence_penalty=1.0,
-        top_p=0.1,
-        temperature=0.1,
-    )
-)
-print(res.choices[0].message.content)
-print(res.usage)
diff --git a/examples/python/plain.py b/examples/python/plain.py
index accd56eb8e..b630369613 100644
--- a/examples/python/plain.py
+++ b/examples/python/plain.py
@@ -1,15 +1,21 @@
+import argparse
 from mistralrs import Runner, Which, ChatCompletionRequest, Architecture
 
+parser = argparse.ArgumentParser(description="Text model chat example")
+parser.add_argument("--model-id", required=True, help="HuggingFace model id")
+parser.add_argument("--arch", required=True, help="Architecture name")
+args = parser.parse_args()
+
 runner = Runner(
     which=Which.Plain(
-        model_id="mistralai/Mistral-7B-Instruct-v0.1",
-        arch=Architecture.Mistral,
+        model_id=args.model_id,
+        arch=Architecture[args.arch],
     ),
 )
 
 res = runner.send_chat_completion_request(
     ChatCompletionRequest(
-        model="mistral",
+        model=args.arch.lower(),
         messages=[
             {"role": "user", "content": "Tell me a story about the Rust type system."}
         ],
diff --git a/examples/python/qwen2vl.py b/examples/python/qwen2vl.py
deleted file mode 100644
index 27f196a50d..0000000000
--- a/examples/python/qwen2vl.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture
-
-MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
-
-runner = Runner(
-    which=Which.VisionPlain(
-        model_id=MODEL_ID,
-        arch=VisionArchitecture.Qwen2VL,
-    ),
-)
-
-res = runner.send_chat_completion_request(
-    ChatCompletionRequest(
-        model="qwen2vl",
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": "https://www.garden-treasures.com/cdn/shop/products/IMG_6245.jpg"
-                        },
-                    },
-                    {
-                        "type": "text",
-                        "text": "What type of flower is this? Give some fun facts.",
-                    },
-                ],
-            }
-        ],
-        max_tokens=256,
-        presence_penalty=1.0,
-        top_p=0.1,
-        temperature=0.1,
-    )
-)
-print(res.choices[0].message.content)
-print(res.usage)
diff --git a/examples/python/smolvlm.py b/examples/python/smolvlm.py
deleted file mode 100644
index 9ac878c8f9..0000000000
--- a/examples/python/smolvlm.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture
-
-runner = Runner(
-    which=Which.VisionPlain(
-        model_id="HuggingFaceTB/SmolVLM-Instruct",
-        arch=VisionArchitecture.Idefics3,
-    ),
-)
-
-res = runner.send_chat_completion_request(
-    ChatCompletionRequest(
-        model="idefics3",
-        messages=[
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg"
-                        },
-                    },
-                    {
-                        "type": "text",
-                        "text": "What is shown in this image?",
-                    },
-                ],
-            },
-        ],
-        max_tokens=256,
-        presence_penalty=1.0,
-        top_p=0.1,
-        temperature=0.1,
-    )
-)
-print(res.choices[0].message.content)
-print(res.usage)
diff --git a/examples/python/phi3v.py b/examples/python/vision_chat.py
similarity index 57%
rename from examples/python/phi3v.py
rename to examples/python/vision_chat.py
index fc9a332783..e7db5e3cf8 100644
--- a/examples/python/phi3v.py
+++ b/examples/python/vision_chat.py
@@ -1,24 +1,32 @@
+import argparse
 from mistralrs import Runner, Which, ChatCompletionRequest, VisionArchitecture
 
+parser = argparse.ArgumentParser(description="Vision model chat example")
+parser.add_argument("--model-id", required=True, help="HuggingFace model id")
+parser.add_argument("--arch", required=True, help="VisionArchitecture name")
+parser.add_argument(
+    "--image-url",
+    default="https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg",
+)
+args = parser.parse_args()
+
 runner = Runner(
     which=Which.VisionPlain(
-        model_id="microsoft/Phi-3.5-vision-instruct",
-        arch=VisionArchitecture.Phi3V,
+        model_id=args.model_id,
+        arch=VisionArchitecture[args.arch],
     ),
 )
 
 res = runner.send_chat_completion_request(
     ChatCompletionRequest(
-        model="phi3v",
+        model=args.arch.lower(),
         messages=[
             {
                 "role": "user",
                 "content": [
                     {
                         "type": "image_url",
-                        "image_url": {
-                            "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg"
-                        },
+                        "image_url": {"url": args.image_url},
                     },
                     {
                         "type": "text",
diff --git a/examples/server/gemma3.py b/examples/server/gemma3.py
deleted file mode 100644
index b09ac850e7..0000000000
--- a/examples/server/gemma3.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from openai import OpenAI
-import httpx
-import textwrap
-import json
-
-
-def log_response(response: httpx.Response):
-    request = response.request
-    print(f"Request: {request.method} {request.url}")
-    print("  Headers:")
-    for key, value in request.headers.items():
-        if key.lower() == "authorization":
-            value = "[...]"
-        if key.lower() == "cookie":
-            value = value.split("=")[0] + "=..."
-        print(f"    {key}: {value}")
-    print("  Body:")
-    try:
-        request_body = json.loads(request.content)
-        print(textwrap.indent(json.dumps(request_body, indent=2), "    "))
-    except json.JSONDecodeError:
-        print(textwrap.indent(request.content.decode(), "    "))
-    print(f"Response: status_code={response.status_code}")
-    print("  Headers:")
-    for key, value in response.headers.items():
-        if key.lower() == "set-cookie":
-            value = value.split("=")[0] + "=..."
-        print(f"    {key}: {value}")
-
-
-client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/")
-
-# Enable this to log requests and responses
-# client._client = httpx.Client(
-#     event_hooks={"request": [print], "response": [log_response]}
-# )
-
-completion = client.chat.completions.create(
-    model="gemma3",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg"
-                    },
-                },
-                {
-                    "type": "text",
-                    "text": "What is this?",
-                },
-            ],
-        },
-    ],
-    max_tokens=256,
-    frequency_penalty=1.0,
-    top_p=0.1,
-    temperature=0,
-)
-resp = completion.choices[0].message.content
-print(resp)
diff --git a/examples/server/llama4.py b/examples/server/llama4.py
deleted file mode 100644
index 648d62b315..0000000000
--- a/examples/server/llama4.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from openai import OpenAI
-import httpx
-import textwrap
-import json
-
-
-def log_response(response: httpx.Response):
-    request = response.request
-    print(f"Request: {request.method} {request.url}")
-    print("  Headers:")
-    for key, value in request.headers.items():
-        if key.lower() == "authorization":
-            value = "[...]"
-        if key.lower() == "cookie":
-            value = value.split("=")[0] + "=..."
-        print(f"    {key}: {value}")
-    print("  Body:")
-    try:
-        request_body = json.loads(request.content)
-        print(textwrap.indent(json.dumps(request_body, indent=2), "    "))
-    except json.JSONDecodeError:
-        print(textwrap.indent(request.content.decode(), "    "))
-    print(f"Response: status_code={response.status_code}")
-    print("  Headers:")
-    for key, value in response.headers.items():
-        if key.lower() == "set-cookie":
-            value = value.split("=")[0] + "=..."
-        print(f"    {key}: {value}")
-
-
-client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/")
-
-# Enable this to log requests and responses
-# client._client = httpx.Client(
-#     event_hooks={"request": [print], "response": [log_response]}
-# )
-
-completion = client.chat.completions.create(
-    model="llama4",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://upload.wikimedia.org/wikipedia/commons/f/fd/Pink_flower.jpg"
-                    },
-                },
-                {
-                    "type": "text",
-                    "text": "What is this?",
-                },
-            ],
-        },
-    ],
-    max_tokens=256,
-    frequency_penalty=1.0,
-    top_p=0.1,
-    temperature=0,
-)
-resp = completion.choices[0].message.content
-print(resp)
diff --git a/examples/server/llama_vision.py b/examples/server/llama_vision.py
deleted file mode 100644
index ef5dca6835..0000000000
--- a/examples/server/llama_vision.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from openai import OpenAI
-import httpx
-import textwrap
-import json
-
-
-def log_response(response: httpx.Response):
-    request = response.request
-    print(f"Request: {request.method} {request.url}")
-    print("  Headers:")
-    for key, value in request.headers.items():
-        if key.lower() == "authorization":
-            value = "[...]"
-        if key.lower() == "cookie":
-            value = value.split("=")[0] + "=..."
-        print(f"    {key}: {value}")
-    print("  Body:")
-    try:
-        request_body = json.loads(request.content)
-        print(textwrap.indent(json.dumps(request_body, indent=2), "    "))
-    except json.JSONDecodeError:
-        print(textwrap.indent(request.content.decode(), "    "))
-    print(f"Response: status_code={response.status_code}")
-    print("  Headers:")
-    for key, value in response.headers.items():
-        if key.lower() == "set-cookie":
-            value = value.split("=")[0] + "=..."
-        print(f"    {key}: {value}")
-
-
-client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/")
-
-# Enable this to log requests and responses
-# client._client = httpx.Client(
-#     event_hooks={"request": [print], "response": [log_response]}
-# )
-
-completion = client.chat.completions.create(
-    model="llama-vision",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg"
-                    },
-                },
-                {
-                    "type": "text",
-                    "text": "<|image|>What is shown in this image? Write a detailed response analyzing the scene.",
-                },
-            ],
-        },
-    ],
-    # max_tokens=256,
-    frequency_penalty=1.0,
-    top_p=0.1,
-    temperature=0,
-)
-resp = completion.choices[0].message.content
-print(resp)
diff --git a/examples/server/llava.py b/examples/server/llava.py
deleted file mode 100644
index b8f66df8aa..0000000000
--- a/examples/server/llava.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from openai import OpenAI
-import httpx
-import textwrap
-import json
-
-
-def log_response(response: httpx.Response):
-    request = response.request
-    print(f"Request: {request.method} {request.url}")
-    print("  Headers:")
-    for key, value in request.headers.items():
-        if key.lower() == "authorization":
-            value = "[...]"
-        if key.lower() == "cookie":
-            value = value.split("=")[0] + "=..."
-        print(f"    {key}: {value}")
-    print("  Body:")
-    try:
-        request_body = json.loads(request.content)
-        print(textwrap.indent(json.dumps(request_body, indent=2), "    "))
-    except json.JSONDecodeError:
-        print(textwrap.indent(request.content.decode(), "    "))
-    print(f"Response: status_code={response.status_code}")
-    print("  Headers:")
-    for key, value in response.headers.items():
-        if key.lower() == "set-cookie":
-            value = value.split("=")[0] + "=..."
-        print(f"    {key}: {value}")
-
-
-client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/")
-
-# Enable this to log requests and responses
-# client._client = httpx.Client(
-#     event_hooks={"request": [print], "response": [log_response]}
-# )
-
-completion = client.chat.completions.create(
-    model="llava",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg"
-                    },
-                },
-                {
-                    "type": "text",
-                    "text": "<image>What is shown in this image? Write a detailed response analyzing the scene.",
-                },
-            ],
-        },
-    ],
-    max_tokens=256,
-    frequency_penalty=1.0,
-    top_p=0.1,
-    temperature=0,
-)
-resp = completion.choices[0].message.content
-print(resp)
diff --git a/examples/server/llava_next.py b/examples/server/llava_next.py
deleted file mode 100644
index cb777f8819..0000000000
--- a/examples/server/llava_next.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from openai import OpenAI
-import httpx
-import textwrap
-import json
-
-
-def log_response(response: httpx.Response):
-    request = response.request
-    print(f"Request: {request.method} {request.url}")
-    print("  Headers:")
-    for key, value in request.headers.items():
-        if key.lower() == "authorization":
-            value = "[...]"
-        if key.lower() == "cookie":
-            value = value.split("=")[0] + "=..."
-        print(f"    {key}: {value}")
-    print("  Body:")
-    try:
-        request_body = json.loads(request.content)
-        print(textwrap.indent(json.dumps(request_body, indent=2), "    "))
-    except json.JSONDecodeError:
-        print(textwrap.indent(request.content.decode(), "    "))
-    print(f"Response: status_code={response.status_code}")
-    print("  Headers:")
-    for key, value in response.headers.items():
-        if key.lower() == "set-cookie":
-            value = value.split("=")[0] + "=..."
-        print(f"    {key}: {value}")
-
-
-client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/")
-
-# Enable this to log requests and responses
-# client._client = httpx.Client(
-#     event_hooks={"request": [print], "response": [log_response]}
-# )
-
-completion = client.chat.completions.create(
-    model="llava_next",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg"
-                    },
-                },
-                {
-                    "type": "text",
-                    "text": "<image>What is shown in this image? Write a detailed response analyzing the scene.",
-                },
-            ],
-        },
-    ],
-    max_tokens=256,
-    frequency_penalty=1.0,
-    top_p=0.1,
-    temperature=0,
-)
-resp = completion.choices[0].message.content
-print(resp)
diff --git a/examples/server/mistral3.py b/examples/server/mistral3.py
deleted file mode 100644
index 3dc3bcfe3a..0000000000
--- a/examples/server/mistral3.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from openai import OpenAI
-import httpx
-import textwrap
-import json
-
-
-def log_response(response: httpx.Response):
-    request = response.request
-    print(f"Request: {request.method} {request.url}")
-    print("  Headers:")
-    for key, value in request.headers.items():
-        if key.lower() == "authorization":
-            value = "[...]"
-        if key.lower() == "cookie":
-            value = value.split("=")[0] + "=..."
-        print(f"    {key}: {value}")
-    print("  Body:")
-    try:
-        request_body = json.loads(request.content)
-        print(textwrap.indent(json.dumps(request_body, indent=2), "    "))
-    except json.JSONDecodeError:
-        print(textwrap.indent(request.content.decode(), "    "))
-    print(f"Response: status_code={response.status_code}")
-    print("  Headers:")
-    for key, value in response.headers.items():
-        if key.lower() == "set-cookie":
-            value = value.split("=")[0] + "=..."
-        print(f"    {key}: {value}")
-
-
-client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/")
-
-# Enable this to log requests and responses
-# client._client = httpx.Client(
-#     event_hooks={"request": [print], "response": [log_response]}
-# )
-
-completion = client.chat.completions.create(
-    model="mistral3",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://upload.wikimedia.org/wikipedia/commons/f/fd/Pink_flower.jpg"
-                    },
-                },
-                {
-                    "type": "text",
-                    "text": "What is this?",
-                },
-            ],
-        },
-    ],
-    max_tokens=256,
-    frequency_penalty=1.0,
-    top_p=0.1,
-    temperature=0,
-)
-resp = completion.choices[0].message.content
-print(resp)
diff --git a/examples/server/phi4mm.py b/examples/server/phi4mm.py
deleted file mode 100644
index 03eb0ab698..0000000000
--- a/examples/server/phi4mm.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from openai import OpenAI
-import httpx
-import textwrap
-import json
-
-
-def log_response(response: httpx.Response):
-    request = response.request
-    print(f"Request: {request.method} {request.url}")
-    print("  Headers:")
-    for key, value in request.headers.items():
-        if key.lower() == "authorization":
-            value = "[...]"
-        if key.lower() == "cookie":
-            value = value.split("=")[0] + "=..."
-        print(f"    {key}: {value}")
-    print("  Body:")
-    try:
-        request_body = json.loads(request.content)
-        print(textwrap.indent(json.dumps(request_body, indent=2), "    "))
-    except json.JSONDecodeError:
-        print(textwrap.indent(request.content.decode(), "    "))
-    print(f"Response: status_code={response.status_code}")
-    print("  Headers:")
-    for key, value in response.headers.items():
-        if key.lower() == "set-cookie":
-            value = value.split("=")[0] + "=..."
-        print(f"    {key}: {value}")
-
-
-client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/")
-
-# Enable this to log requests and responses
-# client._client = httpx.Client(
-#     event_hooks={"request": [print], "response": [log_response]}
-# )
-
-completion = client.chat.completions.create(
-    model="phi4mm",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg"
-                    },
-                },
-                {
-                    "type": "text",
-                    "text": "<|image_1|>\nWhat is shown in this image? Write a detailed response analyzing the scene.",
-                },
-            ],
-        },
-    ],
-    max_tokens=256,
-    frequency_penalty=1.0,
-    top_p=0.1,
-    temperature=0,
-)
-resp = completion.choices[0].message.content
-print(resp)
diff --git a/examples/server/qwen2vl.py b/examples/server/qwen2vl.py
deleted file mode 100644
index fc6e1d1b83..0000000000
--- a/examples/server/qwen2vl.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from openai import OpenAI
-
-client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/")
-
-completion = client.chat.completions.create(
-    model="qwen2vl",
-    messages=[
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://www.garden-treasures.com/cdn/shop/products/IMG_6245.jpg"
-                    },
-                },
-                {
-                    "type": "text",
-                    "text": "What type of flower is this? Give some fun facts.",
-                },
-            ],
-        },
-    ],
-    max_tokens=256,
-    frequency_penalty=1.0,
-    top_p=0.1,
-    temperature=0,
-)
-resp = completion.choices[0].message.content
-print(resp)
diff --git a/examples/server/phi3v.py b/examples/server/vision_chat.py
similarity index 70%
rename from examples/server/phi3v.py
rename to examples/server/vision_chat.py
index 623a2564f8..6bfac36701 100644
--- a/examples/server/phi3v.py
+++ b/examples/server/vision_chat.py
@@ -1,3 +1,4 @@
+import argparse
 from openai import OpenAI
 import httpx
 import textwrap
@@ -28,25 +29,26 @@ def log_response(response: httpx.Response):
         print(f"    {key}: {value}")
 
 
+parser = argparse.ArgumentParser(description="Send a vision chat request")
+parser.add_argument("--model", required=True, help="model name for the API")
+parser.add_argument(
+    "--image-url",
+    default="https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg",
+)
+args = parser.parse_args()
+
 client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/")
 
-# Enable this to log requests and responses
-# client._client = httpx.Client(
-#     event_hooks={"request": [print], "response": [log_response]}
-# )
+# Uncomment to log HTTP requests
+# client._client = httpx.Client(event_hooks={"request": [print], "response": [log_response]})
 
 completion = client.chat.completions.create(
-    model="phi3v",
+    model=args.model,
     messages=[
         {
             "role": "user",
             "content": [
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg"
-                    },
-                },
+                {"type": "image_url", "image_url": {"url": args.image_url}},
                 {
                     "type": "text",
                     "text": "<|image_1|>\nWhat is shown in this image? Write a detailed response analyzing the scene.",
@@ -59,5 +61,4 @@ def log_response(response: httpx.Response):
     top_p=0.1,
     temperature=0,
 )
-resp = completion.choices[0].message.content
-print(resp)
+print(completion.choices[0].message.content)
diff --git a/mistralrs/examples/deepseekr1/main.rs b/mistralrs/examples/deepseekr1/main.rs
deleted file mode 100644
index 27d69269d9..0000000000
--- a/mistralrs/examples/deepseekr1/main.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-use anyhow::Result;
-use mistralrs::{
-    IsqType, PagedAttentionMetaBuilder, TextMessageRole, TextMessages, TextModelBuilder,
-};
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let model = TextModelBuilder::new("deepseek-ai/DeepSeek-R1")
-        .with_isq(IsqType::Q4K)
-        .with_logging()
-        .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
-        .build()
-        .await?;
-
-    let messages = TextMessages::new()
-        .add_message(
-            TextMessageRole::System,
-            "You are an AI agent with a specialty in programming.",
-        )
-        .add_message(
-            TextMessageRole::User,
-            "Hello! How are you? Please write generic binary search function in Rust.",
-        );
-
-    let response = model.send_chat_request(messages).await?;
-
-    println!("{}", response.choices[0].message.content.as_ref().unwrap());
-    dbg!(
-        response.usage.avg_prompt_tok_per_sec,
-        response.usage.avg_compl_tok_per_sec
-    );
-
-    Ok(())
-}
diff --git a/mistralrs/examples/deepseekv2/main.rs b/mistralrs/examples/deepseekv2/main.rs
deleted file mode 100644
index 7f71f08221..0000000000
--- a/mistralrs/examples/deepseekv2/main.rs
+++ /dev/null
@@ -1,34 +0,0 @@
-use anyhow::Result;
-use mistralrs::{
-    IsqType, PagedAttentionMetaBuilder, TextMessageRole, TextMessages, TextModelBuilder,
-};
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let model = TextModelBuilder::new("deepseek-ai/DeepSeek-V2-Lite")
-        .with_isq(IsqType::Q4K)
-        .with_logging()
-        .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
-        .build()
-        .await?;
-
-    let messages = TextMessages::new()
-        .add_message(
-            TextMessageRole::System,
-            "You are an AI agent with a specialty in programming.",
-        )
-        .add_message(
-            TextMessageRole::User,
-            "Hello! How are you? Please write generic binary search function in Rust.",
-        );
-
-    let response = model.send_chat_request(messages).await?;
-
-    println!("{}", response.choices[0].message.content.as_ref().unwrap());
-    dbg!(
-        response.usage.avg_prompt_tok_per_sec,
-        response.usage.avg_compl_tok_per_sec
-    );
-
-    Ok(())
-}
diff --git a/mistralrs/examples/gemma2/main.rs b/mistralrs/examples/gemma2/main.rs
deleted file mode 100644
index 3f21be0be2..0000000000
--- a/mistralrs/examples/gemma2/main.rs
+++ /dev/null
@@ -1,25 +0,0 @@
-use anyhow::Result;
-use mistralrs::{
-    IsqType, PagedAttentionMetaBuilder, RequestBuilder, TextMessageRole, TextModelBuilder,
-};
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let model = TextModelBuilder::new("google/gemma-2-9b-it")
-        .with_isq(IsqType::Q4K)
-        .with_logging()
-        .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
-        .build()
-        .await?;
-
-    let request = RequestBuilder::new().add_message(
-        TextMessageRole::User,
-        "Please write a mathematical equation where a few numbers are added.",
-    );
-
-    let response = model.send_chat_request(request).await?;
-
-    println!("{}", response.choices[0].message.content.as_ref().unwrap());
-
-    Ok(())
-}
diff --git a/mistralrs/examples/gemma3/main.rs b/mistralrs/examples/gemma3/main.rs
deleted file mode 100644
index b94484e094..0000000000
--- a/mistralrs/examples/gemma3/main.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-use anyhow::Result;
-use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder};
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let model = VisionModelBuilder::new("google/gemma-3-12b-it")
-        .with_isq(IsqType::Q4K)
-        .with_logging()
-        .build()
-        .await?;
-
-    let bytes = match reqwest::blocking::get(
-        "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg",
-    ) {
-        Ok(http_resp) => http_resp.bytes()?.to_vec(),
-        Err(e) => anyhow::bail!(e),
-    };
-    let image = image::load_from_memory(&bytes)?;
-
-    let messages = VisionMessages::new().add_image_message(
-        TextMessageRole::User,
-        "What is this?",
-        vec![image],
-        &model,
-    )?;
-
-    let response = model.send_chat_request(messages).await?;
-
-    println!("{}", response.choices[0].message.content.as_ref().unwrap());
-    dbg!(
-        response.usage.avg_prompt_tok_per_sec,
-        response.usage.avg_compl_tok_per_sec
-    );
-
-    Ok(())
-}
diff --git a/mistralrs/examples/llama4/main.rs b/mistralrs/examples/llama4/main.rs
deleted file mode 100644
index 0efb6a84da..0000000000
--- a/mistralrs/examples/llama4/main.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-use anyhow::Result;
-use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder};
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let model = VisionModelBuilder::new("meta-llama/Llama-4-Scout-17B-16E-Instruct")
-        .with_isq(IsqType::Q4K)
-        .with_logging()
-        .build()
-        .await?;
-
-    let bytes = match reqwest::blocking::get(
-        "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg",
-    ) {
-        Ok(http_resp) => http_resp.bytes()?.to_vec(),
-        Err(e) => anyhow::bail!(e),
-    };
-    let image = image::load_from_memory(&bytes)?;
-
-    let messages = VisionMessages::new().add_image_message(
-        TextMessageRole::User,
-        "What is this?",
-        vec![image],
-        &model,
-    )?;
-
-    let response = model.send_chat_request(messages).await?;
-
-    println!("{}", response.choices[0].message.content.as_ref().unwrap());
-    dbg!(
-        response.usage.avg_prompt_tok_per_sec,
-        response.usage.avg_compl_tok_per_sec
-    );
-
-    Ok(())
-}
diff --git a/mistralrs/examples/llama_vision/main.rs b/mistralrs/examples/llama_vision/main.rs
deleted file mode 100644
index 48500b06d1..0000000000
--- a/mistralrs/examples/llama_vision/main.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-use anyhow::Result;
-use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder};
-
-// const MODEL_ID: &str = "meta-llama/Llama-3.2-11B-Vision-Instruct";
-const MODEL_ID: &str = "lamm-mit/Cephalo-Llama-3.2-11B-Vision-Instruct-128k";
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let model = VisionModelBuilder::new(MODEL_ID)
-        .with_isq(IsqType::Q4K)
-        .with_logging()
-        .build()
-        .await?;
-
-    let bytes = match reqwest::blocking::get(
-        "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg",
-    ) {
-        Ok(http_resp) => http_resp.bytes()?.to_vec(),
-        Err(e) => anyhow::bail!(e),
-    };
-    let image = image::load_from_memory(&bytes)?;
-
-    let messages = VisionMessages::new().add_image_message(
-        TextMessageRole::User,
-        "What is depicted here? Please describe the scene in detail.",
-        vec![image],
-        &model,
-    )?;
-
-    let response = model.send_chat_request(messages).await?;
-
-    println!("{}", response.choices[0].message.content.as_ref().unwrap());
-    dbg!(
-        response.usage.avg_prompt_tok_per_sec,
-        response.usage.avg_compl_tok_per_sec
-    );
-
-    Ok(())
-}
diff --git a/mistralrs/examples/llava_next/main.rs b/mistralrs/examples/llava_next/main.rs
deleted file mode 100644
index 034f1c5b08..0000000000
--- a/mistralrs/examples/llava_next/main.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-use anyhow::Result;
-use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder};
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let model = VisionModelBuilder::new("llava-hf/llava-v1.6-mistral-7b-hf")
-        .with_isq(IsqType::Q4K)
-        .with_logging()
-        .build()
-        .await?;
-
-    let bytes = match reqwest::blocking::get(
-        "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg",
-    ) {
-        Ok(http_resp) => http_resp.bytes()?.to_vec(),
-        Err(e) => anyhow::bail!(e),
-    };
-    let image = image::load_from_memory(&bytes)?;
-
-    let messages = VisionMessages::new().add_image_message(
-        TextMessageRole::User,
-        "What is depicted here? Please describe the scene in detail.",
-        vec![image],
-        &model,
-    )?;
-
-    let response = model.send_chat_request(messages).await?;
-
-    println!("{}", response.choices[0].message.content.as_ref().unwrap());
-    dbg!(
-        response.usage.avg_prompt_tok_per_sec,
-        response.usage.avg_compl_tok_per_sec
-    );
-
-    Ok(())
-}
diff --git a/mistralrs/examples/mistral3/main.rs b/mistralrs/examples/mistral3/main.rs
deleted file mode 100644
index 09dc06b8f5..0000000000
--- a/mistralrs/examples/mistral3/main.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-use anyhow::Result;
-use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder};
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let model = VisionModelBuilder::new("mistralai/Mistral-Small-3.1-24B-Instruct-2503")
-        .with_isq(IsqType::Q4K)
-        .with_logging()
-        .build()
-        .await?;
-
-    let bytes = match reqwest::blocking::get(
-        "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg",
-    ) {
-        Ok(http_resp) => http_resp.bytes()?.to_vec(),
-        Err(e) => anyhow::bail!(e),
-    };
-    let image = image::load_from_memory(&bytes)?;
-
-    let messages = VisionMessages::new().add_image_message(
-        TextMessageRole::User,
-        "What is this?",
-        vec![image],
-        &model,
-    )?;
-
-    let response = model.send_chat_request(messages).await?;
-
-    println!("{}", response.choices[0].message.content.as_ref().unwrap());
-    dbg!(
-        response.usage.avg_prompt_tok_per_sec,
-        response.usage.avg_compl_tok_per_sec
-    );
-
-    Ok(())
-}
diff --git a/mistralrs/examples/phi3v/main.rs b/mistralrs/examples/phi3v/main.rs
deleted file mode 100644
index 507477f736..0000000000
--- a/mistralrs/examples/phi3v/main.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-use anyhow::Result;
-use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder};
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let model = VisionModelBuilder::new("microsoft/Phi-3.5-vision-instruct")
-        .with_isq(IsqType::Q4K)
-        .with_logging()
-        .build()
-        .await?;
-
-    let bytes = match reqwest::blocking::get(
-        "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg",
-    ) {
-        Ok(http_resp) => http_resp.bytes()?.to_vec(),
-        Err(e) => anyhow::bail!(e),
-    };
-    let image = image::load_from_memory(&bytes)?;
-
-    let messages = VisionMessages::new().add_image_message(
-        TextMessageRole::User,
-        "What is depicted here? Please describe the scene in detail.",
-        vec![image],
-        &model,
-    )?;
-
-    let response = model.send_chat_request(messages).await?;
-
-    println!("{}", response.choices[0].message.content.as_ref().unwrap());
-    dbg!(
-        response.usage.avg_prompt_tok_per_sec,
-        response.usage.avg_compl_tok_per_sec
-    );
-
-    Ok(())
-}
diff --git a/mistralrs/examples/phi4mm/main.rs b/mistralrs/examples/phi4mm/main.rs
deleted file mode 100644
index 7e4e821f35..0000000000
--- a/mistralrs/examples/phi4mm/main.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-use anyhow::Result;
-use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder};
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let model = VisionModelBuilder::new("microsoft/Phi-4-multimodal-instruct")
-        .with_isq(IsqType::Q4K)
-        .with_logging()
-        .build()
-        .await?;
-
-    let bytes = match reqwest::blocking::get(
-        "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg",
-    ) {
-        Ok(http_resp) => http_resp.bytes()?.to_vec(),
-        Err(e) => anyhow::bail!(e),
-    };
-    let image = image::load_from_memory(&bytes)?;
-
-    let messages = VisionMessages::new().add_image_message(
-        TextMessageRole::User,
-        "What is depicted here? Please describe the scene in detail.",
-        vec![image],
-        &model,
-    )?;
-
-    let response = model.send_chat_request(messages).await?;
-
-    println!("{}", response.choices[0].message.content.as_ref().unwrap());
-    dbg!(
-        response.usage.avg_prompt_tok_per_sec,
-        response.usage.avg_compl_tok_per_sec
-    );
-
-    Ok(())
-}
diff --git a/mistralrs/examples/qwen2vl/main.rs b/mistralrs/examples/qwen2vl/main.rs
deleted file mode 100644
index 8d833ccb41..0000000000
--- a/mistralrs/examples/qwen2vl/main.rs
+++ /dev/null
@@ -1,38 +0,0 @@
-use anyhow::Result;
-use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder};
-
-const MODEL_ID: &str = "Qwen/Qwen2-VL-2B-Instruct";
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let model = VisionModelBuilder::new(MODEL_ID)
-        .with_isq(IsqType::Q4K)
-        .with_logging()
-        .build()
-        .await?;
-
-    let bytes = match reqwest::blocking::get(
-        "https://www.garden-treasures.com/cdn/shop/products/IMG_6245.jpg",
-    ) {
-        Ok(http_resp) => http_resp.bytes()?.to_vec(),
-        Err(e) => anyhow::bail!(e),
-    };
-    let image = image::load_from_memory(&bytes)?;
-
-    let messages = VisionMessages::new().add_image_message(
-        TextMessageRole::User,
-        "What type of flower is this? Give some fun facts.",
-        vec![image],
-        &model,
-    )?;
-
-    let response = model.send_chat_request(messages).await?;
-
-    println!("{}", response.choices[0].message.content.as_ref().unwrap());
-    dbg!(
-        response.usage.avg_prompt_tok_per_sec,
-        response.usage.avg_compl_tok_per_sec
-    );
-
-    Ok(())
-}
diff --git a/mistralrs/examples/simple/main.rs b/mistralrs/examples/simple/main.rs
index 3765ab54ba..175fc7fc9d 100644
--- a/mistralrs/examples/simple/main.rs
+++ b/mistralrs/examples/simple/main.rs
@@ -1,12 +1,20 @@
 use anyhow::Result;
+use clap::Parser;
 use mistralrs::{
     IsqType, PagedAttentionMetaBuilder, RequestBuilder, TextMessageRole, TextMessages,
     TextModelBuilder,
 };
 
+#[derive(Parser)]
+struct Args {
+    #[clap(long, default_value = "microsoft/Phi-3.5-mini-instruct")]
+    model_id: String,
+}
+
 #[tokio::main]
 async fn main() -> Result<()> {
-    let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct")
+    let args = Args::parse();
+    let model = TextModelBuilder::new(&args.model_id)
         .with_isq(IsqType::Q8_0)
         .with_logging()
         .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
@@ -31,7 +39,6 @@ async fn main() -> Result<()> {
         response.usage.avg_compl_tok_per_sec
     );
 
-    // Next example: Return some logprobs with the `RequestBuilder`, which enables higher configurability.
     let request = RequestBuilder::new().return_logprobs(true).add_message(
         TextMessageRole::User,
         "Please write a mathematical equation where a few numbers are added.",
diff --git a/mistralrs/examples/smolvlm/main.rs b/mistralrs/examples/smolvlm/main.rs
deleted file mode 100644
index 145b737958..0000000000
--- a/mistralrs/examples/smolvlm/main.rs
+++ /dev/null
@@ -1,38 +0,0 @@
-use anyhow::Result;
-use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder};
-
-const MODEL_ID: &str = "HuggingFaceTB/SmolVLM-Instruct";
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    let model = VisionModelBuilder::new(MODEL_ID)
-        .with_isq(IsqType::Q8_0)
-        .with_logging()
-        .build()
-        .await?;
-
-    let bytes = match reqwest::blocking::get(
-        "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg",
-    ) {
-        Ok(http_resp) => http_resp.bytes()?.to_vec(),
-        Err(e) => anyhow::bail!(e),
-    };
-    let image = image::load_from_memory(&bytes)?;
-
-    let messages = VisionMessages::new().add_image_message(
-        TextMessageRole::User,
-        "What is this?",
-        vec![image],
-        &model,
-    )?;
-
-    let response = model.send_chat_request(messages).await?;
-
-    println!("{}", response.choices[0].message.content.as_ref().unwrap());
-    dbg!(
-        response.usage.avg_prompt_tok_per_sec,
-        response.usage.avg_compl_tok_per_sec
-    );
-
-    Ok(())
-}
diff --git a/mistralrs/examples/llava/main.rs b/mistralrs/examples/vision_chat/main.rs
similarity index 63%
rename from mistralrs/examples/llava/main.rs
rename to mistralrs/examples/vision_chat/main.rs
index b2668a3703..b71f5d0972 100644
--- a/mistralrs/examples/llava/main.rs
+++ b/mistralrs/examples/vision_chat/main.rs
@@ -1,21 +1,28 @@
 use anyhow::Result;
+use clap::Parser;
 use mistralrs::{IsqType, TextMessageRole, VisionMessages, VisionModelBuilder};
 
+#[derive(Parser)]
+struct Args {
+    #[clap(long)]
+    model_id: String,
+    #[clap(
+        long,
+        default_value = "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg"
+    )]
+    image_url: String,
+}
+
 #[tokio::main]
 async fn main() -> Result<()> {
-    let model = VisionModelBuilder::new("llava-hf/llava-1.5-7b-hf")
+    let args = Args::parse();
+    let model = VisionModelBuilder::new(&args.model_id)
         .with_isq(IsqType::Q4K)
-        .with_chat_template("chat_templates/vicuna.json")
         .with_logging()
         .build()
         .await?;
 
-    let bytes = match reqwest::blocking::get(
-        "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg",
-    ) {
-        Ok(http_resp) => http_resp.bytes()?.to_vec(),
-        Err(e) => anyhow::bail!(e),
-    };
+    let bytes = reqwest::blocking::get(&args.image_url)?.bytes()?.to_vec();
     let image = image::load_from_memory(&bytes)?;
 
     let messages = VisionMessages::new().add_image_message(