road-core · jpodivin · May 26, 2025 · May 26, 2025 · May 26, 2025
diff --git a/pdm.lock b/pdm.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,6 +25,7 @@ dependencies = [
     "torch==2.6.0+cpu",
     "road-core @ git+https://github.com/road-core/service.git",
     "matplotlib>=3.10.1",
+    "pyarrow>=20.0.0",
 ]
 requires-python = ">=3.11.1,<=3.12.8"
 readme = "README.md"

diff --git a/src/road_core_eval/evaluate.py b/src/road_core_eval/evaluate.py
@@ -1,6 +1,6 @@
 """Driver for evaluation."""
 
-import argparse
+from argparse import Namespace, ArgumentParser
 import os
 from httpx import Client
 from road_core_eval.response_evaluation import ResponseEvaluation
@@ -11,9 +11,10 @@
 )
 
 
-def main():
-    """Evaluate response."""
-    parser = argparse.ArgumentParser(description="Response validation module.")
+def parse_args() -> Namespace:
+    """Parse CLI arguments for response evaluation tool."""
+
+    parser = ArgumentParser(description="Response validation module.")
     parser.add_argument(
         "--eval_provider_model_id",
         nargs="+",
@@ -95,7 +96,13 @@ def main():
         type=str,
         help="Path to text file with API token (applicable when deployed on cluster)",
     )
-    args = parser.parse_args()
+    return parser.parse_args()
+
+
+def main():
+    """Evaluate response."""
+    args = parse_args()
+
     client = Client(base_url=args.eval_api_url, verify=False)  # noqa: S501
 
     if "localhost" not in args.eval_api_url:

diff --git a/tests/test_response_evaluation.py b/tests/test_response_evaluation.py
@@ -0,0 +1,35 @@
+"""Tests for response_evaluation module"""
+
+from argparse import Namespace
+from unittest.mock import patch
+
+from httpx import Client
+
+from road_core_eval.response_evaluation import ResponseEvaluation
+
+
+def test_response_evaluation_init(tmpdir):
+    """Test initialization of ResponseEvaluation object with default
+    arguments from road_core_eval.evaluate module.
+    """
+    out_dir = tmpdir.mkdir("out_dir")
+    args = Namespace(
+        eval_provider_model_id=["watsonx+ibm/granite-3-8b-instruct"],
+        judge_provider="ollama",
+        judge_model="llama3.1:latest",
+        eval_data_src="eval_data/question_answer_pair.json",
+        eval_out_dir=out_dir,
+        eval_query_ids=None,
+        eval_scenario="with_rag",
+        qna_pool_file=None,
+        eval_type="model",
+        eval_metrics=["cos_score"],
+        eval_modes=["ols"],
+        eval_api_url="http://localhost:8080",
+        eval_api_token_file="ols_api_key.txt",
+    )
+
+    client = Client(base_url=args.eval_api_url, verify=False)
+    # Mock HF class to prevent model download
+    with patch("llama_index.embeddings.huggingface.HuggingFaceEmbedding"):
+        ResponseEvaluation(args, client)