diff --git a/.github/workflows/config.json b/.github/workflows/config.json index 71f90067..b20185f5 100644 --- a/.github/workflows/config.json +++ b/.github/workflows/config.json @@ -68,10 +68,19 @@ "min_query_expansion_related_question_similarity_score": 90, "expand_to_multiple_questions": true }, - "openai": { - "azure_oai_chat_deployment_name": "gpt-35-turbo", - "azure_oai_eval_deployment_name": "gpt-35-turbo", - "temperature": 0 + "llm": { + "chat_llm": { + "llm_type": "openai", + "model_name": "gpt-35-turbo", + "temperature": 0, + "max_tokens": 4096 + }, + "eval_llm": { + "llm_type": "openai", + "model_name": "gpt-35-turbo", + "temperature": 0, + "max_tokens": 4096 + } }, "eval": { "metric_types": [ diff --git a/config.sample.json b/config.sample.json index cfd162ed..40c8009d 100644 --- a/config.sample.json +++ b/config.sample.json @@ -69,10 +69,19 @@ "min_query_expansion_related_question_similarity_score": 90, "expand_to_multiple_questions": false }, - "openai": { - "azure_oai_chat_deployment_name": "gpt-35-turbo", - "azure_oai_eval_deployment_name": "gpt-35-turbo", - "temperature": 0 + "llm": { + "chat_llm": { + "llm_type": "openai", + "model_name": "gpt-35-turbo", + "temperature": 0, + "max_tokens": 4096 + }, + "eval_llm": { + "llm_type": "openai", + "model_name": "gpt-35-turbo", + "temperature": 0, + "max_tokens": 4096 + } }, "eval": { "metric_types": [ diff --git a/config.schema.json b/config.schema.json index 8ab02676..f1918b87 100644 --- a/config.schema.json +++ b/config.schema.json @@ -520,31 +520,65 @@ "expand_to_multiple_questions" ] }, - "openai": { + "llm": { "type": "object", "properties": { - "azure_oai_chat_deployment_name": { - "type": "string", - "minLength": 1, - "description": "Azure OpenAI deployment name" - }, - "azure_oai_eval_deployment_name": { - "type": "string", - "minLength": 1, - "description": "Azure OpenAI evaluation deployment name" + "chat_llm": { + "type": "object", + "properties": { + "llm_type": { + "type": "string", + "minLength": 1, + "description": "Type of the LLM provider, e.g., 'openai'" + }, + "model_name": { + "type": "string", + "minLength": 1, + "description": "Model name, e.g., 'gpt-3.5-turbo'" + }, + "temperature": { + "type": "number", + "minimum": 0, + "maximum": 1, + "description": "Temperature for generating responses" + }, + "max_tokens": { + "type": "integer", + "minimum": 1, + "description": "Maximum number of tokens allowed for the response" + } + }, + "required": ["llm_type", "model_name", "temperature", "max_tokens"] }, - "temperature": { - "type": "number", - "minimum": 0, - "maximum": 1, - "description": "Temperature for OpenAI API" + "eval_llm": { + "type": "object", + "properties": { + "llm_type": { + "type": "string", + "minLength": 1, + "description": "Type of the LLM provider, e.g., 'openai'" + }, + "model_name": { + "type": "string", + "minLength": 1, + "description": "Model name, e.g., 'gpt-3.5-turbo'" + }, + "temperature": { + "type": "number", + "minimum": 0, + "maximum": 1, + "description": "Temperature for generating responses" + }, + "max_tokens": { + "type": "integer", + "minimum": 1, + "description": "Maximum number of tokens allowed for the response" + } + }, + "required": ["llm_type", "model_name", "temperature", "max_tokens"] } }, - "required": [ - "azure_oai_chat_deployment_name", - "azure_oai_eval_deployment_name", - "temperature" - ] + "required": ["chat_llm", "eval_llm"] }, "eval": { "type": "object", @@ -599,7 +633,7 @@ "language", "rerank", "search", - "openai", + "llm", "eval" ] } \ No newline at end of file diff --git a/rag_experiment_accelerator/config/config.py b/rag_experiment_accelerator/config/config.py index 9a708be0..4d1a3158 100644 --- a/rag_experiment_accelerator/config/config.py +++ b/rag_experiment_accelerator/config/config.py @@ -12,7 +12,7 @@ from rag_experiment_accelerator.config.rerank_config import RerankConfig from rag_experiment_accelerator.config.search_config import SearchConfig from rag_experiment_accelerator.config.query_expansion import QueryExpansionConfig -from rag_experiment_accelerator.config.openai_config import OpenAIConfig +from rag_experiment_accelerator.config.llm_config import LLMConfig from rag_experiment_accelerator.config.eval_config import EvalConfig from rag_experiment_accelerator.embedding.embedding_model import EmbeddingModel @@ -47,7 +47,7 @@ class Config(BaseConfig): rerank: RerankConfig = field(default_factory=RerankConfig) search: SearchConfig = field(default_factory=SearchConfig) query_expansion: QueryExpansionConfig = field(default_factory=QueryExpansionConfig) - openai: OpenAIConfig = field(default_factory=OpenAIConfig) + llm: LLMConfig = field(default_factory=LLMConfig) eval: EvalConfig = field(default_factory=EvalConfig) @classmethod diff --git a/rag_experiment_accelerator/config/llm_config.py b/rag_experiment_accelerator/config/llm_config.py new file mode 100644 index 00000000..c3d164a0 --- /dev/null +++ b/rag_experiment_accelerator/config/llm_config.py @@ -0,0 +1,16 @@ +from dataclasses import dataclass, field +from rag_experiment_accelerator.config.base_config import BaseConfig + + +@dataclass +class BaseLLMConfig(BaseConfig): + llm_type: str = "openai" + model_name: str = "gpt-3.5-turbo" + temperature: float = 0.0 + max_tokens: int = 100 + + +@dataclass +class LLMConfig(BaseConfig): + chat_llm: BaseLLMConfig = field(default_factory=BaseLLMConfig) + eval_llm: BaseLLMConfig = field(default_factory=BaseLLMConfig) diff --git a/rag_experiment_accelerator/config/openai_config.py b/rag_experiment_accelerator/config/openai_config.py deleted file mode 100644 index c4284808..00000000 --- a/rag_experiment_accelerator/config/openai_config.py +++ /dev/null @@ -1,9 +0,0 @@ -from dataclasses import dataclass -from rag_experiment_accelerator.config.base_config import BaseConfig - - -@dataclass -class OpenAIConfig(BaseConfig): - azure_oai_chat_deployment_name: str = "gpt-35-turbo" - azure_oai_eval_deployment_name: str = "gpt-35-turbo" - temperature: float = 0.0 diff --git a/rag_experiment_accelerator/config/tests/data/config.json b/rag_experiment_accelerator/config/tests/data/config.json index 90083d05..fe423d3c 100644 --- a/rag_experiment_accelerator/config/tests/data/config.json +++ b/rag_experiment_accelerator/config/tests/data/config.json @@ -82,10 +82,19 @@ "min_query_expansion_related_question_similarity_score": 90, "expand_to_multiple_questions": false }, - "openai": { - "azure_oai_chat_deployment_name": "test_chat_deployment_name", - "azure_oai_eval_deployment_name": "test_eval_deployment_name", - "temperature": 10 + "llm": { + "chat_llm": { + "llm_type": "openai", + "model_name": "gpt-3.5-turbo", + "temperature": 0, + "max_tokens": 4096 + }, + "eval_llm": { + "llm_type": "openai", + "model_name": "gpt-3.5-turbo", + "temperature": 0, + "max_tokens": 4096 + } }, "eval": { "metric_types": [ diff --git a/rag_experiment_accelerator/config/tests/test_config.py b/rag_experiment_accelerator/config/tests/test_config.py index b0782250..165c6f1c 100644 --- a/rag_experiment_accelerator/config/tests/test_config.py +++ b/rag_experiment_accelerator/config/tests/test_config.py @@ -45,7 +45,12 @@ def test_config_init(mock_validate_json_with_schema, mock_create_embedding_model embedding_model_4.model_name.return_value = "text-embedding-3-small" embedding_model_4.dimension.return_value = 256 embedding_model_4.shorten_dimensions.return_value = True - mock_create_embedding_model.side_effect = [embedding_model_1, embedding_model_2, embedding_model_3, embedding_model_4] + mock_create_embedding_model.side_effect = [ + embedding_model_1, + embedding_model_2, + embedding_model_3, + embedding_model_4, + ] mock_validate_json_with_schema.return_value = (True, None) config = Config.from_path(environment, config_path) @@ -105,7 +110,10 @@ def test_config_init(mock_validate_json_with_schema, mock_create_embedding_model assert index.embedding_model[3].type == mock_embedding[3]["type"] assert index.embedding_model[3].model_name == mock_embedding[3]["model_name"] assert index.embedding_model[3].dimension == mock_embedding[3]["dimension"] - assert index.embedding_model[3].shorten_dimensions == mock_embedding[3]["shorten_dimensions"] + assert ( + index.embedding_model[3].shorten_dimensions + == mock_embedding[3]["shorten_dimensions"] + ) model1 = config.get_embedding_model(config.index.embedding_model[0].model_name) assert model1.model_name.return_value == "all-MiniLM-L6-v2" @@ -156,17 +164,17 @@ def test_config_init(mock_validate_json_with_schema, mock_create_embedding_model == mock_query_expansion["expand_to_multiple_questions"] ) - openai = config.openai - mock_openai = mock_config["openai"] - assert ( - openai.azure_oai_chat_deployment_name - == mock_openai["azure_oai_chat_deployment_name"] - ) - assert ( - openai.azure_oai_eval_deployment_name - == mock_openai["azure_oai_eval_deployment_name"] - ) - assert openai.temperature == mock_openai["temperature"] + llm = config.llm + mock_llm = mock_config["llm"] + assert llm.chat_llm.model_name == mock_llm["chat_llm"]["model_name"] + assert llm.chat_llm.llm_type == mock_llm["chat_llm"]["llm_type"] + assert llm.chat_llm.temperature == mock_llm["chat_llm"]["temperature"] + assert llm.chat_llm.max_tokens == mock_llm["chat_llm"]["max_tokens"] + + assert llm.eval_llm.model_name == mock_llm["eval_llm"]["model_name"] + assert llm.eval_llm.llm_type == mock_llm["eval_llm"]["llm_type"] + assert llm.eval_llm.temperature == mock_llm["eval_llm"]["temperature"] + assert llm.eval_llm.max_tokens == mock_llm["eval_llm"]["max_tokens"] assert config.eval.metric_types == mock_config["eval"]["metric_types"] diff --git a/rag_experiment_accelerator/evaluation/eval.py b/rag_experiment_accelerator/evaluation/eval.py index 6244c064..1552464a 100644 --- a/rag_experiment_accelerator/evaluation/eval.py +++ b/rag_experiment_accelerator/evaluation/eval.py @@ -31,6 +31,9 @@ compute_transformer_based_score, ) +from rag_experiment_accelerator.llm.response_generator_factory import ( + get_response_generator, +) from rag_experiment_accelerator.llm.response_generator import ResponseGenerator from rag_experiment_accelerator.utils.logging import get_logger from rag_experiment_accelerator.config.environment import Environment @@ -102,7 +105,9 @@ def compute_metrics( """ if metric_type.startswith("rouge"): - return plain_metrics.rouge_score(ground_truth=expected, prediction=actual, rouge_metric_name=metric_type) + return plain_metrics.rouge_score( + ground_truth=expected, prediction=actual, rouge_metric_name=metric_type + ) else: plain_metric_func = getattr(plain_metrics, metric_type, None) if plain_metric_func: @@ -207,9 +212,7 @@ def evaluate_prompts( handler = QueryOutputHandler(config.path.query_data_dir) - response_generator = ResponseGenerator( - environment, config, config.openai.azure_oai_eval_deployment_name - ) + response_generator = get_response_generator(config.llm.eval_llm, environment) query_data_load = handler.load( index_config.index_name(), config.experiment_name, config.job_name diff --git a/rag_experiment_accelerator/ingest_data/acs_ingest.py b/rag_experiment_accelerator/ingest_data/acs_ingest.py index f6e28d04..d162d25a 100644 --- a/rag_experiment_accelerator/ingest_data/acs_ingest.py +++ b/rag_experiment_accelerator/ingest_data/acs_ingest.py @@ -7,6 +7,9 @@ from azure.search.documents import SearchClient from rag_experiment_accelerator.checkpoint import cache_with_checkpoint from rag_experiment_accelerator.config.config import Config +from rag_experiment_accelerator.llm.response_generator_factory import ( + get_response_generator, +) from rag_experiment_accelerator.llm.response_generator import ResponseGenerator from rag_experiment_accelerator.llm.prompt import ( do_need_multiple_prompt_instruction, @@ -103,9 +106,7 @@ def generate_qna(environment, config, docs, azure_oai_deployment_name): column_names = ["user_prompt", "output_prompt", "context"] new_df = pd.DataFrame(columns=column_names) - response_generator = ResponseGenerator( - environment, config, azure_oai_deployment_name - ) + response_generator = get_response_generator(config.llm.chat_llm, environment) for doc in docs: chunk = list(doc.values())[0] diff --git a/rag_experiment_accelerator/ingest_data/tests/test_acs_ingest.py b/rag_experiment_accelerator/ingest_data/tests/test_acs_ingest.py index 7aac83f6..5d1f4b6e 100644 --- a/rag_experiment_accelerator/ingest_data/tests/test_acs_ingest.py +++ b/rag_experiment_accelerator/ingest_data/tests/test_acs_ingest.py @@ -58,7 +58,7 @@ def test_my_hash_with_numbers(): assert result == expected_hash -@patch("rag_experiment_accelerator.run.index.ResponseGenerator") +@patch("rag_experiment_accelerator.run.index.get_response_generator") def test_generate_title(mock_response_generator): # Arrange mock_response = "Test Title" @@ -80,7 +80,7 @@ def test_generate_title(mock_response_generator): assert result == mock_response -@patch("rag_experiment_accelerator.run.index.ResponseGenerator") +@patch("rag_experiment_accelerator.run.index.get_response_generator") def test_generate_summary(mock_response_generator): # Arrange mock_summary = "Test Summary" diff --git a/rag_experiment_accelerator/llm/huggingface_response_generator.py b/rag_experiment_accelerator/llm/huggingface_response_generator.py new file mode 100644 index 00000000..b0054a43 --- /dev/null +++ b/rag_experiment_accelerator/llm/huggingface_response_generator.py @@ -0,0 +1,41 @@ +from rag_experiment_accelerator.llm.response_generator import ResponseGenerator +from transformers import AutoTokenizer, AutoModelForCausalLM + +from rag_experiment_accelerator.config.llm_config import BaseLLMConfig +from rag_experiment_accelerator.utils.logging import get_logger +from rag_experiment_accelerator.llm.prompt.prompt import ( + Prompt, + PromptTag, +) + +logger = get_logger(__name__) + + +class HuggingfaceResponseGenerator(ResponseGenerator): + def __init__(self, config: BaseLLMConfig): + super().__init__(config) + + self._tokenizer = AutoTokenizer.from_pretrained(self.config.model_name) + self._model = AutoModelForCausalLM.from_pretrained(self.config.model_name) + + def _get_response( + self, + messages, + prompt: Prompt, + ) -> any: + kwargs = {} + + if self.json_object_supported and PromptTag.JSON in prompt.tags: + kwargs["response_format"] = {"type": "json_object"} + + input_ids = self._tokenizer.encode(messages, return_tensors="pt") + output_ids = self._model.generate( + input_ids, + num_return_sequences=1, + no_repeat_ngram_size=2, + temperature=self.config.temperature, + max_length=self.config.max_tokens, + ) + response_text = self._tokenizer.decode(output_ids[0], skip_special_tokens=True) + + return self._interpret_response(response_text, prompt) diff --git a/rag_experiment_accelerator/llm/openai_response_generator.py b/rag_experiment_accelerator/llm/openai_response_generator.py new file mode 100644 index 00000000..95a2d272 --- /dev/null +++ b/rag_experiment_accelerator/llm/openai_response_generator.py @@ -0,0 +1,79 @@ +import logging + +import openai + +from rag_experiment_accelerator.llm.response_generator import ResponseGenerator + +from openai import AzureOpenAI +from tenacity import ( + after_log, + before_sleep_log, + retry, + stop_after_attempt, + wait_random_exponential, + retry_if_not_exception_type, +) + +from rag_experiment_accelerator.config.llm_config import BaseLLMConfig +from rag_experiment_accelerator.llm.exceptions import ContentFilteredException +from rag_experiment_accelerator.utils.logging import get_logger +from rag_experiment_accelerator.config.environment import Environment +from rag_experiment_accelerator.llm.prompt.prompt import ( + Prompt, + PromptTag, +) + +logger = get_logger(__name__) + + +class OpenAIResponseGenerator(ResponseGenerator): + def __init__(self, config: BaseLLMConfig, environment: Environment): + super().__init__(config) + self.client = self._initialize_azure_openai_client(environment) + + def _initialize_azure_openai_client(self, environment: Environment): + return AzureOpenAI( + azure_endpoint=environment.openai_endpoint, + api_key=environment.openai_api_key, + api_version=environment.openai_api_version, + ) + + @retry( + before_sleep=before_sleep_log(logger, logging.CRITICAL), + after=after_log(logger, logging.CRITICAL), + wait=wait_random_exponential(min=1, max=60), + stop=stop_after_attempt(6), + retry=retry_if_not_exception_type( + (ContentFilteredException, TypeError, KeyboardInterrupt) + ), + ) + def _get_response(self, messages, prompt: Prompt, temperature: float) -> any: + kwargs = {} + + if self.json_object_supported and PromptTag.JSON in prompt.tags: + kwargs["response_format"] = {"type": "json_object"} + + try: + response = self.client.chat.completions.create( + model=self.config.model_name, + messages=messages, + temperature=temperature + if temperature is not None + else self.temperature, + **kwargs, + ) + except openai.BadRequestError as e: + if e.param == "response_format": + self.json_object_supported = False + return self._get_response(messages, prompt, temperature) + raise e + + if response.choices[0].finish_reason == "content_filter": + logger.error( + f"Response was filtered {response.choices[0].finish_reason}:\n{response}" + ) + raise ContentFilteredException("Content was filtered.") + + response_text = response.choices[0].message.content + + return self._interpret_response(response_text, prompt) diff --git a/rag_experiment_accelerator/llm/response_generator.py b/rag_experiment_accelerator/llm/response_generator.py index f38575a3..e9aeb9c9 100644 --- a/rag_experiment_accelerator/llm/response_generator.py +++ b/rag_experiment_accelerator/llm/response_generator.py @@ -1,24 +1,10 @@ -import logging - import json -import openai from string import Template +import abc -from openai import AzureOpenAI -from tenacity import ( - after_log, - before_sleep_log, - retry, - stop_after_attempt, - wait_random_exponential, - retry_if_not_exception_type, -) - -from rag_experiment_accelerator.config.config import Config -from rag_experiment_accelerator.llm.exceptions import ContentFilteredException +from rag_experiment_accelerator.config.llm_config import BaseLLMConfig from rag_experiment_accelerator.utils.logging import get_logger -from rag_experiment_accelerator.config.environment import Environment from rag_experiment_accelerator.llm.prompt.prompt import ( StructuredPrompt, CoTPrompt, @@ -30,21 +16,11 @@ class ResponseGenerator: - def __init__(self, environment: Environment, config: Config, deployment_name: str): + def __init__(self, config: BaseLLMConfig, **kwargs): self.config = config - self.deployment_name = deployment_name - self.temperature = self.config.openai.temperature self.use_long_prompt = True - self.client = self._initialize_azure_openai_client(environment) self.json_object_supported = True - def _initialize_azure_openai_client(self, environment: Environment): - return AzureOpenAI( - azure_endpoint=environment.openai_endpoint, - api_key=environment.openai_api_key, - api_version=environment.openai_api_version, - ) - def _interpret_response(self, response: str, prompt: Prompt) -> any: interpreted_response = response @@ -75,47 +51,9 @@ def _interpret_response(self, response: str, prompt: Prompt) -> any: return interpreted_response - @retry( - before_sleep=before_sleep_log(logger, logging.CRITICAL), - after=after_log(logger, logging.CRITICAL), - wait=wait_random_exponential(min=1, max=60), - stop=stop_after_attempt(6), - retry=retry_if_not_exception_type( - (ContentFilteredException, TypeError, KeyboardInterrupt) - ), - ) - def _get_response( - self, messages, prompt: Prompt, temperature: float | None = None - ) -> any: - kwargs = {} - - if self.json_object_supported and PromptTag.JSON in prompt.tags: - kwargs["response_format"] = {"type": "json_object"} - - try: - response = self.client.chat.completions.create( - model=self.deployment_name, - messages=messages, - temperature=temperature - if temperature is not None - else self.temperature, - **kwargs, - ) - except openai.BadRequestError as e: - if e.param == "response_format": - self.json_object_supported = False - return self._get_response(messages, prompt, temperature) - raise e - - if response.choices[0].finish_reason == "content_filter": - logger.error( - f"Response was filtered {response.choices[0].finish_reason}:\n{response}" - ) - raise ContentFilteredException("Content was filtered.") - - response_text = response.choices[0].message.content - - return self._interpret_response(response_text, prompt) + @abc.abstractmethod + def _get_response(self, messages, prompt: Prompt, temperature: float) -> any: + raise NotImplementedError def generate_response( self, @@ -127,6 +65,9 @@ def generate_response( system_arguments = Prompt.arguments_in_prompt(prompt.system_message) user_arguments = Prompt.arguments_in_prompt(prompt.user_template) + if temperature is None: + temperature = self.config.temperature + for key in system_arguments: assert key in kwargs, f"Missing argument {key} in system message." diff --git a/rag_experiment_accelerator/llm/response_generator_factory.py b/rag_experiment_accelerator/llm/response_generator_factory.py new file mode 100644 index 00000000..962a32e1 --- /dev/null +++ b/rag_experiment_accelerator/llm/response_generator_factory.py @@ -0,0 +1,21 @@ +from rag_experiment_accelerator.config.llm_config import BaseLLMConfig +from rag_experiment_accelerator.config.environment import Environment + +from rag_experiment_accelerator.llm.response_generator import ResponseGenerator +from rag_experiment_accelerator.llm.huggingface_response_generator import ( + HuggingfaceResponseGenerator, +) +from rag_experiment_accelerator.llm.openai_response_generator import ( + OpenAIResponseGenerator, +) + + +def get_response_generator( + config: BaseLLMConfig, environment: Environment +) -> ResponseGenerator: + if config.llm_type == "openai": + return OpenAIResponseGenerator(config, environment) + elif config.llm_type == "huggingface": + return HuggingfaceResponseGenerator(config) + else: + raise ValueError(f"Unsupported LLM type: {config.llm_type}") diff --git a/rag_experiment_accelerator/llm/tests/test_response_generator.py b/rag_experiment_accelerator/llm/tests/test_response_generator.py index 14f88e35..f1c49b57 100644 --- a/rag_experiment_accelerator/llm/tests/test_response_generator.py +++ b/rag_experiment_accelerator/llm/tests/test_response_generator.py @@ -1,8 +1,10 @@ import unittest import json -from unittest.mock import patch, Mock +from unittest.mock import patch, Mock, MagicMock from rag_experiment_accelerator.llm.exceptions import ContentFilteredException -from rag_experiment_accelerator.llm.response_generator import ResponseGenerator +from rag_experiment_accelerator.llm.openai_response_generator import ( + OpenAIResponseGenerator, +) from rag_experiment_accelerator.llm.prompt import ( StructuredPrompt, CoTPrompt, @@ -11,10 +13,15 @@ ) -class TestResponseGenerator(unittest.TestCase): +class TestOpenAIResponseGenerator(unittest.TestCase): def setUp(self): - self.generator = ResponseGenerator.__new__(ResponseGenerator) - self.generator.config = Mock() + self.generator = OpenAIResponseGenerator.__new__(OpenAIResponseGenerator) + self.generator.config = MagicMock() + self.generator.config.llm = MagicMock() + self.generator.config.llm.chat_llm = MagicMock() + self.generator.config.llm.chat_llm.llm_type = "openai" + self.generator.config.llm.eval_llm = MagicMock() + self.generator.config.llm.eval_llm.llm_type = "openai" self.generator.temperature = 0.5 self.generator.deployment_name = "deployment_name" self.generator.client = Mock() @@ -75,7 +82,7 @@ def test_get_response_normal(self, mock_logger): self.generator.client.chat.completions.create.return_value = mock_response # Test - result = self.generator._get_response("message", self.prompt) + result = self.generator._get_response("message", self.prompt, temperature=0.0) self.assertEqual(result, "test response") self.generator.client.chat.completions.create.assert_called_once() @@ -90,7 +97,7 @@ def test_get_response_content_filtered(self, mock_logger): # Test and assert exception with self.assertRaises(ContentFilteredException): - self.generator._get_response("message", self.prompt) + self.generator._get_response("message", self.prompt, temperature=0.0) @patch("rag_experiment_accelerator.llm.response_generator.logger") def test_get_response_retries_on_random_exception(self, mock_logger): @@ -108,12 +115,12 @@ def test_get_response_retries_on_random_exception(self, mock_logger): ] # Test - result = self.generator._get_response("message", self.prompt) + result = self.generator._get_response("message", self.prompt, temperature=0.0) self.assertEqual(result, "recovered response") self.assertEqual(self.generator.client.chat.completions.create.call_count, 2) @patch( - "rag_experiment_accelerator.llm.response_generator.ResponseGenerator._get_response" + "rag_experiment_accelerator.llm.openai_response_generator.OpenAIResponseGenerator._get_response" ) def test_generate_response_full_system_message(self, mock_get_response): # Setup @@ -129,7 +136,7 @@ def test_generate_response_full_system_message(self, mock_get_response): self.assertEqual(response, "valid response") @patch( - "rag_experiment_accelerator.llm.response_generator.ResponseGenerator._get_response" + "rag_experiment_accelerator.llm.openai_response_generator.OpenAIResponseGenerator._get_response" ) def test_generate_response_full_user_template(self, mock_get_response): # Setup @@ -145,7 +152,7 @@ def test_generate_response_full_user_template(self, mock_get_response): self.assertEqual(response, "valid response") @patch( - "rag_experiment_accelerator.llm.response_generator.ResponseGenerator._get_response" + "rag_experiment_accelerator.llm.openai_response_generator.OpenAIResponseGenerator._get_response" ) def test_generate_response_mixed_messages(self, mock_get_response): # Setup @@ -161,7 +168,7 @@ def test_generate_response_mixed_messages(self, mock_get_response): self.assertEqual(response, "valid response") @patch( - "rag_experiment_accelerator.llm.response_generator.ResponseGenerator._get_response" + "rag_experiment_accelerator.llm.openai_response_generator.OpenAIResponseGenerator._get_response" ) def test_generate_response_missing_system_argument(self, mock_get_response): # Setup @@ -173,7 +180,7 @@ def test_generate_response_missing_system_argument(self, mock_get_response): self.generator.generate_response(prompt, None, **kwargs) @patch( - "rag_experiment_accelerator.llm.response_generator.ResponseGenerator._get_response" + "rag_experiment_accelerator.llm.openai_response_generator.OpenAIResponseGenerator._get_response" ) def test_generate_response_missing_user_argument_non_strict( self, mock_get_response @@ -190,7 +197,7 @@ def test_generate_response_missing_user_argument_non_strict( self.assertIsNone(response) @patch( - "rag_experiment_accelerator.llm.response_generator.ResponseGenerator._get_response", + "rag_experiment_accelerator.llm.openai_response_generator.OpenAIResponseGenerator._get_response", side_effect=Exception("Random failure"), ) def test_generate_response_exception_handling_strict(self, mock_get_response): @@ -203,7 +210,7 @@ def test_generate_response_exception_handling_strict(self, mock_get_response): self.generator.generate_response(prompt, None, **kwargs) @patch( - "rag_experiment_accelerator.llm.response_generator.ResponseGenerator._initialize_azure_openai_client" + "rag_experiment_accelerator.llm.openai_response_generator.OpenAIResponseGenerator._initialize_azure_openai_client" ) def test_initialize_azure_openai_client(self, mock_initialize_azure_openai_client): # Arrange diff --git a/rag_experiment_accelerator/run/index.py b/rag_experiment_accelerator/run/index.py index 6f5237e6..bb3d39b4 100644 --- a/rag_experiment_accelerator/run/index.py +++ b/rag_experiment_accelerator/run/index.py @@ -12,7 +12,9 @@ from rag_experiment_accelerator.ingest_data.acs_ingest import upload_data from rag_experiment_accelerator.init_Index.create_index import create_acs_index -from rag_experiment_accelerator.llm.response_generator import ResponseGenerator +from rag_experiment_accelerator.llm.response_generator_factory import ( + get_response_generator, +) from rag_experiment_accelerator.llm.prompt import ( prompt_instruction_title, prompt_instruction_summary, @@ -310,7 +312,7 @@ def process_title( if index_config.chunking.generate_title: title = generate_title( chunk["content"], - config.openai.azure_oai_chat_deployment_name, + config.llm.chat_llm.model_name, environment, config, ) @@ -353,7 +355,7 @@ def process_summary( if index_config.chunking.generate_summary: summary = generate_summary( chunk["content"], - config.openai.azure_oai_chat_deployment_name, + config.llm.chat_llm.model_name, environment, config, ) @@ -382,10 +384,8 @@ def generate_title(chunk, azure_oai_deployment_name, environment, config): Returns: str: The generated title. """ - response = ResponseGenerator( - environment=environment, - config=config, - deployment_name=azure_oai_deployment_name, + response = get_response_generator( + config.llm.chat_llm, environment ).generate_response(prompt_instruction_title, text=chunk) return response @@ -404,9 +404,7 @@ def generate_summary(chunk, azure_oai_deployment_name, environment, config): Returns: str: The generated summary. """ - response = ResponseGenerator( - environment=environment, - config=config, - deployment_name=azure_oai_deployment_name, + response = get_response_generator( + config.llm.chat_llm, environment ).generate_response(prompt_instruction_summary, text=chunk) return response diff --git a/rag_experiment_accelerator/run/qa_generation.py b/rag_experiment_accelerator/run/qa_generation.py index e9b6624d..c7aadef8 100644 --- a/rag_experiment_accelerator/run/qa_generation.py +++ b/rag_experiment_accelerator/run/qa_generation.py @@ -68,9 +68,7 @@ def run( ) # generate qna - df = generate_qna( - environment, config, all_docs, config.openai.azure_oai_chat_deployment_name - ) + df = generate_qna(environment, config, all_docs, config.llm.chat_llm.model_name) # write to jsonl df.to_json(config.path.eval_data_file, orient="records", lines=True) # create data asset in mlstudio diff --git a/rag_experiment_accelerator/run/querying.py b/rag_experiment_accelerator/run/querying.py index 77e91056..d823e597 100644 --- a/rag_experiment_accelerator/run/querying.py +++ b/rag_experiment_accelerator/run/querying.py @@ -46,6 +46,9 @@ from rag_experiment_accelerator.utils.logging import get_logger from rag_experiment_accelerator.config.environment import Environment +from rag_experiment_accelerator.llm.response_generator_factory import ( + get_response_generator, +) from rag_experiment_accelerator.llm.response_generator import ResponseGenerator from rag_experiment_accelerator.llm.prompt import ( prompt_generate_hypothetical_answer, @@ -484,9 +487,7 @@ def get_query_output( ): search_evals = [] - response_generator = ResponseGenerator( - environment, config, config.openai.azure_oai_chat_deployment_name - ) + response_generator = get_response_generator(config.llm.chat_llm, environment) embedding_model = config.get_embedding_model( index_config.embedding_model.model_name @@ -582,9 +583,7 @@ def run( evaluator = SpacyEvaluator(config.search.search_relevancy_threshold) handler = QueryOutputHandler(config.path.query_data_dir) - response_generator = ResponseGenerator( - environment, config, config.openai.azure_oai_chat_deployment_name - ) + response_generator = get_response_generator(config.llm.chat_llm, environment) for index_config in config.index.flatten(): index_name = index_config.index_name() logger.info(f"Processing index: {index_name}") diff --git a/rag_experiment_accelerator/run/tests/test_index.py b/rag_experiment_accelerator/run/tests/test_index.py index 33319f56..88687b60 100644 --- a/rag_experiment_accelerator/run/tests/test_index.py +++ b/rag_experiment_accelerator/run/tests/test_index.py @@ -8,7 +8,7 @@ ) from rag_experiment_accelerator.config.index_config import IndexConfig from rag_experiment_accelerator.config.language_config import LanguageConfig -from rag_experiment_accelerator.config.openai_config import OpenAIConfig +from rag_experiment_accelerator.config.llm_config import LLMConfig, BaseLLMConfig from rag_experiment_accelerator.config.query_expansion import QueryExpansionConfig from rag_experiment_accelerator.config.sampling_config import SamplingConfig from rag_experiment_accelerator.run.index import run @@ -81,8 +81,10 @@ def test_run( spec=QueryExpansionConfig, query_expansion=False ) - mock_config.openai = MagicMock(spec=OpenAIConfig) - mock_config.openai.azure_oai_chat_deployment_name = "test-deployment" + mock_config.llm = MagicMock(spec=LLMConfig) + mock_config.llm.chat_llm = MagicMock(spec=BaseLLMConfig) + mock_config.llm.chat_llm.model_name = "model_name" + mock_config.llm.chat_llm.llm_type = "openai" mock_environment.azure_search_service_endpoint = "service_endpoint" mock_environment.azure_search_admin_key = "admin_key" diff --git a/rag_experiment_accelerator/run/tests/test_qa_generation.py b/rag_experiment_accelerator/run/tests/test_qa_generation.py index 99e1fee4..9c6f4be0 100644 --- a/rag_experiment_accelerator/run/tests/test_qa_generation.py +++ b/rag_experiment_accelerator/run/tests/test_qa_generation.py @@ -52,7 +52,7 @@ def test_run( mock_environment, mock_config, all_docs_instance, - mock_config.openai.azure_oai_chat_deployment_name, + mock_config.llm.chat_llm.model_name, ) df_instance.to_json.assert_called_once_with( mock_config.path.eval_data_file, orient="records", lines=True diff --git a/rag_experiment_accelerator/run/tests/test_querying.py b/rag_experiment_accelerator/run/tests/test_querying.py index 66f20cb8..08ed89c5 100644 --- a/rag_experiment_accelerator/run/tests/test_querying.py +++ b/rag_experiment_accelerator/run/tests/test_querying.py @@ -4,7 +4,7 @@ from azure.search.documents import SearchClient from rag_experiment_accelerator.checkpoint import init_checkpoint from rag_experiment_accelerator.config.chunking_config import ChunkingConfig -from rag_experiment_accelerator.config.openai_config import OpenAIConfig +from rag_experiment_accelerator.config.llm_config import LLMConfig from rag_experiment_accelerator.config.path_config import PathConfig from rag_experiment_accelerator.config.query_expansion import QueryExpansionConfig from rag_experiment_accelerator.config.rerank_config import RerankConfig @@ -49,8 +49,11 @@ def setUp(self): ) self.mock_config.query_expansion.expand_to_multiple_questions = True - self.mock_config.openai = MagicMock(spec=OpenAIConfig) - self.mock_config.openai.azure_oai_chat_deployment_name = "test-deployment" + self.mock_config.llm = MagicMock(spec=LLMConfig) + self.mock_config.llm.chat_llm = MagicMock(spec=LLMConfig) + self.mock_config.llm.chat_llm.llm_type = "openai" + self.mock_config.llm.chat_llm.model_name = "test-deployment" + self.mock_config.llm.chat_llm.temperature = 0.0 self.mock_config.rerank = MagicMock(spec=RerankConfig) self.mock_config.rerank.enabled = True @@ -309,6 +312,7 @@ def test_query_and_eval_acs_multi_no_rerank( @patch("rag_experiment_accelerator.run.querying.SpacyEvaluator") @patch("rag_experiment_accelerator.run.querying.QueryOutputHandler") @patch("rag_experiment_accelerator.run.querying.ResponseGenerator") + @patch("rag_experiment_accelerator.run.querying.get_response_generator") @patch("rag_experiment_accelerator.run.querying.QueryOutput") @patch("rag_experiment_accelerator.run.querying.do_we_need_multiple_questions") @patch("rag_experiment_accelerator.run.querying.query_and_eval_acs") @@ -318,6 +322,7 @@ def test_run_no_multi_no_rerank( mock_do_we_need_multiple_questions, mock_query_output, mock_response_generator, + mock_get_response_generator, mock_query_output_handler, mock_spacy_evaluator, mock_environment, diff --git a/requirements.txt b/requirements.txt index cb1fcb0c..7eca7d43 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,6 +35,8 @@ sentence-transformers==3.1.1 spacy==3.7.6 textdistance==4.6.3 tiktoken==0.7.0 +torch==2.3.0 +transformers==4.40.1 tqdm==4.66.5 umap-learn==0.5.6 unstructured==0.15.13