road-core · asamal4 · Oct 25, 2024
diff --git a/README.md b/README.md
@@ -2,6 +2,11 @@
 
 **This is a mirror of [service repo](https://github.com/road-core/service.git). This repo will target to implement agentic flow. Currently in experimental stage.**
 
+- Dummy functions are used to define tools
+- Only works with GPT
+- Basic implementation
+
+---
 Road Core Service (RCS) is an AI powered assistant that runs on OpenShift
 and provides answers to product questions using backend LLM services. Currently
 [OpenAI](https://openai.com/), [Azure

diff --git a/ols/src/prompts/prompt_generator.py b/ols/src/prompts/prompt_generator.py
@@ -90,6 +90,7 @@ def _generate_prompt_gpt(self) -> tuple[ChatPromptTemplate, dict]:
             prompt_message.append(MessagesPlaceholder("chat_history"))
 
         prompt_message.append(HumanMessagePromptTemplate.from_template("{query}"))
+        prompt_message.append(MessagesPlaceholder(variable_name="agent_scratchpad"))
         return ChatPromptTemplate.from_messages(prompt_message), llm_input_values
 
     def _generate_prompt_granite(self) -> tuple[PromptTemplate, dict]:
@@ -111,6 +112,7 @@ def _generate_prompt_granite(self) -> tuple[PromptTemplate, dict]:
             prompt_message = prompt_message + "\n{chat_history}"
 
         prompt_message = prompt_message + "\n<|user|>\n{query}\n<|assistant|>\n"
+        # prompt_message = prompt_message + "\n{agent_scratchpad}"
         return PromptTemplate.from_template(prompt_message), llm_input_values
 
     def generate_prompt(

diff --git a/ols/src/query_helpers/docs_summarizer.py b/ols/src/query_helpers/docs_summarizer.py
@@ -3,7 +3,9 @@
 import logging
 from typing import Any, Optional
 
+from langchain.agents import AgentExecutor, create_tool_calling_agent
 from langchain.chains import LLMChain
+from langchain_core.messages import HumanMessage
 from llama_index.core import VectorStoreIndex
 
 from ols import config
@@ -14,6 +16,7 @@
 from ols.src.prompts.prompt_generator import GeneratePrompt
 from ols.src.prompts.prompts import QUERY_SYSTEM_INSTRUCTION
 from ols.src.query_helpers.query_helper import QueryHelper
+from ols.src.tools.func_def import tools
 from ols.utils.token_handler import TokenHandler
 
 logger = logging.getLogger(__name__)
@@ -90,6 +93,16 @@ def summarize(
         temp_prompt, temp_prompt_input = GeneratePrompt(
             query, ["sample"], ["ai: sample"], self._system_prompt
         ).generate_prompt(self.model)
+
+        temp_msg_placeholder = None
+        if self.model and ("granite" in self.model):
+            # temp_msg_placeholder = ""
+            pass
+        else:
+            temp_msg_placeholder = [HumanMessage(content="")]
+
+        if temp_msg_placeholder is not None:
+            temp_prompt_input["agent_scratchpad"] = temp_msg_placeholder
         available_tokens = token_handler.calculate_and_check_available_tokens(
             temp_prompt.format(**temp_prompt_input),
             model_config.context_window_size,
@@ -119,6 +132,8 @@ def summarize(
         # Tokens-check: We trigger the computation of the token count
         # without care about the return value. This is to ensure that
         # the query is within the token limit.
+        if temp_msg_placeholder is not None:
+            llm_input_values["agent_scratchpad"] = temp_msg_placeholder
         token_handler.calculate_and_check_available_tokens(
             final_prompt.format(**llm_input_values),
             model_config.context_window_size,
@@ -131,18 +146,28 @@ def summarize(
             verbose=verbose,
         )
 
+        if self.model and ("granite" in self.model):
+            model_engine = chat_engine
+        else:
+            agent = create_tool_calling_agent(bare_llm, tools, final_prompt)
+            model_engine = AgentExecutor(agent=agent, tools=tools, verbose=True)
+
         with TokenMetricUpdater(
             llm=bare_llm,
             provider=provider_config.type,
             model=self.model,
         ) as token_counter:
-            summary = chat_engine.invoke(
+            summary = model_engine.invoke(
+                verbose=True,
                 input=llm_input_values,
                 config={"callbacks": [token_counter]},
             )
 
         # retrieve text response returned from LLM, strip whitespace characters from beginning/end
-        response = summary["text"].strip()
+        if "text" in summary:
+            response = summary["text"].strip()
+        else:
+            response = summary["output"].strip()
 
         if len(rag_context) == 0:
             logger.debug("Using llm to answer the query without reference content")

diff --git a/ols/src/tools/__init__.py b/ols/src/tools/__init__.py
@@ -0,0 +1 @@
+"""Functions/Tools definition."""
diff --git a/ols/src/tools/func_def.py b/ols/src/tools/func_def.py
@@ -0,0 +1,37 @@
+"""Functions/Tools definition."""
+
+from typing import Optional
+
+from langchain.tools import tool
+
+
+# Using dummy functions for experimentation
+@tool
+def get_pods(namespace: str) -> str:
+    """Get pod names from specific namespace."""
+    if namespace == "lightspeed":
+        return f"{namespace}_pod1"
+    return "I don't have information"
+
+
+# @tool
+# def get_pods_memory(namespace: str = None, pod: str = None) -> float:
+#     """Get memory usage by namespace."""
+#     if pod:
+#         pass
+#     elif namespace == "lightspeed":
+#         pod = get_pods(namespace)
+#     else:
+#         return "I don't have information"
+#     return 2 * len(pod)
+
+
+@tool
+def get_pods_memory(pod: Optional[str] = None) -> float:
+    """Get memory usage by namespace."""
+    if pod:
+        return 2 * len(pod)
+    return "I don't have information"
+
+
+tools = [get_pods, get_pods_memory]