basic agentic flow

asamal4 · asamal4 · commit 832518e2e2f4 · 2024-10-25T16:48:21.000+05:30
diff --git a/README.md b/README.md
@@ -2,6 +2,11 @@
 
 **This is a mirror of [service repo](https://github.com/road-core/service.git). This repo will target to implement agentic flow. Currently in experimental stage.**
 
+- Dummy functions are used to define tools
+- Only works with GPT
+- Basic implementation
+
+---
 Road Core Service (RCS) is an AI powered assistant that runs on OpenShift
 and provides answers to product questions using backend LLM services. Currently
 [OpenAI](https://openai.com/), [Azure
diff --git a/ols/src/prompts/prompt_generator.py b/ols/src/prompts/prompt_generator.py
@@ -90,6 +90,7 @@ def _generate_prompt_gpt(self) -> tuple[ChatPromptTemplate, dict]:
             prompt_message.append(MessagesPlaceholder("chat_history"))
 
         prompt_message.append(HumanMessagePromptTemplate.from_template("{query}"))
+        prompt_message.append(MessagesPlaceholder(variable_name="agent_scratchpad"))
         return ChatPromptTemplate.from_messages(prompt_message), llm_input_values
 
     def _generate_prompt_granite(self) -> tuple[PromptTemplate, dict]:
@@ -111,6 +112,7 @@ def _generate_prompt_granite(self) -> tuple[PromptTemplate, dict]:
             prompt_message = prompt_message + "\n{chat_history}"
 
         prompt_message = prompt_message + "\n<|user|>\n{query}\n<|assistant|>\n"
+        # prompt_message = prompt_message + "\n{agent_scratchpad}"
         return PromptTemplate.from_template(prompt_message), llm_input_values
 
     def generate_prompt(
diff --git a/ols/src/query_helpers/docs_summarizer.py b/ols/src/query_helpers/docs_summarizer.py
@@ -5,6 +5,8 @@
 
 from langchain.chains import LLMChain
 from llama_index.core import VectorStoreIndex
+from langchain_core.messages import HumanMessage
+from langchain.agents import AgentExecutor, create_tool_calling_agent
 
 from ols import config
 from ols.app.metrics import TokenMetricUpdater
@@ -13,6 +15,7 @@
 from ols.constants import RAG_CONTENT_LIMIT, GenericLLMParameters
 from ols.src.prompts.prompt_generator import GeneratePrompt
 from ols.src.prompts.prompts import QUERY_SYSTEM_INSTRUCTION
+from ols.src.tools.func_def import tools
 from ols.src.query_helpers.query_helper import QueryHelper
 from ols.utils.token_handler import TokenHandler
 
@@ -90,6 +93,16 @@ def summarize(
         temp_prompt, temp_prompt_input = GeneratePrompt(
             query, ["sample"], ["ai: sample"], self._system_prompt
         ).generate_prompt(self.model)
+
+        temp_msg_placeholder = None
+        if "granite" in self.model:
+            # temp_msg_placeholder = ""
+            pass
+        else:
+            temp_msg_placeholder = [HumanMessage(content="")]
+
+        if temp_msg_placeholder is not None:
+            temp_prompt_input["agent_scratchpad"] = temp_msg_placeholder
         available_tokens = token_handler.calculate_and_check_available_tokens(
             temp_prompt.format(**temp_prompt_input),
             model_config.context_window_size,
@@ -119,6 +132,8 @@ def summarize(
         # Tokens-check: We trigger the computation of the token count
         # without care about the return value. This is to ensure that
         # the query is within the token limit.
+        if temp_msg_placeholder is not None:
+            llm_input_values["agent_scratchpad"] = temp_msg_placeholder
         token_handler.calculate_and_check_available_tokens(
             final_prompt.format(**llm_input_values),
             model_config.context_window_size,
@@ -131,18 +146,29 @@ def summarize(
             verbose=verbose,
         )
 
+        if "granite" in self.model:
+            model_engine = chat_engine
+        else:
+            agent = create_tool_calling_agent(bare_llm, tools, final_prompt)
+            model_engine = AgentExecutor(agent=agent, tools=tools, verbose=True)
+
         with TokenMetricUpdater(
             llm=bare_llm,
             provider=provider_config.type,
             model=self.model,
         ) as token_counter:
-            summary = chat_engine.invoke(
+            summary = model_engine.invoke(
+            # summary = agent_executor.invoke(
+                verbose=True,
                 input=llm_input_values,
                 config={"callbacks": [token_counter]},
             )
 
         # retrieve text response returned from LLM, strip whitespace characters from beginning/end
-        response = summary["text"].strip()
+        if "text" in summary:
+            response = summary["text"].strip()
+        else:
+            response = summary["output"].strip()
 
         if len(rag_context) == 0:
             logger.debug("Using llm to answer the query without reference content")
diff --git a/ols/src/tools/__init__.py b/ols/src/tools/__init__.py
@@ -0,0 +1 @@
+"""Functions/Tools definition."""
diff --git a/ols/src/tools/func_def.py b/ols/src/tools/func_def.py
@@ -0,0 +1,37 @@
+"""Functions/Tools definition."""
+
+from typing import Optional
+
+from langchain.tools import tool
+
+
+# Using dummy functions for experimentation
+@tool
+def get_pods(namespace: str) -> str:
+    """Get pod names from specific namespace."""
+    if namespace == "lightspeed":
+        return f"{namespace}_pod1"
+    return "I don't have information"
+
+
+# @tool
+# def get_pods_memory(namespace: str = None, pod: str = None) -> float:
+#     """Get memory usage by namespace."""
+#     if pod:
+#         pass
+#     elif namespace == "lightspeed":
+#         pod = get_pods(namespace)
+#     else:
+#         return "I don't have information"
+#     return 2 * len(pod)
+
+
+@tool
+def get_pods_memory(pod: Optional[str] = None) -> float:
+    """Get memory usage by namespace."""
+    if pod:
+        return 2 * len(pod)
+    return "I don't have information"
+
+
+tools = [get_pods, get_pods_memory]