Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

**This is a mirror of [service repo](https://github.com/road-core/service.git). This repo will target to implement agentic flow. Currently in experimental stage.**

- Dummy functions are used to define tools
- Only works with GPT
- Basic implementation

---
Road Core Service (RCS) is an AI powered assistant that runs on OpenShift
and provides answers to product questions using backend LLM services. Currently
[OpenAI](https://openai.com/), [Azure
Expand Down
2 changes: 2 additions & 0 deletions ols/src/prompts/prompt_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def _generate_prompt_gpt(self) -> tuple[ChatPromptTemplate, dict]:
prompt_message.append(MessagesPlaceholder("chat_history"))

prompt_message.append(HumanMessagePromptTemplate.from_template("{query}"))
prompt_message.append(MessagesPlaceholder(variable_name="agent_scratchpad"))
return ChatPromptTemplate.from_messages(prompt_message), llm_input_values

def _generate_prompt_granite(self) -> tuple[PromptTemplate, dict]:
Expand All @@ -111,6 +112,7 @@ def _generate_prompt_granite(self) -> tuple[PromptTemplate, dict]:
prompt_message = prompt_message + "\n{chat_history}"

prompt_message = prompt_message + "\n<|user|>\n{query}\n<|assistant|>\n"
# prompt_message = prompt_message + "\n{agent_scratchpad}"
return PromptTemplate.from_template(prompt_message), llm_input_values

def generate_prompt(
Expand Down
29 changes: 27 additions & 2 deletions ols/src/query_helpers/docs_summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import logging
from typing import Any, Optional

from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain.chains import LLMChain
from langchain_core.messages import HumanMessage
from llama_index.core import VectorStoreIndex

from ols import config
Expand All @@ -14,6 +16,7 @@
from ols.src.prompts.prompt_generator import GeneratePrompt
from ols.src.prompts.prompts import QUERY_SYSTEM_INSTRUCTION
from ols.src.query_helpers.query_helper import QueryHelper
from ols.src.tools.func_def import tools
from ols.utils.token_handler import TokenHandler

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -90,6 +93,16 @@ def summarize(
temp_prompt, temp_prompt_input = GeneratePrompt(
query, ["sample"], ["ai: sample"], self._system_prompt
).generate_prompt(self.model)

temp_msg_placeholder = None
if self.model and ("granite" in self.model):
# temp_msg_placeholder = ""
pass
else:
temp_msg_placeholder = [HumanMessage(content="")]

if temp_msg_placeholder is not None:
temp_prompt_input["agent_scratchpad"] = temp_msg_placeholder
available_tokens = token_handler.calculate_and_check_available_tokens(
temp_prompt.format(**temp_prompt_input),
model_config.context_window_size,
Expand Down Expand Up @@ -119,6 +132,8 @@ def summarize(
# Tokens-check: We trigger the computation of the token count
# without care about the return value. This is to ensure that
# the query is within the token limit.
if temp_msg_placeholder is not None:
llm_input_values["agent_scratchpad"] = temp_msg_placeholder
token_handler.calculate_and_check_available_tokens(
final_prompt.format(**llm_input_values),
model_config.context_window_size,
Expand All @@ -131,18 +146,28 @@ def summarize(
verbose=verbose,
)

if self.model and ("granite" in self.model):
model_engine = chat_engine
else:
agent = create_tool_calling_agent(bare_llm, tools, final_prompt)
model_engine = AgentExecutor(agent=agent, tools=tools, verbose=True)

with TokenMetricUpdater(
llm=bare_llm,
provider=provider_config.type,
model=self.model,
) as token_counter:
summary = chat_engine.invoke(
summary = model_engine.invoke(
verbose=True,
input=llm_input_values,
config={"callbacks": [token_counter]},
)

# retrieve text response returned from LLM, strip whitespace characters from beginning/end
response = summary["text"].strip()
if "text" in summary:
response = summary["text"].strip()
else:
response = summary["output"].strip()

if len(rag_context) == 0:
logger.debug("Using llm to answer the query without reference content")
Expand Down
1 change: 1 addition & 0 deletions ols/src/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Functions/Tools definition."""
37 changes: 37 additions & 0 deletions ols/src/tools/func_def.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Functions/Tools definition."""

from typing import Optional

from langchain.tools import tool


# Using dummy functions for experimentation
@tool
def get_pods(namespace: str) -> str:
"""Get pod names from specific namespace."""
if namespace == "lightspeed":
return f"{namespace}_pod1"
return "I don't have information"


# @tool
# def get_pods_memory(namespace: str = None, pod: str = None) -> float:
# """Get memory usage by namespace."""
# if pod:
# pass
# elif namespace == "lightspeed":
# pod = get_pods(namespace)
# else:
# return "I don't have information"
# return 2 * len(pod)


@tool
def get_pods_memory(pod: Optional[str] = None) -> float:
"""Get memory usage by namespace."""
if pod:
return 2 * len(pod)
return "I don't have information"


tools = [get_pods, get_pods_memory]
Loading