Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ data/
# Workspace
workspace/

config/

### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
3 changes: 2 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,6 @@
},
"files.insertFinalNewline": true,
"files.trimTrailingWhitespace": true,
"editor.formatOnSave": true
"editor.formatOnSave": true,
"liveServer.settings.port": 5501
}
52 changes: 50 additions & 2 deletions app/agent/data_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,23 @@
from app.config import config
from app.prompt.visualization import NEXT_STEP_PROMPT, SYSTEM_PROMPT
from app.tool import Terminate, ToolCollection
from app.tool.chart_visualization.python_execute import NormalPythonExecute
# from app.tool.chart_visualization.chart_prepare import VisualizationPrepare
# from app.tool.chart_visualization.data_visualization import DataVisualization
# from app.tool.chart_visualization.initial_report_generation import GenerateInitialReport
# from app.tool.chart_visualization.final_report_generation import GenerateFinalReport
# from app.tool.chart_visualization.search_report_template import SearchReportTemplate
# from app.tool.chart_visualization.report_template_generation import ReportTemplateGeneration
# from app.tool.chart_visualization.initial_information_collection import InitialInformationCollection
from app.tool.chart_visualization.chart_prepare import VisualizationPrepare
from app.tool.chart_visualization.select_insights import SelectInsights
from app.tool.chart_visualization.add_insights import AddInsights
from app.tool.chart_visualization.data_visualization import DataVisualization
from app.tool.chart_visualization.python_execute import NormalPythonExecute
from app.tool.chart_visualization.v2.search_html_library import SearchHtmlLibrary
from app.tool.chart_visualization.v2.initial_report_generation import GenerateInitialReport
from app.tool.chart_visualization.v2.report_template_generation import ReportTemplateGeneration
from app.tool.chart_visualization.v2.final_report_generation import GenerateFinalReport
from app.tool.chart_visualization.v2.report_beautify import ReportBeautify


class DataAnalysis(ToolCallAgent):
Expand All @@ -18,7 +32,34 @@ class DataAnalysis(ToolCallAgent):
"""

name: str = "Data_Analysis"
description: str = "An analytical agent that utilizes python and data visualization tools to solve diverse data analysis tasks"
description: str = """
A data science agent specializing in Python-based analytics and advanced visualization techniques
for solving complex data analysis challenges.

Standard Report Generation Workflow:
1. Template Preparation:
- SearchHtmlLibrary: Identify suitable visualization templates
- ReportTemplateGeneration & GenerateInitialReport: Create initial report structure

2. Visualization Pipeline:
- VisualizationPrepare: Configure data for visualization
- DataVisualization: Generate interactive charts and graphs

3. Insight Enhancement:
- SelectInsights: Extract key findings from visualizations
- AddInsights: Annotate charts with analytical insights

4. Report Finalization:
- GenerateFinalReport: Replace the placeholders with charts
- ReportBeautify: Apply professional styling and formatting

Operational Protocol:
- First determine optimal visualization types based on dataset characteristics
- Utilize HTML template library to establish report framework
- Execute visualization pipeline to create data representations
- Enhance each chart with key insights you selected
- Assemble final report by embedding enriched visualizations
"""

system_prompt: str = SYSTEM_PROMPT.format(directory=config.workspace_root)
next_step_prompt: str = NEXT_STEP_PROMPT
Expand All @@ -30,8 +71,15 @@ class DataAnalysis(ToolCallAgent):
available_tools: ToolCollection = Field(
default_factory=lambda: ToolCollection(
NormalPythonExecute(),
SearchHtmlLibrary(),
ReportTemplateGeneration(),
GenerateInitialReport(),
GenerateFinalReport(),
ReportBeautify(),
AddInsights(),
VisualizationPrepare(),
DataVisualization(),
SelectInsights(),
Terminate(),
)
)
28 changes: 27 additions & 1 deletion app/prompt/visualization.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,33 @@
SYSTEM_PROMPT = """You are an AI agent designed to data analysis / visualization task. You have various tools at your disposal that you can call upon to efficiently complete complex requests.
# Note:
1. The workspace directory is: {directory}; Read / write file in workspace
2. Generate analysis conclusion report in the end"""
2. Generate analysis conclusion report in the end

Standard Report Generation Workflow:
1. Template Preparation:
- SearchHtmlLibrary: Identify suitable visualization templates
- ReportTemplateGeneration & GenerateInitialReport: Create initial report structure

2. Visualization Pipeline:
- VisualizationPrepare: Configure data for visualization
- DataVisualization: Generate interactive charts and graphs

3. Insight Enhancement:
- SelectInsights: Extract key findings from visualizations
- AddInsights: Annotate charts with analytical insights

4. Report Finalization:
- GenerateFinalReport: Replace the placeholders with charts
- ReportBeautify: Apply professional styling and formatting

Operational Protocol:
- First determine optimal visualization types based on dataset characteristics
- Utilize HTML template library to establish report framework
- Execute visualization pipeline to create data representations
- Enhance each chart with key insights you selected
- Assemble final report by embedding enriched visualizations

"""

NEXT_STEP_PROMPT = """Based on user needs, break down the problem and use different tools step by step to solve it.
# Note
Expand Down
3 changes: 0 additions & 3 deletions app/tool/chart_visualization/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
from app.tool.chart_visualization.chart_prepare import VisualizationPrepare
from app.tool.chart_visualization.data_visualization import DataVisualization
from app.tool.chart_visualization.python_execute import NormalPythonExecute


__all__ = ["DataVisualization", "VisualizationPrepare", "NormalPythonExecute"]
228 changes: 228 additions & 0 deletions app/tool/chart_visualization/add_insights.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
import sys
import asyncio
import json
import os
print(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))

from typing import Any, Hashable

import pandas as pd
from pydantic import Field, model_validator

from app.config import config
from app.llm import LLM
from app.logger import logger
from app.tool.base import BaseTool


class AddInsights(BaseTool):
name: str = "add_insights"
description: str = (
"Enhances charts by adding insights markers and annotations "
"using JSON data generated by the insights_selection tool. "
"This creates the final annotated visualization output."
)

parameters: dict = {
"type": "object",
"properties": {
"json_path": {
"type": "string",
"description": """Path to the JSON file generated by insights_selection tool.
Contains chart insights data in format:
{
"chartPath": string,
"insights_id": number[]
}""",
},
"output_type": {
"type": "string",
"description": "Visualization output format selection",
"default": "html",
"enum": [
"png", # Static image format
"html" # Interactive web format (recommended)
],
},
},
"required": ["json_path"],
}
llm: LLM = Field(default_factory=LLM, description="Language model instance")

@model_validator(mode="after")
def initialize_llm(self):
"""Initialize llm with default settings if not provided."""
if self.llm is None or not isinstance(self.llm, LLM):
self.llm = LLM(config_name=self.name.lower())
return self

def load_chart_with_css(self, chart_path):
# 读取 HTML 文件
with open(chart_path, 'r', encoding='utf-8') as f:
html_content = f.read()
html_content = html_content.replace('`', "'")

# 在 <head> 里插入 CSS
css = """
<style>
body, html {
margin: 0;
padding: 0;
height: 100%;
overflow: hidden;
}
#chart-container {
width: 100%;
height: 100%;
}
</style>
"""

# 如果原文件没有 <head>,直接插入到最前面
if "<head>" in html_content:
html_content = html_content.replace("<head>", "<head>" + css)
else:
html_content = css + html_content

with open(chart_path, 'w', encoding='utf-8') as f:
f.write(html_content)

def get_file_path(
self,
json_info: list[dict[str, str]],
path_str: str,
directory: str = None,
) -> list[str]:
res = []
for item in json_info:
if os.path.exists(item[path_str]):
res.append(item[path_str])
elif os.path.exists(
os.path.join(f"{directory or config.workspace_root}", item[path_str])
):
res.append(
os.path.join(
f"{directory or config.workspace_root}", item[path_str]
)
)
else:
raise Exception(f"No such file or directory: {item[path_str]}")
return res

async def add_insights(
self, json_info: list[dict[str, str]], output_type: str
) -> str:
data_list = []
chart_file_path = self.get_file_path(
json_info, "chartPath", os.path.join(config.workspace_root, "visualization")
)
for index, item in enumerate(json_info):
if "insights_id" in item:
data_list.append(
{
"file_name": os.path.basename(chart_file_path[index]).replace(
f".{output_type}", ""
),
"insights_id": item["insights_id"],
}
)
tasks = [
self.invoke_vmind(
insights_id=item["insights_id"],
file_name=item["file_name"],
output_type=output_type,
task_type="insight",
)
for item in data_list
]
results = await asyncio.gather(*tasks)
error_list = []
success_list = []
for index, result in enumerate(results):
chart_path = chart_file_path[index]
if "error" in result and "chart_path" not in result:
error_list.append(f"Error in {chart_path}: {result['error']}")
else:
success_list.append(chart_path)
self.load_chart_with_css(chart_path)

success_template = (
f"# Charts Update with Insights\n{','.join(success_list)}"
if len(success_list) > 0
else ""
)
if len(error_list) > 0:
return {
"observation": f"# Error in chart insights:{'\n'.join(error_list)}\n{success_template}",
"success": False,
}
else:
return {"observation": f"{success_template}"}

async def execute(
self,
json_path: str,
output_type: str | None = "html",
tool_type: str | None = "visualization",
language: str | None = "en",
) -> str:
try:
logger.info(f"📈 data_visualization with {json_path} in: {tool_type} ")
with open(json_path, "r", encoding="utf-8") as file:
json_info = json.load(file)
return await self.add_insights(json_info, output_type)
except Exception as e:
return {
"observation": f"Error: {e}",
"success": False,
}

async def invoke_vmind(
self,
file_name: str,
output_type: str,
task_type: str,
insights_id: list[str] = None,
dict_data: list[dict[Hashable, Any]] = None,
chart_description: str = None,
language: str = "en",
):
llm_config = {
"base_url": self.llm.base_url,
"model": self.llm.model,
"api_key": self.llm.api_key,
}
vmind_params = {
"llm_config": llm_config,
"user_prompt": chart_description,
"dataset": dict_data,
"file_name": file_name,
"output_type": output_type,
"insights_id": insights_id,
"task_type": task_type,
"directory": str(config.workspace_root),
"language": language,
}

process = await asyncio.create_subprocess_exec(
"npx",
"ts-node",
"src/chartVisualize.ts",
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=os.path.dirname(__file__),
)
input_json = json.dumps(vmind_params, ensure_ascii=False).encode("utf-8")
try:
stdout, stderr = await process.communicate(input_json)
stdout_str = stdout.decode("utf-8")
stderr_str = stderr.decode("utf-8")
if process.returncode == 0:
return json.loads(stdout_str)
else:
return {"error": f"Node.js Error: {stderr_str}"}
except Exception as e:
return {"error": f"Subprocess Error: {str(e)}"}

Loading