小小优化,不成敬意

This commit is contained in:
2026-03-09 10:21:33 +08:00
parent dc9e4bd0ef
commit 237c96f629
19 changed files with 243 additions and 322 deletions

View File

@@ -63,13 +63,23 @@ def run_analysis(
"""
Run the full AI-driven analysis pipeline.
Each run creates a timestamped subdirectory under output_dir:
output_dir/run_20260309_143025/
├── analysis_report.md
└── charts/
├── bar_chart.png
└── ...
Args:
data_file: Path to any CSV file
user_requirement: Natural language requirement (optional)
template_file: Report template path (optional)
output_dir: Output directory
output_dir: Base output directory
"""
os.makedirs(output_dir, exist_ok=True)
# 每次运行创建带时间戳的子目录
run_timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
run_dir = os.path.join(output_dir, f"run_{run_timestamp}")
os.makedirs(run_dir, exist_ok=True)
config = get_config()
print("\n" + "=" * 70)
@@ -77,6 +87,7 @@ def run_analysis(
print("=" * 70)
print(f"Start: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Data: {data_file}")
print(f"Output: {run_dir}")
if template_file:
print(f"Template: {template_file}")
print("=" * 70)
@@ -117,6 +128,7 @@ def run_analysis(
# ── Stage 4: AI Task Execution ──
print("\n[4/5] AI Executing Tasks...")
# Reuse DAL from Stage 1 — no need to load data again
dal.set_output_dir(run_dir)
results: List[AnalysisResult] = []
sorted_tasks = sorted(analysis_plan.tasks, key=lambda t: t.priority, reverse=True)
@@ -137,12 +149,12 @@ def run_analysis(
# ── Stage 5: Report Generation ──
print("\n[5/5] Generating Report...")
report_path = os.path.join(output_dir, "analysis_report.md")
report_path = os.path.join(run_dir, "analysis_report.md")
if template_file and os.path.exists(template_file):
report = _generate_template_report(profile, results, template_file, config)
report = _generate_template_report(profile, results, template_file, config, run_dir)
else:
report = generate_report(results, requirement, profile)
report = generate_report(results, requirement, profile, output_path=run_dir)
# Save report
with open(report_path, 'w', encoding='utf-8') as f:
@@ -155,7 +167,7 @@ def run_analysis(
print("\n" + "=" * 70)
print("Analysis Complete!")
print(f"End: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Output: {report_path}")
print(f"Output: {run_dir}")
print("=" * 70)
return True
@@ -165,7 +177,8 @@ def _generate_template_report(
profile: DataProfile,
results: List[AnalysisResult],
template_path: str,
config
config,
run_dir: str = ""
) -> str:
"""Use AI to fill a template with data from task execution results."""
client = OpenAI(api_key=config.llm.api_key, base_url=config.llm.base_url)
@@ -210,12 +223,17 @@ def _generate_template_report(
{template}
```
## 图表文件
以下是分析过程中生成的图表文件,请在报告适当位置嵌入:
{_collect_chart_paths(results, run_dir)}
## 要求
1. 用实际数据填充模板中所有占位符
2. 根据数据中的字段,智能映射到模板分类
3. 所有数字必须来自分析结果,不要编造
4. 如果某个模板分类在数据中没有对应,标注"本期无数据"
5. 保持Markdown格式
6. 在报告中嵌入图表,使用 ![描述](图表路径) 格式,让报告图文结合
"""
print(" AI filling template with analysis results...")
@@ -243,6 +261,28 @@ def _generate_template_report(
return header + report
def _collect_chart_paths(results: List[AnalysisResult], run_dir: str = "") -> str:
"""Collect all chart paths from task results for embedding in reports."""
paths = []
for r in results:
if not r.success:
continue
# From visualizations list
for viz in (r.visualizations or []):
if viz and viz not in paths:
paths.append(viz)
# From data dict (chart_path in tool results)
if isinstance(r.data, dict):
for key, val in r.data.items():
if isinstance(val, dict) and val.get('chart_path'):
cp = val['chart_path']
if cp not in paths:
paths.append(cp)
if not paths:
return "(无图表)"
return "\n".join(f"- {p}" for p in paths)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="AI-Driven Data Analysis")