2026-03-19 12:21:04 +08:00
|
|
|
|
"""
|
|
|
|
|
|
Agent 编排层 —— 调度四层架构完成分析
|
|
|
|
|
|
|
2026-03-20 13:20:31 +08:00
|
|
|
|
Layer 1: Planner 意图规划
|
|
|
|
|
|
Layer 1.5: Playbook 预设匹配
|
|
|
|
|
|
Layer 2: Explorer 自适应探索
|
|
|
|
|
|
Layer 3: Insight 异常洞察(与图表并行)
|
|
|
|
|
|
Layer 4: Context 上下文记忆
|
|
|
|
|
|
Output: Reporter + Chart + Consolidator
|
2026-03-19 12:21:04 +08:00
|
|
|
|
"""
|
2026-03-20 13:20:31 +08:00
|
|
|
|
import os
|
2026-03-31 14:39:17 +08:00
|
|
|
|
import time
|
2026-03-19 12:21:04 +08:00
|
|
|
|
from typing import Optional
|
2026-03-20 13:20:31 +08:00
|
|
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
2026-03-19 12:21:04 +08:00
|
|
|
|
|
2026-03-31 14:39:17 +08:00
|
|
|
|
from core.config import DB_PATH, MAX_EXPLORATION_ROUNDS, PLAYBOOK_DIR, CHARTS_DIR, PROJECT_ROOT
|
2026-03-20 13:20:31 +08:00
|
|
|
|
from core.schema import extract_schema, schema_to_text
|
|
|
|
|
|
from core.sandbox import SandboxExecutor
|
|
|
|
|
|
from layers.planner import Planner
|
|
|
|
|
|
from layers.explorer import Explorer
|
|
|
|
|
|
from layers.insights import InsightEngine, quick_detect
|
|
|
|
|
|
from layers.context import ContextManager
|
|
|
|
|
|
from layers.playbook import PlaybookManager
|
|
|
|
|
|
from output.reporter import ReportGenerator
|
|
|
|
|
|
from output.chart import ChartGenerator
|
|
|
|
|
|
from output.consolidator import ReportConsolidator
|
2026-03-19 12:21:04 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class DataAnalysisAgent:
|
2026-03-20 13:20:31 +08:00
|
|
|
|
"""数据分析 Agent —— 四层架构编排"""
|
2026-03-19 12:21:04 +08:00
|
|
|
|
|
2026-03-20 13:20:31 +08:00
|
|
|
|
def __init__(self, db_path: str = DB_PATH):
|
2026-03-19 12:21:04 +08:00
|
|
|
|
self.db_path = db_path
|
|
|
|
|
|
self.schema = extract_schema(db_path)
|
|
|
|
|
|
self.schema_text = schema_to_text(self.schema)
|
|
|
|
|
|
self.executor = SandboxExecutor(db_path)
|
|
|
|
|
|
|
2026-03-20 13:20:31 +08:00
|
|
|
|
# 各层组件
|
2026-03-19 12:21:04 +08:00
|
|
|
|
self.planner = Planner()
|
|
|
|
|
|
self.explorer = Explorer(self.executor)
|
|
|
|
|
|
self.insight_engine = InsightEngine()
|
|
|
|
|
|
self.reporter = ReportGenerator()
|
|
|
|
|
|
self.context = ContextManager()
|
2026-03-20 13:20:31 +08:00
|
|
|
|
self.playbook_mgr = PlaybookManager(PLAYBOOK_DIR)
|
|
|
|
|
|
self.chart_gen = ChartGenerator(output_dir=CHARTS_DIR)
|
|
|
|
|
|
self.consolidator = ReportConsolidator()
|
|
|
|
|
|
|
|
|
|
|
|
# 累积图表
|
|
|
|
|
|
self._all_charts: list[dict] = []
|
|
|
|
|
|
|
2026-03-31 14:39:17 +08:00
|
|
|
|
# 报告输出目录
|
|
|
|
|
|
self.reports_dir = os.path.join(PROJECT_ROOT, "reports")
|
|
|
|
|
|
os.makedirs(self.reports_dir, exist_ok=True)
|
|
|
|
|
|
|
2026-03-20 13:20:31 +08:00
|
|
|
|
# 自动生成 Playbook
|
|
|
|
|
|
if not self.playbook_mgr.playbooks:
|
|
|
|
|
|
print("\n🤖 [Playbook] 未发现预设剧本,AI 自动生成中...")
|
|
|
|
|
|
generated = self.playbook_mgr.auto_generate(self.schema_text, save_dir=PLAYBOOK_DIR)
|
|
|
|
|
|
if generated:
|
|
|
|
|
|
print(f" ✅ 自动生成 {len(generated)} 个剧本:")
|
|
|
|
|
|
for pb in generated:
|
|
|
|
|
|
print(f" 📋 {pb.name} — {pb.description}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(" ⚠️ 自动生成失败,将使用纯自适应模式")
|
2026-03-19 12:21:04 +08:00
|
|
|
|
|
|
|
|
|
|
def analyze(self, question: str, max_rounds: Optional[int] = None) -> str:
|
2026-03-20 13:20:31 +08:00
|
|
|
|
"""完整分析流程"""
|
2026-03-19 12:21:04 +08:00
|
|
|
|
max_rounds = max_rounds or MAX_EXPLORATION_ROUNDS
|
|
|
|
|
|
|
|
|
|
|
|
print(f"\n{'='*60}")
|
|
|
|
|
|
print(f"📊 {question}")
|
|
|
|
|
|
print(f"{'='*60}")
|
|
|
|
|
|
|
2026-03-20 13:20:31 +08:00
|
|
|
|
# Layer 0: 上下文
|
|
|
|
|
|
prev = self.context.get_context_for(question)
|
|
|
|
|
|
if prev:
|
|
|
|
|
|
print("📎 发现历史上下文")
|
2026-03-19 12:21:04 +08:00
|
|
|
|
|
2026-03-20 13:20:31 +08:00
|
|
|
|
# Layer 1: 意图规划
|
2026-03-19 12:21:04 +08:00
|
|
|
|
print("\n🎯 [Layer 1] 意图规划...")
|
|
|
|
|
|
plan = self.planner.plan(question, self.schema_text)
|
2026-03-20 13:20:31 +08:00
|
|
|
|
# 注入历史上下文到 plan 中,让 Explorer 能看到
|
|
|
|
|
|
if prev:
|
|
|
|
|
|
plan["_prev_context"] = prev
|
|
|
|
|
|
print(f" 类型: {plan.get('analysis_type', 'unknown')}")
|
|
|
|
|
|
print(f" 维度: {', '.join(plan.get('dimensions', [])) or '自动发现'}")
|
2026-03-19 12:21:04 +08:00
|
|
|
|
rationale = plan.get("rationale", "")
|
|
|
|
|
|
print(f" 理由: {rationale[:80]}{'...' if len(rationale) > 80 else ''}")
|
|
|
|
|
|
|
2026-03-20 13:20:31 +08:00
|
|
|
|
# Layer 1.5: Playbook 匹配
|
|
|
|
|
|
playbook_result = None
|
|
|
|
|
|
if self.playbook_mgr.playbooks:
|
|
|
|
|
|
print(f"\n📋 [Playbook] 匹配预设剧本 ({len(self.playbook_mgr.playbooks)} 个可用)...")
|
|
|
|
|
|
playbook_result = self.playbook_mgr.match(plan, self.schema_text)
|
|
|
|
|
|
if playbook_result:
|
|
|
|
|
|
n = len(playbook_result.get("preset_queries", []))
|
|
|
|
|
|
print(f" ✅ 匹配: {playbook_result['playbook_name']} ({n} 条预设查询)")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(" ❌ 无匹配,走纯自适应路径")
|
|
|
|
|
|
|
|
|
|
|
|
# Layer 2: 自适应探索
|
2026-03-19 12:21:04 +08:00
|
|
|
|
print(f"\n🔍 [Layer 2] 自适应探索 (最多 {max_rounds} 轮)...")
|
2026-03-20 13:20:31 +08:00
|
|
|
|
steps = self.explorer.explore(plan, self.schema_text, max_rounds=max_rounds, playbook_result=playbook_result)
|
2026-03-19 12:21:04 +08:00
|
|
|
|
successful = sum(1 for s in steps if s.success)
|
|
|
|
|
|
print(f"\n 完成: {len(steps)} 轮, {successful} 条成功查询")
|
|
|
|
|
|
|
2026-03-20 13:20:31 +08:00
|
|
|
|
# Layer 3 + Charts: 并行执行洞察和图表生成
|
|
|
|
|
|
print("\n🔎 [Layer 3] 异常洞察 + 📊 图表生成(并行)...")
|
2026-03-19 12:21:04 +08:00
|
|
|
|
rule_alerts = quick_detect(steps)
|
|
|
|
|
|
for alert in rule_alerts:
|
|
|
|
|
|
print(f" {alert}")
|
|
|
|
|
|
|
2026-03-20 13:20:31 +08:00
|
|
|
|
insights = []
|
|
|
|
|
|
charts = []
|
|
|
|
|
|
|
|
|
|
|
|
with ThreadPoolExecutor(max_workers=2) as pool:
|
|
|
|
|
|
future_insights = pool.submit(self.insight_engine.analyze, steps, question)
|
|
|
|
|
|
future_charts = pool.submit(self.chart_gen.generate, steps, question)
|
|
|
|
|
|
|
|
|
|
|
|
for future in as_completed([future_insights, future_charts]):
|
|
|
|
|
|
try:
|
|
|
|
|
|
result = future.result()
|
|
|
|
|
|
if future == future_insights:
|
|
|
|
|
|
insights = result
|
|
|
|
|
|
if insights:
|
|
|
|
|
|
print(f" 🔎 发现 {len(insights)} 条洞察")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(" 🔎 未发现异常")
|
|
|
|
|
|
else:
|
|
|
|
|
|
charts = result
|
|
|
|
|
|
if charts:
|
|
|
|
|
|
print(f" 📊 生成 {len(charts)} 张图表:")
|
|
|
|
|
|
for c in charts:
|
|
|
|
|
|
print(f" 🖼️ {c['title']} → {c['path']}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
print(" 📊 无需生成图表")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f" ⚠️ 并行任务出错: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
if charts:
|
|
|
|
|
|
self._all_charts.extend(charts)
|
|
|
|
|
|
|
|
|
|
|
|
# 生成报告
|
2026-03-19 12:21:04 +08:00
|
|
|
|
print("\n📝 正在生成报告...")
|
2026-03-20 13:20:31 +08:00
|
|
|
|
report = self.reporter.generate(question, plan, steps, insights, charts=charts)
|
2026-03-19 12:21:04 +08:00
|
|
|
|
|
|
|
|
|
|
if insights:
|
2026-03-20 13:20:31 +08:00
|
|
|
|
report += f"\n\n---\n\n{self.insight_engine.format_insights(insights)}"
|
|
|
|
|
|
|
|
|
|
|
|
# Layer 4: 记录上下文
|
|
|
|
|
|
self.context.add_session(question=question, plan=plan, steps=steps, insights=insights, report=report)
|
2026-03-19 12:21:04 +08:00
|
|
|
|
|
2026-03-31 14:39:17 +08:00
|
|
|
|
# 自动保存报告
|
|
|
|
|
|
self.save_report(report, question, charts=charts)
|
|
|
|
|
|
|
2026-03-19 12:21:04 +08:00
|
|
|
|
return report
|
|
|
|
|
|
|
2026-03-20 13:20:31 +08:00
|
|
|
|
def full_report(self, question: str = "") -> str:
|
|
|
|
|
|
"""整合所有历史分析为综合报告"""
|
|
|
|
|
|
sessions = self.context.sessions
|
|
|
|
|
|
if not sessions:
|
|
|
|
|
|
return "(还没有分析记录,请先执行分析)"
|
|
|
|
|
|
print(f"\n📑 整合 {len(sessions)} 次分析为综合报告...")
|
|
|
|
|
|
report = self.consolidator.consolidate(sessions=sessions, question=question, charts=self._all_charts or None)
|
|
|
|
|
|
print(" ✅ 综合报告生成完成")
|
|
|
|
|
|
return report
|
|
|
|
|
|
|
2026-03-19 12:21:04 +08:00
|
|
|
|
def get_schema(self) -> str:
|
|
|
|
|
|
return self.schema_text
|
|
|
|
|
|
|
|
|
|
|
|
def get_history(self) -> str:
|
|
|
|
|
|
return self.context.get_history_summary()
|
|
|
|
|
|
|
|
|
|
|
|
def get_audit(self) -> str:
|
|
|
|
|
|
return self.executor.get_execution_summary()
|
|
|
|
|
|
|
|
|
|
|
|
def clear_history(self):
|
|
|
|
|
|
self.context.clear()
|
2026-03-20 13:20:31 +08:00
|
|
|
|
self._all_charts.clear()
|
|
|
|
|
|
|
|
|
|
|
|
def close(self):
|
|
|
|
|
|
"""释放资源"""
|
|
|
|
|
|
self.executor.close()
|
2026-03-31 14:39:17 +08:00
|
|
|
|
|
|
|
|
|
|
def save_report(self, report: str, question: str, charts: list[dict] | None = None) -> str:
|
|
|
|
|
|
"""将报告保存为 Markdown 文件,返回文件路径"""
|
|
|
|
|
|
ts = time.strftime("%Y%m%d_%H%M%S")
|
|
|
|
|
|
# 取问题前 20 字作为文件名
|
|
|
|
|
|
import re
|
|
|
|
|
|
safe_q = re.sub(r'[^\w\u4e00-\u9fff]', '_', question)[:20].strip('_')
|
|
|
|
|
|
fname = f"{ts}_{safe_q}.md"
|
|
|
|
|
|
fpath = os.path.join(self.reports_dir, fname)
|
|
|
|
|
|
|
|
|
|
|
|
with open(fpath, "w", encoding="utf-8") as f:
|
|
|
|
|
|
f.write(f"# 分析报告: {question}\n\n")
|
|
|
|
|
|
f.write(f"_生成时间: {time.strftime('%Y-%m-%d %H:%M:%S')}_\n\n")
|
|
|
|
|
|
f.write(report)
|
|
|
|
|
|
if charts:
|
|
|
|
|
|
f.write("\n\n---\n\n## 📊 图表索引\n\n")
|
|
|
|
|
|
for c in charts:
|
|
|
|
|
|
f.write(f"### {c['title']}\n![{c['title']}]({os.path.abspath(c['path'])})\n\n")
|
|
|
|
|
|
|
|
|
|
|
|
print(f" 💾 报告已保存: {fpath}")
|
|
|
|
|
|
return fpath
|
|
|
|
|
|
|
|
|
|
|
|
def export_data(self, steps: list, format: str = "csv") -> str | None:
|
|
|
|
|
|
"""导出探索结果为 CSV"""
|
|
|
|
|
|
import csv
|
|
|
|
|
|
import io
|
|
|
|
|
|
|
|
|
|
|
|
all_rows = []
|
|
|
|
|
|
all_cols = set()
|
|
|
|
|
|
for step in steps:
|
|
|
|
|
|
if step.success and step.rows:
|
|
|
|
|
|
for row in step.rows:
|
|
|
|
|
|
row["_query"] = step.purpose
|
|
|
|
|
|
all_rows.append(row)
|
|
|
|
|
|
all_cols.update(row.keys())
|
|
|
|
|
|
|
|
|
|
|
|
if not all_rows:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
ts = time.strftime("%Y%m%d_%H%M%S")
|
|
|
|
|
|
fname = f"export_{ts}.csv"
|
|
|
|
|
|
fpath = os.path.join(self.reports_dir, fname)
|
|
|
|
|
|
|
|
|
|
|
|
cols = sorted(all_cols)
|
|
|
|
|
|
with open(fpath, "w", encoding="utf-8-sig", newline="") as f:
|
|
|
|
|
|
writer = csv.DictWriter(f, fieldnames=cols)
|
|
|
|
|
|
writer.writeheader()
|
|
|
|
|
|
writer.writerows(all_rows)
|
|
|
|
|
|
|
|
|
|
|
|
print(f" 📁 数据已导出: {fpath} ({len(all_rows)} 行)")
|
|
|
|
|
|
return fpath
|