""" Agent 编排层 —— 调度四层架构完成分析 Layer 1: Planner 意图规划 Layer 1.5: Playbook 预设匹配 Layer 2: Explorer 自适应探索 Layer 3: Insight 异常洞察(与图表并行) Layer 4: Context 上下文记忆 Output: Reporter + Chart + Consolidator """ import os import time from typing import Optional from concurrent.futures import ThreadPoolExecutor, as_completed from core.config import DB_PATH, MAX_EXPLORATION_ROUNDS, PLAYBOOK_DIR, CHARTS_DIR, PROJECT_ROOT from core.schema import extract_schema, schema_to_text from core.sandbox import SandboxExecutor from layers.planner import Planner from layers.explorer import Explorer from layers.insights import InsightEngine, quick_detect from layers.context import ContextManager from layers.playbook import PlaybookManager from output.reporter import ReportGenerator from output.chart import ChartGenerator from output.consolidator import ReportConsolidator class DataAnalysisAgent: """数据分析 Agent —— 四层架构编排""" def __init__(self, db_path: str = DB_PATH): self.db_path = db_path self.schema = extract_schema(db_path) self.schema_text = schema_to_text(self.schema) self.executor = SandboxExecutor(db_path) # 各层组件 self.planner = Planner() self.explorer = Explorer(self.executor) self.insight_engine = InsightEngine() self.reporter = ReportGenerator() self.context = ContextManager() self.playbook_mgr = PlaybookManager(PLAYBOOK_DIR) self.chart_gen = ChartGenerator(output_dir=CHARTS_DIR) self.consolidator = ReportConsolidator() # 累积图表 self._all_charts: list[dict] = [] # 报告输出目录 self.reports_dir = os.path.join(PROJECT_ROOT, "reports") os.makedirs(self.reports_dir, exist_ok=True) # 自动生成 Playbook if not self.playbook_mgr.playbooks: print("\n🤖 [Playbook] 未发现预设剧本,AI 自动生成中...") generated = self.playbook_mgr.auto_generate(self.schema_text, save_dir=PLAYBOOK_DIR) if generated: print(f" ✅ 自动生成 {len(generated)} 个剧本:") for pb in generated: print(f" 📋 {pb.name} — {pb.description}") else: print(" ⚠️ 自动生成失败,将使用纯自适应模式") def analyze(self, question: str, max_rounds: Optional[int] = None) -> str: """完整分析流程""" max_rounds = max_rounds or MAX_EXPLORATION_ROUNDS print(f"\n{'='*60}") print(f"📊 {question}") print(f"{'='*60}") # Layer 0: 上下文 prev = self.context.get_context_for(question) if prev: print("📎 发现历史上下文") # Layer 1: 意图规划 print("\n🎯 [Layer 1] 意图规划...") plan = self.planner.plan(question, self.schema_text) # 注入历史上下文到 plan 中,让 Explorer 能看到 if prev: plan["_prev_context"] = prev print(f" 类型: {plan.get('analysis_type', 'unknown')}") print(f" 维度: {', '.join(plan.get('dimensions', [])) or '自动发现'}") rationale = plan.get("rationale", "") print(f" 理由: {rationale[:80]}{'...' if len(rationale) > 80 else ''}") # Layer 1.5: Playbook 匹配 playbook_result = None if self.playbook_mgr.playbooks: print(f"\n📋 [Playbook] 匹配预设剧本 ({len(self.playbook_mgr.playbooks)} 个可用)...") playbook_result = self.playbook_mgr.match(plan, self.schema_text) if playbook_result: n = len(playbook_result.get("preset_queries", [])) print(f" ✅ 匹配: {playbook_result['playbook_name']} ({n} 条预设查询)") else: print(" ❌ 无匹配,走纯自适应路径") # Layer 2: 自适应探索 print(f"\n🔍 [Layer 2] 自适应探索 (最多 {max_rounds} 轮)...") steps = self.explorer.explore(plan, self.schema_text, max_rounds=max_rounds, playbook_result=playbook_result) successful = sum(1 for s in steps if s.success) print(f"\n 完成: {len(steps)} 轮, {successful} 条成功查询") # Layer 3 + Charts: 并行执行洞察和图表生成 print("\n🔎 [Layer 3] 异常洞察 + 📊 图表生成(并行)...") rule_alerts = quick_detect(steps) for alert in rule_alerts: print(f" {alert}") insights = [] charts = [] with ThreadPoolExecutor(max_workers=2) as pool: future_insights = pool.submit(self.insight_engine.analyze, steps, question) future_charts = pool.submit(self.chart_gen.generate, steps, question) for future in as_completed([future_insights, future_charts]): try: result = future.result() if future == future_insights: insights = result if insights: print(f" 🔎 发现 {len(insights)} 条洞察") else: print(" 🔎 未发现异常") else: charts = result if charts: print(f" 📊 生成 {len(charts)} 张图表:") for c in charts: print(f" 🖼️ {c['title']} → {c['path']}") else: print(" 📊 无需生成图表") except Exception as e: print(f" ⚠️ 并行任务出错: {e}") if charts: self._all_charts.extend(charts) # 生成报告 print("\n📝 正在生成报告...") report = self.reporter.generate(question, plan, steps, insights, charts=charts) if insights: report += f"\n\n---\n\n{self.insight_engine.format_insights(insights)}" # Layer 4: 记录上下文 self.context.add_session(question=question, plan=plan, steps=steps, insights=insights, report=report) # 自动保存报告 self.save_report(report, question, charts=charts) return report def full_report(self, question: str = "") -> str: """整合所有历史分析为综合报告""" sessions = self.context.sessions if not sessions: return "(还没有分析记录,请先执行分析)" print(f"\n📑 整合 {len(sessions)} 次分析为综合报告...") report = self.consolidator.consolidate(sessions=sessions, question=question, charts=self._all_charts or None) print(" ✅ 综合报告生成完成") return report def get_schema(self) -> str: return self.schema_text def get_history(self) -> str: return self.context.get_history_summary() def get_audit(self) -> str: return self.executor.get_execution_summary() def clear_history(self): self.context.clear() self._all_charts.clear() def close(self): """释放资源""" self.executor.close() def save_report(self, report: str, question: str, charts: list[dict] | None = None) -> str: """将报告保存为 Markdown 文件,返回文件路径""" ts = time.strftime("%Y%m%d_%H%M%S") # 取问题前 20 字作为文件名 import re safe_q = re.sub(r'[^\w\u4e00-\u9fff]', '_', question)[:20].strip('_') fname = f"{ts}_{safe_q}.md" fpath = os.path.join(self.reports_dir, fname) with open(fpath, "w", encoding="utf-8") as f: f.write(f"# 分析报告: {question}\n\n") f.write(f"_生成时间: {time.strftime('%Y-%m-%d %H:%M:%S')}_\n\n") f.write(report) if charts: f.write("\n\n---\n\n## 📊 图表索引\n\n") for c in charts: f.write(f"### {c['title']}\n![{c['title']}]({os.path.abspath(c['path'])})\n\n") print(f" 💾 报告已保存: {fpath}") return fpath def export_data(self, steps: list, format: str = "csv") -> str | None: """导出探索结果为 CSV""" import csv import io all_rows = [] all_cols = set() for step in steps: if step.success and step.rows: for row in step.rows: row["_query"] = step.purpose all_rows.append(row) all_cols.update(row.keys()) if not all_rows: return None ts = time.strftime("%Y%m%d_%H%M%S") fname = f"export_{ts}.csv" fpath = os.path.join(self.reports_dir, fname) cols = sorted(all_cols) with open(fpath, "w", encoding="utf-8-sig", newline="") as f: writer = csv.DictWriter(f, fieldnames=cols) writer.writeheader() writer.writerows(all_rows) print(f" 📁 数据已导出: {fpath} ({len(all_rows)} 行)") return fpath