SQLite 持久连接 — sandbox 不再每次查询开关连接，改为 __init__ 时建连、close() 时释放

Explorer 的 system prompt 明确告知 sandbox 规则 — "每条 SQL 必须包含聚合函数或 LIMIT"，减少 LLM 生成违规 SQL 浪费轮次 LLM 客户端单例 — 所有组件共享一个 openai.OpenAI 实例，不再各建各的 sanitize 顺序修复 — 小样本抑制放在 float round 之前，避免被 round 干扰 quick_detect 从 O(n²) 改为 O(n) — 按列聚合一次，加去重，不再对每行重复算整列统计历史上下文实际生效 — get_context_for 的结果现在会注入到 Explorer 的初始 prompt 里，多轮分析时 LLM 能看到之前的发现
2026-03-20 13:20:31 +08:00
parent 96927a789d
commit b7a27b12bd
39 changed files with 2637 additions and 1133 deletions
--- a/agent.py
+++ b/agent.py
@@ -1,139 +1,175 @@
 """
 Agent 编排层 —— 调度四层架构完成分析

-Layer 1: Planner      意图规划
-Layer 2: Explorer     自适应探索
-Layer 3: Insight      异常洞察
-Layer 4: Context      上下文记忆
+Layer 1:   Planner    意图规划
+Layer 1.5: Playbook   预设匹配
+Layer 2:   Explorer   自适应探索
+Layer 3:   Insight    异常洞察（与图表并行）
+Layer 4:   Context    上下文记忆
+Output:    Reporter + Chart + Consolidator
 """
+import os
 from typing import Optional
+from concurrent.futures import ThreadPoolExecutor, as_completed

-from config import DB_PATH, MAX_EXPLORATION_ROUNDS
-from schema_extractor import extract_schema, schema_to_text
-from sandbox_executor import SandboxExecutor
-from planner import Planner
-from explorer import Explorer
-from insights import InsightEngine, quick_detect
-from reporter import ReportGenerator
-from context import ContextManager
+from core.config import DB_PATH, MAX_EXPLORATION_ROUNDS, PLAYBOOK_DIR, CHARTS_DIR
+from core.schema import extract_schema, schema_to_text
+from core.sandbox import SandboxExecutor
+from layers.planner import Planner
+from layers.explorer import Explorer
+from layers.insights import InsightEngine, quick_detect
+from layers.context import ContextManager
+from layers.playbook import PlaybookManager
+from output.reporter import ReportGenerator
+from output.chart import ChartGenerator
+from output.consolidator import ReportConsolidator


 class DataAnalysisAgent:
-    """
-    数据分析 Agent
+    """数据分析 Agent —— 四层架构编排"""

-    四层架构：
-    1. Planner  - 理解用户意图，生成分析计划
-    2. Explorer - 基于计划多轮迭代探索
-    3. Insights - 从结果中检测异常、输出主动洞察
-    4. Context  - 管理多轮对话上下文
-
-    Agent 负责编排这四层，从问题到报告。
-    """
-
-    def __init__(self, db_path: str):
-        # 数据层
+    def __init__(self, db_path: str = DB_PATH):
        self.db_path = db_path
        self.schema = extract_schema(db_path)
        self.schema_text = schema_to_text(self.schema)
        self.executor = SandboxExecutor(db_path)

-        # 四层组件
+        # 各层组件
        self.planner = Planner()
        self.explorer = Explorer(self.executor)
        self.insight_engine = InsightEngine()
        self.reporter = ReportGenerator()
        self.context = ContextManager()
+        self.playbook_mgr = PlaybookManager(PLAYBOOK_DIR)
+        self.chart_gen = ChartGenerator(output_dir=CHARTS_DIR)
+        self.consolidator = ReportConsolidator()
+
+        # 累积图表
+        self._all_charts: list[dict] = []
+
+        # 自动生成 Playbook
+        if not self.playbook_mgr.playbooks:
+            print("\n🤖 [Playbook] 未发现预设剧本，AI 自动生成中...")
+            generated = self.playbook_mgr.auto_generate(self.schema_text, save_dir=PLAYBOOK_DIR)
+            if generated:
+                print(f"   ✅ 自动生成 {len(generated)} 个剧本：")
+                for pb in generated:
+                    print(f"      📋 {pb.name} — {pb.description}")
+            else:
+                print("   ⚠️ 自动生成失败，将使用纯自适应模式")

    def analyze(self, question: str, max_rounds: Optional[int] = None) -> str:
-        """
-        完整分析流程
-
-        Args:
-            question: 用户分析问题
-            max_rounds: 最大探索轮数（默认用配置值）
-
-        Returns:
-            格式化的分析报告
-        """
+        """完整分析流程"""
        max_rounds = max_rounds or MAX_EXPLORATION_ROUNDS

        print(f"\n{'='*60}")
        print(f"📊 {question}")
        print(f"{'='*60}")

-        # ── Layer 0: 检查上下文 ──────────────────────────
-        prev_context = self.context.get_context_for(question)
-        if prev_context:
-            print("📎 发现历史分析上下文，将结合之前的发现")
+        # Layer 0: 上下文
+        prev = self.context.get_context_for(question)
+        if prev:
+            print("📎 发现历史上下文")

-        # ── Layer 1: 意图规划 ────────────────────────────
+        # Layer 1: 意图规划
        print("\n🎯 [Layer 1] 意图规划...")
        plan = self.planner.plan(question, self.schema_text)
-
-        analysis_type = plan.get("analysis_type", "unknown")
-        dimensions = plan.get("dimensions", [])
+        # 注入历史上下文到 plan 中，让 Explorer 能看到
+        if prev:
+            plan["_prev_context"] = prev
+        print(f"   类型: {plan.get('analysis_type', 'unknown')}")
+        print(f"   维度: {', '.join(plan.get('dimensions', [])) or '自动发现'}")
        rationale = plan.get("rationale", "")
-        print(f"   类型: {analysis_type}")
-        print(f"   维度: {', '.join(dimensions) if dimensions else '自动发现'}")
        print(f"   理由: {rationale[:80]}{'...' if len(rationale) > 80 else ''}")

-        # ── Layer 2: 自适应探索 ──────────────────────────
-        print(f"\n🔍 [Layer 2] 自适应探索 (最多 {max_rounds} 轮)...")
-        steps = self.explorer.explore(plan, self.schema_text, max_rounds=max_rounds)
+        # Layer 1.5: Playbook 匹配
+        playbook_result = None
+        if self.playbook_mgr.playbooks:
+            print(f"\n📋 [Playbook] 匹配预设剧本 ({len(self.playbook_mgr.playbooks)} 个可用)...")
+            playbook_result = self.playbook_mgr.match(plan, self.schema_text)
+            if playbook_result:
+                n = len(playbook_result.get("preset_queries", []))
+                print(f"   ✅ 匹配: {playbook_result['playbook_name']} ({n} 条预设查询)")
+            else:
+                print("   ❌ 无匹配，走纯自适应路径")

+        # Layer 2: 自适应探索
+        print(f"\n🔍 [Layer 2] 自适应探索 (最多 {max_rounds} 轮)...")
+        steps = self.explorer.explore(plan, self.schema_text, max_rounds=max_rounds, playbook_result=playbook_result)
        successful = sum(1 for s in steps if s.success)
        print(f"\n   完成: {len(steps)} 轮, {successful} 条成功查询")

-        # ── Layer 3: 异常洞察 ────────────────────────────
-        print("\n🔎 [Layer 3] 异常洞察...")
-
-        # 先做规则检测
+        # Layer 3 + Charts: 并行执行洞察和图表生成
+        print("\n🔎 [Layer 3] 异常洞察 + 📊 图表生成（并行）...")
        rule_alerts = quick_detect(steps)
        for alert in rule_alerts:
            print(f"   {alert}")

-        # 再做 LLM 深度分析
-        insights = self.insight_engine.analyze(steps, question)
-        if insights:
-            print(f"   发现 {len(insights)} 条洞察")
-            for insight in insights:
-                print(f"   {insight}")
-        else:
-            print("   未发现异常")
+        insights = []
+        charts = []

-        # ── 生成报告 ────────────────────────────────────
+        with ThreadPoolExecutor(max_workers=2) as pool:
+            future_insights = pool.submit(self.insight_engine.analyze, steps, question)
+            future_charts = pool.submit(self.chart_gen.generate, steps, question)
+
+            for future in as_completed([future_insights, future_charts]):
+                try:
+                    result = future.result()
+                    if future == future_insights:
+                        insights = result
+                        if insights:
+                            print(f"   🔎 发现 {len(insights)} 条洞察")
+                        else:
+                            print("   🔎 未发现异常")
+                    else:
+                        charts = result
+                        if charts:
+                            print(f"   📊 生成 {len(charts)} 张图表：")
+                            for c in charts:
+                                print(f"      🖼️ {c['title']} → {c['path']}")
+                        else:
+                            print("   📊 无需生成图表")
+                except Exception as e:
+                    print(f"   ⚠️ 并行任务出错: {e}")
+
+        if charts:
+            self._all_charts.extend(charts)
+
+        # 生成报告
        print("\n📝 正在生成报告...")
-        report = self.reporter.generate(question, plan, steps, insights)
+        report = self.reporter.generate(question, plan, steps, insights, charts=charts)

-        # 追加主动洞察
        if insights:
-            insight_text = self.insight_engine.format_insights(insights)
-            report += f"\n\n---\n\n{insight_text}"
+            report += f"\n\n---\n\n{self.insight_engine.format_insights(insights)}"

-        # ── Layer 4: 记录上下文 ──────────────────────────
-        self.context.add_session(
-            question=question,
-            plan=plan,
-            steps=steps,
-            insights=insights,
-            report=report,
-        )
+        # Layer 4: 记录上下文
+        self.context.add_session(question=question, plan=plan, steps=steps, insights=insights, report=report)

        return report

+    def full_report(self, question: str = "") -> str:
+        """整合所有历史分析为综合报告"""
+        sessions = self.context.sessions
+        if not sessions:
+            return "（还没有分析记录，请先执行分析）"
+        print(f"\n📑 整合 {len(sessions)} 次分析为综合报告...")
+        report = self.consolidator.consolidate(sessions=sessions, question=question, charts=self._all_charts or None)
+        print("   ✅ 综合报告生成完成")
+        return report
+
    def get_schema(self) -> str:
-        """获取 Schema 文本"""
        return self.schema_text

    def get_history(self) -> str:
-        """获取分析历史摘要"""
        return self.context.get_history_summary()

    def get_audit(self) -> str:
-        """获取执行审计日志"""
        return self.executor.get_execution_summary()

    def clear_history(self):
-        """清空分析历史"""
        self.context.clear()
+        self._all_charts.clear()
+
+    def close(self):
+        """释放资源"""
+        self.executor.close()