SQLite 持久连接 — sandbox 不再每次查询开关连接，改为 __init__ 时建连、close() 时释放

Explorer 的 system prompt 明确告知 sandbox 规则 — "每条 SQL 必须包含聚合函数或 LIMIT"，减少 LLM 生成违规 SQL 浪费轮次 LLM 客户端单例 — 所有组件共享一个 openai.OpenAI 实例，不再各建各的 sanitize 顺序修复 — 小样本抑制放在 float round 之前，避免被 round 干扰 quick_detect 从 O(n²) 改为 O(n) — 按列聚合一次，加去重，不再对每行重复算整列统计历史上下文实际生效 — get_context_for 的结果现在会注入到 Explorer 的初始 prompt 里，多轮分析时 LLM 能看到之前的发现
2026-03-20 13:20:31 +08:00
parent 96927a789d
commit b7a27b12bd
39 changed files with 2637 additions and 1133 deletions
--- a/output/reporter.py
+++ b/output/reporter.py
@@ -0,0 +1,84 @@
+"""
+报告生成器 —— 单次分析报告
+"""
+import json
+from typing import Any
+
+from core.config import LLM_CONFIG
+from core.utils import get_llm_client
+from layers.explorer import ExplorationStep
+from layers.insights import Insight
+
+
+REPORT_PROMPT = """你是一个数据分析报告撰写专家。基于以下信息撰写报告。
+
+## 用户问题
+{question}
+
+## 分析计划
+{plan}
+
+## 探索过程
+{exploration}
+
+## 主动洞察
+{insights_text}
+
+## 可用图表
+{charts_text}
+
+## 撰写要求
+1. **开头**：一句话总结核心结论
+2. **核心发现**：按重要性排列，带具体数字
+3. **图表引用**：用 `![标题](路径)` 嵌入到相关段落
+4. **深入洞察**：异常、趋势、关联
+5. **建议**：基于数据的行动建议
+6. **审计**：末尾附上所有 SQL
+
+中文，专业简报风格。图表自然嵌入对应段落。"""
+
+
+class ReportGenerator:
+    """报告生成器"""
+
+    def __init__(self):
+        self.client, self.model = get_llm_client(LLM_CONFIG)
+
+    def generate(self, question: str, plan: dict, steps: list[ExplorationStep],
+                 insights: list[Insight], charts: list[dict] | None = None) -> str:
+        exploration = self._build_exploration(steps)
+        insights_text = "\n".join(str(i) for i in insights) if insights else "未检测到异常。"
+        charts_text = "\n".join(f"{i}. 标题: {c['title']}, 路径: {c['path']}" for i, c in enumerate(charts or [], 1)) or "无图表。"
+
+        prompt = REPORT_PROMPT.format(
+            question=question,
+            plan=json.dumps(plan, ensure_ascii=False, indent=2),
+            exploration=exploration,
+            insights_text=insights_text,
+            charts_text=charts_text,
+        )
+
+        response = self.client.chat.completions.create(
+            model=self.model,
+            messages=[
+                {"role": "system", "content": "你是专业的数据分析师，撰写清晰、有洞察力的分析报告。"},
+                {"role": "user", "content": prompt},
+            ],
+            temperature=0.3, max_tokens=4096,
+        )
+        return response.choices[0].message.content
+
+    def _build_exploration(self, steps: list[ExplorationStep]) -> str:
+        parts = []
+        for step in steps:
+            if step.action == "done":
+                parts.append(f"### 结束\n{step.reasoning}")
+            elif step.success:
+                parts.append(
+                    f"### 第 {step.round_num} 轮：{step.purpose}\n"
+                    f"SQL: `{step.sql}`\n结果 ({step.row_count} 行):\n"
+                    f"数据: {json.dumps(step.rows, ensure_ascii=False)}"
+                )
+            else:
+                parts.append(f"### 第 {step.round_num} 轮：{step.purpose}\nSQL: `{step.sql}`\n失败: {step.error}")
+        return "\n\n".join(parts) if parts else "无探索步骤"