SQLite 持久连接 — sandbox 不再每次查询开关连接，改为 __init__ 时建连、close() 时释放

Explorer 的 system prompt 明确告知 sandbox 规则 — "每条 SQL 必须包含聚合函数或 LIMIT"，减少 LLM 生成违规 SQL 浪费轮次 LLM 客户端单例 — 所有组件共享一个 openai.OpenAI 实例，不再各建各的 sanitize 顺序修复 — 小样本抑制放在 float round 之前，避免被 round 干扰 quick_detect 从 O(n²) 改为 O(n) — 按列聚合一次，加去重，不再对每行重复算整列统计历史上下文实际生效 — get_context_for 的结果现在会注入到 Explorer 的初始 prompt 里，多轮分析时 LLM 能看到之前的发现
2026-03-20 13:20:31 +08:00
parent 96927a789d
commit b7a27b12bd
39 changed files with 2637 additions and 1133 deletions
--- a/layers/planner.py
+++ b/layers/planner.py
@@ -0,0 +1,74 @@
+"""
+Layer 1: 意图规划器
+"""
+import json
+from typing import Any
+
+from core.config import LLM_CONFIG
+from core.utils import get_llm_client, extract_json_object
+
+PROMPT = """你是一个数据分析规划专家。
+
+## 你的任务
+根据用户的分析问题和数据库 Schema，生成一个结构化的分析计划。
+
+## 输出格式（严格 JSON）
+```json
+{
+  "intent": "一句话描述用户想了解什么",
+  "analysis_type": "ranking" | "distribution" | "trend" | "comparison" | "anomaly" | "overview",
+  "primary_table": "主要分析的表名",
+  "dimensions": ["分组维度列名"],
+  "metrics": ["需要聚合的数值列名"],
+  "aggregations": ["SUM", "AVG", "COUNT", ...],
+  "filters": [{"column": "列名", "condition": "过滤条件（可选）"}],
+  "join_needed": false,
+  "join_info": {"tables": [], "on": ""},
+  "expected_rounds": 3,
+  "rationale": "为什么这样规划，需要关注什么"
+}
+```
+
+## 分析类型说明
+- ranking: 按某维度排名
+- distribution: 分布/占比
+- trend: 时间趋势
+- comparison: 对比分析
+- anomaly: 异常检测
+- overview: 全局概览
+
+## 规划原则
+1. 只选择与问题相关的表和列
+2. 如果需要 JOIN，说明关联条件
+3. 预估需要几轮探索（1-6）
+4. 标注可能的异常关注点
+5. metrics 不要包含 id 列"""
+
+
+class Planner:
+    """意图规划器"""
+
+    def __init__(self):
+        self.client, self.model = get_llm_client(LLM_CONFIG)
+
+    def plan(self, question: str, schema_text: str) -> dict[str, Any]:
+        response = self.client.chat.completions.create(
+            model=self.model,
+            messages=[
+                {"role": "system", "content": PROMPT},
+                {"role": "user", "content": f"## Schema\n{schema_text}\n\n## 用户问题\n{question}"},
+            ],
+            temperature=0.1,
+            max_tokens=1024,
+        )
+        content = response.choices[0].message.content.strip()
+        plan = extract_json_object(content)
+
+        if not plan:
+            plan = {"intent": content[:100], "analysis_type": "overview"}
+
+        plan.setdefault("analysis_type", "overview")
+        plan.setdefault("expected_rounds", 3)
+        plan.setdefault("filters", [])
+        plan.setdefault("join_needed", False)
+        return plan