SQLite 持久连接 — sandbox 不再每次查询开关连接,改为 __init__ 时建连、close() 时释放

Explorer 的 system prompt 明确告知 sandbox 规则 — "每条 SQL 必须包含聚合函数或 LIMIT",减少 LLM 生成违规 SQL 浪费轮次
LLM 客户端单例 — 所有组件共享一个 openai.OpenAI 实例,不再各建各的
sanitize 顺序修复 — 小样本抑制放在 float round 之前,避免被 round 干扰
quick_detect 从 O(n²) 改为 O(n) — 按列聚合一次,加去重,不再对每行重复算整列统计
历史上下文实际生效 — get_context_for 的结果现在会注入到 Explorer 的初始 prompt 里,多轮分析时 LLM 能看到之前的发现
This commit is contained in:
2026-03-20 13:20:31 +08:00
parent 96927a789d
commit b7a27b12bd
39 changed files with 2637 additions and 1133 deletions

192
agent.py
View File

@@ -1,139 +1,175 @@
"""
Agent 编排层 —— 调度四层架构完成分析
Layer 1: Planner 意图规划
Layer 2: Explorer 自适应探索
Layer 3: Insight 异常洞察
Layer 4: Context 上下文记忆
Layer 1: Planner 意图规划
Layer 1.5: Playbook 预设匹配
Layer 2: Explorer 自适应探索
Layer 3: Insight 异常洞察(与图表并行)
Layer 4: Context 上下文记忆
Output: Reporter + Chart + Consolidator
"""
import os
from typing import Optional
from concurrent.futures import ThreadPoolExecutor, as_completed
from config import DB_PATH, MAX_EXPLORATION_ROUNDS
from schema_extractor import extract_schema, schema_to_text
from sandbox_executor import SandboxExecutor
from planner import Planner
from explorer import Explorer
from insights import InsightEngine, quick_detect
from reporter import ReportGenerator
from context import ContextManager
from core.config import DB_PATH, MAX_EXPLORATION_ROUNDS, PLAYBOOK_DIR, CHARTS_DIR
from core.schema import extract_schema, schema_to_text
from core.sandbox import SandboxExecutor
from layers.planner import Planner
from layers.explorer import Explorer
from layers.insights import InsightEngine, quick_detect
from layers.context import ContextManager
from layers.playbook import PlaybookManager
from output.reporter import ReportGenerator
from output.chart import ChartGenerator
from output.consolidator import ReportConsolidator
class DataAnalysisAgent:
"""
数据分析 Agent
"""数据分析 Agent —— 四层架构编排"""
四层架构:
1. Planner - 理解用户意图,生成分析计划
2. Explorer - 基于计划多轮迭代探索
3. Insights - 从结果中检测异常、输出主动洞察
4. Context - 管理多轮对话上下文
Agent 负责编排这四层,从问题到报告。
"""
def __init__(self, db_path: str):
# 数据层
def __init__(self, db_path: str = DB_PATH):
self.db_path = db_path
self.schema = extract_schema(db_path)
self.schema_text = schema_to_text(self.schema)
self.executor = SandboxExecutor(db_path)
# 层组件
# 层组件
self.planner = Planner()
self.explorer = Explorer(self.executor)
self.insight_engine = InsightEngine()
self.reporter = ReportGenerator()
self.context = ContextManager()
self.playbook_mgr = PlaybookManager(PLAYBOOK_DIR)
self.chart_gen = ChartGenerator(output_dir=CHARTS_DIR)
self.consolidator = ReportConsolidator()
# 累积图表
self._all_charts: list[dict] = []
# 自动生成 Playbook
if not self.playbook_mgr.playbooks:
print("\n🤖 [Playbook] 未发现预设剧本AI 自动生成中...")
generated = self.playbook_mgr.auto_generate(self.schema_text, save_dir=PLAYBOOK_DIR)
if generated:
print(f" ✅ 自动生成 {len(generated)} 个剧本:")
for pb in generated:
print(f" 📋 {pb.name}{pb.description}")
else:
print(" ⚠️ 自动生成失败,将使用纯自适应模式")
def analyze(self, question: str, max_rounds: Optional[int] = None) -> str:
"""
完整分析流程
Args:
question: 用户分析问题
max_rounds: 最大探索轮数(默认用配置值)
Returns:
格式化的分析报告
"""
"""完整分析流程"""
max_rounds = max_rounds or MAX_EXPLORATION_ROUNDS
print(f"\n{'='*60}")
print(f"📊 {question}")
print(f"{'='*60}")
# ── Layer 0: 检查上下文 ──────────────────────────
prev_context = self.context.get_context_for(question)
if prev_context:
print("📎 发现历史分析上下文,将结合之前的发现")
# Layer 0: 上下文
prev = self.context.get_context_for(question)
if prev:
print("📎 发现历史上下文")
# ── Layer 1: 意图规划 ────────────────────────────
# Layer 1: 意图规划
print("\n🎯 [Layer 1] 意图规划...")
plan = self.planner.plan(question, self.schema_text)
analysis_type = plan.get("analysis_type", "unknown")
dimensions = plan.get("dimensions", [])
# 注入历史上下文到 plan 中,让 Explorer 能看到
if prev:
plan["_prev_context"] = prev
print(f" 类型: {plan.get('analysis_type', 'unknown')}")
print(f" 维度: {', '.join(plan.get('dimensions', [])) or '自动发现'}")
rationale = plan.get("rationale", "")
print(f" 类型: {analysis_type}")
print(f" 维度: {', '.join(dimensions) if dimensions else '自动发现'}")
print(f" 理由: {rationale[:80]}{'...' if len(rationale) > 80 else ''}")
# ── Layer 2: 自适应探索 ──────────────────────────
print(f"\n🔍 [Layer 2] 自适应探索 (最多 {max_rounds} 轮)...")
steps = self.explorer.explore(plan, self.schema_text, max_rounds=max_rounds)
# Layer 1.5: Playbook 匹配
playbook_result = None
if self.playbook_mgr.playbooks:
print(f"\n📋 [Playbook] 匹配预设剧本 ({len(self.playbook_mgr.playbooks)} 个可用)...")
playbook_result = self.playbook_mgr.match(plan, self.schema_text)
if playbook_result:
n = len(playbook_result.get("preset_queries", []))
print(f" ✅ 匹配: {playbook_result['playbook_name']} ({n} 条预设查询)")
else:
print(" ❌ 无匹配,走纯自适应路径")
# Layer 2: 自适应探索
print(f"\n🔍 [Layer 2] 自适应探索 (最多 {max_rounds} 轮)...")
steps = self.explorer.explore(plan, self.schema_text, max_rounds=max_rounds, playbook_result=playbook_result)
successful = sum(1 for s in steps if s.success)
print(f"\n 完成: {len(steps)} 轮, {successful} 条成功查询")
# ── Layer 3: 异常洞察 ────────────────────────────
print("\n🔎 [Layer 3] 异常洞察...")
# 先做规则检测
# Layer 3 + Charts: 并行执行洞察和图表生成
print("\n🔎 [Layer 3] 异常洞察 + 📊 图表生成(并行)...")
rule_alerts = quick_detect(steps)
for alert in rule_alerts:
print(f" {alert}")
# 再做 LLM 深度分析
insights = self.insight_engine.analyze(steps, question)
if insights:
print(f" 发现 {len(insights)} 条洞察")
for insight in insights:
print(f" {insight}")
else:
print(" 未发现异常")
insights = []
charts = []
# ── 生成报告 ────────────────────────────────────
with ThreadPoolExecutor(max_workers=2) as pool:
future_insights = pool.submit(self.insight_engine.analyze, steps, question)
future_charts = pool.submit(self.chart_gen.generate, steps, question)
for future in as_completed([future_insights, future_charts]):
try:
result = future.result()
if future == future_insights:
insights = result
if insights:
print(f" 🔎 发现 {len(insights)} 条洞察")
else:
print(" 🔎 未发现异常")
else:
charts = result
if charts:
print(f" 📊 生成 {len(charts)} 张图表:")
for c in charts:
print(f" 🖼️ {c['title']}{c['path']}")
else:
print(" 📊 无需生成图表")
except Exception as e:
print(f" ⚠️ 并行任务出错: {e}")
if charts:
self._all_charts.extend(charts)
# 生成报告
print("\n📝 正在生成报告...")
report = self.reporter.generate(question, plan, steps, insights)
report = self.reporter.generate(question, plan, steps, insights, charts=charts)
# 追加主动洞察
if insights:
insight_text = self.insight_engine.format_insights(insights)
report += f"\n\n---\n\n{insight_text}"
report += f"\n\n---\n\n{self.insight_engine.format_insights(insights)}"
# ── Layer 4: 记录上下文 ──────────────────────────
self.context.add_session(
question=question,
plan=plan,
steps=steps,
insights=insights,
report=report,
)
# Layer 4: 记录上下文
self.context.add_session(question=question, plan=plan, steps=steps, insights=insights, report=report)
return report
def full_report(self, question: str = "") -> str:
"""整合所有历史分析为综合报告"""
sessions = self.context.sessions
if not sessions:
return "(还没有分析记录,请先执行分析)"
print(f"\n📑 整合 {len(sessions)} 次分析为综合报告...")
report = self.consolidator.consolidate(sessions=sessions, question=question, charts=self._all_charts or None)
print(" ✅ 综合报告生成完成")
return report
def get_schema(self) -> str:
"""获取 Schema 文本"""
return self.schema_text
def get_history(self) -> str:
"""获取分析历史摘要"""
return self.context.get_history_summary()
def get_audit(self) -> str:
"""获取执行审计日志"""
return self.executor.get_execution_summary()
def clear_history(self):
"""清空分析历史"""
self.context.clear()
self._all_charts.clear()
def close(self):
"""释放资源"""
self.executor.close()