Files
iov_ana/agent.py
Jeason b7a27b12bd SQLite 持久连接 — sandbox 不再每次查询开关连接,改为 __init__ 时建连、close() 时释放
Explorer 的 system prompt 明确告知 sandbox 规则 — "每条 SQL 必须包含聚合函数或 LIMIT",减少 LLM 生成违规 SQL 浪费轮次
LLM 客户端单例 — 所有组件共享一个 openai.OpenAI 实例,不再各建各的
sanitize 顺序修复 — 小样本抑制放在 float round 之前,避免被 round 干扰
quick_detect 从 O(n²) 改为 O(n) — 按列聚合一次,加去重,不再对每行重复算整列统计
历史上下文实际生效 — get_context_for 的结果现在会注入到 Explorer 的初始 prompt 里,多轮分析时 LLM 能看到之前的发现
2026-03-20 13:20:31 +08:00

176 lines
6.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Agent 编排层 —— 调度四层架构完成分析
Layer 1: Planner 意图规划
Layer 1.5: Playbook 预设匹配
Layer 2: Explorer 自适应探索
Layer 3: Insight 异常洞察(与图表并行)
Layer 4: Context 上下文记忆
Output: Reporter + Chart + Consolidator
"""
import os
from typing import Optional
from concurrent.futures import ThreadPoolExecutor, as_completed
from core.config import DB_PATH, MAX_EXPLORATION_ROUNDS, PLAYBOOK_DIR, CHARTS_DIR
from core.schema import extract_schema, schema_to_text
from core.sandbox import SandboxExecutor
from layers.planner import Planner
from layers.explorer import Explorer
from layers.insights import InsightEngine, quick_detect
from layers.context import ContextManager
from layers.playbook import PlaybookManager
from output.reporter import ReportGenerator
from output.chart import ChartGenerator
from output.consolidator import ReportConsolidator
class DataAnalysisAgent:
"""数据分析 Agent —— 四层架构编排"""
def __init__(self, db_path: str = DB_PATH):
self.db_path = db_path
self.schema = extract_schema(db_path)
self.schema_text = schema_to_text(self.schema)
self.executor = SandboxExecutor(db_path)
# 各层组件
self.planner = Planner()
self.explorer = Explorer(self.executor)
self.insight_engine = InsightEngine()
self.reporter = ReportGenerator()
self.context = ContextManager()
self.playbook_mgr = PlaybookManager(PLAYBOOK_DIR)
self.chart_gen = ChartGenerator(output_dir=CHARTS_DIR)
self.consolidator = ReportConsolidator()
# 累积图表
self._all_charts: list[dict] = []
# 自动生成 Playbook
if not self.playbook_mgr.playbooks:
print("\n🤖 [Playbook] 未发现预设剧本AI 自动生成中...")
generated = self.playbook_mgr.auto_generate(self.schema_text, save_dir=PLAYBOOK_DIR)
if generated:
print(f" ✅ 自动生成 {len(generated)} 个剧本:")
for pb in generated:
print(f" 📋 {pb.name}{pb.description}")
else:
print(" ⚠️ 自动生成失败,将使用纯自适应模式")
def analyze(self, question: str, max_rounds: Optional[int] = None) -> str:
"""完整分析流程"""
max_rounds = max_rounds or MAX_EXPLORATION_ROUNDS
print(f"\n{'='*60}")
print(f"📊 {question}")
print(f"{'='*60}")
# Layer 0: 上下文
prev = self.context.get_context_for(question)
if prev:
print("📎 发现历史上下文")
# Layer 1: 意图规划
print("\n🎯 [Layer 1] 意图规划...")
plan = self.planner.plan(question, self.schema_text)
# 注入历史上下文到 plan 中,让 Explorer 能看到
if prev:
plan["_prev_context"] = prev
print(f" 类型: {plan.get('analysis_type', 'unknown')}")
print(f" 维度: {', '.join(plan.get('dimensions', [])) or '自动发现'}")
rationale = plan.get("rationale", "")
print(f" 理由: {rationale[:80]}{'...' if len(rationale) > 80 else ''}")
# Layer 1.5: Playbook 匹配
playbook_result = None
if self.playbook_mgr.playbooks:
print(f"\n📋 [Playbook] 匹配预设剧本 ({len(self.playbook_mgr.playbooks)} 个可用)...")
playbook_result = self.playbook_mgr.match(plan, self.schema_text)
if playbook_result:
n = len(playbook_result.get("preset_queries", []))
print(f" ✅ 匹配: {playbook_result['playbook_name']} ({n} 条预设查询)")
else:
print(" ❌ 无匹配,走纯自适应路径")
# Layer 2: 自适应探索
print(f"\n🔍 [Layer 2] 自适应探索 (最多 {max_rounds} 轮)...")
steps = self.explorer.explore(plan, self.schema_text, max_rounds=max_rounds, playbook_result=playbook_result)
successful = sum(1 for s in steps if s.success)
print(f"\n 完成: {len(steps)} 轮, {successful} 条成功查询")
# Layer 3 + Charts: 并行执行洞察和图表生成
print("\n🔎 [Layer 3] 异常洞察 + 📊 图表生成(并行)...")
rule_alerts = quick_detect(steps)
for alert in rule_alerts:
print(f" {alert}")
insights = []
charts = []
with ThreadPoolExecutor(max_workers=2) as pool:
future_insights = pool.submit(self.insight_engine.analyze, steps, question)
future_charts = pool.submit(self.chart_gen.generate, steps, question)
for future in as_completed([future_insights, future_charts]):
try:
result = future.result()
if future == future_insights:
insights = result
if insights:
print(f" 🔎 发现 {len(insights)} 条洞察")
else:
print(" 🔎 未发现异常")
else:
charts = result
if charts:
print(f" 📊 生成 {len(charts)} 张图表:")
for c in charts:
print(f" 🖼️ {c['title']}{c['path']}")
else:
print(" 📊 无需生成图表")
except Exception as e:
print(f" ⚠️ 并行任务出错: {e}")
if charts:
self._all_charts.extend(charts)
# 生成报告
print("\n📝 正在生成报告...")
report = self.reporter.generate(question, plan, steps, insights, charts=charts)
if insights:
report += f"\n\n---\n\n{self.insight_engine.format_insights(insights)}"
# Layer 4: 记录上下文
self.context.add_session(question=question, plan=plan, steps=steps, insights=insights, report=report)
return report
def full_report(self, question: str = "") -> str:
"""整合所有历史分析为综合报告"""
sessions = self.context.sessions
if not sessions:
return "(还没有分析记录,请先执行分析)"
print(f"\n📑 整合 {len(sessions)} 次分析为综合报告...")
report = self.consolidator.consolidate(sessions=sessions, question=question, charts=self._all_charts or None)
print(" ✅ 综合报告生成完成")
return report
def get_schema(self) -> str:
return self.schema_text
def get_history(self) -> str:
return self.context.get_history_summary()
def get_audit(self) -> str:
return self.executor.get_execution_summary()
def clear_history(self):
self.context.clear()
self._all_charts.clear()
def close(self):
"""释放资源"""
self.executor.close()