Files
iov_ana/agent.py
openclaw e8f8e2f1ba feat: 四层架构全面增强
安全与稳定性:
- 移除硬编码 API Key,改用 .env + 环境变量
- LLM 调用统一重试机制(指数退避,3 次重试,处理 429/5xx/超时)
- 中文字体检测增强(CJK 关键词兜底 + 无字体时英文 fallback)
- 缺失 API Key 给出友好提示而非崩溃

分析能力提升:
- 异常检测新增 z-score 检测(标准差>2 标记异常)
- 新增变异系数 CV 检测(数据波动性)
- 新增零值/缺失检测
- 上下文管理器升级为关键词语义匹配(替代简单取最近 2 条)

用户体验:
- 报告自动保存为 Markdown(reports/ 目录)
- 新增 export 命令导出查询结果为 CSV
- 新增 reports 命令查看已保存报告
- CLI 支持 readline 命令历史(方向键翻阅)
- CSV 导入工具重写:自动列名映射、容错处理、dry-run 模式
- 新增 .env.example 配置模板
2026-03-31 14:39:17 +08:00

235 lines
9.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Agent 编排层 —— 调度四层架构完成分析
Layer 1: Planner 意图规划
Layer 1.5: Playbook 预设匹配
Layer 2: Explorer 自适应探索
Layer 3: Insight 异常洞察(与图表并行)
Layer 4: Context 上下文记忆
Output: Reporter + Chart + Consolidator
"""
import os
import time
from typing import Optional
from concurrent.futures import ThreadPoolExecutor, as_completed
from core.config import DB_PATH, MAX_EXPLORATION_ROUNDS, PLAYBOOK_DIR, CHARTS_DIR, PROJECT_ROOT
from core.schema import extract_schema, schema_to_text
from core.sandbox import SandboxExecutor
from layers.planner import Planner
from layers.explorer import Explorer
from layers.insights import InsightEngine, quick_detect
from layers.context import ContextManager
from layers.playbook import PlaybookManager
from output.reporter import ReportGenerator
from output.chart import ChartGenerator
from output.consolidator import ReportConsolidator
class DataAnalysisAgent:
"""数据分析 Agent —— 四层架构编排"""
def __init__(self, db_path: str = DB_PATH):
self.db_path = db_path
self.schema = extract_schema(db_path)
self.schema_text = schema_to_text(self.schema)
self.executor = SandboxExecutor(db_path)
# 各层组件
self.planner = Planner()
self.explorer = Explorer(self.executor)
self.insight_engine = InsightEngine()
self.reporter = ReportGenerator()
self.context = ContextManager()
self.playbook_mgr = PlaybookManager(PLAYBOOK_DIR)
self.chart_gen = ChartGenerator(output_dir=CHARTS_DIR)
self.consolidator = ReportConsolidator()
# 累积图表
self._all_charts: list[dict] = []
# 报告输出目录
self.reports_dir = os.path.join(PROJECT_ROOT, "reports")
os.makedirs(self.reports_dir, exist_ok=True)
# 自动生成 Playbook
if not self.playbook_mgr.playbooks:
print("\n🤖 [Playbook] 未发现预设剧本AI 自动生成中...")
generated = self.playbook_mgr.auto_generate(self.schema_text, save_dir=PLAYBOOK_DIR)
if generated:
print(f" ✅ 自动生成 {len(generated)} 个剧本:")
for pb in generated:
print(f" 📋 {pb.name}{pb.description}")
else:
print(" ⚠️ 自动生成失败,将使用纯自适应模式")
def analyze(self, question: str, max_rounds: Optional[int] = None) -> str:
"""完整分析流程"""
max_rounds = max_rounds or MAX_EXPLORATION_ROUNDS
print(f"\n{'='*60}")
print(f"📊 {question}")
print(f"{'='*60}")
# Layer 0: 上下文
prev = self.context.get_context_for(question)
if prev:
print("📎 发现历史上下文")
# Layer 1: 意图规划
print("\n🎯 [Layer 1] 意图规划...")
plan = self.planner.plan(question, self.schema_text)
# 注入历史上下文到 plan 中,让 Explorer 能看到
if prev:
plan["_prev_context"] = prev
print(f" 类型: {plan.get('analysis_type', 'unknown')}")
print(f" 维度: {', '.join(plan.get('dimensions', [])) or '自动发现'}")
rationale = plan.get("rationale", "")
print(f" 理由: {rationale[:80]}{'...' if len(rationale) > 80 else ''}")
# Layer 1.5: Playbook 匹配
playbook_result = None
if self.playbook_mgr.playbooks:
print(f"\n📋 [Playbook] 匹配预设剧本 ({len(self.playbook_mgr.playbooks)} 个可用)...")
playbook_result = self.playbook_mgr.match(plan, self.schema_text)
if playbook_result:
n = len(playbook_result.get("preset_queries", []))
print(f" ✅ 匹配: {playbook_result['playbook_name']} ({n} 条预设查询)")
else:
print(" ❌ 无匹配,走纯自适应路径")
# Layer 2: 自适应探索
print(f"\n🔍 [Layer 2] 自适应探索 (最多 {max_rounds} 轮)...")
steps = self.explorer.explore(plan, self.schema_text, max_rounds=max_rounds, playbook_result=playbook_result)
successful = sum(1 for s in steps if s.success)
print(f"\n 完成: {len(steps)} 轮, {successful} 条成功查询")
# Layer 3 + Charts: 并行执行洞察和图表生成
print("\n🔎 [Layer 3] 异常洞察 + 📊 图表生成(并行)...")
rule_alerts = quick_detect(steps)
for alert in rule_alerts:
print(f" {alert}")
insights = []
charts = []
with ThreadPoolExecutor(max_workers=2) as pool:
future_insights = pool.submit(self.insight_engine.analyze, steps, question)
future_charts = pool.submit(self.chart_gen.generate, steps, question)
for future in as_completed([future_insights, future_charts]):
try:
result = future.result()
if future == future_insights:
insights = result
if insights:
print(f" 🔎 发现 {len(insights)} 条洞察")
else:
print(" 🔎 未发现异常")
else:
charts = result
if charts:
print(f" 📊 生成 {len(charts)} 张图表:")
for c in charts:
print(f" 🖼️ {c['title']}{c['path']}")
else:
print(" 📊 无需生成图表")
except Exception as e:
print(f" ⚠️ 并行任务出错: {e}")
if charts:
self._all_charts.extend(charts)
# 生成报告
print("\n📝 正在生成报告...")
report = self.reporter.generate(question, plan, steps, insights, charts=charts)
if insights:
report += f"\n\n---\n\n{self.insight_engine.format_insights(insights)}"
# Layer 4: 记录上下文
self.context.add_session(question=question, plan=plan, steps=steps, insights=insights, report=report)
# 自动保存报告
self.save_report(report, question, charts=charts)
return report
def full_report(self, question: str = "") -> str:
"""整合所有历史分析为综合报告"""
sessions = self.context.sessions
if not sessions:
return "(还没有分析记录,请先执行分析)"
print(f"\n📑 整合 {len(sessions)} 次分析为综合报告...")
report = self.consolidator.consolidate(sessions=sessions, question=question, charts=self._all_charts or None)
print(" ✅ 综合报告生成完成")
return report
def get_schema(self) -> str:
return self.schema_text
def get_history(self) -> str:
return self.context.get_history_summary()
def get_audit(self) -> str:
return self.executor.get_execution_summary()
def clear_history(self):
self.context.clear()
self._all_charts.clear()
def close(self):
"""释放资源"""
self.executor.close()
def save_report(self, report: str, question: str, charts: list[dict] | None = None) -> str:
"""将报告保存为 Markdown 文件,返回文件路径"""
ts = time.strftime("%Y%m%d_%H%M%S")
# 取问题前 20 字作为文件名
import re
safe_q = re.sub(r'[^\w\u4e00-\u9fff]', '_', question)[:20].strip('_')
fname = f"{ts}_{safe_q}.md"
fpath = os.path.join(self.reports_dir, fname)
with open(fpath, "w", encoding="utf-8") as f:
f.write(f"# 分析报告: {question}\n\n")
f.write(f"_生成时间: {time.strftime('%Y-%m-%d %H:%M:%S')}_\n\n")
f.write(report)
if charts:
f.write("\n\n---\n\n## 📊 图表索引\n\n")
for c in charts:
f.write(f"### {c['title']}\n![{c['title']}]({os.path.abspath(c['path'])})\n\n")
print(f" 💾 报告已保存: {fpath}")
return fpath
def export_data(self, steps: list, format: str = "csv") -> str | None:
"""导出探索结果为 CSV"""
import csv
import io
all_rows = []
all_cols = set()
for step in steps:
if step.success and step.rows:
for row in step.rows:
row["_query"] = step.purpose
all_rows.append(row)
all_cols.update(row.keys())
if not all_rows:
return None
ts = time.strftime("%Y%m%d_%H%M%S")
fname = f"export_{ts}.csv"
fpath = os.path.join(self.reports_dir, fname)
cols = sorted(all_cols)
with open(fpath, "w", encoding="utf-8-sig", newline="") as f:
writer = csv.DictWriter(f, fieldnames=cols)
writer.writeheader()
writer.writerows(all_rows)
print(f" 📁 数据已导出: {fpath} ({len(all_rows)} 行)")
return fpath