SQLite 持久连接 — sandbox 不再每次查询开关连接,改为 __init__ 时建连、close() 时释放

Explorer 的 system prompt 明确告知 sandbox 规则 — "每条 SQL 必须包含聚合函数或 LIMIT",减少 LLM 生成违规 SQL 浪费轮次
LLM 客户端单例 — 所有组件共享一个 openai.OpenAI 实例,不再各建各的
sanitize 顺序修复 — 小样本抑制放在 float round 之前,避免被 round 干扰
quick_detect 从 O(n²) 改为 O(n) — 按列聚合一次,加去重,不再对每行重复算整列统计
历史上下文实际生效 — get_context_for 的结果现在会注入到 Explorer 的初始 prompt 里,多轮分析时 LLM 能看到之前的发现
This commit is contained in:
2026-03-20 13:20:31 +08:00
parent 96927a789d
commit b7a27b12bd
39 changed files with 2637 additions and 1133 deletions

102
cli.py
View File

@@ -1,27 +1,30 @@
"""
交互式 CLI —— 四层架构自适应分析
用法: python3 cli.py [数据库路径]
用法: python cli.py [数据库路径]
"""
import os
import sys
sys.path.insert(0, os.path.dirname(__file__))
from config import DB_PATH, LLM_CONFIG, MAX_EXPLORATION_ROUNDS
from core.config import DB_PATH, LLM_CONFIG, MAX_EXPLORATION_ROUNDS, PLAYBOOK_DIR
from agent import DataAnalysisAgent
def print_help():
print("""
可用命令:
<问题> 分析一个问题
<问题> 分析一个问题
rounds=<N> <问题> 设置探索轮数
schema 查看数据库 Schema
history 查看分析历史
audit 查看 SQL 审计日志
clear 清空分析历史
help 显示帮助
quit / q 退出
report [主题] 整合所有分析,生成综合报告
schema 查看数据库 Schema
playbooks 查看已加载的预设剧本
regen 重新生成预设剧本
history 查看分析历史
audit 查看 SQL 审计日志
clear 清空分析历史
help 显示帮助
quit / q 退出
""")
@@ -30,17 +33,12 @@ def main():
if not os.path.exists(db_path):
print(f"❌ 数据库不存在: {db_path}")
print(f" 请先运行 python3 demo.py 创建示例数据库,或指定已有数据库路径")
sys.exit(1)
if not LLM_CONFIG["api_key"]:
print("⚠️ 未配置 LLM_API_KEY,请先设置环境变量:")
print(" export LLM_API_KEY=sk-xxx")
print(" export LLM_BASE_URL=https://api.openai.com/v1 # 或 Ollama 等")
print(" export LLM_MODEL=gpt-4o")
print("⚠️ 未配置 LLM_API_KEY")
sys.exit(1)
# 初始化 Agent
agent = DataAnalysisAgent(db_path)
print("=" * 60)
@@ -49,6 +47,7 @@ def main():
print(f"\n🔗 LLM: {LLM_CONFIG['model']} @ {LLM_CONFIG['base_url']}")
print(f"🔄 最大探索轮数: {MAX_EXPLORATION_ROUNDS}")
print(f"💾 数据库: {db_path}")
print(f"📋 预设剧本: {len(agent.playbook_mgr.playbooks)}")
print(f"\n💬 输入分析问题help 查看命令)\n")
while True:
@@ -68,44 +67,69 @@ def main():
break
elif cmd == "help":
print_help()
continue
elif cmd == "schema":
print(agent.get_schema())
continue
elif cmd == "history":
print(agent.get_history())
continue
elif cmd == "audit":
print(agent.get_audit())
continue
elif cmd == "clear":
agent.clear_history()
print("✅ 历史已清空")
continue
# 解析可选参数rounds=3
max_rounds = MAX_EXPLORATION_ROUNDS
question = user_input
if "rounds=" in question.lower():
parts = question.split("rounds=")
question = parts[0].strip()
elif cmd.startswith("report"):
topic = user_input[6:].strip()
try:
max_rounds = int(parts[1].strip().split()[0])
except (ValueError, IndexError):
pass
report = agent.full_report(question=topic)
print("\n" + report)
print("\n" + "~" * 60)
except Exception as e:
print(f"\n❌ 报告整合出错: {e}")
import traceback
traceback.print_exc()
elif cmd == "playbooks":
if not agent.playbook_mgr.playbooks:
print("(无预设剧本,输入 regen 让 AI 自动生成)")
else:
for i, pb in enumerate(agent.playbook_mgr.playbooks, 1):
print(f" {i}. 📋 {pb.name}{pb.description} ({len(pb.preset_queries)} 条预设)")
elif cmd == "regen":
if os.path.isdir(PLAYBOOK_DIR):
for f in os.listdir(PLAYBOOK_DIR):
if f.startswith("auto_") and f.endswith(".json"):
os.remove(os.path.join(PLAYBOOK_DIR, f))
agent.playbook_mgr.playbooks.clear()
print("🤖 AI 正在重新生成预设剧本...")
generated = agent.playbook_mgr.auto_generate(agent.schema_text, save_dir=PLAYBOOK_DIR)
if generated:
print(f"✅ 生成 {len(generated)} 个剧本:")
for pb in generated:
print(f" 📋 {pb.name}{pb.description}")
else:
print("⚠️ 生成失败")
else:
# 解析 rounds=N
max_rounds = MAX_EXPLORATION_ROUNDS
question = user_input
if "rounds=" in question.lower():
parts = question.split("rounds=")
question = parts[0].strip()
try:
max_rounds = int(parts[1].strip().split()[0])
except (ValueError, IndexError):
pass
try:
report = agent.analyze(question, max_rounds=max_rounds)
print("\n" + report)
print("\n" + "~" * 60)
except Exception as e:
print(f"\n❌ 分析出错: {e}")
import traceback
traceback.print_exc()
try:
report = agent.analyze(question, max_rounds=max_rounds)
print("\n" + report)
print("\n" + "~" * 60)
except Exception as e:
print(f"\n❌ 分析出错: {e}")
import traceback
traceback.print_exc()
# 退出时显示审计
print("\n📋 本次会话审计:")
print(agent.get_audit())
agent.close()
if __name__ == "__main__":