Files
iov_ana/output/consolidator.py
Jeason b7a27b12bd SQLite 持久连接 — sandbox 不再每次查询开关连接,改为 __init__ 时建连、close() 时释放
Explorer 的 system prompt 明确告知 sandbox 规则 — "每条 SQL 必须包含聚合函数或 LIMIT",减少 LLM 生成违规 SQL 浪费轮次
LLM 客户端单例 — 所有组件共享一个 openai.OpenAI 实例,不再各建各的
sanitize 顺序修复 — 小样本抑制放在 float round 之前,避免被 round 干扰
quick_detect 从 O(n²) 改为 O(n) — 按列聚合一次,加去重,不再对每行重复算整列统计
历史上下文实际生效 — get_context_for 的结果现在会注入到 Explorer 的初始 prompt 里,多轮分析时 LLM 能看到之前的发现
2026-03-20 13:20:31 +08:00

85 lines
3.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
报告整合器 —— 将多次分析结果合并为一份完整报告
"""
import json
from core.config import LLM_CONFIG
from core.utils import get_llm_client
from layers.context import AnalysisSession
CONSOLIDATE_PROMPT = """你是一个高级数据分析总监。下面是你的团队针对同一份数据做的多次分析,请整合为一份完整的综合报告。
## 核心问题
{question}
## 各次分析结果
{sections}
## 可用图表
{charts_text}
## 整合要求
1. **执行摘要**3-5 句话概括全局结论
2. **核心发现**:从所有分析中提炼最重要的发现,去重,按重要性排列
3. **交叉洞察**:不同维度之间的关联
4. **图表引用**:用 `![标题](路径)` 嵌入相关段落
5. **风险与建议**:按优先级排列
6. **数据附录**:关键统计数字
中文,专业简报风格。先结论后细节。"""
class ReportConsolidator:
"""报告整合器"""
def __init__(self):
self.client, self.model = get_llm_client(LLM_CONFIG)
def consolidate(self, sessions: list[AnalysisSession], question: str = "",
charts: list[dict] | None = None) -> str:
if not sessions:
return "(无分析数据可整合)"
if not question:
question = sessions[0].question
sections = self._build_sections(sessions)
charts_text = "\n".join(f"{i}. {c['title']}: {c['path']}" for i, c in enumerate(charts or [], 1)) or "无图表。"
try:
response = self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": "你是高级数据分析总监,整合多维度分析结果。"},
{"role": "user", "content": CONSOLIDATE_PROMPT.format(question=question, sections=sections, charts_text=charts_text)},
],
temperature=0.3, max_tokens=4096,
)
return response.choices[0].message.content
except Exception as e:
print(f" ⚠️ LLM 整合失败: {e},使用拼接模式")
return self._fallback_concat(sessions, charts)
def _build_sections(self, sessions: list[AnalysisSession]) -> str:
parts = []
for i, session in enumerate(sessions, 1):
section = f"### 分析 {i}: {session.question}\n"
section += f"类型: {session.plan.get('analysis_type', '未知')}\n\n"
for step in session.steps:
if not step.success or not step.rows or step.action == "done":
continue
section += f"- {step.purpose} ({step.row_count} 行)\n"
section += f" 数据: {json.dumps(step.rows[:8], ensure_ascii=False)}\n\n"
if session.insights:
section += "#### 洞察\n" + "\n".join(f"- {i}" for i in session.insights) + "\n"
parts.append(section)
return "\n---\n".join(parts)
def _fallback_concat(self, sessions: list[AnalysisSession], charts: list[dict] | None) -> str:
parts = ["# 综合分析报告\n"]
for i, s in enumerate(sessions, 1):
parts.append(f"## 第 {i} 部分: {s.question}\n{s.report}\n")
if charts:
parts.append("\n## 可视化\n" + "\n".join(f"![{c['title']}]({c['path']})" for c in charts))
return "\n".join(parts)