Files
iov_ana/layers/context.py
openclaw e8f8e2f1ba feat: 四层架构全面增强
安全与稳定性:
- 移除硬编码 API Key,改用 .env + 环境变量
- LLM 调用统一重试机制(指数退避,3 次重试,处理 429/5xx/超时)
- 中文字体检测增强(CJK 关键词兜底 + 无字体时英文 fallback)
- 缺失 API Key 给出友好提示而非崩溃

分析能力提升:
- 异常检测新增 z-score 检测(标准差>2 标记异常)
- 新增变异系数 CV 检测(数据波动性)
- 新增零值/缺失检测
- 上下文管理器升级为关键词语义匹配(替代简单取最近 2 条)

用户体验:
- 报告自动保存为 Markdown(reports/ 目录)
- 新增 export 命令导出查询结果为 CSV
- 新增 reports 命令查看已保存报告
- CLI 支持 readline 命令历史(方向键翻阅)
- CSV 导入工具重写:自动列名映射、容错处理、dry-run 模式
- 新增 .env.example 配置模板
2026-03-31 14:39:17 +08:00

120 lines
4.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Layer 4: 上下文管理器 —— 增强版
- 关键词语义匹配,替代简单取最近 N 条
- 会话摘要去重
"""
import time
import re
from dataclasses import dataclass, field
from typing import Optional
from layers.explorer import ExplorationStep
from layers.insights import Insight
@dataclass
class AnalysisSession:
"""一次分析的完整记录"""
question: str
plan: dict
steps: list[ExplorationStep]
insights: list[Insight]
report: str
timestamp: float = field(default_factory=time.time)
@property
def keywords(self) -> set[str]:
"""提取会话关键词(中文分字 + 英文词切分)"""
text = f"{self.question} {self.plan.get('intent', '')} {' '.join(self.plan.get('dimensions', []))}"
# 中文字符
cn_chars = set(re.findall(r'[\u4e00-\u9fff]+', text))
# 英文单词(小写)
en_words = set(re.findall(r'[a-zA-Z]{2,}', text.lower()))
return cn_chars | en_words
def similarity(self, question: str) -> float:
"""与新问题的关键词相似度Jaccard-like"""
q_cn = set(re.findall(r'[\u4e00-\u9fff]+', question))
q_en = set(re.findall(r'[a-zA-Z]{2,}', question.lower()))
q_kw = q_cn | q_en
if not q_kw:
return 0.0
overlap = self.keywords & q_kw
return len(overlap) / len(q_kw)
def summary(self) -> str:
parts = [f"**问题**: {self.question}"]
if self.plan:
parts.append(f"**分析类型**: {self.plan.get('analysis_type', 'unknown')}")
parts.append(f"**维度**: {', '.join(self.plan.get('dimensions', []))}")
key_findings = []
for step in self.steps:
if step.success and step.rows:
top_row = step.rows[0] if step.rows else {}
finding = f"{step.purpose}: " + ", ".join(f"{k}={v}" for k, v in top_row.items() if k.lower() != "id")
key_findings.append(finding)
if key_findings:
parts.append("**核心发现**:")
for f in key_findings[:5]:
parts.append(f" - {f}")
if self.insights:
parts.append("**洞察**:")
for i in self.insights[:3]:
parts.append(f" - {i}")
return "\n".join(parts)
def to_reference_text(self) -> str:
return (
f"## 之前的分析\n### 问题\n{self.question}\n### 摘要\n{self.summary()}\n### 发现\n"
+ "\n".join(f"- {s.purpose}: {s.row_count}" for s in self.steps if s.success)
)
class ContextManager:
"""上下文管理器 —— 语义匹配增强版"""
def __init__(self, max_history: int = 20):
self.sessions: list[AnalysisSession] = []
self.max_history = max_history
def add_session(self, question: str, plan: dict, steps: list[ExplorationStep],
insights: list[Insight], report: str) -> AnalysisSession:
session = AnalysisSession(question=question, plan=plan, steps=steps, insights=insights, report=report)
self.sessions.append(session)
if len(self.sessions) > self.max_history:
self.sessions = self.sessions[-self.max_history:]
return session
def get_context_for(self, new_question: str) -> Optional[str]:
"""
智能匹配最相关的 1~3 个历史分析作为上下文。
相似度 > 0.3 才引用,最多 3 条,按相似度降序。
"""
if not self.sessions:
return None
scored = []
for s in self.sessions:
sim = s.similarity(new_question)
if sim > 0.3: # 相关性阈值
scored.append((sim, s))
if not scored:
# 无相关历史,返回最近 1 条作为兜底
return self.sessions[-1].to_reference_text()
scored.sort(key=lambda x: x[0], reverse=True)
return "\n\n---\n\n".join(s.to_reference_text() for _, s in scored[:3])
def get_history_summary(self) -> str:
if not self.sessions:
return "(无历史分析)"
lines = [f"{len(self.sessions)} 次分析:"]
for i, s in enumerate(self.sessions, 1):
ts = time.strftime("%H:%M", time.localtime(s.timestamp))
lines.append(f" {i}. [{ts}] {s.question}")
return "\n".join(lines)
def clear(self):
self.sessions.clear()