feat: 四层架构全面增强

安全与稳定性: - 移除硬编码 API Key，改用 .env + 环境变量 - LLM 调用统一重试机制（指数退避，3 次重试，处理 429/5xx/超时） - 中文字体检测增强（CJK 关键词兜底 + 无字体时英文 fallback） - 缺失 API Key 给出友好提示而非崩溃分析能力提升: - 异常检测新增 z-score 检测（标准差>2 标记异常） - 新增变异系数 CV 检测（数据波动性） - 新增零值/缺失检测 - 上下文管理器升级为关键词语义匹配（替代简单取最近 2 条）用户体验: - 报告自动保存为 Markdown（reports/ 目录） - 新增 export 命令导出查询结果为 CSV - 新增 reports 命令查看已保存报告 - CLI 支持 readline 命令历史（方向键翻阅） - CSV 导入工具重写：自动列名映射、容错处理、dry-run 模式 - 新增 .env.example 配置模板
2026-03-31 14:39:17 +08:00
parent b7a27b12bd
commit e8f8e2f1ba
14 changed files with 588 additions and 115 deletions
--- a/core/config.py
+++ b/core/config.py
@@ -1,13 +1,38 @@
 """
-配置文件
+配置文件 —— 支持环境变量 + .env 文件
 """
 import os

+
+def _load_dotenv(path: str = ".env"):
+    """简易 .env 加载器，不依赖 python-dotenv"""
+    if not os.path.isfile(path):
+        return
+    with open(path, "r", encoding="utf-8") as f:
+        for line in f:
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+            if "=" not in line:
+                continue
+            key, _, val = line.partition("=")
+            key, val = key.strip(), val.strip().strip('"').strip("'")
+            if key and key not in os.environ:  # 环境变量优先
+                os.environ[key] = val
+
+
+# 项目根目录（先定义，.env 加载需要用到）
+PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__))
+
+# 加载 .env（项目根目录优先，其次当前目录）
+_load_dotenv(os.path.join(PROJECT_ROOT, ".env"))
+_load_dotenv(".env")
+
 # LLM 配置（兼容 OpenAI API 格式，包括 Ollama / vLLM / DeepSeek 等）
 LLM_CONFIG = {
-    "api_key": os.getenv("LLM_API_KEY", "sk-c44i1hy64xgzwox6x08o4zug93frq6rgn84oqugf2pje1tg4"),
-    "base_url": os.getenv("LLM_BASE_URL", "https://api.xiaomimimo.com/v1"),
-    "model": os.getenv("LLM_MODEL", "mimo-v2-flash"),
+    "api_key": os.getenv("LLM_API_KEY", ""),
+    "base_url": os.getenv("LLM_BASE_URL", "https://api.openai.com/v1"),
+    "model": os.getenv("LLM_MODEL", "gpt-4o-mini"),
 }

 # 沙箱安全规则
--- a/core/utils.py
+++ b/core/utils.py
@@ -1,8 +1,9 @@
 """
-公共工具 —— JSON 提取、LLM 客户端单例
+公共工具 —— JSON 提取、LLM 客户端单例、重试机制
 """
 import json
 import re
+import time
 from typing import Any

 import openai
@@ -17,14 +18,77 @@ def get_llm_client(config: dict) -> tuple[openai.OpenAI, str]:
    """获取 LLM 客户端（单例），避免每个组件各建一个"""
    global _llm_client, _llm_model
    if _llm_client is None:
+        api_key = config.get("api_key", "")
+        if not api_key:
+            raise RuntimeError(
+                "LLM_API_KEY 未配置！请设置环境变量或在 .env 文件中添加：\n"
+                "  LLM_API_KEY=your-key\n"
+                "  LLM_BASE_URL=https://api.openai.com/v1\n"
+                "  LLM_MODEL=gpt-4o-mini"
+            )
        _llm_client = openai.OpenAI(
-            api_key=config["api_key"],
+            api_key=api_key,
            base_url=config["base_url"],
        )
        _llm_model = config["model"]
    return _llm_client, _llm_model


+# ── LLM 调用重试包装 ────────────────────────────────
+
+class LLMCallError(Exception):
+    """LLM 调用最终失败"""
+    pass
+
+
+def llm_chat(client: openai.OpenAI, model: str, messages: list[dict],
+             max_retries: int = 3, **kwargs) -> str:
+    """
+    带指数退避重试的 LLM 调用。
+    处理 429 限频、5xx 超时、网络错误。
+    """
+    last_err = None
+    for attempt in range(max_retries):
+        try:
+            response = client.chat.completions.create(
+                model=model, messages=messages, **kwargs
+            )
+            return response.choices[0].message.content.strip()
+        except openai.RateLimitError as e:
+            last_err = e
+            # 读取 Retry-After 或使用默认退避
+            wait = _get_retry_delay(e, attempt)
+            print(f"     ⏳ 限频，等待 {wait:.1f}s 后重试 ({attempt+1}/{max_retries})...")
+            time.sleep(wait)
+        except (openai.APITimeoutError, openai.APIConnectionError, openai.APIStatusError) as e:
+            last_err = e
+            wait = min(2 ** attempt * 2, 30)
+            print(f"     ⚠️ API 错误: {type(e).__name__}，等待 {wait:.1f}s ({attempt+1}/{max_retries})...")
+            time.sleep(wait)
+        except Exception as e:
+            last_err = e
+            if attempt < max_retries - 1:
+                wait = min(2 ** attempt * 2, 30)
+                time.sleep(wait)
+                continue
+            raise
+
+    raise LLMCallError(f"LLM 调用失败（{max_retries} 次重试）: {last_err}")
+
+
+def _get_retry_delay(error, attempt: int) -> float:
+    """从错误响应中提取重试等待时间"""
+    try:
+        if hasattr(error, 'response') and error.response is not None:
+            retry_after = error.response.headers.get('Retry-After')
+            if retry_after:
+                return float(retry_after)
+    except Exception:
+        pass
+    # 指数退避: 2s, 4s, 8s, 最大 30s
+    return min(2 ** (attempt + 1), 30)
+
+
 # ── JSON 提取 ────────────────────────────────────────

 def extract_json_object(text: str) -> dict: