Explorer 的 system prompt 明确告知 sandbox 规则 — "每条 SQL 必须包含聚合函数或 LIMIT",减少 LLM 生成违规 SQL 浪费轮次 LLM 客户端单例 — 所有组件共享一个 openai.OpenAI 实例,不再各建各的 sanitize 顺序修复 — 小样本抑制放在 float round 之前,避免被 round 干扰 quick_detect 从 O(n²) 改为 O(n) — 按列聚合一次,加去重,不再对每行重复算整列统计 历史上下文实际生效 — get_context_for 的结果现在会注入到 Explorer 的初始 prompt 里,多轮分析时 LLM 能看到之前的发现
104 lines
3.1 KiB
Python
104 lines
3.1 KiB
Python
"""
|
|
将工单 CSV 数据导入 SQLite 数据库
|
|
"""
|
|
import csv
|
|
import sqlite3
|
|
import os
|
|
import sys
|
|
|
|
def import_csv(csv_path: str, db_path: str):
|
|
"""将工单 CSV 导入 SQLite"""
|
|
if os.path.exists(db_path):
|
|
os.remove(db_path)
|
|
print(f"🗑️ 已删除旧数据库: {db_path}")
|
|
|
|
conn = sqlite3.connect(db_path)
|
|
cur = conn.cursor()
|
|
|
|
# 创建工单表
|
|
cur.execute("""
|
|
CREATE TABLE tickets (
|
|
工单号 TEXT PRIMARY KEY,
|
|
来源 TEXT,
|
|
创建日期 TEXT,
|
|
问题类型 TEXT,
|
|
问题描述 TEXT,
|
|
处理过程 TEXT,
|
|
跟踪记录 TEXT,
|
|
严重程度 TEXT,
|
|
工单状态 TEXT,
|
|
模块 TEXT,
|
|
责任人 TEXT,
|
|
关闭日期 TEXT,
|
|
车型 TEXT,
|
|
VIN TEXT,
|
|
SIM TEXT,
|
|
Notes TEXT,
|
|
Attachment TEXT,
|
|
创建人 TEXT,
|
|
关闭时长_天 REAL,
|
|
创建日期_解析 TEXT,
|
|
关闭日期_解析 TEXT
|
|
)
|
|
""")
|
|
|
|
with open(csv_path, "r", encoding="utf-8-sig") as f:
|
|
reader = csv.DictReader(f)
|
|
rows = list(reader)
|
|
|
|
for row in rows:
|
|
cur.execute("""
|
|
INSERT INTO tickets VALUES (
|
|
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?
|
|
)
|
|
""", (
|
|
row.get("工单号", ""),
|
|
row.get("来源", ""),
|
|
row.get("创建日期", ""),
|
|
row.get("问题类型", ""),
|
|
row.get("问题描述", ""),
|
|
row.get("处理过程", ""),
|
|
row.get("跟踪记录", ""),
|
|
row.get("严重程度", ""),
|
|
row.get("工单状态", ""),
|
|
row.get("模块", ""),
|
|
row.get("责任人", ""),
|
|
row.get("关闭日期", ""),
|
|
row.get("车型", ""),
|
|
row.get("VIN", ""),
|
|
row.get("SIM", ""),
|
|
row.get("Notes", ""),
|
|
row.get("Attachment", ""),
|
|
row.get("创建人", ""),
|
|
float(row["关闭时长(天)"]) if row.get("关闭时长(天)") else None,
|
|
row.get("创建日期_解析", ""),
|
|
row.get("关闭日期_解析", ""),
|
|
))
|
|
|
|
conn.commit()
|
|
print(f"✅ 导入 {len(rows)} 条工单到 {db_path}")
|
|
|
|
# 验证
|
|
cur.execute("SELECT COUNT(*) FROM tickets")
|
|
print(f" 数据库中共 {cur.fetchone()[0]} 条记录")
|
|
|
|
cur.execute("SELECT DISTINCT 问题类型 FROM tickets")
|
|
types = [r[0] for r in cur.fetchall()]
|
|
print(f" 问题类型: {', '.join(types)}")
|
|
|
|
cur.execute("SELECT DISTINCT 工单状态 FROM tickets")
|
|
statuses = [r[0] for r in cur.fetchall()]
|
|
print(f" 工单状态: {', '.join(statuses)}")
|
|
|
|
cur.execute("SELECT DISTINCT 车型 FROM tickets")
|
|
models = [r[0] for r in cur.fetchall()]
|
|
print(f" 车型: {', '.join(models)}")
|
|
|
|
conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
csv_path = sys.argv[1] if len(sys.argv) > 1 else "cleaned_data.csv"
|
|
db_path = os.path.join(os.path.dirname(__file__), "demo.db")
|
|
import_csv(csv_path, db_path)
|