180 lines
6.5 KiB
Python
180 lines
6.5 KiB
Python
|
|
"""
|
|||
|
|
Layer 1.5: 预设分析剧本
|
|||
|
|
"""
|
|||
|
|
import json
|
|||
|
|
import os
|
|||
|
|
import re
|
|||
|
|
from typing import Optional
|
|||
|
|
|
|||
|
|
from core.config import LLM_CONFIG
|
|||
|
|
from core.utils import get_llm_client, extract_json_object, extract_json_array
|
|||
|
|
|
|||
|
|
|
|||
|
|
class Playbook:
|
|||
|
|
"""一个预设分析剧本"""
|
|||
|
|
def __init__(self, data: dict):
|
|||
|
|
self.name = data["name"]
|
|||
|
|
self.description = data["description"]
|
|||
|
|
self.tags = data.get("tags", [])
|
|||
|
|
self.preset_queries: list[dict] = data.get("preset_queries", [])
|
|||
|
|
self.exploration_hints = data.get("exploration_hints", "")
|
|||
|
|
self.placeholders = data.get("placeholders", {})
|
|||
|
|
|
|||
|
|
def to_summary(self) -> str:
|
|||
|
|
return f"[{self.name}] {self.description} (标签: {', '.join(self.tags)})"
|
|||
|
|
|
|||
|
|
def render_queries(self, schema: dict) -> list[dict]:
|
|||
|
|
rendered = []
|
|||
|
|
for q in self.preset_queries:
|
|||
|
|
sql, purpose = q["sql"], q.get("purpose", "")
|
|||
|
|
for key, val in self.placeholders.items():
|
|||
|
|
sql = sql.replace(f"{{{{{key}}}}}", val)
|
|||
|
|
purpose = purpose.replace(f"{{{{{key}}}}}", val)
|
|||
|
|
rendered.append({"sql": sql, "purpose": purpose})
|
|||
|
|
return rendered
|
|||
|
|
|
|||
|
|
|
|||
|
|
class PlaybookManager:
|
|||
|
|
"""加载和匹配 Playbook"""
|
|||
|
|
|
|||
|
|
def __init__(self, playbook_dir: str = ""):
|
|||
|
|
self.playbooks: list[Playbook] = []
|
|||
|
|
self.client, self.model = get_llm_client(LLM_CONFIG)
|
|||
|
|
if playbook_dir and os.path.isdir(playbook_dir):
|
|||
|
|
self._load_from_dir(playbook_dir)
|
|||
|
|
|
|||
|
|
def _load_from_dir(self, dir_path: str):
|
|||
|
|
for fname in sorted(os.listdir(dir_path)):
|
|||
|
|
if not fname.endswith(".json"):
|
|||
|
|
continue
|
|||
|
|
try:
|
|||
|
|
with open(os.path.join(dir_path, fname), "r", encoding="utf-8") as f:
|
|||
|
|
data = json.load(f)
|
|||
|
|
items = data if isinstance(data, list) else [data]
|
|||
|
|
for item in items:
|
|||
|
|
self.playbooks.append(Playbook(item))
|
|||
|
|
except (json.JSONDecodeError, KeyError) as e:
|
|||
|
|
print(f" ⚠️ 加载 playbook 失败 {fname}: {e}")
|
|||
|
|
|
|||
|
|
def add(self, playbook: Playbook):
|
|||
|
|
self.playbooks.append(playbook)
|
|||
|
|
|
|||
|
|
def auto_generate(self, schema_text: str, save_dir: str = "") -> list[Playbook]:
|
|||
|
|
"""让 LLM 根据 Schema 自动生成 Playbook"""
|
|||
|
|
prompt = f"""你是一个数据分析专家。根据以下数据库 Schema,生成 3-5 个预设分析剧本。
|
|||
|
|
|
|||
|
|
## 数据库 Schema
|
|||
|
|
{schema_text}
|
|||
|
|
|
|||
|
|
## 输出格式(严格 JSON 数组)
|
|||
|
|
```json
|
|||
|
|
[
|
|||
|
|
{{
|
|||
|
|
"name": "剧本名称",
|
|||
|
|
"description": "一句话描述",
|
|||
|
|
"tags": ["关键词1", "关键词2"],
|
|||
|
|
"preset_queries": [
|
|||
|
|
{{"purpose": "查询目的", "sql": "SELECT ... GROUP BY ..."}}
|
|||
|
|
],
|
|||
|
|
"exploration_hints": "后续探索提示"
|
|||
|
|
}}
|
|||
|
|
]
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
## SQL 规则
|
|||
|
|
- 只用 SELECT,必须有聚合函数或 GROUP BY
|
|||
|
|
- 禁止 SELECT *,用 ROUND 控制精度,合理 LIMIT
|
|||
|
|
- 直接使用实际表名和列名"""
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
response = self.client.chat.completions.create(
|
|||
|
|
model=self.model,
|
|||
|
|
messages=[
|
|||
|
|
{"role": "system", "content": "你是数据分析专家。只输出 JSON,不要其他内容。"},
|
|||
|
|
{"role": "user", "content": prompt},
|
|||
|
|
],
|
|||
|
|
temperature=0.3, max_tokens=4096,
|
|||
|
|
)
|
|||
|
|
content = response.choices[0].message.content.strip()
|
|||
|
|
playbooks_data = extract_json_array(content)
|
|||
|
|
if not playbooks_data:
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
generated = []
|
|||
|
|
for i, pb_data in enumerate(playbooks_data):
|
|||
|
|
pb_data.setdefault("tags", [])
|
|||
|
|
pb_data.setdefault("exploration_hints", "")
|
|||
|
|
pb_data.setdefault("placeholders", {})
|
|||
|
|
try:
|
|||
|
|
pb = Playbook(pb_data)
|
|||
|
|
self.playbooks.append(pb)
|
|||
|
|
generated.append(pb)
|
|||
|
|
if save_dir:
|
|||
|
|
os.makedirs(save_dir, exist_ok=True)
|
|||
|
|
safe = re.sub(r'[^\w\u4e00-\u9fff]', '_', pb.name)[:30]
|
|||
|
|
fpath = os.path.join(save_dir, f"auto_{i+1}_{safe}.json")
|
|||
|
|
with open(fpath, "w", encoding="utf-8") as f:
|
|||
|
|
json.dump(pb_data, f, ensure_ascii=False, indent=2)
|
|||
|
|
except (KeyError, TypeError) as e:
|
|||
|
|
print(f" ⚠️ 跳过无效 Playbook: {e}")
|
|||
|
|
return generated
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" ⚠️ 自动生成 Playbook 出错: {e}")
|
|||
|
|
return []
|
|||
|
|
|
|||
|
|
def match(self, plan: dict, schema_text: str) -> Optional[dict]:
|
|||
|
|
"""用 LLM 判断当前分析计划是否匹配某个 Playbook"""
|
|||
|
|
if not self.playbooks:
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
pb_summaries = []
|
|||
|
|
for i, pb in enumerate(self.playbooks):
|
|||
|
|
queries_desc = "\n".join(f" - {q.get('purpose', '')}: {q['sql'][:100]}" for q in pb.preset_queries)
|
|||
|
|
pb_summaries.append(f"{i+1}. {pb.to_summary()}\n 预设查询:\n{queries_desc}")
|
|||
|
|
|
|||
|
|
prompt = f"""判断当前分析计划是否适合使用某个预设剧本。
|
|||
|
|
|
|||
|
|
## 分析计划
|
|||
|
|
```json
|
|||
|
|
{json.dumps(plan, ensure_ascii=False, indent=2)}
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
## Schema
|
|||
|
|
{schema_text}
|
|||
|
|
|
|||
|
|
## 可用剧本
|
|||
|
|
{chr(10).join(pb_summaries)}
|
|||
|
|
|
|||
|
|
## 输出(严格 JSON)
|
|||
|
|
匹配: {{"matched": true, "playbook_index": 1, "reasoning": "原因", "placeholders": {{}}}}
|
|||
|
|
不匹配: {{"matched": false, "reasoning": "原因"}}"""
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
response = self.client.chat.completions.create(
|
|||
|
|
model=self.model,
|
|||
|
|
messages=[
|
|||
|
|
{"role": "system", "content": "你是分析计划匹配器。"},
|
|||
|
|
{"role": "user", "content": prompt},
|
|||
|
|
],
|
|||
|
|
temperature=0.1, max_tokens=512,
|
|||
|
|
)
|
|||
|
|
result = extract_json_object(response.choices[0].message.content.strip())
|
|||
|
|
if not result.get("matched"):
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
idx = result.get("playbook_index", 1) - 1
|
|||
|
|
if idx < 0 or idx >= len(self.playbooks):
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
pb = self.playbooks[idx]
|
|||
|
|
pb.placeholders = {**pb.placeholders, **result.get("placeholders", {})}
|
|||
|
|
return {
|
|||
|
|
"matched": True, "playbook_name": pb.name,
|
|||
|
|
"reasoning": result.get("reasoning", ""),
|
|||
|
|
"preset_queries": pb.render_queries({}),
|
|||
|
|
"exploration_hints": pb.exploration_hints,
|
|||
|
|
}
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" ⚠️ Playbook 匹配出错: {e}")
|
|||
|
|
return None
|