90 lines
2.8 KiB
Python
90 lines
2.8 KiB
Python
|
|
#!/usr/bin/env python
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
知识库体检脚本
|
|||
|
|
|
|||
|
|
对 KnowledgeEntry 做简单统计,供 kb-audit Skill 调用。
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import sys
|
|||
|
|
from datetime import datetime, timedelta
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
|
|||
|
|
def add_project_root_to_path():
|
|||
|
|
# 假定脚本位于 .claude/skills/kb-audit/scripts/ 下
|
|||
|
|
script_path = Path(__file__).resolve()
|
|||
|
|
project_root = script_path.parents[4]
|
|||
|
|
if str(project_root) not in sys.path:
|
|||
|
|
sys.path.insert(0, str(project_root))
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
add_project_root_to_path()
|
|||
|
|
|
|||
|
|
from src.core.database import db_manager
|
|||
|
|
from src.core.models import KnowledgeEntry
|
|||
|
|
|
|||
|
|
print("=== 知识库健康检查 ===\n")
|
|||
|
|
|
|||
|
|
with db_manager.get_session() as session:
|
|||
|
|
total = session.query(KnowledgeEntry).count()
|
|||
|
|
print(f"知识条目总数: {total}")
|
|||
|
|
|
|||
|
|
# 低置信度(<0.7)
|
|||
|
|
low_conf = (
|
|||
|
|
session.query(KnowledgeEntry)
|
|||
|
|
.filter(KnowledgeEntry.confidence_score.isnot(None))
|
|||
|
|
.filter(KnowledgeEntry.confidence_score < 0.7)
|
|||
|
|
.count()
|
|||
|
|
)
|
|||
|
|
print(f"低置信度条目数 (confidence_score < 0.7): {low_conf}")
|
|||
|
|
|
|||
|
|
# 使用次数极低(usage_count < 3 或为 NULL)
|
|||
|
|
low_usage = (
|
|||
|
|
session.query(KnowledgeEntry)
|
|||
|
|
.filter(
|
|||
|
|
(KnowledgeEntry.usage_count.is_(None))
|
|||
|
|
| (KnowledgeEntry.usage_count < 3)
|
|||
|
|
)
|
|||
|
|
.count()
|
|||
|
|
)
|
|||
|
|
print(f"使用次数极低条目数 (usage_count < 3 或空): {low_usage}")
|
|||
|
|
|
|||
|
|
# 长期未更新(> 90 天)
|
|||
|
|
cutoff = datetime.now() - timedelta(days=90)
|
|||
|
|
old_entries = (
|
|||
|
|
session.query(KnowledgeEntry)
|
|||
|
|
.filter(
|
|||
|
|
(KnowledgeEntry.updated_at.isnot(None))
|
|||
|
|
& (KnowledgeEntry.updated_at < cutoff)
|
|||
|
|
)
|
|||
|
|
.count()
|
|||
|
|
)
|
|||
|
|
print(f"长期未更新条目数 (updated_at > 90 天未更新): {old_entries}")
|
|||
|
|
|
|||
|
|
print("\n示例问题条目(不含完整答案,仅展示前若干个):")
|
|||
|
|
sample_entries = (
|
|||
|
|
session.query(KnowledgeEntry)
|
|||
|
|
.order_by(KnowledgeEntry.created_at.desc())
|
|||
|
|
.limit(5)
|
|||
|
|
.all()
|
|||
|
|
)
|
|||
|
|
for e in sample_entries:
|
|||
|
|
q_preview = (e.question or "")[:40]
|
|||
|
|
print(
|
|||
|
|
f" ID={e.id}, category={e.category}, "
|
|||
|
|
f"confidence={e.confidence_score}, usage={e.usage_count}, "
|
|||
|
|
f"Q='{q_preview}...'"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
print("\n提示:")
|
|||
|
|
print(" - 建议优先审查低置信度且 usage_count 较高的条目;")
|
|||
|
|
print(" - 对长期未更新且 usage_count 较高的条目,可考虑人工复查内容是否过时;")
|
|||
|
|
print(" - 对 usage_count 极低且从未触发的条目,可考虑合并或归档。")
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|
|||
|
|
|