524 lines
17 KiB
Python
524 lines
17 KiB
Python
"""报告生成引擎的单元测试。"""
|
||
|
||
import pytest
|
||
import tempfile
|
||
import os
|
||
|
||
from src.engines.report_generation import (
|
||
extract_key_findings,
|
||
organize_report_structure,
|
||
generate_report,
|
||
_categorize_insight,
|
||
_calculate_importance,
|
||
_generate_report_title,
|
||
_generate_default_sections
|
||
)
|
||
from src.models.analysis_result import AnalysisResult
|
||
from src.models.requirement_spec import RequirementSpec, AnalysisObjective
|
||
from src.models.data_profile import DataProfile, ColumnInfo
|
||
|
||
|
||
@pytest.fixture
|
||
def sample_results():
|
||
"""创建示例分析结果。"""
|
||
return [
|
||
AnalysisResult(
|
||
task_id='task1',
|
||
task_name='状态分布分析',
|
||
success=True,
|
||
data={'open': 50, 'closed': 30, 'pending': 20},
|
||
visualizations=['chart1.png'],
|
||
insights=[
|
||
'待处理工单占比50%,异常高',
|
||
'已关闭工单占比30%'
|
||
],
|
||
execution_time=2.5
|
||
),
|
||
AnalysisResult(
|
||
task_id='task2',
|
||
task_name='趋势分析',
|
||
success=True,
|
||
data={'trend': 'increasing'},
|
||
visualizations=['chart2.png'],
|
||
insights=[
|
||
'工单数量呈上升趋势',
|
||
'增长率为15%'
|
||
],
|
||
execution_time=3.2
|
||
),
|
||
AnalysisResult(
|
||
task_id='task3',
|
||
task_name='类型分析',
|
||
success=False,
|
||
data={},
|
||
visualizations=[],
|
||
insights=[],
|
||
error='数据缺少类型字段',
|
||
execution_time=0.1
|
||
)
|
||
]
|
||
|
||
|
||
@pytest.fixture
|
||
def sample_requirement():
|
||
"""创建示例需求规格。"""
|
||
return RequirementSpec(
|
||
user_input='分析工单健康度',
|
||
objectives=[
|
||
AnalysisObjective(
|
||
name='健康度分析',
|
||
description='评估工单处理的健康状况',
|
||
metrics=['关闭率', '处理时长', '积压情况'],
|
||
priority=5
|
||
)
|
||
]
|
||
)
|
||
|
||
|
||
@pytest.fixture
|
||
def sample_data_profile():
|
||
"""创建示例数据画像。"""
|
||
return DataProfile(
|
||
file_path='test.csv',
|
||
row_count=1000,
|
||
column_count=5,
|
||
columns=[
|
||
ColumnInfo(
|
||
name='status',
|
||
dtype='categorical',
|
||
missing_rate=0.0,
|
||
unique_count=3,
|
||
sample_values=['open', 'closed', 'pending']
|
||
),
|
||
ColumnInfo(
|
||
name='created_at',
|
||
dtype='datetime',
|
||
missing_rate=0.0,
|
||
unique_count=1000
|
||
)
|
||
],
|
||
inferred_type='ticket',
|
||
key_fields={'status': '状态', 'created_at': '创建时间'},
|
||
quality_score=85.0,
|
||
summary='工单数据,包含1000条记录'
|
||
)
|
||
|
||
|
||
class TestExtractKeyFindings:
|
||
"""测试关键发现提炼。"""
|
||
|
||
def test_basic_functionality(self, sample_results):
|
||
"""测试基本功能。"""
|
||
key_findings = extract_key_findings(sample_results)
|
||
|
||
# 验证:返回列表
|
||
assert isinstance(key_findings, list)
|
||
|
||
# 验证:只包含成功的结果
|
||
assert len(key_findings) == 4 # 2个任务,每个2个洞察
|
||
|
||
# 验证:每个发现都有必需的字段
|
||
for finding in key_findings:
|
||
assert 'finding' in finding
|
||
assert 'importance' in finding
|
||
assert 'source_task' in finding
|
||
assert 'category' in finding
|
||
|
||
def test_importance_sorting(self, sample_results):
|
||
"""测试按重要性排序。"""
|
||
key_findings = extract_key_findings(sample_results)
|
||
|
||
# 验证:按重要性降序排列
|
||
for i in range(len(key_findings) - 1):
|
||
assert key_findings[i]['importance'] >= key_findings[i + 1]['importance']
|
||
|
||
def test_empty_results(self):
|
||
"""测试空结果列表。"""
|
||
key_findings = extract_key_findings([])
|
||
|
||
assert isinstance(key_findings, list)
|
||
assert len(key_findings) == 0
|
||
|
||
def test_only_failed_results(self):
|
||
"""测试只有失败的结果。"""
|
||
results = [
|
||
AnalysisResult(
|
||
task_id='task1',
|
||
task_name='失败任务',
|
||
success=False,
|
||
error='测试错误'
|
||
)
|
||
]
|
||
|
||
key_findings = extract_key_findings(results)
|
||
|
||
# 失败的任务不应该产生发现
|
||
assert len(key_findings) == 0
|
||
|
||
|
||
class TestCategorizeInsight:
|
||
"""测试洞察分类。"""
|
||
|
||
def test_anomaly_detection(self):
|
||
"""测试异常检测。"""
|
||
insight = '待处理工单占比50%,异常高'
|
||
category = _categorize_insight(insight)
|
||
assert category == 'anomaly'
|
||
|
||
def test_trend_detection(self):
|
||
"""测试趋势检测。"""
|
||
insight = '工单数量呈上升趋势'
|
||
category = _categorize_insight(insight)
|
||
assert category == 'trend'
|
||
|
||
def test_general_insight(self):
|
||
"""测试一般洞察。"""
|
||
insight = '数据质量良好'
|
||
category = _categorize_insight(insight)
|
||
assert category == 'insight'
|
||
|
||
def test_english_keywords(self):
|
||
"""测试英文关键词。"""
|
||
assert _categorize_insight('This is an anomaly') == 'anomaly'
|
||
assert _categorize_insight('Showing growth trend') == 'trend'
|
||
|
||
|
||
class TestCalculateImportance:
|
||
"""测试重要性计算。"""
|
||
|
||
def test_anomaly_importance(self):
|
||
"""测试异常的重要性。"""
|
||
insight = '严重异常:系统故障'
|
||
importance = _calculate_importance(insight, {})
|
||
|
||
# 异常 + 严重 = 高重要性
|
||
assert importance >= 4
|
||
|
||
def test_percentage_importance(self):
|
||
"""测试包含百分比的重要性。"""
|
||
insight = '占比达到80%'
|
||
importance = _calculate_importance(insight, {})
|
||
|
||
# 包含百分比 = 较高重要性
|
||
assert importance >= 4
|
||
|
||
def test_normal_importance(self):
|
||
"""测试普通洞察的重要性。"""
|
||
insight = '数据正常'
|
||
importance = _calculate_importance(insight, {})
|
||
|
||
# 默认中等重要性
|
||
assert importance == 3
|
||
|
||
def test_importance_range(self):
|
||
"""测试重要性范围。"""
|
||
# 测试多个洞察,确保重要性在1-5范围内
|
||
insights = [
|
||
'严重异常问题',
|
||
'占比80%',
|
||
'正常数据',
|
||
'轻微变化'
|
||
]
|
||
|
||
for insight in insights:
|
||
importance = _calculate_importance(insight, {})
|
||
assert 1 <= importance <= 5
|
||
|
||
|
||
class TestOrganizeReportStructure:
|
||
"""测试报告结构组织。"""
|
||
|
||
def test_basic_structure(self, sample_results, sample_requirement, sample_data_profile):
|
||
"""测试基本结构。"""
|
||
key_findings = extract_key_findings(sample_results)
|
||
structure = organize_report_structure(key_findings, sample_requirement, sample_data_profile)
|
||
|
||
# 验证:包含必需的字段
|
||
assert 'title' in structure
|
||
assert 'sections' in structure
|
||
assert 'executive_summary' in structure
|
||
assert 'detailed_analysis' in structure
|
||
assert 'conclusions' in structure
|
||
|
||
def test_with_template(self, sample_results, sample_data_profile):
|
||
"""测试使用模板的结构。"""
|
||
# 创建带模板的需求
|
||
requirement = RequirementSpec(
|
||
user_input='按模板分析',
|
||
objectives=[
|
||
AnalysisObjective(
|
||
name='分析',
|
||
description='按模板分析',
|
||
metrics=['指标1'],
|
||
priority=5
|
||
)
|
||
],
|
||
template_path='template.md',
|
||
template_requirements={
|
||
'sections': ['第一章', '第二章', '第三章'],
|
||
'required_metrics': ['指标1', '指标2'],
|
||
'required_charts': ['图表1']
|
||
}
|
||
)
|
||
|
||
key_findings = extract_key_findings(sample_results)
|
||
structure = organize_report_structure(key_findings, requirement, sample_data_profile)
|
||
|
||
# 验证:使用模板结构
|
||
assert structure['use_template'] is True
|
||
assert structure['sections'] == ['第一章', '第二章', '第三章']
|
||
|
||
def test_without_template(self, sample_results, sample_requirement, sample_data_profile):
|
||
"""测试不使用模板的结构。"""
|
||
key_findings = extract_key_findings(sample_results)
|
||
structure = organize_report_structure(key_findings, sample_requirement, sample_data_profile)
|
||
|
||
# 验证:生成默认结构
|
||
assert structure['use_template'] is False
|
||
assert len(structure['sections']) > 0
|
||
assert '执行摘要' in structure['sections']
|
||
|
||
def test_executive_summary(self, sample_results, sample_requirement, sample_data_profile):
|
||
"""测试执行摘要组织。"""
|
||
key_findings = extract_key_findings(sample_results)
|
||
structure = organize_report_structure(key_findings, sample_requirement, sample_data_profile)
|
||
|
||
exec_summary = structure['executive_summary']
|
||
|
||
# 验证:包含关键发现
|
||
assert 'key_findings' in exec_summary
|
||
assert isinstance(exec_summary['key_findings'], list)
|
||
|
||
# 验证:包含统计信息
|
||
assert 'anomaly_count' in exec_summary
|
||
assert 'trend_count' in exec_summary
|
||
|
||
def test_detailed_analysis(self, sample_results, sample_requirement, sample_data_profile):
|
||
"""测试详细分析组织。"""
|
||
key_findings = extract_key_findings(sample_results)
|
||
structure = organize_report_structure(key_findings, sample_requirement, sample_data_profile)
|
||
|
||
detailed = structure['detailed_analysis']
|
||
|
||
# 验证:包含分类
|
||
assert 'anomaly' in detailed
|
||
assert 'trend' in detailed
|
||
assert 'insight' in detailed
|
||
|
||
# 验证:每个分类都是列表
|
||
assert isinstance(detailed['anomaly'], list)
|
||
assert isinstance(detailed['trend'], list)
|
||
assert isinstance(detailed['insight'], list)
|
||
|
||
|
||
class TestGenerateReportTitle:
|
||
"""测试报告标题生成。"""
|
||
|
||
def test_health_analysis_title(self, sample_data_profile):
|
||
"""测试健康度分析标题。"""
|
||
requirement = RequirementSpec(
|
||
user_input='分析工单健康度',
|
||
objectives=[]
|
||
)
|
||
|
||
title = _generate_report_title(requirement, sample_data_profile)
|
||
|
||
assert '工单' in title
|
||
assert '健康度' in title
|
||
|
||
def test_trend_analysis_title(self, sample_data_profile):
|
||
"""测试趋势分析标题。"""
|
||
requirement = RequirementSpec(
|
||
user_input='分析趋势',
|
||
objectives=[]
|
||
)
|
||
|
||
title = _generate_report_title(requirement, sample_data_profile)
|
||
|
||
assert '工单' in title
|
||
assert '趋势' in title
|
||
|
||
def test_generic_title(self, sample_data_profile):
|
||
"""测试通用标题。"""
|
||
requirement = RequirementSpec(
|
||
user_input='分析数据',
|
||
objectives=[]
|
||
)
|
||
|
||
title = _generate_report_title(requirement, sample_data_profile)
|
||
|
||
assert '工单' in title
|
||
assert '分析报告' in title
|
||
|
||
|
||
class TestGenerateDefaultSections:
|
||
"""测试默认章节生成。"""
|
||
|
||
def test_with_anomalies(self):
|
||
"""测试包含异常的章节。"""
|
||
key_findings = [
|
||
{
|
||
'finding': '异常情况',
|
||
'category': 'anomaly',
|
||
'importance': 5
|
||
}
|
||
]
|
||
|
||
data_profile = DataProfile(
|
||
file_path='test.csv',
|
||
row_count=100,
|
||
column_count=3,
|
||
columns=[],
|
||
inferred_type='ticket'
|
||
)
|
||
|
||
sections = _generate_default_sections(key_findings, data_profile)
|
||
|
||
# 验证:包含异常分析章节
|
||
assert '异常分析' in sections
|
||
|
||
def test_with_trends(self):
|
||
"""测试包含趋势的章节。"""
|
||
key_findings = [
|
||
{
|
||
'finding': '上升趋势',
|
||
'category': 'trend',
|
||
'importance': 4
|
||
}
|
||
]
|
||
|
||
data_profile = DataProfile(
|
||
file_path='test.csv',
|
||
row_count=100,
|
||
column_count=3,
|
||
columns=[],
|
||
inferred_type='sales'
|
||
)
|
||
|
||
sections = _generate_default_sections(key_findings, data_profile)
|
||
|
||
# 验证:包含趋势分析章节
|
||
assert '趋势分析' in sections
|
||
|
||
def test_ticket_data_sections(self):
|
||
"""测试工单数据的章节。"""
|
||
data_profile = DataProfile(
|
||
file_path='test.csv',
|
||
row_count=100,
|
||
column_count=3,
|
||
columns=[],
|
||
inferred_type='ticket'
|
||
)
|
||
|
||
sections = _generate_default_sections([], data_profile)
|
||
|
||
# 验证:包含工单相关章节
|
||
assert '状态分析' in sections or '类型分析' in sections
|
||
|
||
|
||
class TestGenerateReport:
|
||
"""测试完整报告生成。"""
|
||
|
||
def test_basic_report_generation(self, sample_results, sample_requirement, sample_data_profile):
|
||
"""测试基本报告生成。"""
|
||
report = generate_report(sample_results, sample_requirement, sample_data_profile)
|
||
|
||
# 验证:返回字符串
|
||
assert isinstance(report, str)
|
||
|
||
# 验证:报告不为空
|
||
assert len(report) > 0
|
||
|
||
# 验证:包含标题
|
||
assert '#' in report
|
||
|
||
# 验证:包含执行摘要
|
||
assert '执行摘要' in report or '摘要' in report
|
||
|
||
def test_report_with_skipped_tasks(self, sample_results, sample_requirement, sample_data_profile):
|
||
"""测试包含跳过任务的报告。"""
|
||
report = generate_report(sample_results, sample_requirement, sample_data_profile)
|
||
|
||
# 验证:提到跳过的任务
|
||
assert '跳过' in report or '失败' in report
|
||
|
||
# 验证:提到失败的任务名称
|
||
assert '类型分析' in report
|
||
|
||
def test_report_with_visualizations(self, sample_results, sample_requirement, sample_data_profile):
|
||
"""测试包含可视化的报告。"""
|
||
report = generate_report(sample_results, sample_requirement, sample_data_profile)
|
||
|
||
# 验证:包含图表引用
|
||
assert 'chart1.png' in report or 'chart2.png' in report or '![' in report
|
||
|
||
def test_report_with_insights(self, sample_results, sample_requirement, sample_data_profile):
|
||
"""测试包含洞察的报告。"""
|
||
report = generate_report(sample_results, sample_requirement, sample_data_profile)
|
||
|
||
# 验证:包含洞察内容
|
||
assert '待处理工单' in report or '趋势' in report
|
||
|
||
def test_report_save_to_file(self, sample_results, sample_requirement, sample_data_profile):
|
||
"""测试报告保存到文件。"""
|
||
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as f:
|
||
output_path = f.name
|
||
|
||
try:
|
||
report = generate_report(
|
||
sample_results,
|
||
sample_requirement,
|
||
sample_data_profile,
|
||
output_path=output_path
|
||
)
|
||
|
||
# 验证:文件已创建
|
||
assert os.path.exists(output_path)
|
||
|
||
# 验证:文件内容与返回内容一致
|
||
with open(output_path, 'r', encoding='utf-8') as f:
|
||
saved_content = f.read()
|
||
|
||
assert saved_content == report
|
||
|
||
finally:
|
||
if os.path.exists(output_path):
|
||
os.unlink(output_path)
|
||
|
||
def test_empty_results(self, sample_requirement, sample_data_profile):
|
||
"""测试空结果列表。"""
|
||
report = generate_report([], sample_requirement, sample_data_profile)
|
||
|
||
# 验证:仍然生成报告
|
||
assert isinstance(report, str)
|
||
assert len(report) > 0
|
||
|
||
# 验证:包含基本结构
|
||
assert '执行摘要' in report or '摘要' in report
|
||
|
||
def test_all_failed_results(self, sample_requirement, sample_data_profile):
|
||
"""测试所有任务都失败的情况。"""
|
||
results = [
|
||
AnalysisResult(
|
||
task_id='task1',
|
||
task_name='失败任务1',
|
||
success=False,
|
||
error='错误1'
|
||
),
|
||
AnalysisResult(
|
||
task_id='task2',
|
||
task_name='失败任务2',
|
||
success=False,
|
||
error='错误2'
|
||
)
|
||
]
|
||
|
||
report = generate_report(results, sample_requirement, sample_data_profile)
|
||
|
||
# 验证:报告生成成功
|
||
assert isinstance(report, str)
|
||
assert len(report) > 0
|
||
|
||
# 验证:提到失败
|
||
assert '失败' in report or '跳过' in report
|