Files
vibe_data_ana/tests/test_report_generation.py

524 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""报告生成引擎的单元测试。"""
import pytest
import tempfile
import os
from src.engines.report_generation import (
extract_key_findings,
organize_report_structure,
generate_report,
_categorize_insight,
_calculate_importance,
_generate_report_title,
_generate_default_sections
)
from src.models.analysis_result import AnalysisResult
from src.models.requirement_spec import RequirementSpec, AnalysisObjective
from src.models.data_profile import DataProfile, ColumnInfo
@pytest.fixture
def sample_results():
"""创建示例分析结果。"""
return [
AnalysisResult(
task_id='task1',
task_name='状态分布分析',
success=True,
data={'open': 50, 'closed': 30, 'pending': 20},
visualizations=['chart1.png'],
insights=[
'待处理工单占比50%,异常高',
'已关闭工单占比30%'
],
execution_time=2.5
),
AnalysisResult(
task_id='task2',
task_name='趋势分析',
success=True,
data={'trend': 'increasing'},
visualizations=['chart2.png'],
insights=[
'工单数量呈上升趋势',
'增长率为15%'
],
execution_time=3.2
),
AnalysisResult(
task_id='task3',
task_name='类型分析',
success=False,
data={},
visualizations=[],
insights=[],
error='数据缺少类型字段',
execution_time=0.1
)
]
@pytest.fixture
def sample_requirement():
"""创建示例需求规格。"""
return RequirementSpec(
user_input='分析工单健康度',
objectives=[
AnalysisObjective(
name='健康度分析',
description='评估工单处理的健康状况',
metrics=['关闭率', '处理时长', '积压情况'],
priority=5
)
]
)
@pytest.fixture
def sample_data_profile():
"""创建示例数据画像。"""
return DataProfile(
file_path='test.csv',
row_count=1000,
column_count=5,
columns=[
ColumnInfo(
name='status',
dtype='categorical',
missing_rate=0.0,
unique_count=3,
sample_values=['open', 'closed', 'pending']
),
ColumnInfo(
name='created_at',
dtype='datetime',
missing_rate=0.0,
unique_count=1000
)
],
inferred_type='ticket',
key_fields={'status': '状态', 'created_at': '创建时间'},
quality_score=85.0,
summary='工单数据包含1000条记录'
)
class TestExtractKeyFindings:
"""测试关键发现提炼。"""
def test_basic_functionality(self, sample_results):
"""测试基本功能。"""
key_findings = extract_key_findings(sample_results)
# 验证:返回列表
assert isinstance(key_findings, list)
# 验证:只包含成功的结果
assert len(key_findings) == 4 # 2个任务每个2个洞察
# 验证:每个发现都有必需的字段
for finding in key_findings:
assert 'finding' in finding
assert 'importance' in finding
assert 'source_task' in finding
assert 'category' in finding
def test_importance_sorting(self, sample_results):
"""测试按重要性排序。"""
key_findings = extract_key_findings(sample_results)
# 验证:按重要性降序排列
for i in range(len(key_findings) - 1):
assert key_findings[i]['importance'] >= key_findings[i + 1]['importance']
def test_empty_results(self):
"""测试空结果列表。"""
key_findings = extract_key_findings([])
assert isinstance(key_findings, list)
assert len(key_findings) == 0
def test_only_failed_results(self):
"""测试只有失败的结果。"""
results = [
AnalysisResult(
task_id='task1',
task_name='失败任务',
success=False,
error='测试错误'
)
]
key_findings = extract_key_findings(results)
# 失败的任务不应该产生发现
assert len(key_findings) == 0
class TestCategorizeInsight:
"""测试洞察分类。"""
def test_anomaly_detection(self):
"""测试异常检测。"""
insight = '待处理工单占比50%,异常高'
category = _categorize_insight(insight)
assert category == 'anomaly'
def test_trend_detection(self):
"""测试趋势检测。"""
insight = '工单数量呈上升趋势'
category = _categorize_insight(insight)
assert category == 'trend'
def test_general_insight(self):
"""测试一般洞察。"""
insight = '数据质量良好'
category = _categorize_insight(insight)
assert category == 'insight'
def test_english_keywords(self):
"""测试英文关键词。"""
assert _categorize_insight('This is an anomaly') == 'anomaly'
assert _categorize_insight('Showing growth trend') == 'trend'
class TestCalculateImportance:
"""测试重要性计算。"""
def test_anomaly_importance(self):
"""测试异常的重要性。"""
insight = '严重异常:系统故障'
importance = _calculate_importance(insight, {})
# 异常 + 严重 = 高重要性
assert importance >= 4
def test_percentage_importance(self):
"""测试包含百分比的重要性。"""
insight = '占比达到80%'
importance = _calculate_importance(insight, {})
# 包含百分比 = 较高重要性
assert importance >= 4
def test_normal_importance(self):
"""测试普通洞察的重要性。"""
insight = '数据正常'
importance = _calculate_importance(insight, {})
# 默认中等重要性
assert importance == 3
def test_importance_range(self):
"""测试重要性范围。"""
# 测试多个洞察确保重要性在1-5范围内
insights = [
'严重异常问题',
'占比80%',
'正常数据',
'轻微变化'
]
for insight in insights:
importance = _calculate_importance(insight, {})
assert 1 <= importance <= 5
class TestOrganizeReportStructure:
"""测试报告结构组织。"""
def test_basic_structure(self, sample_results, sample_requirement, sample_data_profile):
"""测试基本结构。"""
key_findings = extract_key_findings(sample_results)
structure = organize_report_structure(key_findings, sample_requirement, sample_data_profile)
# 验证:包含必需的字段
assert 'title' in structure
assert 'sections' in structure
assert 'executive_summary' in structure
assert 'detailed_analysis' in structure
assert 'conclusions' in structure
def test_with_template(self, sample_results, sample_data_profile):
"""测试使用模板的结构。"""
# 创建带模板的需求
requirement = RequirementSpec(
user_input='按模板分析',
objectives=[
AnalysisObjective(
name='分析',
description='按模板分析',
metrics=['指标1'],
priority=5
)
],
template_path='template.md',
template_requirements={
'sections': ['第一章', '第二章', '第三章'],
'required_metrics': ['指标1', '指标2'],
'required_charts': ['图表1']
}
)
key_findings = extract_key_findings(sample_results)
structure = organize_report_structure(key_findings, requirement, sample_data_profile)
# 验证:使用模板结构
assert structure['use_template'] is True
assert structure['sections'] == ['第一章', '第二章', '第三章']
def test_without_template(self, sample_results, sample_requirement, sample_data_profile):
"""测试不使用模板的结构。"""
key_findings = extract_key_findings(sample_results)
structure = organize_report_structure(key_findings, sample_requirement, sample_data_profile)
# 验证:生成默认结构
assert structure['use_template'] is False
assert len(structure['sections']) > 0
assert '执行摘要' in structure['sections']
def test_executive_summary(self, sample_results, sample_requirement, sample_data_profile):
"""测试执行摘要组织。"""
key_findings = extract_key_findings(sample_results)
structure = organize_report_structure(key_findings, sample_requirement, sample_data_profile)
exec_summary = structure['executive_summary']
# 验证:包含关键发现
assert 'key_findings' in exec_summary
assert isinstance(exec_summary['key_findings'], list)
# 验证:包含统计信息
assert 'anomaly_count' in exec_summary
assert 'trend_count' in exec_summary
def test_detailed_analysis(self, sample_results, sample_requirement, sample_data_profile):
"""测试详细分析组织。"""
key_findings = extract_key_findings(sample_results)
structure = organize_report_structure(key_findings, sample_requirement, sample_data_profile)
detailed = structure['detailed_analysis']
# 验证:包含分类
assert 'anomaly' in detailed
assert 'trend' in detailed
assert 'insight' in detailed
# 验证:每个分类都是列表
assert isinstance(detailed['anomaly'], list)
assert isinstance(detailed['trend'], list)
assert isinstance(detailed['insight'], list)
class TestGenerateReportTitle:
"""测试报告标题生成。"""
def test_health_analysis_title(self, sample_data_profile):
"""测试健康度分析标题。"""
requirement = RequirementSpec(
user_input='分析工单健康度',
objectives=[]
)
title = _generate_report_title(requirement, sample_data_profile)
assert '工单' in title
assert '健康度' in title
def test_trend_analysis_title(self, sample_data_profile):
"""测试趋势分析标题。"""
requirement = RequirementSpec(
user_input='分析趋势',
objectives=[]
)
title = _generate_report_title(requirement, sample_data_profile)
assert '工单' in title
assert '趋势' in title
def test_generic_title(self, sample_data_profile):
"""测试通用标题。"""
requirement = RequirementSpec(
user_input='分析数据',
objectives=[]
)
title = _generate_report_title(requirement, sample_data_profile)
assert '工单' in title
assert '分析报告' in title
class TestGenerateDefaultSections:
"""测试默认章节生成。"""
def test_with_anomalies(self):
"""测试包含异常的章节。"""
key_findings = [
{
'finding': '异常情况',
'category': 'anomaly',
'importance': 5
}
]
data_profile = DataProfile(
file_path='test.csv',
row_count=100,
column_count=3,
columns=[],
inferred_type='ticket'
)
sections = _generate_default_sections(key_findings, data_profile)
# 验证:包含异常分析章节
assert '异常分析' in sections
def test_with_trends(self):
"""测试包含趋势的章节。"""
key_findings = [
{
'finding': '上升趋势',
'category': 'trend',
'importance': 4
}
]
data_profile = DataProfile(
file_path='test.csv',
row_count=100,
column_count=3,
columns=[],
inferred_type='sales'
)
sections = _generate_default_sections(key_findings, data_profile)
# 验证:包含趋势分析章节
assert '趋势分析' in sections
def test_ticket_data_sections(self):
"""测试工单数据的章节。"""
data_profile = DataProfile(
file_path='test.csv',
row_count=100,
column_count=3,
columns=[],
inferred_type='ticket'
)
sections = _generate_default_sections([], data_profile)
# 验证:包含工单相关章节
assert '状态分析' in sections or '类型分析' in sections
class TestGenerateReport:
"""测试完整报告生成。"""
def test_basic_report_generation(self, sample_results, sample_requirement, sample_data_profile):
"""测试基本报告生成。"""
report = generate_report(sample_results, sample_requirement, sample_data_profile)
# 验证:返回字符串
assert isinstance(report, str)
# 验证:报告不为空
assert len(report) > 0
# 验证:包含标题
assert '#' in report
# 验证:包含执行摘要
assert '执行摘要' in report or '摘要' in report
def test_report_with_skipped_tasks(self, sample_results, sample_requirement, sample_data_profile):
"""测试包含跳过任务的报告。"""
report = generate_report(sample_results, sample_requirement, sample_data_profile)
# 验证:提到跳过的任务
assert '跳过' in report or '失败' in report
# 验证:提到失败的任务名称
assert '类型分析' in report
def test_report_with_visualizations(self, sample_results, sample_requirement, sample_data_profile):
"""测试包含可视化的报告。"""
report = generate_report(sample_results, sample_requirement, sample_data_profile)
# 验证:包含图表引用
assert 'chart1.png' in report or 'chart2.png' in report or '![' in report
def test_report_with_insights(self, sample_results, sample_requirement, sample_data_profile):
"""测试包含洞察的报告。"""
report = generate_report(sample_results, sample_requirement, sample_data_profile)
# 验证:包含洞察内容
assert '待处理工单' in report or '趋势' in report
def test_report_save_to_file(self, sample_results, sample_requirement, sample_data_profile):
"""测试报告保存到文件。"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as f:
output_path = f.name
try:
report = generate_report(
sample_results,
sample_requirement,
sample_data_profile,
output_path=output_path
)
# 验证:文件已创建
assert os.path.exists(output_path)
# 验证:文件内容与返回内容一致
with open(output_path, 'r', encoding='utf-8') as f:
saved_content = f.read()
assert saved_content == report
finally:
if os.path.exists(output_path):
os.unlink(output_path)
def test_empty_results(self, sample_requirement, sample_data_profile):
"""测试空结果列表。"""
report = generate_report([], sample_requirement, sample_data_profile)
# 验证:仍然生成报告
assert isinstance(report, str)
assert len(report) > 0
# 验证:包含基本结构
assert '执行摘要' in report or '摘要' in report
def test_all_failed_results(self, sample_requirement, sample_data_profile):
"""测试所有任务都失败的情况。"""
results = [
AnalysisResult(
task_id='task1',
task_name='失败任务1',
success=False,
error='错误1'
),
AnalysisResult(
task_id='task2',
task_name='失败任务2',
success=False,
error='错误2'
)
]
report = generate_report(results, sample_requirement, sample_data_profile)
# 验证:报告生成成功
assert isinstance(report, str)
assert len(report) > 0
# 验证:提到失败
assert '失败' in report or '跳过' in report