"""报告生成引擎的单元测试。""" import pytest import tempfile import os from src.engines.report_generation import ( extract_key_findings, organize_report_structure, generate_report, _categorize_insight, _calculate_importance, _generate_report_title, _generate_default_sections ) from src.models.analysis_result import AnalysisResult from src.models.requirement_spec import RequirementSpec, AnalysisObjective from src.models.data_profile import DataProfile, ColumnInfo @pytest.fixture def sample_results(): """创建示例分析结果。""" return [ AnalysisResult( task_id='task1', task_name='状态分布分析', success=True, data={'open': 50, 'closed': 30, 'pending': 20}, visualizations=['chart1.png'], insights=[ '待处理工单占比50%,异常高', '已关闭工单占比30%' ], execution_time=2.5 ), AnalysisResult( task_id='task2', task_name='趋势分析', success=True, data={'trend': 'increasing'}, visualizations=['chart2.png'], insights=[ '工单数量呈上升趋势', '增长率为15%' ], execution_time=3.2 ), AnalysisResult( task_id='task3', task_name='类型分析', success=False, data={}, visualizations=[], insights=[], error='数据缺少类型字段', execution_time=0.1 ) ] @pytest.fixture def sample_requirement(): """创建示例需求规格。""" return RequirementSpec( user_input='分析工单健康度', objectives=[ AnalysisObjective( name='健康度分析', description='评估工单处理的健康状况', metrics=['关闭率', '处理时长', '积压情况'], priority=5 ) ] ) @pytest.fixture def sample_data_profile(): """创建示例数据画像。""" return DataProfile( file_path='test.csv', row_count=1000, column_count=5, columns=[ ColumnInfo( name='status', dtype='categorical', missing_rate=0.0, unique_count=3, sample_values=['open', 'closed', 'pending'] ), ColumnInfo( name='created_at', dtype='datetime', missing_rate=0.0, unique_count=1000 ) ], inferred_type='ticket', key_fields={'status': '状态', 'created_at': '创建时间'}, quality_score=85.0, summary='工单数据,包含1000条记录' ) class TestExtractKeyFindings: """测试关键发现提炼。""" def test_basic_functionality(self, sample_results): """测试基本功能。""" key_findings = extract_key_findings(sample_results) # 验证:返回列表 assert isinstance(key_findings, list) # 验证:只包含成功的结果 assert len(key_findings) == 4 # 2个任务,每个2个洞察 # 验证:每个发现都有必需的字段 for finding in key_findings: assert 'finding' in finding assert 'importance' in finding assert 'source_task' in finding assert 'category' in finding def test_importance_sorting(self, sample_results): """测试按重要性排序。""" key_findings = extract_key_findings(sample_results) # 验证:按重要性降序排列 for i in range(len(key_findings) - 1): assert key_findings[i]['importance'] >= key_findings[i + 1]['importance'] def test_empty_results(self): """测试空结果列表。""" key_findings = extract_key_findings([]) assert isinstance(key_findings, list) assert len(key_findings) == 0 def test_only_failed_results(self): """测试只有失败的结果。""" results = [ AnalysisResult( task_id='task1', task_name='失败任务', success=False, error='测试错误' ) ] key_findings = extract_key_findings(results) # 失败的任务不应该产生发现 assert len(key_findings) == 0 class TestCategorizeInsight: """测试洞察分类。""" def test_anomaly_detection(self): """测试异常检测。""" insight = '待处理工单占比50%,异常高' category = _categorize_insight(insight) assert category == 'anomaly' def test_trend_detection(self): """测试趋势检测。""" insight = '工单数量呈上升趋势' category = _categorize_insight(insight) assert category == 'trend' def test_general_insight(self): """测试一般洞察。""" insight = '数据质量良好' category = _categorize_insight(insight) assert category == 'insight' def test_english_keywords(self): """测试英文关键词。""" assert _categorize_insight('This is an anomaly') == 'anomaly' assert _categorize_insight('Showing growth trend') == 'trend' class TestCalculateImportance: """测试重要性计算。""" def test_anomaly_importance(self): """测试异常的重要性。""" insight = '严重异常:系统故障' importance = _calculate_importance(insight, {}) # 异常 + 严重 = 高重要性 assert importance >= 4 def test_percentage_importance(self): """测试包含百分比的重要性。""" insight = '占比达到80%' importance = _calculate_importance(insight, {}) # 包含百分比 = 较高重要性 assert importance >= 4 def test_normal_importance(self): """测试普通洞察的重要性。""" insight = '数据正常' importance = _calculate_importance(insight, {}) # 默认中等重要性 assert importance == 3 def test_importance_range(self): """测试重要性范围。""" # 测试多个洞察,确保重要性在1-5范围内 insights = [ '严重异常问题', '占比80%', '正常数据', '轻微变化' ] for insight in insights: importance = _calculate_importance(insight, {}) assert 1 <= importance <= 5 class TestOrganizeReportStructure: """测试报告结构组织。""" def test_basic_structure(self, sample_results, sample_requirement, sample_data_profile): """测试基本结构。""" key_findings = extract_key_findings(sample_results) structure = organize_report_structure(key_findings, sample_requirement, sample_data_profile) # 验证:包含必需的字段 assert 'title' in structure assert 'sections' in structure assert 'executive_summary' in structure assert 'detailed_analysis' in structure assert 'conclusions' in structure def test_with_template(self, sample_results, sample_data_profile): """测试使用模板的结构。""" # 创建带模板的需求 requirement = RequirementSpec( user_input='按模板分析', objectives=[ AnalysisObjective( name='分析', description='按模板分析', metrics=['指标1'], priority=5 ) ], template_path='template.md', template_requirements={ 'sections': ['第一章', '第二章', '第三章'], 'required_metrics': ['指标1', '指标2'], 'required_charts': ['图表1'] } ) key_findings = extract_key_findings(sample_results) structure = organize_report_structure(key_findings, requirement, sample_data_profile) # 验证:使用模板结构 assert structure['use_template'] is True assert structure['sections'] == ['第一章', '第二章', '第三章'] def test_without_template(self, sample_results, sample_requirement, sample_data_profile): """测试不使用模板的结构。""" key_findings = extract_key_findings(sample_results) structure = organize_report_structure(key_findings, sample_requirement, sample_data_profile) # 验证:生成默认结构 assert structure['use_template'] is False assert len(structure['sections']) > 0 assert '执行摘要' in structure['sections'] def test_executive_summary(self, sample_results, sample_requirement, sample_data_profile): """测试执行摘要组织。""" key_findings = extract_key_findings(sample_results) structure = organize_report_structure(key_findings, sample_requirement, sample_data_profile) exec_summary = structure['executive_summary'] # 验证:包含关键发现 assert 'key_findings' in exec_summary assert isinstance(exec_summary['key_findings'], list) # 验证:包含统计信息 assert 'anomaly_count' in exec_summary assert 'trend_count' in exec_summary def test_detailed_analysis(self, sample_results, sample_requirement, sample_data_profile): """测试详细分析组织。""" key_findings = extract_key_findings(sample_results) structure = organize_report_structure(key_findings, sample_requirement, sample_data_profile) detailed = structure['detailed_analysis'] # 验证:包含分类 assert 'anomaly' in detailed assert 'trend' in detailed assert 'insight' in detailed # 验证:每个分类都是列表 assert isinstance(detailed['anomaly'], list) assert isinstance(detailed['trend'], list) assert isinstance(detailed['insight'], list) class TestGenerateReportTitle: """测试报告标题生成。""" def test_health_analysis_title(self, sample_data_profile): """测试健康度分析标题。""" requirement = RequirementSpec( user_input='分析工单健康度', objectives=[] ) title = _generate_report_title(requirement, sample_data_profile) assert '工单' in title assert '健康度' in title def test_trend_analysis_title(self, sample_data_profile): """测试趋势分析标题。""" requirement = RequirementSpec( user_input='分析趋势', objectives=[] ) title = _generate_report_title(requirement, sample_data_profile) assert '工单' in title assert '趋势' in title def test_generic_title(self, sample_data_profile): """测试通用标题。""" requirement = RequirementSpec( user_input='分析数据', objectives=[] ) title = _generate_report_title(requirement, sample_data_profile) assert '工单' in title assert '分析报告' in title class TestGenerateDefaultSections: """测试默认章节生成。""" def test_with_anomalies(self): """测试包含异常的章节。""" key_findings = [ { 'finding': '异常情况', 'category': 'anomaly', 'importance': 5 } ] data_profile = DataProfile( file_path='test.csv', row_count=100, column_count=3, columns=[], inferred_type='ticket' ) sections = _generate_default_sections(key_findings, data_profile) # 验证:包含异常分析章节 assert '异常分析' in sections def test_with_trends(self): """测试包含趋势的章节。""" key_findings = [ { 'finding': '上升趋势', 'category': 'trend', 'importance': 4 } ] data_profile = DataProfile( file_path='test.csv', row_count=100, column_count=3, columns=[], inferred_type='sales' ) sections = _generate_default_sections(key_findings, data_profile) # 验证:包含趋势分析章节 assert '趋势分析' in sections def test_ticket_data_sections(self): """测试工单数据的章节。""" data_profile = DataProfile( file_path='test.csv', row_count=100, column_count=3, columns=[], inferred_type='ticket' ) sections = _generate_default_sections([], data_profile) # 验证:包含工单相关章节 assert '状态分析' in sections or '类型分析' in sections class TestGenerateReport: """测试完整报告生成。""" def test_basic_report_generation(self, sample_results, sample_requirement, sample_data_profile): """测试基本报告生成。""" report = generate_report(sample_results, sample_requirement, sample_data_profile) # 验证:返回字符串 assert isinstance(report, str) # 验证:报告不为空 assert len(report) > 0 # 验证:包含标题 assert '#' in report # 验证:包含执行摘要 assert '执行摘要' in report or '摘要' in report def test_report_with_skipped_tasks(self, sample_results, sample_requirement, sample_data_profile): """测试包含跳过任务的报告。""" report = generate_report(sample_results, sample_requirement, sample_data_profile) # 验证:提到跳过的任务 assert '跳过' in report or '失败' in report # 验证:提到失败的任务名称 assert '类型分析' in report def test_report_with_visualizations(self, sample_results, sample_requirement, sample_data_profile): """测试包含可视化的报告。""" report = generate_report(sample_results, sample_requirement, sample_data_profile) # 验证:包含图表引用 assert 'chart1.png' in report or 'chart2.png' in report or '![' in report def test_report_with_insights(self, sample_results, sample_requirement, sample_data_profile): """测试包含洞察的报告。""" report = generate_report(sample_results, sample_requirement, sample_data_profile) # 验证:包含洞察内容 assert '待处理工单' in report or '趋势' in report def test_report_save_to_file(self, sample_results, sample_requirement, sample_data_profile): """测试报告保存到文件。""" with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as f: output_path = f.name try: report = generate_report( sample_results, sample_requirement, sample_data_profile, output_path=output_path ) # 验证:文件已创建 assert os.path.exists(output_path) # 验证:文件内容与返回内容一致 with open(output_path, 'r', encoding='utf-8') as f: saved_content = f.read() assert saved_content == report finally: if os.path.exists(output_path): os.unlink(output_path) def test_empty_results(self, sample_requirement, sample_data_profile): """测试空结果列表。""" report = generate_report([], sample_requirement, sample_data_profile) # 验证:仍然生成报告 assert isinstance(report, str) assert len(report) > 0 # 验证:包含基本结构 assert '执行摘要' in report or '摘要' in report def test_all_failed_results(self, sample_requirement, sample_data_profile): """测试所有任务都失败的情况。""" results = [ AnalysisResult( task_id='task1', task_name='失败任务1', success=False, error='错误1' ), AnalysisResult( task_id='task2', task_name='失败任务2', success=False, error='错误2' ) ] report = generate_report(results, sample_requirement, sample_data_profile) # 验证:报告生成成功 assert isinstance(report, str) assert len(report) > 0 # 验证:提到失败 assert '失败' in report or '跳过' in report