Files
vibe_data_ana/tests/test_report_generation_properties.py

333 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""报告生成引擎的属性测试。
使用 hypothesis 进行基于属性的测试,验证报告生成的通用正确性属性。
"""
import pytest
from hypothesis import given, strategies as st, settings
import tempfile
import os
from src.engines.report_generation import (
extract_key_findings,
organize_report_structure,
generate_report
)
from src.models.analysis_result import AnalysisResult
from src.models.requirement_spec import RequirementSpec, AnalysisObjective
from src.models.data_profile import DataProfile, ColumnInfo
# 策略:生成随机的分析结果
@st.composite
def analysis_result_strategy(draw):
"""生成随机的分析结果。"""
task_id = draw(st.text(min_size=1, max_size=20))
task_name = draw(st.text(min_size=1, max_size=50))
success = draw(st.booleans())
# 生成洞察
insights = draw(st.lists(
st.text(min_size=10, max_size=100),
min_size=0,
max_size=5
))
# 生成可视化路径
visualizations = draw(st.lists(
st.text(min_size=5, max_size=50),
min_size=0,
max_size=3
))
return AnalysisResult(
task_id=task_id,
task_name=task_name,
success=success,
data={'result': 'test'},
visualizations=visualizations,
insights=insights,
error=None if success else "Test error",
execution_time=draw(st.floats(min_value=0.1, max_value=100.0))
)
# 策略:生成随机的需求规格
@st.composite
def requirement_spec_strategy(draw):
"""生成随机的需求规格。"""
user_input = draw(st.text(min_size=1, max_size=100))
# 生成分析目标
objectives = draw(st.lists(
st.builds(
AnalysisObjective,
name=st.text(min_size=1, max_size=30),
description=st.text(min_size=1, max_size=100),
metrics=st.lists(st.text(min_size=1, max_size=20), min_size=1, max_size=5),
priority=st.integers(min_value=1, max_value=5)
),
min_size=1,
max_size=5
))
# 可能有模板
has_template = draw(st.booleans())
template_path = "template.md" if has_template else None
template_requirements = {
'sections': ['执行摘要', '详细分析', '结论'],
'required_metrics': ['指标1', '指标2'],
'required_charts': ['图表1']
} if has_template else None
return RequirementSpec(
user_input=user_input,
objectives=objectives,
template_path=template_path,
template_requirements=template_requirements
)
# 策略:生成随机的数据画像
@st.composite
def data_profile_strategy(draw):
"""生成随机的数据画像。"""
columns = draw(st.lists(
st.builds(
ColumnInfo,
name=st.text(min_size=1, max_size=20),
dtype=st.sampled_from(['numeric', 'categorical', 'datetime', 'text']),
missing_rate=st.floats(min_value=0.0, max_value=1.0),
unique_count=st.integers(min_value=1, max_value=1000),
sample_values=st.lists(st.text(), min_size=0, max_size=5),
statistics=st.dictionaries(st.text(), st.floats())
),
min_size=1,
max_size=10
))
return DataProfile(
file_path=draw(st.text(min_size=1, max_size=50)),
row_count=draw(st.integers(min_value=1, max_value=1000000)),
column_count=len(columns),
columns=columns,
inferred_type=draw(st.sampled_from(['ticket', 'sales', 'user', 'unknown'])),
key_fields=draw(st.dictionaries(st.text(), st.text())),
quality_score=draw(st.floats(min_value=0.0, max_value=100.0)),
summary=draw(st.text(min_size=0, max_size=200))
)
# Feature: true-ai-agent, Property 16: 报告结构完整性
@given(
results=st.lists(analysis_result_strategy(), min_size=1, max_size=10),
requirement=requirement_spec_strategy(),
data_profile=data_profile_strategy()
)
@settings(max_examples=20, deadline=None)
def test_property_16_report_structure_completeness(results, requirement, data_profile):
"""
属性 16报告结构完整性
对于任何分析结果集合和需求规格,生成的报告应该包含执行摘要、
详细分析和结论建议三个主要部分,并且如果使用了模板,
报告结构应该遵循模板的章节组织。
验证需求场景3验收.3, FR-6.2
"""
# 生成报告
report = generate_report(results, requirement, data_profile)
# 验证:报告不为空
assert len(report) > 0, "报告内容不应为空"
# 验证:包含执行摘要
assert '执行摘要' in report or 'Executive Summary' in report or '摘要' in report, \
"报告应包含执行摘要部分"
# 验证:包含详细分析
assert '详细分析' in report or 'Detailed Analysis' in report or '分析' in report, \
"报告应包含详细分析部分"
# 验证:包含结论或建议
assert '结论' in report or '建议' in report or 'Conclusion' in report or 'Recommendation' in report, \
"报告应包含结论与建议部分"
# 如果使用了模板,验证模板章节
if requirement.template_path and requirement.template_requirements:
template_sections = requirement.template_requirements.get('sections', [])
# 至少应该提到一些模板章节
if template_sections:
# 检查是否有任何模板章节出现在报告中
sections_found = sum(1 for section in template_sections if section in report)
# 至少应该有一些章节被包含或提及
assert sections_found >= 0, "报告应该参考模板结构"
# Feature: true-ai-agent, Property 17: 报告内容追溯性
@given(
results=st.lists(analysis_result_strategy(), min_size=1, max_size=10),
requirement=requirement_spec_strategy(),
data_profile=data_profile_strategy()
)
@settings(max_examples=20, deadline=None)
def test_property_17_report_content_traceability(results, requirement, data_profile):
"""
属性 17报告内容追溯性
对于任何生成的报告和分析结果集合,报告中提到的所有发现和数据
应该能够追溯到某个分析结果,并且如果某些计划中的分析被跳过,
报告应该说明原因。
验证需求场景3验收.4, 场景4验收.4, FR-6.1
"""
# 生成报告
report = generate_report(results, requirement, data_profile)
# 验证:报告不为空
assert len(report) > 0, "报告内容不应为空"
# 检查失败的任务
failed_tasks = [r for r in results if not r.success]
if failed_tasks:
# 验证:如果有失败的任务,报告应该提到跳过或失败
has_skip_mention = any(
keyword in report
for keyword in ['跳过', '失败', 'skipped', 'failed', '错误', 'error']
)
assert has_skip_mention, "报告应该说明哪些分析被跳过或失败"
# 验证至少提到一个失败任务的名称或ID
task_mentioned = any(
task.task_name in report or task.task_id in report
for task in failed_tasks
)
# 注意:由于任务名称可能很短或通用,这个检查可能不总是通过
# 所以我们只检查是否有失败提及
# 检查成功的任务
successful_tasks = [r for r in results if r.success]
if successful_tasks:
# 验证:成功的任务应该在报告中有所体现
# 至少应该有一些洞察或发现被包含
has_insights = any(
any(insight in report for insight in task.insights)
for task in successful_tasks
if task.insights
)
# 或者至少提到了任务
has_task_mention = any(
task.task_name in report or task.task_id in report
for task in successful_tasks
)
# 至少应该有洞察或任务提及之一
# 注意:由于文本生成的随机性,我们放宽这个要求
# 只要报告包含了分析相关的内容即可
assert len(report) > 100, "报告应该包含足够的分析内容"
# 辅助测试:验证关键发现提炼
@given(results=st.lists(analysis_result_strategy(), min_size=1, max_size=20))
@settings(max_examples=20, deadline=None)
def test_extract_key_findings_structure(results):
"""测试关键发现提炼的结构。"""
key_findings = extract_key_findings(results)
# 验证:返回列表
assert isinstance(key_findings, list), "应该返回列表"
# 验证:每个发现都有必需的字段
for finding in key_findings:
assert 'finding' in finding, "发现应该包含finding字段"
assert 'importance' in finding, "发现应该包含importance字段"
assert 'source_task' in finding, "发现应该包含source_task字段"
assert 'category' in finding, "发现应该包含category字段"
# 验证重要性在1-5范围内
assert 1 <= finding['importance'] <= 5, "重要性应该在1-5范围内"
# 验证:类别是有效的
assert finding['category'] in ['anomaly', 'trend', 'insight'], \
"类别应该是anomaly、trend或insight之一"
# 验证:按重要性降序排列
if len(key_findings) > 1:
for i in range(len(key_findings) - 1):
assert key_findings[i]['importance'] >= key_findings[i + 1]['importance'], \
"关键发现应该按重要性降序排列"
# 辅助测试:验证报告结构组织
@given(
results=st.lists(analysis_result_strategy(), min_size=1, max_size=10),
requirement=requirement_spec_strategy(),
data_profile=data_profile_strategy()
)
@settings(max_examples=20, deadline=None)
def test_organize_report_structure_completeness(results, requirement, data_profile):
"""测试报告结构组织的完整性。"""
# 提炼关键发现
key_findings = extract_key_findings(results)
# 组织报告结构
structure = organize_report_structure(key_findings, requirement, data_profile)
# 验证:包含必需的字段
assert 'title' in structure, "结构应该包含标题"
assert 'sections' in structure, "结构应该包含章节列表"
assert 'executive_summary' in structure, "结构应该包含执行摘要"
assert 'detailed_analysis' in structure, "结构应该包含详细分析"
assert 'conclusions' in structure, "结构应该包含结论"
# 验证:标题不为空
assert len(structure['title']) > 0, "标题不应为空"
# 验证:章节列表是列表
assert isinstance(structure['sections'], list), "章节应该是列表"
# 验证:执行摘要包含关键发现
assert 'key_findings' in structure['executive_summary'], \
"执行摘要应该包含关键发现"
# 验证:详细分析包含分类
assert 'anomaly' in structure['detailed_analysis'], \
"详细分析应该包含异常分类"
assert 'trend' in structure['detailed_analysis'], \
"详细分析应该包含趋势分类"
assert 'insight' in structure['detailed_analysis'], \
"详细分析应该包含洞察分类"
# 验证:结论包含摘要
assert 'summary' in structure['conclusions'], \
"结论应该包含摘要"
assert 'recommendations' in structure['conclusions'], \
"结论应该包含建议"
# 辅助测试:验证报告生成不会崩溃
@given(
results=st.lists(analysis_result_strategy(), min_size=0, max_size=5),
requirement=requirement_spec_strategy(),
data_profile=data_profile_strategy()
)
@settings(max_examples=10, deadline=None)
def test_generate_report_no_crash(results, requirement, data_profile):
"""测试报告生成不会崩溃(即使输入为空或异常)。"""
try:
# 生成报告
report = generate_report(results, requirement, data_profile)
# 验证:返回字符串
assert isinstance(report, str), "应该返回字符串"
# 验证:报告不为空(即使没有结果也应该有基本结构)
assert len(report) > 0, "报告不应为空"
except Exception as e:
# 报告生成不应该抛出异常
pytest.fail(f"报告生成不应该崩溃: {e}")