Files
vibe_data_ana/tests/test_requirement_understanding_properties.py

245 lines
8.4 KiB
Python

"""Property-based tests for requirement understanding engine."""
import pytest
from hypothesis import given, strategies as st, settings, assume
import tempfile
import os
from src.engines.requirement_understanding import (
understand_requirement,
parse_template,
check_data_requirement_match
)
from src.models.data_profile import DataProfile, ColumnInfo
from src.models.requirement_spec import RequirementSpec, AnalysisObjective
# Strategies for generating test data
@st.composite
def column_info_strategy(draw):
"""Generate random ColumnInfo."""
name = draw(st.text(min_size=1, max_size=20, alphabet=st.characters(whitelist_categories=('L', 'N'))))
dtype = draw(st.sampled_from(['numeric', 'categorical', 'datetime', 'text']))
missing_rate = draw(st.floats(min_value=0.0, max_value=1.0))
unique_count = draw(st.integers(min_value=1, max_value=1000))
return ColumnInfo(
name=name,
dtype=dtype,
missing_rate=missing_rate,
unique_count=unique_count,
sample_values=[],
statistics={}
)
@st.composite
def data_profile_strategy(draw):
"""Generate random DataProfile."""
row_count = draw(st.integers(min_value=10, max_value=100000))
columns = draw(st.lists(column_info_strategy(), min_size=2, max_size=20))
inferred_type = draw(st.sampled_from(['ticket', 'sales', 'user', 'unknown']))
quality_score = draw(st.floats(min_value=0.0, max_value=100.0))
return DataProfile(
file_path='test.csv',
row_count=row_count,
column_count=len(columns),
columns=columns,
inferred_type=inferred_type,
key_fields={},
quality_score=quality_score,
summary=f"Test data with {len(columns)} columns"
)
# Feature: true-ai-agent, Property 3: 抽象需求转化
@given(
user_input=st.sampled_from([
"分析健康度",
"我想了解数据质量",
"帮我分析趋势",
"查看分布情况",
"完整分析"
]),
data_profile=data_profile_strategy()
)
@settings(max_examples=20, deadline=None)
def test_abstract_requirement_transformation(user_input, data_profile):
"""
Property 3: For any abstract user requirement (like "健康度", "质量分析"),
the requirement understanding engine should be able to transform it into
a concrete list of analysis objectives, each containing name, description,
and related metrics.
Validates: 场景2验收.1, 场景2验收.2
"""
# Execute requirement understanding
requirement = understand_requirement(user_input, data_profile)
# Verify: Should return RequirementSpec
assert isinstance(requirement, RequirementSpec)
# Verify: Should have objectives
assert len(requirement.objectives) > 0, "Should generate at least one objective"
# Verify: Each objective should have required fields
for objective in requirement.objectives:
assert isinstance(objective, AnalysisObjective)
assert len(objective.name) > 0, "Objective name should not be empty"
assert len(objective.description) > 0, "Objective description should not be empty"
assert isinstance(objective.metrics, list), "Metrics should be a list"
assert 1 <= objective.priority <= 5, "Priority should be between 1 and 5"
# Verify: User input should be preserved
assert requirement.user_input == user_input
# Feature: true-ai-agent, Property 4: 模板解析
@given(
template_content=st.text(min_size=10, max_size=500)
)
@settings(max_examples=20, deadline=None)
def test_template_parsing(template_content):
"""
Property 4: For any valid analysis template, the requirement understanding
engine should be able to parse the template structure and extract the list
of required metrics and charts.
Validates: 场景3验收.1
"""
# Create temporary template file
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as f:
f.write(template_content)
template_path = f.name
try:
# Parse template
template_req = parse_template(template_path)
# Verify: Should return dictionary with expected keys
assert isinstance(template_req, dict)
assert 'sections' in template_req
assert 'required_metrics' in template_req
assert 'required_charts' in template_req
# Verify: All values should be lists
assert isinstance(template_req['sections'], list)
assert isinstance(template_req['required_metrics'], list)
assert isinstance(template_req['required_charts'], list)
finally:
# Cleanup
os.unlink(template_path)
# Feature: true-ai-agent, Property 5: 数据-需求匹配检查
@given(
data_profile=data_profile_strategy()
)
@settings(max_examples=20, deadline=None)
def test_data_requirement_matching(data_profile):
"""
Property 5: For any requirement spec and data profile, the requirement
understanding engine should be able to identify whether the data satisfies
the requirement, and if not, should mark missing fields or capabilities.
Validates: 场景3验收.2
"""
# Create a simple requirement
requirement = RequirementSpec(
user_input="测试需求",
objectives=[
AnalysisObjective(
name="时间分析",
description="分析时间趋势",
metrics=["时间序列", "趋势"],
priority=5
),
AnalysisObjective(
name="状态分析",
description="分析状态分布",
metrics=["状态分布"],
priority=4
)
]
)
# Check match
match_result = check_data_requirement_match(requirement, data_profile)
# Verify: Should return dictionary with expected keys
assert isinstance(match_result, dict)
assert 'all_satisfied' in match_result
assert 'satisfied_objectives' in match_result
assert 'unsatisfied_objectives' in match_result
assert 'missing_fields' in match_result
assert 'can_proceed' in match_result
# Verify: Boolean fields should be boolean
assert isinstance(match_result['all_satisfied'], bool)
assert isinstance(match_result['can_proceed'], bool)
# Verify: List fields should be lists
assert isinstance(match_result['satisfied_objectives'], list)
assert isinstance(match_result['unsatisfied_objectives'], list)
assert isinstance(match_result['missing_fields'], list)
# Verify: Satisfied + unsatisfied should equal total objectives
total_checked = len(match_result['satisfied_objectives']) + len(match_result['unsatisfied_objectives'])
assert total_checked == len(requirement.objectives)
# Verify: If all satisfied, should have no unsatisfied objectives
if match_result['all_satisfied']:
assert len(match_result['unsatisfied_objectives']) == 0
assert len(match_result['missing_fields']) == 0
# Verify: If can proceed, should have at least one satisfied objective
if match_result['can_proceed']:
assert len(match_result['satisfied_objectives']) > 0
# Feature: true-ai-agent, Property 3: 抽象需求转化 (with template)
@given(
user_input=st.text(min_size=5, max_size=100),
data_profile=data_profile_strategy()
)
@settings(max_examples=20, deadline=None)
def test_requirement_with_template(user_input, data_profile):
"""
Property 3 (extended): Requirement understanding should work with templates.
Validates: FR-2.3
"""
# Create a simple template
template_content = """# 分析报告
## 数据概览
指标: 行数, 列数
## 趋势分析
图表: 时间序列图
## 分布分析
图表: 分布图
"""
with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as f:
f.write(template_content)
template_path = f.name
try:
# Execute with template
requirement = understand_requirement(user_input, data_profile, template_path)
# Verify: Should have template path
assert requirement.template_path == template_path
# Verify: Should have template requirements
assert requirement.template_requirements is not None
assert isinstance(requirement.template_requirements, dict)
finally:
# Cleanup
os.unlink(template_path)