vibe_data_ana/tests/test_requirement_understanding_properties.py

"""Property-based tests for requirement understanding engine."""

import pytest
from hypothesis import given, strategies as st, settings, assume
import tempfile
import os

from src.engines.requirement_understanding import (
    understand_requirement,
    parse_template,
    check_data_requirement_match
)
from src.models.data_profile import DataProfile, ColumnInfo
from src.models.requirement_spec import RequirementSpec, AnalysisObjective


# Strategies for generating test data
@st.composite
def column_info_strategy(draw):
    """Generate random ColumnInfo."""
    name = draw(st.text(min_size=1, max_size=20, alphabet=st.characters(whitelist_categories=('L', 'N'))))
    dtype = draw(st.sampled_from(['numeric', 'categorical', 'datetime', 'text']))
    missing_rate = draw(st.floats(min_value=0.0, max_value=1.0))
    unique_count = draw(st.integers(min_value=1, max_value=1000))

    return ColumnInfo(
        name=name,
        dtype=dtype,
        missing_rate=missing_rate,
        unique_count=unique_count,
        sample_values=[],
        statistics={}
    )


@st.composite
def data_profile_strategy(draw):
    """Generate random DataProfile."""
    row_count = draw(st.integers(min_value=10, max_value=100000))
    columns = draw(st.lists(column_info_strategy(), min_size=2, max_size=20))
    inferred_type = draw(st.sampled_from(['ticket', 'sales', 'user', 'unknown']))
    quality_score = draw(st.floats(min_value=0.0, max_value=100.0))

    return DataProfile(
        file_path='test.csv',
        row_count=row_count,
        column_count=len(columns),
        columns=columns,
        inferred_type=inferred_type,
        key_fields={},
        quality_score=quality_score,
        summary=f"Test data with {len(columns)} columns"
    )


# Feature: true-ai-agent, Property 3: 抽象需求转化
@given(
    user_input=st.sampled_from([
        "分析健康度",
        "我想了解数据质量",
        "帮我分析趋势",
        "查看分布情况",
        "完整分析"
    ]),
    data_profile=data_profile_strategy()
)
@settings(max_examples=20, deadline=None)
def test_abstract_requirement_transformation(user_input, data_profile):
    """
    Property 3: For any abstract user requirement (like "健康度", "质量分析"),
    the requirement understanding engine should be able to transform it into
    a concrete list of analysis objectives, each containing name, description,
    and related metrics.

    Validates: 场景2验收.1, 场景2验收.2
    """
    # Execute requirement understanding
    requirement = understand_requirement(user_input, data_profile)

    # Verify: Should return RequirementSpec
    assert isinstance(requirement, RequirementSpec)

    # Verify: Should have objectives
    assert len(requirement.objectives) > 0, "Should generate at least one objective"

    # Verify: Each objective should have required fields
    for objective in requirement.objectives:
        assert isinstance(objective, AnalysisObjective)
        assert len(objective.name) > 0, "Objective name should not be empty"
        assert len(objective.description) > 0, "Objective description should not be empty"
        assert isinstance(objective.metrics, list), "Metrics should be a list"
        assert 1 <= objective.priority <= 5, "Priority should be between 1 and 5"

    # Verify: User input should be preserved
    assert requirement.user_input == user_input


# Feature: true-ai-agent, Property 4: 模板解析
@given(
    template_content=st.text(min_size=10, max_size=500)
)
@settings(max_examples=20, deadline=None)
def test_template_parsing(template_content):
    """
    Property 4: For any valid analysis template, the requirement understanding
    engine should be able to parse the template structure and extract the list
    of required metrics and charts.

    Validates: 场景3验收.1
    """
    # Create temporary template file
    with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as f:
        f.write(template_content)
        template_path = f.name

    try:
        # Parse template
        template_req = parse_template(template_path)

        # Verify: Should return dictionary with expected keys
        assert isinstance(template_req, dict)
        assert 'sections' in template_req
        assert 'required_metrics' in template_req
        assert 'required_charts' in template_req

        # Verify: All values should be lists
        assert isinstance(template_req['sections'], list)
        assert isinstance(template_req['required_metrics'], list)
        assert isinstance(template_req['required_charts'], list)

    finally:
        # Cleanup
        os.unlink(template_path)


# Feature: true-ai-agent, Property 5: 数据-需求匹配检查
@given(
    data_profile=data_profile_strategy()
)
@settings(max_examples=20, deadline=None)
def test_data_requirement_matching(data_profile):
    """
    Property 5: For any requirement spec and data profile, the requirement
    understanding engine should be able to identify whether the data satisfies
    the requirement, and if not, should mark missing fields or capabilities.

    Validates: 场景3验收.2
    """
    # Create a simple requirement
    requirement = RequirementSpec(
        user_input="测试需求",
        objectives=[
            AnalysisObjective(
                name="时间分析",
                description="分析时间趋势",
                metrics=["时间序列", "趋势"],
                priority=5
            ),
            AnalysisObjective(
                name="状态分析",
                description="分析状态分布",
                metrics=["状态分布"],
                priority=4
            )
        ]
    )

    # Check match
    match_result = check_data_requirement_match(requirement, data_profile)

    # Verify: Should return dictionary with expected keys
    assert isinstance(match_result, dict)
    assert 'all_satisfied' in match_result
    assert 'satisfied_objectives' in match_result
    assert 'unsatisfied_objectives' in match_result
    assert 'missing_fields' in match_result
    assert 'can_proceed' in match_result

    # Verify: Boolean fields should be boolean
    assert isinstance(match_result['all_satisfied'], bool)
    assert isinstance(match_result['can_proceed'], bool)

    # Verify: List fields should be lists
    assert isinstance(match_result['satisfied_objectives'], list)
    assert isinstance(match_result['unsatisfied_objectives'], list)
    assert isinstance(match_result['missing_fields'], list)

    # Verify: Satisfied + unsatisfied should equal total objectives
    total_checked = len(match_result['satisfied_objectives']) + len(match_result['unsatisfied_objectives'])
    assert total_checked == len(requirement.objectives)

    # Verify: If all satisfied, should have no unsatisfied objectives
    if match_result['all_satisfied']:
        assert len(match_result['unsatisfied_objectives']) == 0
        assert len(match_result['missing_fields']) == 0

    # Verify: If can proceed, should have at least one satisfied objective
    if match_result['can_proceed']:
        assert len(match_result['satisfied_objectives']) > 0


# Feature: true-ai-agent, Property 3: 抽象需求转化 (with template)
@given(
    user_input=st.text(min_size=5, max_size=100),
    data_profile=data_profile_strategy()
)
@settings(max_examples=20, deadline=None)
def test_requirement_with_template(user_input, data_profile):
    """
    Property 3 (extended): Requirement understanding should work with templates.

    Validates: FR-2.3
    """
    # Create a simple template
    template_content = """# 分析报告

## 数据概览
指标: 行数, 列数

## 趋势分析
图表: 时间序列图

## 分布分析
图表: 分布图
"""

    with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as f:
        f.write(template_content)
        template_path = f.name

    try:
        # Execute with template
        requirement = understand_requirement(user_input, data_profile, template_path)

        # Verify: Should have template path
        assert requirement.template_path == template_path

        # Verify: Should have template requirements
        assert requirement.template_requirements is not None
        assert isinstance(requirement.template_requirements, dict)

    finally:
        # Cleanup
        os.unlink(template_path)