Files
vibe_data_ana/tests/test_analysis_planning.py

343 lines
9.8 KiB
Python
Raw Normal View History

"""Unit tests for analysis planning engine."""
import pytest
from src.engines.analysis_planning import (
plan_analysis,
validate_task_dependencies,
_fallback_analysis_planning,
_has_circular_dependency
)
from src.models.data_profile import DataProfile, ColumnInfo
from src.models.requirement_spec import RequirementSpec, AnalysisObjective
from src.models.analysis_plan import AnalysisTask
@pytest.fixture
def sample_data_profile():
"""Create a sample data profile for testing."""
return DataProfile(
file_path='test.csv',
row_count=1000,
column_count=5,
columns=[
ColumnInfo(
name='created_at',
dtype='datetime',
missing_rate=0.0,
unique_count=1000
),
ColumnInfo(
name='status',
dtype='categorical',
missing_rate=0.1,
unique_count=5
),
ColumnInfo(
name='type',
dtype='categorical',
missing_rate=0.0,
unique_count=10
),
ColumnInfo(
name='priority',
dtype='numeric',
missing_rate=0.0,
unique_count=5
),
ColumnInfo(
name='description',
dtype='text',
missing_rate=0.05,
unique_count=950
)
],
inferred_type='ticket',
key_fields={'time': 'created_at', 'status': 'status'},
quality_score=85.0,
summary='Ticket data with 1000 rows'
)
@pytest.fixture
def sample_requirement():
"""Create a sample requirement for testing."""
return RequirementSpec(
user_input="分析工单健康度和趋势",
objectives=[
AnalysisObjective(
name="健康度分析",
description="评估工单处理的健康状况",
metrics=["完成率", "处理效率"],
priority=5
),
AnalysisObjective(
name="趋势分析",
description="分析工单随时间的变化趋势",
metrics=["时间序列", "增长率"],
priority=4
)
]
)
def test_fallback_planning_generates_tasks(sample_data_profile, sample_requirement):
"""Test that fallback planning generates tasks."""
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
# Should have tasks
assert len(plan.tasks) > 0
# Should have objectives
assert len(plan.objectives) == len(sample_requirement.objectives)
# Should have estimated duration
assert plan.estimated_duration > 0
def test_fallback_planning_respects_objectives(sample_data_profile, sample_requirement):
"""Test that fallback planning creates tasks based on objectives."""
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
# Should have tasks related to health analysis
health_tasks = [t for t in plan.tasks if '健康' in t.name or '质量' in t.name]
assert len(health_tasks) > 0
# Should have tasks related to trend analysis
trend_tasks = [t for t in plan.tasks if '趋势' in t.name or '时间' in t.name]
assert len(trend_tasks) > 0
def test_fallback_planning_with_no_matching_objectives(sample_data_profile):
"""Test fallback planning with generic objectives."""
requirement = RequirementSpec(
user_input="分析数据",
objectives=[
AnalysisObjective(
name="综合分析",
description="全面分析数据",
metrics=[],
priority=3
)
]
)
plan = _fallback_analysis_planning(sample_data_profile, requirement)
# Should still generate at least one task
assert len(plan.tasks) > 0
def test_fallback_planning_with_empty_objectives(sample_data_profile):
"""Test fallback planning with no objectives."""
requirement = RequirementSpec(
user_input="分析数据",
objectives=[]
)
plan = _fallback_analysis_planning(sample_data_profile, requirement)
# Should generate default task
assert len(plan.tasks) > 0
def test_validate_dependencies_valid():
"""Test validation with valid dependencies."""
tasks = [
AnalysisTask(
id="task_1",
name="Task 1",
description="First task",
priority=5,
dependencies=[]
),
AnalysisTask(
id="task_2",
name="Task 2",
description="Second task",
priority=4,
dependencies=["task_1"]
),
AnalysisTask(
id="task_3",
name="Task 3",
description="Third task",
priority=3,
dependencies=["task_1", "task_2"]
)
]
validation = validate_task_dependencies(tasks)
assert validation['valid']
assert validation['forms_dag']
assert not validation['has_circular_dependency']
assert len(validation['missing_dependencies']) == 0
def test_validate_dependencies_with_cycle():
"""Test validation detects circular dependencies."""
tasks = [
AnalysisTask(
id="task_1",
name="Task 1",
description="First task",
priority=5,
dependencies=["task_2"]
),
AnalysisTask(
id="task_2",
name="Task 2",
description="Second task",
priority=4,
dependencies=["task_1"]
)
]
validation = validate_task_dependencies(tasks)
assert not validation['valid']
assert validation['has_circular_dependency']
assert not validation['forms_dag']
def test_validate_dependencies_with_missing():
"""Test validation detects missing dependencies."""
tasks = [
AnalysisTask(
id="task_1",
name="Task 1",
description="First task",
priority=5,
dependencies=["task_999"] # Doesn't exist
)
]
validation = validate_task_dependencies(tasks)
assert not validation['valid']
assert len(validation['missing_dependencies']) > 0
def test_has_circular_dependency_simple_cycle():
"""Test circular dependency detection with simple cycle."""
tasks = [
AnalysisTask(
id="A",
name="Task A",
description="Task A",
priority=3,
dependencies=["B"]
),
AnalysisTask(
id="B",
name="Task B",
description="Task B",
priority=3,
dependencies=["A"]
)
]
assert _has_circular_dependency(tasks)
def test_has_circular_dependency_complex_cycle():
"""Test circular dependency detection with complex cycle."""
tasks = [
AnalysisTask(
id="A",
name="Task A",
description="Task A",
priority=3,
dependencies=["B"]
),
AnalysisTask(
id="B",
name="Task B",
description="Task B",
priority=3,
dependencies=["C"]
),
AnalysisTask(
id="C",
name="Task C",
description="Task C",
priority=3,
dependencies=["A"] # Cycle: A -> B -> C -> A
)
]
assert _has_circular_dependency(tasks)
def test_has_circular_dependency_no_cycle():
"""Test circular dependency detection with no cycle."""
tasks = [
AnalysisTask(
id="A",
name="Task A",
description="Task A",
priority=3,
dependencies=[]
),
AnalysisTask(
id="B",
name="Task B",
description="Task B",
priority=3,
dependencies=["A"]
),
AnalysisTask(
id="C",
name="Task C",
description="Task C",
priority=3,
dependencies=["A", "B"]
)
]
assert not _has_circular_dependency(tasks)
def test_task_priority_range(sample_data_profile, sample_requirement):
"""Test that all generated tasks have valid priority range."""
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
for task in plan.tasks:
assert 1 <= task.priority <= 5, \
f"Task {task.id} has invalid priority {task.priority}"
def test_task_unique_ids(sample_data_profile, sample_requirement):
"""Test that all tasks have unique IDs."""
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
task_ids = [task.id for task in plan.tasks]
assert len(task_ids) == len(set(task_ids)), "Task IDs should be unique"
def test_plan_has_timestamps(sample_data_profile, sample_requirement):
"""Test that plan has creation and update timestamps."""
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
assert plan.created_at is not None
assert plan.updated_at is not None
def test_task_required_tools_is_list(sample_data_profile, sample_requirement):
"""Test that required_tools is always a list."""
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
for task in plan.tasks:
assert isinstance(task.required_tools, list), \
f"Task {task.id} required_tools should be a list"
def test_task_dependencies_is_list(sample_data_profile, sample_requirement):
"""Test that dependencies is always a list."""
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
for task in plan.tasks:
assert isinstance(task.dependencies, list), \
f"Task {task.id} dependencies should be a list"