343 lines
9.8 KiB
Python
343 lines
9.8 KiB
Python
|
|
"""Unit tests for analysis planning engine."""
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
|
||
|
|
from src.engines.analysis_planning import (
|
||
|
|
plan_analysis,
|
||
|
|
validate_task_dependencies,
|
||
|
|
_fallback_analysis_planning,
|
||
|
|
_has_circular_dependency
|
||
|
|
)
|
||
|
|
from src.models.data_profile import DataProfile, ColumnInfo
|
||
|
|
from src.models.requirement_spec import RequirementSpec, AnalysisObjective
|
||
|
|
from src.models.analysis_plan import AnalysisTask
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.fixture
|
||
|
|
def sample_data_profile():
|
||
|
|
"""Create a sample data profile for testing."""
|
||
|
|
return DataProfile(
|
||
|
|
file_path='test.csv',
|
||
|
|
row_count=1000,
|
||
|
|
column_count=5,
|
||
|
|
columns=[
|
||
|
|
ColumnInfo(
|
||
|
|
name='created_at',
|
||
|
|
dtype='datetime',
|
||
|
|
missing_rate=0.0,
|
||
|
|
unique_count=1000
|
||
|
|
),
|
||
|
|
ColumnInfo(
|
||
|
|
name='status',
|
||
|
|
dtype='categorical',
|
||
|
|
missing_rate=0.1,
|
||
|
|
unique_count=5
|
||
|
|
),
|
||
|
|
ColumnInfo(
|
||
|
|
name='type',
|
||
|
|
dtype='categorical',
|
||
|
|
missing_rate=0.0,
|
||
|
|
unique_count=10
|
||
|
|
),
|
||
|
|
ColumnInfo(
|
||
|
|
name='priority',
|
||
|
|
dtype='numeric',
|
||
|
|
missing_rate=0.0,
|
||
|
|
unique_count=5
|
||
|
|
),
|
||
|
|
ColumnInfo(
|
||
|
|
name='description',
|
||
|
|
dtype='text',
|
||
|
|
missing_rate=0.05,
|
||
|
|
unique_count=950
|
||
|
|
)
|
||
|
|
],
|
||
|
|
inferred_type='ticket',
|
||
|
|
key_fields={'time': 'created_at', 'status': 'status'},
|
||
|
|
quality_score=85.0,
|
||
|
|
summary='Ticket data with 1000 rows'
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.fixture
|
||
|
|
def sample_requirement():
|
||
|
|
"""Create a sample requirement for testing."""
|
||
|
|
return RequirementSpec(
|
||
|
|
user_input="分析工单健康度和趋势",
|
||
|
|
objectives=[
|
||
|
|
AnalysisObjective(
|
||
|
|
name="健康度分析",
|
||
|
|
description="评估工单处理的健康状况",
|
||
|
|
metrics=["完成率", "处理效率"],
|
||
|
|
priority=5
|
||
|
|
),
|
||
|
|
AnalysisObjective(
|
||
|
|
name="趋势分析",
|
||
|
|
description="分析工单随时间的变化趋势",
|
||
|
|
metrics=["时间序列", "增长率"],
|
||
|
|
priority=4
|
||
|
|
)
|
||
|
|
]
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def test_fallback_planning_generates_tasks(sample_data_profile, sample_requirement):
|
||
|
|
"""Test that fallback planning generates tasks."""
|
||
|
|
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
|
||
|
|
|
||
|
|
# Should have tasks
|
||
|
|
assert len(plan.tasks) > 0
|
||
|
|
|
||
|
|
# Should have objectives
|
||
|
|
assert len(plan.objectives) == len(sample_requirement.objectives)
|
||
|
|
|
||
|
|
# Should have estimated duration
|
||
|
|
assert plan.estimated_duration > 0
|
||
|
|
|
||
|
|
|
||
|
|
def test_fallback_planning_respects_objectives(sample_data_profile, sample_requirement):
|
||
|
|
"""Test that fallback planning creates tasks based on objectives."""
|
||
|
|
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
|
||
|
|
|
||
|
|
# Should have tasks related to health analysis
|
||
|
|
health_tasks = [t for t in plan.tasks if '健康' in t.name or '质量' in t.name]
|
||
|
|
assert len(health_tasks) > 0
|
||
|
|
|
||
|
|
# Should have tasks related to trend analysis
|
||
|
|
trend_tasks = [t for t in plan.tasks if '趋势' in t.name or '时间' in t.name]
|
||
|
|
assert len(trend_tasks) > 0
|
||
|
|
|
||
|
|
|
||
|
|
def test_fallback_planning_with_no_matching_objectives(sample_data_profile):
|
||
|
|
"""Test fallback planning with generic objectives."""
|
||
|
|
requirement = RequirementSpec(
|
||
|
|
user_input="分析数据",
|
||
|
|
objectives=[
|
||
|
|
AnalysisObjective(
|
||
|
|
name="综合分析",
|
||
|
|
description="全面分析数据",
|
||
|
|
metrics=[],
|
||
|
|
priority=3
|
||
|
|
)
|
||
|
|
]
|
||
|
|
)
|
||
|
|
|
||
|
|
plan = _fallback_analysis_planning(sample_data_profile, requirement)
|
||
|
|
|
||
|
|
# Should still generate at least one task
|
||
|
|
assert len(plan.tasks) > 0
|
||
|
|
|
||
|
|
|
||
|
|
def test_fallback_planning_with_empty_objectives(sample_data_profile):
|
||
|
|
"""Test fallback planning with no objectives."""
|
||
|
|
requirement = RequirementSpec(
|
||
|
|
user_input="分析数据",
|
||
|
|
objectives=[]
|
||
|
|
)
|
||
|
|
|
||
|
|
plan = _fallback_analysis_planning(sample_data_profile, requirement)
|
||
|
|
|
||
|
|
# Should generate default task
|
||
|
|
assert len(plan.tasks) > 0
|
||
|
|
|
||
|
|
|
||
|
|
def test_validate_dependencies_valid():
|
||
|
|
"""Test validation with valid dependencies."""
|
||
|
|
tasks = [
|
||
|
|
AnalysisTask(
|
||
|
|
id="task_1",
|
||
|
|
name="Task 1",
|
||
|
|
description="First task",
|
||
|
|
priority=5,
|
||
|
|
dependencies=[]
|
||
|
|
),
|
||
|
|
AnalysisTask(
|
||
|
|
id="task_2",
|
||
|
|
name="Task 2",
|
||
|
|
description="Second task",
|
||
|
|
priority=4,
|
||
|
|
dependencies=["task_1"]
|
||
|
|
),
|
||
|
|
AnalysisTask(
|
||
|
|
id="task_3",
|
||
|
|
name="Task 3",
|
||
|
|
description="Third task",
|
||
|
|
priority=3,
|
||
|
|
dependencies=["task_1", "task_2"]
|
||
|
|
)
|
||
|
|
]
|
||
|
|
|
||
|
|
validation = validate_task_dependencies(tasks)
|
||
|
|
|
||
|
|
assert validation['valid']
|
||
|
|
assert validation['forms_dag']
|
||
|
|
assert not validation['has_circular_dependency']
|
||
|
|
assert len(validation['missing_dependencies']) == 0
|
||
|
|
|
||
|
|
|
||
|
|
def test_validate_dependencies_with_cycle():
|
||
|
|
"""Test validation detects circular dependencies."""
|
||
|
|
tasks = [
|
||
|
|
AnalysisTask(
|
||
|
|
id="task_1",
|
||
|
|
name="Task 1",
|
||
|
|
description="First task",
|
||
|
|
priority=5,
|
||
|
|
dependencies=["task_2"]
|
||
|
|
),
|
||
|
|
AnalysisTask(
|
||
|
|
id="task_2",
|
||
|
|
name="Task 2",
|
||
|
|
description="Second task",
|
||
|
|
priority=4,
|
||
|
|
dependencies=["task_1"]
|
||
|
|
)
|
||
|
|
]
|
||
|
|
|
||
|
|
validation = validate_task_dependencies(tasks)
|
||
|
|
|
||
|
|
assert not validation['valid']
|
||
|
|
assert validation['has_circular_dependency']
|
||
|
|
assert not validation['forms_dag']
|
||
|
|
|
||
|
|
|
||
|
|
def test_validate_dependencies_with_missing():
|
||
|
|
"""Test validation detects missing dependencies."""
|
||
|
|
tasks = [
|
||
|
|
AnalysisTask(
|
||
|
|
id="task_1",
|
||
|
|
name="Task 1",
|
||
|
|
description="First task",
|
||
|
|
priority=5,
|
||
|
|
dependencies=["task_999"] # Doesn't exist
|
||
|
|
)
|
||
|
|
]
|
||
|
|
|
||
|
|
validation = validate_task_dependencies(tasks)
|
||
|
|
|
||
|
|
assert not validation['valid']
|
||
|
|
assert len(validation['missing_dependencies']) > 0
|
||
|
|
|
||
|
|
|
||
|
|
def test_has_circular_dependency_simple_cycle():
|
||
|
|
"""Test circular dependency detection with simple cycle."""
|
||
|
|
tasks = [
|
||
|
|
AnalysisTask(
|
||
|
|
id="A",
|
||
|
|
name="Task A",
|
||
|
|
description="Task A",
|
||
|
|
priority=3,
|
||
|
|
dependencies=["B"]
|
||
|
|
),
|
||
|
|
AnalysisTask(
|
||
|
|
id="B",
|
||
|
|
name="Task B",
|
||
|
|
description="Task B",
|
||
|
|
priority=3,
|
||
|
|
dependencies=["A"]
|
||
|
|
)
|
||
|
|
]
|
||
|
|
|
||
|
|
assert _has_circular_dependency(tasks)
|
||
|
|
|
||
|
|
|
||
|
|
def test_has_circular_dependency_complex_cycle():
|
||
|
|
"""Test circular dependency detection with complex cycle."""
|
||
|
|
tasks = [
|
||
|
|
AnalysisTask(
|
||
|
|
id="A",
|
||
|
|
name="Task A",
|
||
|
|
description="Task A",
|
||
|
|
priority=3,
|
||
|
|
dependencies=["B"]
|
||
|
|
),
|
||
|
|
AnalysisTask(
|
||
|
|
id="B",
|
||
|
|
name="Task B",
|
||
|
|
description="Task B",
|
||
|
|
priority=3,
|
||
|
|
dependencies=["C"]
|
||
|
|
),
|
||
|
|
AnalysisTask(
|
||
|
|
id="C",
|
||
|
|
name="Task C",
|
||
|
|
description="Task C",
|
||
|
|
priority=3,
|
||
|
|
dependencies=["A"] # Cycle: A -> B -> C -> A
|
||
|
|
)
|
||
|
|
]
|
||
|
|
|
||
|
|
assert _has_circular_dependency(tasks)
|
||
|
|
|
||
|
|
|
||
|
|
def test_has_circular_dependency_no_cycle():
|
||
|
|
"""Test circular dependency detection with no cycle."""
|
||
|
|
tasks = [
|
||
|
|
AnalysisTask(
|
||
|
|
id="A",
|
||
|
|
name="Task A",
|
||
|
|
description="Task A",
|
||
|
|
priority=3,
|
||
|
|
dependencies=[]
|
||
|
|
),
|
||
|
|
AnalysisTask(
|
||
|
|
id="B",
|
||
|
|
name="Task B",
|
||
|
|
description="Task B",
|
||
|
|
priority=3,
|
||
|
|
dependencies=["A"]
|
||
|
|
),
|
||
|
|
AnalysisTask(
|
||
|
|
id="C",
|
||
|
|
name="Task C",
|
||
|
|
description="Task C",
|
||
|
|
priority=3,
|
||
|
|
dependencies=["A", "B"]
|
||
|
|
)
|
||
|
|
]
|
||
|
|
|
||
|
|
assert not _has_circular_dependency(tasks)
|
||
|
|
|
||
|
|
|
||
|
|
def test_task_priority_range(sample_data_profile, sample_requirement):
|
||
|
|
"""Test that all generated tasks have valid priority range."""
|
||
|
|
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
|
||
|
|
|
||
|
|
for task in plan.tasks:
|
||
|
|
assert 1 <= task.priority <= 5, \
|
||
|
|
f"Task {task.id} has invalid priority {task.priority}"
|
||
|
|
|
||
|
|
|
||
|
|
def test_task_unique_ids(sample_data_profile, sample_requirement):
|
||
|
|
"""Test that all tasks have unique IDs."""
|
||
|
|
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
|
||
|
|
|
||
|
|
task_ids = [task.id for task in plan.tasks]
|
||
|
|
assert len(task_ids) == len(set(task_ids)), "Task IDs should be unique"
|
||
|
|
|
||
|
|
|
||
|
|
def test_plan_has_timestamps(sample_data_profile, sample_requirement):
|
||
|
|
"""Test that plan has creation and update timestamps."""
|
||
|
|
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
|
||
|
|
|
||
|
|
assert plan.created_at is not None
|
||
|
|
assert plan.updated_at is not None
|
||
|
|
|
||
|
|
|
||
|
|
def test_task_required_tools_is_list(sample_data_profile, sample_requirement):
|
||
|
|
"""Test that required_tools is always a list."""
|
||
|
|
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
|
||
|
|
|
||
|
|
for task in plan.tasks:
|
||
|
|
assert isinstance(task.required_tools, list), \
|
||
|
|
f"Task {task.id} required_tools should be a list"
|
||
|
|
|
||
|
|
|
||
|
|
def test_task_dependencies_is_list(sample_data_profile, sample_requirement):
|
||
|
|
"""Test that dependencies is always a list."""
|
||
|
|
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
|
||
|
|
|
||
|
|
for task in plan.tasks:
|
||
|
|
assert isinstance(task.dependencies, list), \
|
||
|
|
f"Task {task.id} dependencies should be a list"
|