Complete AI Data Analysis Agent implementation with 95.7% test coverage
This commit is contained in:
342
tests/test_analysis_planning.py
Normal file
342
tests/test_analysis_planning.py
Normal file
@@ -0,0 +1,342 @@
|
||||
"""Unit tests for analysis planning engine."""
|
||||
|
||||
import pytest
|
||||
|
||||
from src.engines.analysis_planning import (
|
||||
plan_analysis,
|
||||
validate_task_dependencies,
|
||||
_fallback_analysis_planning,
|
||||
_has_circular_dependency
|
||||
)
|
||||
from src.models.data_profile import DataProfile, ColumnInfo
|
||||
from src.models.requirement_spec import RequirementSpec, AnalysisObjective
|
||||
from src.models.analysis_plan import AnalysisTask
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_data_profile():
|
||||
"""Create a sample data profile for testing."""
|
||||
return DataProfile(
|
||||
file_path='test.csv',
|
||||
row_count=1000,
|
||||
column_count=5,
|
||||
columns=[
|
||||
ColumnInfo(
|
||||
name='created_at',
|
||||
dtype='datetime',
|
||||
missing_rate=0.0,
|
||||
unique_count=1000
|
||||
),
|
||||
ColumnInfo(
|
||||
name='status',
|
||||
dtype='categorical',
|
||||
missing_rate=0.1,
|
||||
unique_count=5
|
||||
),
|
||||
ColumnInfo(
|
||||
name='type',
|
||||
dtype='categorical',
|
||||
missing_rate=0.0,
|
||||
unique_count=10
|
||||
),
|
||||
ColumnInfo(
|
||||
name='priority',
|
||||
dtype='numeric',
|
||||
missing_rate=0.0,
|
||||
unique_count=5
|
||||
),
|
||||
ColumnInfo(
|
||||
name='description',
|
||||
dtype='text',
|
||||
missing_rate=0.05,
|
||||
unique_count=950
|
||||
)
|
||||
],
|
||||
inferred_type='ticket',
|
||||
key_fields={'time': 'created_at', 'status': 'status'},
|
||||
quality_score=85.0,
|
||||
summary='Ticket data with 1000 rows'
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_requirement():
|
||||
"""Create a sample requirement for testing."""
|
||||
return RequirementSpec(
|
||||
user_input="分析工单健康度和趋势",
|
||||
objectives=[
|
||||
AnalysisObjective(
|
||||
name="健康度分析",
|
||||
description="评估工单处理的健康状况",
|
||||
metrics=["完成率", "处理效率"],
|
||||
priority=5
|
||||
),
|
||||
AnalysisObjective(
|
||||
name="趋势分析",
|
||||
description="分析工单随时间的变化趋势",
|
||||
metrics=["时间序列", "增长率"],
|
||||
priority=4
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def test_fallback_planning_generates_tasks(sample_data_profile, sample_requirement):
|
||||
"""Test that fallback planning generates tasks."""
|
||||
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
|
||||
|
||||
# Should have tasks
|
||||
assert len(plan.tasks) > 0
|
||||
|
||||
# Should have objectives
|
||||
assert len(plan.objectives) == len(sample_requirement.objectives)
|
||||
|
||||
# Should have estimated duration
|
||||
assert plan.estimated_duration > 0
|
||||
|
||||
|
||||
def test_fallback_planning_respects_objectives(sample_data_profile, sample_requirement):
|
||||
"""Test that fallback planning creates tasks based on objectives."""
|
||||
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
|
||||
|
||||
# Should have tasks related to health analysis
|
||||
health_tasks = [t for t in plan.tasks if '健康' in t.name or '质量' in t.name]
|
||||
assert len(health_tasks) > 0
|
||||
|
||||
# Should have tasks related to trend analysis
|
||||
trend_tasks = [t for t in plan.tasks if '趋势' in t.name or '时间' in t.name]
|
||||
assert len(trend_tasks) > 0
|
||||
|
||||
|
||||
def test_fallback_planning_with_no_matching_objectives(sample_data_profile):
|
||||
"""Test fallback planning with generic objectives."""
|
||||
requirement = RequirementSpec(
|
||||
user_input="分析数据",
|
||||
objectives=[
|
||||
AnalysisObjective(
|
||||
name="综合分析",
|
||||
description="全面分析数据",
|
||||
metrics=[],
|
||||
priority=3
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
plan = _fallback_analysis_planning(sample_data_profile, requirement)
|
||||
|
||||
# Should still generate at least one task
|
||||
assert len(plan.tasks) > 0
|
||||
|
||||
|
||||
def test_fallback_planning_with_empty_objectives(sample_data_profile):
|
||||
"""Test fallback planning with no objectives."""
|
||||
requirement = RequirementSpec(
|
||||
user_input="分析数据",
|
||||
objectives=[]
|
||||
)
|
||||
|
||||
plan = _fallback_analysis_planning(sample_data_profile, requirement)
|
||||
|
||||
# Should generate default task
|
||||
assert len(plan.tasks) > 0
|
||||
|
||||
|
||||
def test_validate_dependencies_valid():
|
||||
"""Test validation with valid dependencies."""
|
||||
tasks = [
|
||||
AnalysisTask(
|
||||
id="task_1",
|
||||
name="Task 1",
|
||||
description="First task",
|
||||
priority=5,
|
||||
dependencies=[]
|
||||
),
|
||||
AnalysisTask(
|
||||
id="task_2",
|
||||
name="Task 2",
|
||||
description="Second task",
|
||||
priority=4,
|
||||
dependencies=["task_1"]
|
||||
),
|
||||
AnalysisTask(
|
||||
id="task_3",
|
||||
name="Task 3",
|
||||
description="Third task",
|
||||
priority=3,
|
||||
dependencies=["task_1", "task_2"]
|
||||
)
|
||||
]
|
||||
|
||||
validation = validate_task_dependencies(tasks)
|
||||
|
||||
assert validation['valid']
|
||||
assert validation['forms_dag']
|
||||
assert not validation['has_circular_dependency']
|
||||
assert len(validation['missing_dependencies']) == 0
|
||||
|
||||
|
||||
def test_validate_dependencies_with_cycle():
|
||||
"""Test validation detects circular dependencies."""
|
||||
tasks = [
|
||||
AnalysisTask(
|
||||
id="task_1",
|
||||
name="Task 1",
|
||||
description="First task",
|
||||
priority=5,
|
||||
dependencies=["task_2"]
|
||||
),
|
||||
AnalysisTask(
|
||||
id="task_2",
|
||||
name="Task 2",
|
||||
description="Second task",
|
||||
priority=4,
|
||||
dependencies=["task_1"]
|
||||
)
|
||||
]
|
||||
|
||||
validation = validate_task_dependencies(tasks)
|
||||
|
||||
assert not validation['valid']
|
||||
assert validation['has_circular_dependency']
|
||||
assert not validation['forms_dag']
|
||||
|
||||
|
||||
def test_validate_dependencies_with_missing():
|
||||
"""Test validation detects missing dependencies."""
|
||||
tasks = [
|
||||
AnalysisTask(
|
||||
id="task_1",
|
||||
name="Task 1",
|
||||
description="First task",
|
||||
priority=5,
|
||||
dependencies=["task_999"] # Doesn't exist
|
||||
)
|
||||
]
|
||||
|
||||
validation = validate_task_dependencies(tasks)
|
||||
|
||||
assert not validation['valid']
|
||||
assert len(validation['missing_dependencies']) > 0
|
||||
|
||||
|
||||
def test_has_circular_dependency_simple_cycle():
|
||||
"""Test circular dependency detection with simple cycle."""
|
||||
tasks = [
|
||||
AnalysisTask(
|
||||
id="A",
|
||||
name="Task A",
|
||||
description="Task A",
|
||||
priority=3,
|
||||
dependencies=["B"]
|
||||
),
|
||||
AnalysisTask(
|
||||
id="B",
|
||||
name="Task B",
|
||||
description="Task B",
|
||||
priority=3,
|
||||
dependencies=["A"]
|
||||
)
|
||||
]
|
||||
|
||||
assert _has_circular_dependency(tasks)
|
||||
|
||||
|
||||
def test_has_circular_dependency_complex_cycle():
|
||||
"""Test circular dependency detection with complex cycle."""
|
||||
tasks = [
|
||||
AnalysisTask(
|
||||
id="A",
|
||||
name="Task A",
|
||||
description="Task A",
|
||||
priority=3,
|
||||
dependencies=["B"]
|
||||
),
|
||||
AnalysisTask(
|
||||
id="B",
|
||||
name="Task B",
|
||||
description="Task B",
|
||||
priority=3,
|
||||
dependencies=["C"]
|
||||
),
|
||||
AnalysisTask(
|
||||
id="C",
|
||||
name="Task C",
|
||||
description="Task C",
|
||||
priority=3,
|
||||
dependencies=["A"] # Cycle: A -> B -> C -> A
|
||||
)
|
||||
]
|
||||
|
||||
assert _has_circular_dependency(tasks)
|
||||
|
||||
|
||||
def test_has_circular_dependency_no_cycle():
|
||||
"""Test circular dependency detection with no cycle."""
|
||||
tasks = [
|
||||
AnalysisTask(
|
||||
id="A",
|
||||
name="Task A",
|
||||
description="Task A",
|
||||
priority=3,
|
||||
dependencies=[]
|
||||
),
|
||||
AnalysisTask(
|
||||
id="B",
|
||||
name="Task B",
|
||||
description="Task B",
|
||||
priority=3,
|
||||
dependencies=["A"]
|
||||
),
|
||||
AnalysisTask(
|
||||
id="C",
|
||||
name="Task C",
|
||||
description="Task C",
|
||||
priority=3,
|
||||
dependencies=["A", "B"]
|
||||
)
|
||||
]
|
||||
|
||||
assert not _has_circular_dependency(tasks)
|
||||
|
||||
|
||||
def test_task_priority_range(sample_data_profile, sample_requirement):
|
||||
"""Test that all generated tasks have valid priority range."""
|
||||
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
|
||||
|
||||
for task in plan.tasks:
|
||||
assert 1 <= task.priority <= 5, \
|
||||
f"Task {task.id} has invalid priority {task.priority}"
|
||||
|
||||
|
||||
def test_task_unique_ids(sample_data_profile, sample_requirement):
|
||||
"""Test that all tasks have unique IDs."""
|
||||
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
|
||||
|
||||
task_ids = [task.id for task in plan.tasks]
|
||||
assert len(task_ids) == len(set(task_ids)), "Task IDs should be unique"
|
||||
|
||||
|
||||
def test_plan_has_timestamps(sample_data_profile, sample_requirement):
|
||||
"""Test that plan has creation and update timestamps."""
|
||||
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
|
||||
|
||||
assert plan.created_at is not None
|
||||
assert plan.updated_at is not None
|
||||
|
||||
|
||||
def test_task_required_tools_is_list(sample_data_profile, sample_requirement):
|
||||
"""Test that required_tools is always a list."""
|
||||
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
|
||||
|
||||
for task in plan.tasks:
|
||||
assert isinstance(task.required_tools, list), \
|
||||
f"Task {task.id} required_tools should be a list"
|
||||
|
||||
|
||||
def test_task_dependencies_is_list(sample_data_profile, sample_requirement):
|
||||
"""Test that dependencies is always a list."""
|
||||
plan = _fallback_analysis_planning(sample_data_profile, sample_requirement)
|
||||
|
||||
for task in plan.tasks:
|
||||
assert isinstance(task.dependencies, list), \
|
||||
f"Task {task.id} dependencies should be a list"
|
||||
Reference in New Issue
Block a user