"""Unit tests for analysis planning engine.""" import pytest from src.engines.analysis_planning import ( plan_analysis, validate_task_dependencies, _fallback_analysis_planning, _has_circular_dependency ) from src.models.data_profile import DataProfile, ColumnInfo from src.models.requirement_spec import RequirementSpec, AnalysisObjective from src.models.analysis_plan import AnalysisTask @pytest.fixture def sample_data_profile(): """Create a sample data profile for testing.""" return DataProfile( file_path='test.csv', row_count=1000, column_count=5, columns=[ ColumnInfo( name='created_at', dtype='datetime', missing_rate=0.0, unique_count=1000 ), ColumnInfo( name='status', dtype='categorical', missing_rate=0.1, unique_count=5 ), ColumnInfo( name='type', dtype='categorical', missing_rate=0.0, unique_count=10 ), ColumnInfo( name='priority', dtype='numeric', missing_rate=0.0, unique_count=5 ), ColumnInfo( name='description', dtype='text', missing_rate=0.05, unique_count=950 ) ], inferred_type='ticket', key_fields={'time': 'created_at', 'status': 'status'}, quality_score=85.0, summary='Ticket data with 1000 rows' ) @pytest.fixture def sample_requirement(): """Create a sample requirement for testing.""" return RequirementSpec( user_input="分析工单健康度和趋势", objectives=[ AnalysisObjective( name="健康度分析", description="评估工单处理的健康状况", metrics=["完成率", "处理效率"], priority=5 ), AnalysisObjective( name="趋势分析", description="分析工单随时间的变化趋势", metrics=["时间序列", "增长率"], priority=4 ) ] ) def test_fallback_planning_generates_tasks(sample_data_profile, sample_requirement): """Test that fallback planning generates tasks.""" plan = _fallback_analysis_planning(sample_data_profile, sample_requirement) # Should have tasks assert len(plan.tasks) > 0 # Should have objectives assert len(plan.objectives) == len(sample_requirement.objectives) # Should have estimated duration assert plan.estimated_duration > 0 def test_fallback_planning_respects_objectives(sample_data_profile, sample_requirement): """Test that fallback planning creates tasks based on objectives.""" plan = _fallback_analysis_planning(sample_data_profile, sample_requirement) # Should have tasks related to health analysis health_tasks = [t for t in plan.tasks if '健康' in t.name or '质量' in t.name] assert len(health_tasks) > 0 # Should have tasks related to trend analysis trend_tasks = [t for t in plan.tasks if '趋势' in t.name or '时间' in t.name] assert len(trend_tasks) > 0 def test_fallback_planning_with_no_matching_objectives(sample_data_profile): """Test fallback planning with generic objectives.""" requirement = RequirementSpec( user_input="分析数据", objectives=[ AnalysisObjective( name="综合分析", description="全面分析数据", metrics=[], priority=3 ) ] ) plan = _fallback_analysis_planning(sample_data_profile, requirement) # Should still generate at least one task assert len(plan.tasks) > 0 def test_fallback_planning_with_empty_objectives(sample_data_profile): """Test fallback planning with no objectives.""" requirement = RequirementSpec( user_input="分析数据", objectives=[] ) plan = _fallback_analysis_planning(sample_data_profile, requirement) # Should generate default task assert len(plan.tasks) > 0 def test_validate_dependencies_valid(): """Test validation with valid dependencies.""" tasks = [ AnalysisTask( id="task_1", name="Task 1", description="First task", priority=5, dependencies=[] ), AnalysisTask( id="task_2", name="Task 2", description="Second task", priority=4, dependencies=["task_1"] ), AnalysisTask( id="task_3", name="Task 3", description="Third task", priority=3, dependencies=["task_1", "task_2"] ) ] validation = validate_task_dependencies(tasks) assert validation['valid'] assert validation['forms_dag'] assert not validation['has_circular_dependency'] assert len(validation['missing_dependencies']) == 0 def test_validate_dependencies_with_cycle(): """Test validation detects circular dependencies.""" tasks = [ AnalysisTask( id="task_1", name="Task 1", description="First task", priority=5, dependencies=["task_2"] ), AnalysisTask( id="task_2", name="Task 2", description="Second task", priority=4, dependencies=["task_1"] ) ] validation = validate_task_dependencies(tasks) assert not validation['valid'] assert validation['has_circular_dependency'] assert not validation['forms_dag'] def test_validate_dependencies_with_missing(): """Test validation detects missing dependencies.""" tasks = [ AnalysisTask( id="task_1", name="Task 1", description="First task", priority=5, dependencies=["task_999"] # Doesn't exist ) ] validation = validate_task_dependencies(tasks) assert not validation['valid'] assert len(validation['missing_dependencies']) > 0 def test_has_circular_dependency_simple_cycle(): """Test circular dependency detection with simple cycle.""" tasks = [ AnalysisTask( id="A", name="Task A", description="Task A", priority=3, dependencies=["B"] ), AnalysisTask( id="B", name="Task B", description="Task B", priority=3, dependencies=["A"] ) ] assert _has_circular_dependency(tasks) def test_has_circular_dependency_complex_cycle(): """Test circular dependency detection with complex cycle.""" tasks = [ AnalysisTask( id="A", name="Task A", description="Task A", priority=3, dependencies=["B"] ), AnalysisTask( id="B", name="Task B", description="Task B", priority=3, dependencies=["C"] ), AnalysisTask( id="C", name="Task C", description="Task C", priority=3, dependencies=["A"] # Cycle: A -> B -> C -> A ) ] assert _has_circular_dependency(tasks) def test_has_circular_dependency_no_cycle(): """Test circular dependency detection with no cycle.""" tasks = [ AnalysisTask( id="A", name="Task A", description="Task A", priority=3, dependencies=[] ), AnalysisTask( id="B", name="Task B", description="Task B", priority=3, dependencies=["A"] ), AnalysisTask( id="C", name="Task C", description="Task C", priority=3, dependencies=["A", "B"] ) ] assert not _has_circular_dependency(tasks) def test_task_priority_range(sample_data_profile, sample_requirement): """Test that all generated tasks have valid priority range.""" plan = _fallback_analysis_planning(sample_data_profile, sample_requirement) for task in plan.tasks: assert 1 <= task.priority <= 5, \ f"Task {task.id} has invalid priority {task.priority}" def test_task_unique_ids(sample_data_profile, sample_requirement): """Test that all tasks have unique IDs.""" plan = _fallback_analysis_planning(sample_data_profile, sample_requirement) task_ids = [task.id for task in plan.tasks] assert len(task_ids) == len(set(task_ids)), "Task IDs should be unique" def test_plan_has_timestamps(sample_data_profile, sample_requirement): """Test that plan has creation and update timestamps.""" plan = _fallback_analysis_planning(sample_data_profile, sample_requirement) assert plan.created_at is not None assert plan.updated_at is not None def test_task_required_tools_is_list(sample_data_profile, sample_requirement): """Test that required_tools is always a list.""" plan = _fallback_analysis_planning(sample_data_profile, sample_requirement) for task in plan.tasks: assert isinstance(task.required_tools, list), \ f"Task {task.id} required_tools should be a list" def test_task_dependencies_is_list(sample_data_profile, sample_requirement): """Test that dependencies is always a list.""" plan = _fallback_analysis_planning(sample_data_profile, sample_requirement) for task in plan.tasks: assert isinstance(task.dependencies, list), \ f"Task {task.id} dependencies should be a list"