"""Property-based tests for analysis planning engine.""" import pytest from hypothesis import given, strategies as st, settings from src.engines.analysis_planning import ( plan_analysis, validate_task_dependencies, _fallback_analysis_planning, _has_circular_dependency ) from src.models.data_profile import DataProfile, ColumnInfo from src.models.requirement_spec import RequirementSpec, AnalysisObjective from src.models.analysis_plan import AnalysisTask # Strategies for generating test data @st.composite def column_info_strategy(draw): """Generate random ColumnInfo.""" name = draw(st.text(min_size=1, max_size=20, alphabet=st.characters(whitelist_categories=('L', 'N')))) dtype = draw(st.sampled_from(['numeric', 'categorical', 'datetime', 'text'])) missing_rate = draw(st.floats(min_value=0.0, max_value=1.0)) unique_count = draw(st.integers(min_value=1, max_value=1000)) return ColumnInfo( name=name, dtype=dtype, missing_rate=missing_rate, unique_count=unique_count, sample_values=[], statistics={} ) @st.composite def data_profile_strategy(draw): """Generate random DataProfile.""" row_count = draw(st.integers(min_value=10, max_value=100000)) columns = draw(st.lists(column_info_strategy(), min_size=2, max_size=20)) inferred_type = draw(st.sampled_from(['ticket', 'sales', 'user', 'unknown'])) quality_score = draw(st.floats(min_value=0.0, max_value=100.0)) return DataProfile( file_path='test.csv', row_count=row_count, column_count=len(columns), columns=columns, inferred_type=inferred_type, key_fields={}, quality_score=quality_score, summary=f"Test data with {len(columns)} columns" ) @st.composite def requirement_spec_strategy(draw): """Generate random RequirementSpec.""" user_input = draw(st.text(min_size=5, max_size=100)) num_objectives = draw(st.integers(min_value=1, max_value=5)) objectives = [] for i in range(num_objectives): obj = AnalysisObjective( name=f"Objective {i+1}", description=draw(st.text(min_size=10, max_size=100)), metrics=draw(st.lists(st.text(min_size=3, max_size=20), min_size=1, max_size=5)), priority=draw(st.integers(min_value=1, max_value=5)) ) objectives.append(obj) return RequirementSpec( user_input=user_input, objectives=objectives ) # Feature: true-ai-agent, Property 6: 动态任务生成 @given( data_profile=data_profile_strategy(), requirement=requirement_spec_strategy() ) @settings(max_examples=20, deadline=None) def test_dynamic_task_generation(data_profile, requirement): """ Property 6: For any data profile and requirement spec, the analysis planning engine should be able to generate a non-empty task list, with each task containing unique ID, description, priority, and required tools. Validates: 场景1验收.2, FR-3.1 """ # Use fallback to avoid API dependency plan = _fallback_analysis_planning(data_profile, requirement) # Verify: Should have tasks assert len(plan.tasks) > 0, "Should generate at least one task" # Verify: Each task should have required fields task_ids = set() for task in plan.tasks: # Unique ID assert task.id not in task_ids, f"Task ID {task.id} is not unique" task_ids.add(task.id) # Required fields assert len(task.name) > 0, "Task name should not be empty" assert len(task.description) > 0, "Task description should not be empty" assert 1 <= task.priority <= 5, f"Task priority {task.priority} should be between 1 and 5" assert isinstance(task.required_tools, list), "Required tools should be a list" assert isinstance(task.dependencies, list), "Dependencies should be a list" assert task.status in ['pending', 'running', 'completed', 'failed', 'skipped'], \ f"Invalid task status: {task.status}" # Verify: Plan should have objectives assert len(plan.objectives) > 0, "Plan should have objectives" # Verify: Estimated duration should be non-negative assert plan.estimated_duration >= 0, "Estimated duration should be non-negative" # Feature: true-ai-agent, Property 7: 任务依赖一致性 @given( data_profile=data_profile_strategy(), requirement=requirement_spec_strategy() ) @settings(max_examples=20, deadline=None) def test_task_dependency_consistency(data_profile, requirement): """ Property 7: For any generated analysis plan, all task dependencies should form a directed acyclic graph (DAG), with no circular dependencies. Validates: FR-3.1 """ # Use fallback to avoid API dependency plan = _fallback_analysis_planning(data_profile, requirement) # Verify: No circular dependencies assert not _has_circular_dependency(plan.tasks), \ "Task dependencies should not form a cycle" # Verify: All dependencies exist task_ids = {task.id for task in plan.tasks} for task in plan.tasks: for dep_id in task.dependencies: assert dep_id in task_ids, \ f"Task {task.id} depends on non-existent task {dep_id}" assert dep_id != task.id, \ f"Task {task.id} should not depend on itself" # Verify: Validation function agrees validation = validate_task_dependencies(plan.tasks) assert validation['valid'], "Task dependencies should be valid" assert validation['forms_dag'], "Task dependencies should form a DAG" assert not validation['has_circular_dependency'], "Should not have circular dependencies" assert len(validation['missing_dependencies']) == 0, "Should not have missing dependencies" # Feature: true-ai-agent, Property 6: 动态任务生成 (priority ordering) @given( data_profile=data_profile_strategy(), requirement=requirement_spec_strategy() ) @settings(max_examples=20, deadline=None) def test_task_priority_ordering(data_profile, requirement): """ Property 6 (extended): Tasks should respect objective priorities. High-priority objectives should generate high-priority tasks. Validates: FR-3.2 """ # Use fallback to avoid API dependency plan = _fallback_analysis_planning(data_profile, requirement) # Verify: All tasks have valid priorities for task in plan.tasks: assert 1 <= task.priority <= 5, \ f"Task priority {task.priority} should be between 1 and 5" # Verify: If objectives have high priority, at least some tasks should too max_obj_priority = max(obj.priority for obj in plan.objectives) if max_obj_priority >= 4: # Should have at least one high-priority task high_priority_tasks = [t for t in plan.tasks if t.priority >= 4] # This is a soft requirement, so we just check structure assert all(1 <= t.priority <= 5 for t in plan.tasks) # Test circular dependency detection @given( num_tasks=st.integers(min_value=2, max_value=10) ) @settings(max_examples=10, deadline=None) def test_circular_dependency_detection(num_tasks): """ Test that circular dependency detection works correctly. """ # Create tasks with no dependencies (should be valid) tasks = [ AnalysisTask( id=f"task_{i}", name=f"Task {i}", description=f"Description {i}", priority=3, dependencies=[] ) for i in range(num_tasks) ] # Should not have circular dependencies assert not _has_circular_dependency(tasks) # Create a simple cycle: task_0 -> task_1 -> task_0 if num_tasks >= 2: tasks_with_cycle = [ AnalysisTask( id="task_0", name="Task 0", description="Description 0", priority=3, dependencies=["task_1"] ), AnalysisTask( id="task_1", name="Task 1", description="Description 1", priority=3, dependencies=["task_0"] ) ] # Should detect the cycle assert _has_circular_dependency(tasks_with_cycle) # Test dependency validation def test_dependency_validation_with_missing_deps(): """Test validation detects missing dependencies.""" tasks = [ AnalysisTask( id="task_1", name="Task 1", description="Description 1", priority=3, dependencies=["task_2", "task_999"] # task_999 doesn't exist ), AnalysisTask( id="task_2", name="Task 2", description="Description 2", priority=3, dependencies=[] ) ] validation = validate_task_dependencies(tasks) # Should not be valid assert not validation['valid'] # Should have missing dependencies assert len(validation['missing_dependencies']) > 0 # Should identify task_999 as missing missing_dep_ids = [md['missing_dep'] for md in validation['missing_dependencies']] assert 'task_999' in missing_dep_ids