266 lines
9.1 KiB
Python
266 lines
9.1 KiB
Python
"""Property-based tests for analysis planning engine."""
|
|
|
|
import pytest
|
|
from hypothesis import given, strategies as st, settings
|
|
|
|
from src.engines.analysis_planning import (
|
|
plan_analysis,
|
|
validate_task_dependencies,
|
|
_fallback_analysis_planning,
|
|
_has_circular_dependency
|
|
)
|
|
from src.models.data_profile import DataProfile, ColumnInfo
|
|
from src.models.requirement_spec import RequirementSpec, AnalysisObjective
|
|
from src.models.analysis_plan import AnalysisTask
|
|
|
|
|
|
# Strategies for generating test data
|
|
@st.composite
|
|
def column_info_strategy(draw):
|
|
"""Generate random ColumnInfo."""
|
|
name = draw(st.text(min_size=1, max_size=20, alphabet=st.characters(whitelist_categories=('L', 'N'))))
|
|
dtype = draw(st.sampled_from(['numeric', 'categorical', 'datetime', 'text']))
|
|
missing_rate = draw(st.floats(min_value=0.0, max_value=1.0))
|
|
unique_count = draw(st.integers(min_value=1, max_value=1000))
|
|
|
|
return ColumnInfo(
|
|
name=name,
|
|
dtype=dtype,
|
|
missing_rate=missing_rate,
|
|
unique_count=unique_count,
|
|
sample_values=[],
|
|
statistics={}
|
|
)
|
|
|
|
|
|
@st.composite
|
|
def data_profile_strategy(draw):
|
|
"""Generate random DataProfile."""
|
|
row_count = draw(st.integers(min_value=10, max_value=100000))
|
|
columns = draw(st.lists(column_info_strategy(), min_size=2, max_size=20))
|
|
inferred_type = draw(st.sampled_from(['ticket', 'sales', 'user', 'unknown']))
|
|
quality_score = draw(st.floats(min_value=0.0, max_value=100.0))
|
|
|
|
return DataProfile(
|
|
file_path='test.csv',
|
|
row_count=row_count,
|
|
column_count=len(columns),
|
|
columns=columns,
|
|
inferred_type=inferred_type,
|
|
key_fields={},
|
|
quality_score=quality_score,
|
|
summary=f"Test data with {len(columns)} columns"
|
|
)
|
|
|
|
|
|
@st.composite
|
|
def requirement_spec_strategy(draw):
|
|
"""Generate random RequirementSpec."""
|
|
user_input = draw(st.text(min_size=5, max_size=100))
|
|
num_objectives = draw(st.integers(min_value=1, max_value=5))
|
|
|
|
objectives = []
|
|
for i in range(num_objectives):
|
|
obj = AnalysisObjective(
|
|
name=f"Objective {i+1}",
|
|
description=draw(st.text(min_size=10, max_size=100)),
|
|
metrics=draw(st.lists(st.text(min_size=3, max_size=20), min_size=1, max_size=5)),
|
|
priority=draw(st.integers(min_value=1, max_value=5))
|
|
)
|
|
objectives.append(obj)
|
|
|
|
return RequirementSpec(
|
|
user_input=user_input,
|
|
objectives=objectives
|
|
)
|
|
|
|
|
|
# Feature: true-ai-agent, Property 6: 动态任务生成
|
|
@given(
|
|
data_profile=data_profile_strategy(),
|
|
requirement=requirement_spec_strategy()
|
|
)
|
|
@settings(max_examples=20, deadline=None)
|
|
def test_dynamic_task_generation(data_profile, requirement):
|
|
"""
|
|
Property 6: For any data profile and requirement spec, the analysis
|
|
planning engine should be able to generate a non-empty task list, with
|
|
each task containing unique ID, description, priority, and required tools.
|
|
|
|
Validates: 场景1验收.2, FR-3.1
|
|
"""
|
|
# Use fallback to avoid API dependency
|
|
plan = _fallback_analysis_planning(data_profile, requirement)
|
|
|
|
# Verify: Should have tasks
|
|
assert len(plan.tasks) > 0, "Should generate at least one task"
|
|
|
|
# Verify: Each task should have required fields
|
|
task_ids = set()
|
|
for task in plan.tasks:
|
|
# Unique ID
|
|
assert task.id not in task_ids, f"Task ID {task.id} is not unique"
|
|
task_ids.add(task.id)
|
|
|
|
# Required fields
|
|
assert len(task.name) > 0, "Task name should not be empty"
|
|
assert len(task.description) > 0, "Task description should not be empty"
|
|
assert 1 <= task.priority <= 5, f"Task priority {task.priority} should be between 1 and 5"
|
|
assert isinstance(task.required_tools, list), "Required tools should be a list"
|
|
assert isinstance(task.dependencies, list), "Dependencies should be a list"
|
|
assert task.status in ['pending', 'running', 'completed', 'failed', 'skipped'], \
|
|
f"Invalid task status: {task.status}"
|
|
|
|
# Verify: Plan should have objectives
|
|
assert len(plan.objectives) > 0, "Plan should have objectives"
|
|
|
|
# Verify: Estimated duration should be non-negative
|
|
assert plan.estimated_duration >= 0, "Estimated duration should be non-negative"
|
|
|
|
|
|
# Feature: true-ai-agent, Property 7: 任务依赖一致性
|
|
@given(
|
|
data_profile=data_profile_strategy(),
|
|
requirement=requirement_spec_strategy()
|
|
)
|
|
@settings(max_examples=20, deadline=None)
|
|
def test_task_dependency_consistency(data_profile, requirement):
|
|
"""
|
|
Property 7: For any generated analysis plan, all task dependencies should
|
|
form a directed acyclic graph (DAG), with no circular dependencies.
|
|
|
|
Validates: FR-3.1
|
|
"""
|
|
# Use fallback to avoid API dependency
|
|
plan = _fallback_analysis_planning(data_profile, requirement)
|
|
|
|
# Verify: No circular dependencies
|
|
assert not _has_circular_dependency(plan.tasks), \
|
|
"Task dependencies should not form a cycle"
|
|
|
|
# Verify: All dependencies exist
|
|
task_ids = {task.id for task in plan.tasks}
|
|
for task in plan.tasks:
|
|
for dep_id in task.dependencies:
|
|
assert dep_id in task_ids, \
|
|
f"Task {task.id} depends on non-existent task {dep_id}"
|
|
assert dep_id != task.id, \
|
|
f"Task {task.id} should not depend on itself"
|
|
|
|
# Verify: Validation function agrees
|
|
validation = validate_task_dependencies(plan.tasks)
|
|
assert validation['valid'], "Task dependencies should be valid"
|
|
assert validation['forms_dag'], "Task dependencies should form a DAG"
|
|
assert not validation['has_circular_dependency'], "Should not have circular dependencies"
|
|
assert len(validation['missing_dependencies']) == 0, "Should not have missing dependencies"
|
|
|
|
|
|
# Feature: true-ai-agent, Property 6: 动态任务生成 (priority ordering)
|
|
@given(
|
|
data_profile=data_profile_strategy(),
|
|
requirement=requirement_spec_strategy()
|
|
)
|
|
@settings(max_examples=20, deadline=None)
|
|
def test_task_priority_ordering(data_profile, requirement):
|
|
"""
|
|
Property 6 (extended): Tasks should respect objective priorities.
|
|
High-priority objectives should generate high-priority tasks.
|
|
|
|
Validates: FR-3.2
|
|
"""
|
|
# Use fallback to avoid API dependency
|
|
plan = _fallback_analysis_planning(data_profile, requirement)
|
|
|
|
# Verify: All tasks have valid priorities
|
|
for task in plan.tasks:
|
|
assert 1 <= task.priority <= 5, \
|
|
f"Task priority {task.priority} should be between 1 and 5"
|
|
|
|
# Verify: If objectives have high priority, at least some tasks should too
|
|
max_obj_priority = max(obj.priority for obj in plan.objectives)
|
|
if max_obj_priority >= 4:
|
|
# Should have at least one high-priority task
|
|
high_priority_tasks = [t for t in plan.tasks if t.priority >= 4]
|
|
# This is a soft requirement, so we just check structure
|
|
assert all(1 <= t.priority <= 5 for t in plan.tasks)
|
|
|
|
|
|
# Test circular dependency detection
|
|
@given(
|
|
num_tasks=st.integers(min_value=2, max_value=10)
|
|
)
|
|
@settings(max_examples=10, deadline=None)
|
|
def test_circular_dependency_detection(num_tasks):
|
|
"""
|
|
Test that circular dependency detection works correctly.
|
|
"""
|
|
# Create tasks with no dependencies (should be valid)
|
|
tasks = [
|
|
AnalysisTask(
|
|
id=f"task_{i}",
|
|
name=f"Task {i}",
|
|
description=f"Description {i}",
|
|
priority=3,
|
|
dependencies=[]
|
|
)
|
|
for i in range(num_tasks)
|
|
]
|
|
|
|
# Should not have circular dependencies
|
|
assert not _has_circular_dependency(tasks)
|
|
|
|
# Create a simple cycle: task_0 -> task_1 -> task_0
|
|
if num_tasks >= 2:
|
|
tasks_with_cycle = [
|
|
AnalysisTask(
|
|
id="task_0",
|
|
name="Task 0",
|
|
description="Description 0",
|
|
priority=3,
|
|
dependencies=["task_1"]
|
|
),
|
|
AnalysisTask(
|
|
id="task_1",
|
|
name="Task 1",
|
|
description="Description 1",
|
|
priority=3,
|
|
dependencies=["task_0"]
|
|
)
|
|
]
|
|
|
|
# Should detect the cycle
|
|
assert _has_circular_dependency(tasks_with_cycle)
|
|
|
|
|
|
# Test dependency validation
|
|
def test_dependency_validation_with_missing_deps():
|
|
"""Test validation detects missing dependencies."""
|
|
tasks = [
|
|
AnalysisTask(
|
|
id="task_1",
|
|
name="Task 1",
|
|
description="Description 1",
|
|
priority=3,
|
|
dependencies=["task_2", "task_999"] # task_999 doesn't exist
|
|
),
|
|
AnalysisTask(
|
|
id="task_2",
|
|
name="Task 2",
|
|
description="Description 2",
|
|
priority=3,
|
|
dependencies=[]
|
|
)
|
|
]
|
|
|
|
validation = validate_task_dependencies(tasks)
|
|
|
|
# Should not be valid
|
|
assert not validation['valid']
|
|
|
|
# Should have missing dependencies
|
|
assert len(validation['missing_dependencies']) > 0
|
|
|
|
# Should identify task_999 as missing
|
|
missing_dep_ids = [md['missing_dep'] for md in validation['missing_dependencies']]
|
|
assert 'task_999' in missing_dep_ids
|