Files
vibe_data_ana/tests/test_task_execution_properties.py

203 lines
6.0 KiB
Python
Raw Normal View History

"""Property-based tests for task execution engine."""
import pytest
import pandas as pd
from hypothesis import given, strategies as st, settings
from src.engines.task_execution import (
execute_task,
call_tool,
extract_insights,
_fallback_task_execution
)
from src.models.analysis_plan import AnalysisTask
from src.data_access import DataAccessLayer
from src.tools.stats_tools import CalculateStatisticsTool
# Feature: true-ai-agent, Property 13: 任务执行完整性
@given(
task_name=st.text(min_size=5, max_size=50),
task_description=st.text(min_size=10, max_size=100)
)
@settings(max_examples=10, deadline=None)
def test_task_execution_completeness(task_name, task_description):
"""
Property 13: For any valid analysis plan and tool set, the task execution
engine should be able to execute all non-skipped tasks and generate an
analysis result (success or failure) for each task.
Validates: 场景1验收.3, FR-5.1
"""
# Create sample data
sample_data = pd.DataFrame({
'value': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'category': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B']
})
# Create sample tools
sample_tools = [CalculateStatisticsTool()]
# Create task
task = AnalysisTask(
id="test_task",
name=task_name,
description=task_description,
priority=3,
required_tools=['calculate_statistics']
)
# Create data access
data_access = DataAccessLayer(sample_data)
# Execute task (using fallback to avoid API dependency)
result = _fallback_task_execution(task, sample_tools, data_access)
# Verify: Should return AnalysisResult
assert result is not None
assert result.task_id == task.id
assert result.task_name == task.name
# Verify: Should have success status
assert isinstance(result.success, bool)
# Verify: Should have execution time
assert result.execution_time >= 0
# Verify: If failed, should have error message
if not result.success:
assert result.error is not None
# Verify: Should have insights (even if empty)
assert isinstance(result.insights, list)
# Feature: true-ai-agent, Property 14: ReAct 循环终止
def test_react_loop_termination():
"""
Property 14: For any analysis task, the ReAct execution loop should
terminate within a finite number of steps (either complete the task
or reach maximum iterations), and should not loop infinitely.
Validates: FR-5.1
"""
sample_data = pd.DataFrame({
'value': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'category': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B']
})
sample_tools = [CalculateStatisticsTool()]
task = AnalysisTask(
id="test_task",
name="Test Task",
description="Calculate statistics",
priority=3,
required_tools=['calculate_statistics']
)
data_access = DataAccessLayer(sample_data)
# Execute with limited iterations
result = _fallback_task_execution(task, sample_tools, data_access)
# Verify: Should complete (not hang)
assert result is not None
# Verify: Should have finite execution time
assert result.execution_time < 60, "Execution should complete within 60 seconds"
# Feature: true-ai-agent, Property 15: 异常识别
def test_anomaly_identification():
"""
Property 15: For any data containing obvious anomalies (e.g., a category
accounting for >80% of data, or values exceeding 3 standard deviations),
the task execution engine should be able to mark the anomaly in the
analysis result insights.
Validates: 场景4验收.1
"""
# Create data with anomaly (category A is 90%)
anomaly_data = pd.DataFrame({
'value': list(range(100)),
'category': ['A'] * 90 + ['B'] * 10
})
task = AnalysisTask(
id="test_task",
name="Anomaly Detection",
description="Detect anomalies in data",
priority=3,
required_tools=['calculate_statistics']
)
data_access = DataAccessLayer(anomaly_data)
tools = [CalculateStatisticsTool()]
result = _fallback_task_execution(task, tools, data_access)
# Verify: Should complete successfully
assert result.success or result.error is not None
# Verify: Should have insights
assert isinstance(result.insights, list)
# Test tool calling
def test_call_tool_success():
"""Test successful tool calling."""
sample_data = pd.DataFrame({
'value': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'category': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B']
})
tool = CalculateStatisticsTool()
data_access = DataAccessLayer(sample_data)
result = call_tool(tool, data_access, column='value')
# Should return result dict
assert isinstance(result, dict)
assert 'success' in result
# Test insight extraction
def test_extract_insights_without_ai():
"""Test insight extraction without AI."""
history = [
{'type': 'thought', 'content': 'Analyzing data'},
{'type': 'action', 'tool': 'calculate_statistics'},
{'type': 'observation', 'result': {'data': {'mean': 5.5}}}
]
insights = extract_insights(history, client=None)
# Should return list of insights
assert isinstance(insights, list)
assert len(insights) > 0
# Test execution with empty tools
def test_execution_with_no_tools():
"""Test execution when no tools are available."""
sample_data = pd.DataFrame({
'value': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'category': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B']
})
task = AnalysisTask(
id="test_task",
name="Test Task",
description="Test",
priority=3,
required_tools=['nonexistent_tool']
)
data_access = DataAccessLayer(sample_data)
result = _fallback_task_execution(task, [], data_access)
# Should fail gracefully
assert not result.success
assert result.error is not None