203 lines
6.0 KiB
Python
203 lines
6.0 KiB
Python
|
|
"""Property-based tests for task execution engine."""
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
import pandas as pd
|
||
|
|
from hypothesis import given, strategies as st, settings
|
||
|
|
|
||
|
|
from src.engines.task_execution import (
|
||
|
|
execute_task,
|
||
|
|
call_tool,
|
||
|
|
extract_insights,
|
||
|
|
_fallback_task_execution
|
||
|
|
)
|
||
|
|
from src.models.analysis_plan import AnalysisTask
|
||
|
|
from src.data_access import DataAccessLayer
|
||
|
|
from src.tools.stats_tools import CalculateStatisticsTool
|
||
|
|
|
||
|
|
|
||
|
|
# Feature: true-ai-agent, Property 13: 任务执行完整性
|
||
|
|
@given(
|
||
|
|
task_name=st.text(min_size=5, max_size=50),
|
||
|
|
task_description=st.text(min_size=10, max_size=100)
|
||
|
|
)
|
||
|
|
@settings(max_examples=10, deadline=None)
|
||
|
|
def test_task_execution_completeness(task_name, task_description):
|
||
|
|
"""
|
||
|
|
Property 13: For any valid analysis plan and tool set, the task execution
|
||
|
|
engine should be able to execute all non-skipped tasks and generate an
|
||
|
|
analysis result (success or failure) for each task.
|
||
|
|
|
||
|
|
Validates: 场景1验收.3, FR-5.1
|
||
|
|
"""
|
||
|
|
# Create sample data
|
||
|
|
sample_data = pd.DataFrame({
|
||
|
|
'value': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
|
||
|
|
'category': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B']
|
||
|
|
})
|
||
|
|
|
||
|
|
# Create sample tools
|
||
|
|
sample_tools = [CalculateStatisticsTool()]
|
||
|
|
|
||
|
|
# Create task
|
||
|
|
task = AnalysisTask(
|
||
|
|
id="test_task",
|
||
|
|
name=task_name,
|
||
|
|
description=task_description,
|
||
|
|
priority=3,
|
||
|
|
required_tools=['calculate_statistics']
|
||
|
|
)
|
||
|
|
|
||
|
|
# Create data access
|
||
|
|
data_access = DataAccessLayer(sample_data)
|
||
|
|
|
||
|
|
# Execute task (using fallback to avoid API dependency)
|
||
|
|
result = _fallback_task_execution(task, sample_tools, data_access)
|
||
|
|
|
||
|
|
# Verify: Should return AnalysisResult
|
||
|
|
assert result is not None
|
||
|
|
assert result.task_id == task.id
|
||
|
|
assert result.task_name == task.name
|
||
|
|
|
||
|
|
# Verify: Should have success status
|
||
|
|
assert isinstance(result.success, bool)
|
||
|
|
|
||
|
|
# Verify: Should have execution time
|
||
|
|
assert result.execution_time >= 0
|
||
|
|
|
||
|
|
# Verify: If failed, should have error message
|
||
|
|
if not result.success:
|
||
|
|
assert result.error is not None
|
||
|
|
|
||
|
|
# Verify: Should have insights (even if empty)
|
||
|
|
assert isinstance(result.insights, list)
|
||
|
|
|
||
|
|
|
||
|
|
# Feature: true-ai-agent, Property 14: ReAct 循环终止
|
||
|
|
def test_react_loop_termination():
|
||
|
|
"""
|
||
|
|
Property 14: For any analysis task, the ReAct execution loop should
|
||
|
|
terminate within a finite number of steps (either complete the task
|
||
|
|
or reach maximum iterations), and should not loop infinitely.
|
||
|
|
|
||
|
|
Validates: FR-5.1
|
||
|
|
"""
|
||
|
|
sample_data = pd.DataFrame({
|
||
|
|
'value': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
|
||
|
|
'category': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B']
|
||
|
|
})
|
||
|
|
sample_tools = [CalculateStatisticsTool()]
|
||
|
|
|
||
|
|
task = AnalysisTask(
|
||
|
|
id="test_task",
|
||
|
|
name="Test Task",
|
||
|
|
description="Calculate statistics",
|
||
|
|
priority=3,
|
||
|
|
required_tools=['calculate_statistics']
|
||
|
|
)
|
||
|
|
|
||
|
|
data_access = DataAccessLayer(sample_data)
|
||
|
|
|
||
|
|
# Execute with limited iterations
|
||
|
|
result = _fallback_task_execution(task, sample_tools, data_access)
|
||
|
|
|
||
|
|
# Verify: Should complete (not hang)
|
||
|
|
assert result is not None
|
||
|
|
|
||
|
|
# Verify: Should have finite execution time
|
||
|
|
assert result.execution_time < 60, "Execution should complete within 60 seconds"
|
||
|
|
|
||
|
|
|
||
|
|
# Feature: true-ai-agent, Property 15: 异常识别
|
||
|
|
def test_anomaly_identification():
|
||
|
|
"""
|
||
|
|
Property 15: For any data containing obvious anomalies (e.g., a category
|
||
|
|
accounting for >80% of data, or values exceeding 3 standard deviations),
|
||
|
|
the task execution engine should be able to mark the anomaly in the
|
||
|
|
analysis result insights.
|
||
|
|
|
||
|
|
Validates: 场景4验收.1
|
||
|
|
"""
|
||
|
|
# Create data with anomaly (category A is 90%)
|
||
|
|
anomaly_data = pd.DataFrame({
|
||
|
|
'value': list(range(100)),
|
||
|
|
'category': ['A'] * 90 + ['B'] * 10
|
||
|
|
})
|
||
|
|
|
||
|
|
task = AnalysisTask(
|
||
|
|
id="test_task",
|
||
|
|
name="Anomaly Detection",
|
||
|
|
description="Detect anomalies in data",
|
||
|
|
priority=3,
|
||
|
|
required_tools=['calculate_statistics']
|
||
|
|
)
|
||
|
|
|
||
|
|
data_access = DataAccessLayer(anomaly_data)
|
||
|
|
tools = [CalculateStatisticsTool()]
|
||
|
|
|
||
|
|
result = _fallback_task_execution(task, tools, data_access)
|
||
|
|
|
||
|
|
# Verify: Should complete successfully
|
||
|
|
assert result.success or result.error is not None
|
||
|
|
|
||
|
|
# Verify: Should have insights
|
||
|
|
assert isinstance(result.insights, list)
|
||
|
|
|
||
|
|
|
||
|
|
# Test tool calling
|
||
|
|
def test_call_tool_success():
|
||
|
|
"""Test successful tool calling."""
|
||
|
|
sample_data = pd.DataFrame({
|
||
|
|
'value': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
|
||
|
|
'category': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B']
|
||
|
|
})
|
||
|
|
|
||
|
|
tool = CalculateStatisticsTool()
|
||
|
|
data_access = DataAccessLayer(sample_data)
|
||
|
|
|
||
|
|
result = call_tool(tool, data_access, column='value')
|
||
|
|
|
||
|
|
# Should return result dict
|
||
|
|
assert isinstance(result, dict)
|
||
|
|
assert 'success' in result
|
||
|
|
|
||
|
|
|
||
|
|
# Test insight extraction
|
||
|
|
def test_extract_insights_without_ai():
|
||
|
|
"""Test insight extraction without AI."""
|
||
|
|
history = [
|
||
|
|
{'type': 'thought', 'content': 'Analyzing data'},
|
||
|
|
{'type': 'action', 'tool': 'calculate_statistics'},
|
||
|
|
{'type': 'observation', 'result': {'data': {'mean': 5.5}}}
|
||
|
|
]
|
||
|
|
|
||
|
|
insights = extract_insights(history, client=None)
|
||
|
|
|
||
|
|
# Should return list of insights
|
||
|
|
assert isinstance(insights, list)
|
||
|
|
assert len(insights) > 0
|
||
|
|
|
||
|
|
|
||
|
|
# Test execution with empty tools
|
||
|
|
def test_execution_with_no_tools():
|
||
|
|
"""Test execution when no tools are available."""
|
||
|
|
sample_data = pd.DataFrame({
|
||
|
|
'value': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
|
||
|
|
'category': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B']
|
||
|
|
})
|
||
|
|
|
||
|
|
task = AnalysisTask(
|
||
|
|
id="test_task",
|
||
|
|
name="Test Task",
|
||
|
|
description="Test",
|
||
|
|
priority=3,
|
||
|
|
required_tools=['nonexistent_tool']
|
||
|
|
)
|
||
|
|
|
||
|
|
data_access = DataAccessLayer(sample_data)
|
||
|
|
|
||
|
|
result = _fallback_task_execution(task, [], data_access)
|
||
|
|
|
||
|
|
# Should fail gracefully
|
||
|
|
assert not result.success
|
||
|
|
assert result.error is not None
|