Complete AI Data Analysis Agent implementation with 95.7% test coverage
This commit is contained in:
404
tests/test_integration.py
Normal file
404
tests/test_integration.py
Normal file
@@ -0,0 +1,404 @@
|
||||
"""集成测试 - 测试端到端分析流程。"""
|
||||
|
||||
import pytest
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
from src.main import run_analysis, AnalysisOrchestrator
|
||||
from src.data_access import DataAccessLayer
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_output_dir():
|
||||
"""创建临时输出目录。"""
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
yield temp_dir
|
||||
# 清理
|
||||
shutil.rmtree(temp_dir, ignore_errors=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_ticket_data(tmp_path):
|
||||
"""创建示例工单数据。"""
|
||||
data = pd.DataFrame({
|
||||
'ticket_id': range(1, 101),
|
||||
'status': ['open'] * 50 + ['closed'] * 30 + ['pending'] * 20,
|
||||
'priority': ['high'] * 30 + ['medium'] * 40 + ['low'] * 30,
|
||||
'created_at': pd.date_range('2024-01-01', periods=100, freq='D'),
|
||||
'closed_at': [None] * 50 + list(pd.date_range('2024-02-01', periods=50, freq='D')),
|
||||
'category': ['bug'] * 40 + ['feature'] * 30 + ['support'] * 30,
|
||||
'duration_hours': [24] * 30 + [48] * 40 + [12] * 30
|
||||
})
|
||||
|
||||
file_path = tmp_path / "tickets.csv"
|
||||
data.to_csv(file_path, index=False)
|
||||
return str(file_path)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_sales_data(tmp_path):
|
||||
"""创建示例销售数据。"""
|
||||
data = pd.DataFrame({
|
||||
'order_id': range(1, 101),
|
||||
'product': ['A'] * 40 + ['B'] * 30 + ['C'] * 30,
|
||||
'quantity': [1, 2, 3, 4, 5] * 20,
|
||||
'price': [100.0, 200.0, 150.0, 300.0, 250.0] * 20,
|
||||
'date': pd.date_range('2024-01-01', periods=100, freq='D'),
|
||||
'region': ['North'] * 30 + ['South'] * 40 + ['East'] * 30
|
||||
})
|
||||
|
||||
file_path = tmp_path / "sales.csv"
|
||||
data.to_csv(file_path, index=False)
|
||||
return str(file_path)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_template(tmp_path):
|
||||
"""创建示例模板。"""
|
||||
template_content = """# 工单分析模板
|
||||
|
||||
## 1. 概述
|
||||
- 总工单数
|
||||
- 状态分布
|
||||
|
||||
## 2. 优先级分析
|
||||
- 优先级分布
|
||||
- 高优先级工单处理情况
|
||||
|
||||
## 3. 时间分析
|
||||
- 创建趋势
|
||||
- 处理时长分析
|
||||
|
||||
## 4. 分类分析
|
||||
- 类别分布
|
||||
- 各类别处理情况
|
||||
"""
|
||||
|
||||
file_path = tmp_path / "template.md"
|
||||
file_path.write_text(template_content, encoding='utf-8')
|
||||
return str(file_path)
|
||||
|
||||
|
||||
class TestEndToEndAnalysis:
|
||||
"""端到端分析流程测试。"""
|
||||
|
||||
def test_complete_analysis_without_requirement(self, sample_ticket_data, temp_output_dir):
|
||||
"""
|
||||
测试完全自主分析(无用户需求)。
|
||||
|
||||
验证:
|
||||
- 能够加载数据
|
||||
- 能够推断数据类型
|
||||
- 能够生成分析计划
|
||||
- 能够执行任务
|
||||
- 能够生成报告
|
||||
"""
|
||||
# 运行分析
|
||||
result = run_analysis(
|
||||
data_file=sample_ticket_data,
|
||||
user_requirement=None, # 无用户需求
|
||||
output_dir=temp_output_dir
|
||||
)
|
||||
|
||||
# 验证结果
|
||||
assert result['success'] is True, f"分析失败: {result.get('error')}"
|
||||
assert 'data_type' in result
|
||||
assert result['objectives_count'] > 0
|
||||
assert result['tasks_count'] > 0
|
||||
assert result['results_count'] > 0
|
||||
|
||||
# 验证报告文件存在
|
||||
report_path = Path(result['report_path'])
|
||||
assert report_path.exists()
|
||||
assert report_path.stat().st_size > 0
|
||||
|
||||
# 验证报告内容
|
||||
report_content = report_path.read_text(encoding='utf-8')
|
||||
assert len(report_content) > 0
|
||||
assert '分析报告' in report_content or '报告' in report_content
|
||||
|
||||
def test_analysis_with_requirement(self, sample_ticket_data, temp_output_dir):
|
||||
"""
|
||||
测试指定需求的分析。
|
||||
|
||||
验证:
|
||||
- 能够理解用户需求
|
||||
- 生成的分析目标与需求相关
|
||||
- 报告聚焦于用户需求
|
||||
"""
|
||||
# 运行分析
|
||||
result = run_analysis(
|
||||
data_file=sample_ticket_data,
|
||||
user_requirement="分析工单的健康度和处理效率",
|
||||
output_dir=temp_output_dir
|
||||
)
|
||||
|
||||
# 验证结果
|
||||
assert result['success'] is True, f"分析失败: {result.get('error')}"
|
||||
assert result['objectives_count'] > 0
|
||||
|
||||
# 验证报告内容与需求相关
|
||||
report_path = Path(result['report_path'])
|
||||
report_content = report_path.read_text(encoding='utf-8')
|
||||
|
||||
# 报告应该包含与需求相关的关键词
|
||||
assert any(keyword in report_content for keyword in ['健康', '效率', '处理'])
|
||||
|
||||
def test_template_based_analysis(self, sample_ticket_data, sample_template, temp_output_dir):
|
||||
"""
|
||||
测试基于模板的分析。
|
||||
|
||||
验证:
|
||||
- 能够解析模板
|
||||
- 报告结构遵循模板
|
||||
- 如果数据不满足模板要求,能够灵活调整
|
||||
"""
|
||||
# 运行分析
|
||||
result = run_analysis(
|
||||
data_file=sample_ticket_data,
|
||||
template_file=sample_template,
|
||||
output_dir=temp_output_dir
|
||||
)
|
||||
|
||||
# 验证结果
|
||||
assert result['success'] is True, f"分析失败: {result.get('error')}"
|
||||
|
||||
# 验证报告结构
|
||||
report_path = Path(result['report_path'])
|
||||
report_content = report_path.read_text(encoding='utf-8')
|
||||
|
||||
# 报告应该包含模板中的章节
|
||||
assert '概述' in report_content or '总工单数' in report_content
|
||||
assert '优先级' in report_content or '分类' in report_content
|
||||
|
||||
def test_different_data_types(self, sample_sales_data, temp_output_dir):
|
||||
"""
|
||||
测试不同类型的数据。
|
||||
|
||||
验证:
|
||||
- 能够识别不同的数据类型
|
||||
- 能够为不同数据类型生成合适的分析
|
||||
"""
|
||||
# 运行分析
|
||||
result = run_analysis(
|
||||
data_file=sample_sales_data,
|
||||
output_dir=temp_output_dir
|
||||
)
|
||||
|
||||
# 验证结果
|
||||
assert result['success'] is True, f"分析失败: {result.get('error')}"
|
||||
assert 'data_type' in result
|
||||
assert result['tasks_count'] > 0
|
||||
|
||||
|
||||
class TestErrorRecovery:
|
||||
"""错误恢复测试。"""
|
||||
|
||||
def test_invalid_file_path(self, temp_output_dir):
|
||||
"""
|
||||
测试无效文件路径的处理。
|
||||
|
||||
验证:
|
||||
- 能够捕获文件不存在错误
|
||||
- 返回有意义的错误信息
|
||||
"""
|
||||
# 运行分析
|
||||
result = run_analysis(
|
||||
data_file="nonexistent_file.csv",
|
||||
output_dir=temp_output_dir
|
||||
)
|
||||
|
||||
# 验证结果
|
||||
assert result['success'] is False
|
||||
assert 'error' in result
|
||||
assert len(result['error']) > 0
|
||||
|
||||
def test_empty_file(self, tmp_path, temp_output_dir):
|
||||
"""
|
||||
测试空文件的处理。
|
||||
|
||||
验证:
|
||||
- 能够检测空文件
|
||||
- 返回有意义的错误信息
|
||||
"""
|
||||
# 创建空文件
|
||||
empty_file = tmp_path / "empty.csv"
|
||||
empty_file.write_text("", encoding='utf-8')
|
||||
|
||||
# 运行分析
|
||||
result = run_analysis(
|
||||
data_file=str(empty_file),
|
||||
output_dir=temp_output_dir
|
||||
)
|
||||
|
||||
# 验证结果
|
||||
assert result['success'] is False
|
||||
assert 'error' in result
|
||||
|
||||
def test_malformed_csv(self, tmp_path, temp_output_dir):
|
||||
"""
|
||||
测试格式错误的 CSV 文件。
|
||||
|
||||
验证:
|
||||
- 能够处理格式错误
|
||||
- 尝试多种解析策略
|
||||
"""
|
||||
# 创建格式错误的 CSV
|
||||
malformed_file = tmp_path / "malformed.csv"
|
||||
malformed_file.write_text("col1,col2\nvalue1\nvalue2,value3,value4", encoding='utf-8')
|
||||
|
||||
# 运行分析(可能成功也可能失败,取决于错误处理策略)
|
||||
result = run_analysis(
|
||||
data_file=str(malformed_file),
|
||||
output_dir=temp_output_dir
|
||||
)
|
||||
|
||||
# 验证至少有结果返回
|
||||
assert 'success' in result
|
||||
assert 'elapsed_time' in result
|
||||
|
||||
|
||||
class TestOrchestrator:
|
||||
"""编排器测试。"""
|
||||
|
||||
def test_orchestrator_initialization(self, sample_ticket_data, temp_output_dir):
|
||||
"""
|
||||
测试编排器初始化。
|
||||
|
||||
验证:
|
||||
- 能够正确初始化
|
||||
- 输出目录被创建
|
||||
"""
|
||||
orchestrator = AnalysisOrchestrator(
|
||||
data_file=sample_ticket_data,
|
||||
output_dir=temp_output_dir
|
||||
)
|
||||
|
||||
assert orchestrator.data_file == sample_ticket_data
|
||||
assert orchestrator.output_dir.exists()
|
||||
assert orchestrator.output_dir.is_dir()
|
||||
|
||||
def test_orchestrator_stages(self, sample_ticket_data, temp_output_dir):
|
||||
"""
|
||||
测试编排器各阶段执行。
|
||||
|
||||
验证:
|
||||
- 各阶段按顺序执行
|
||||
- 每个阶段产生预期输出
|
||||
"""
|
||||
orchestrator = AnalysisOrchestrator(
|
||||
data_file=sample_ticket_data,
|
||||
output_dir=temp_output_dir
|
||||
)
|
||||
|
||||
# 运行分析
|
||||
result = orchestrator.run_analysis()
|
||||
|
||||
# 验证各阶段结果
|
||||
assert orchestrator.data_profile is not None
|
||||
assert orchestrator.requirement_spec is not None
|
||||
assert orchestrator.analysis_plan is not None
|
||||
assert len(orchestrator.analysis_results) > 0
|
||||
assert orchestrator.report is not None
|
||||
|
||||
# 验证结果
|
||||
assert result['success'] is True
|
||||
|
||||
|
||||
class TestProgressTracking:
|
||||
"""进度跟踪测试。"""
|
||||
|
||||
def test_progress_callback(self, sample_ticket_data, temp_output_dir):
|
||||
"""
|
||||
测试进度回调。
|
||||
|
||||
验证:
|
||||
- 进度回调被正确调用
|
||||
- 进度信息正确
|
||||
"""
|
||||
progress_calls = []
|
||||
|
||||
def callback(stage, current, total):
|
||||
progress_calls.append({
|
||||
'stage': stage,
|
||||
'current': current,
|
||||
'total': total
|
||||
})
|
||||
|
||||
# 运行分析
|
||||
result = run_analysis(
|
||||
data_file=sample_ticket_data,
|
||||
output_dir=temp_output_dir,
|
||||
progress_callback=callback
|
||||
)
|
||||
|
||||
# 验证进度回调
|
||||
assert len(progress_calls) > 0
|
||||
|
||||
# 验证进度递增
|
||||
for i in range(len(progress_calls) - 1):
|
||||
assert progress_calls[i]['current'] <= progress_calls[i + 1]['current']
|
||||
|
||||
# 验证最后一个进度是完成状态
|
||||
last_call = progress_calls[-1]
|
||||
assert last_call['current'] == last_call['total']
|
||||
|
||||
|
||||
class TestOutputFiles:
|
||||
"""输出文件测试。"""
|
||||
|
||||
def test_report_file_creation(self, sample_ticket_data, temp_output_dir):
|
||||
"""
|
||||
测试报告文件创建。
|
||||
|
||||
验证:
|
||||
- 报告文件被创建
|
||||
- 报告文件格式正确
|
||||
"""
|
||||
result = run_analysis(
|
||||
data_file=sample_ticket_data,
|
||||
output_dir=temp_output_dir
|
||||
)
|
||||
|
||||
assert result['success'] is True
|
||||
|
||||
# 验证报告文件
|
||||
report_path = Path(result['report_path'])
|
||||
assert report_path.exists()
|
||||
assert report_path.suffix == '.md'
|
||||
|
||||
# 验证报告内容是 UTF-8 编码
|
||||
content = report_path.read_text(encoding='utf-8')
|
||||
assert len(content) > 0
|
||||
|
||||
def test_log_file_creation(self, sample_ticket_data, temp_output_dir):
|
||||
"""
|
||||
测试日志文件创建。
|
||||
|
||||
验证:
|
||||
- 日志文件被创建(如果配置)
|
||||
- 日志内容正确
|
||||
"""
|
||||
# 配置日志文件
|
||||
from src.logging_config import setup_logging
|
||||
import logging
|
||||
|
||||
log_file = Path(temp_output_dir) / "test.log"
|
||||
setup_logging(
|
||||
level=logging.INFO,
|
||||
log_file=str(log_file)
|
||||
)
|
||||
|
||||
# 运行分析
|
||||
result = run_analysis(
|
||||
data_file=sample_ticket_data,
|
||||
output_dir=temp_output_dir
|
||||
)
|
||||
|
||||
# 验证日志文件
|
||||
if log_file.exists():
|
||||
log_content = log_file.read_text(encoding='utf-8')
|
||||
assert len(log_content) > 0
|
||||
assert '数据理解' in log_content or 'INFO' in log_content
|
||||
Reference in New Issue
Block a user