Files
vibe_data_ana/tests/test_integration.py

405 lines
12 KiB
Python
Raw Normal View History

"""集成测试 - 测试端到端分析流程。"""
import pytest
import pandas as pd
from pathlib import Path
import tempfile
import shutil
from src.main import run_analysis, AnalysisOrchestrator
from src.data_access import DataAccessLayer
@pytest.fixture
def temp_output_dir():
"""创建临时输出目录。"""
temp_dir = tempfile.mkdtemp()
yield temp_dir
# 清理
shutil.rmtree(temp_dir, ignore_errors=True)
@pytest.fixture
def sample_ticket_data(tmp_path):
"""创建示例工单数据。"""
data = pd.DataFrame({
'ticket_id': range(1, 101),
'status': ['open'] * 50 + ['closed'] * 30 + ['pending'] * 20,
'priority': ['high'] * 30 + ['medium'] * 40 + ['low'] * 30,
'created_at': pd.date_range('2024-01-01', periods=100, freq='D'),
'closed_at': [None] * 50 + list(pd.date_range('2024-02-01', periods=50, freq='D')),
'category': ['bug'] * 40 + ['feature'] * 30 + ['support'] * 30,
'duration_hours': [24] * 30 + [48] * 40 + [12] * 30
})
file_path = tmp_path / "tickets.csv"
data.to_csv(file_path, index=False)
return str(file_path)
@pytest.fixture
def sample_sales_data(tmp_path):
"""创建示例销售数据。"""
data = pd.DataFrame({
'order_id': range(1, 101),
'product': ['A'] * 40 + ['B'] * 30 + ['C'] * 30,
'quantity': [1, 2, 3, 4, 5] * 20,
'price': [100.0, 200.0, 150.0, 300.0, 250.0] * 20,
'date': pd.date_range('2024-01-01', periods=100, freq='D'),
'region': ['North'] * 30 + ['South'] * 40 + ['East'] * 30
})
file_path = tmp_path / "sales.csv"
data.to_csv(file_path, index=False)
return str(file_path)
@pytest.fixture
def sample_template(tmp_path):
"""创建示例模板。"""
template_content = """# 工单分析模板
## 1. 概述
- 总工单数
- 状态分布
## 2. 优先级分析
- 优先级分布
- 高优先级工单处理情况
## 3. 时间分析
- 创建趋势
- 处理时长分析
## 4. 分类分析
- 类别分布
- 各类别处理情况
"""
file_path = tmp_path / "template.md"
file_path.write_text(template_content, encoding='utf-8')
return str(file_path)
class TestEndToEndAnalysis:
"""端到端分析流程测试。"""
def test_complete_analysis_without_requirement(self, sample_ticket_data, temp_output_dir):
"""
测试完全自主分析无用户需求
验证
- 能够加载数据
- 能够推断数据类型
- 能够生成分析计划
- 能够执行任务
- 能够生成报告
"""
# 运行分析
result = run_analysis(
data_file=sample_ticket_data,
user_requirement=None, # 无用户需求
output_dir=temp_output_dir
)
# 验证结果
assert result['success'] is True, f"分析失败: {result.get('error')}"
assert 'data_type' in result
assert result['objectives_count'] > 0
assert result['tasks_count'] > 0
assert result['results_count'] > 0
# 验证报告文件存在
report_path = Path(result['report_path'])
assert report_path.exists()
assert report_path.stat().st_size > 0
# 验证报告内容
report_content = report_path.read_text(encoding='utf-8')
assert len(report_content) > 0
assert '分析报告' in report_content or '报告' in report_content
def test_analysis_with_requirement(self, sample_ticket_data, temp_output_dir):
"""
测试指定需求的分析
验证
- 能够理解用户需求
- 生成的分析目标与需求相关
- 报告聚焦于用户需求
"""
# 运行分析
result = run_analysis(
data_file=sample_ticket_data,
user_requirement="分析工单的健康度和处理效率",
output_dir=temp_output_dir
)
# 验证结果
assert result['success'] is True, f"分析失败: {result.get('error')}"
assert result['objectives_count'] > 0
# 验证报告内容与需求相关
report_path = Path(result['report_path'])
report_content = report_path.read_text(encoding='utf-8')
# 报告应该包含与需求相关的关键词
assert any(keyword in report_content for keyword in ['健康', '效率', '处理'])
def test_template_based_analysis(self, sample_ticket_data, sample_template, temp_output_dir):
"""
测试基于模板的分析
验证
- 能够解析模板
- 报告结构遵循模板
- 如果数据不满足模板要求能够灵活调整
"""
# 运行分析
result = run_analysis(
data_file=sample_ticket_data,
template_file=sample_template,
output_dir=temp_output_dir
)
# 验证结果
assert result['success'] is True, f"分析失败: {result.get('error')}"
# 验证报告结构
report_path = Path(result['report_path'])
report_content = report_path.read_text(encoding='utf-8')
# 报告应该包含模板中的章节
assert '概述' in report_content or '总工单数' in report_content
assert '优先级' in report_content or '分类' in report_content
def test_different_data_types(self, sample_sales_data, temp_output_dir):
"""
测试不同类型的数据
验证
- 能够识别不同的数据类型
- 能够为不同数据类型生成合适的分析
"""
# 运行分析
result = run_analysis(
data_file=sample_sales_data,
output_dir=temp_output_dir
)
# 验证结果
assert result['success'] is True, f"分析失败: {result.get('error')}"
assert 'data_type' in result
assert result['tasks_count'] > 0
class TestErrorRecovery:
"""错误恢复测试。"""
def test_invalid_file_path(self, temp_output_dir):
"""
测试无效文件路径的处理
验证
- 能够捕获文件不存在错误
- 返回有意义的错误信息
"""
# 运行分析
result = run_analysis(
data_file="nonexistent_file.csv",
output_dir=temp_output_dir
)
# 验证结果
assert result['success'] is False
assert 'error' in result
assert len(result['error']) > 0
def test_empty_file(self, tmp_path, temp_output_dir):
"""
测试空文件的处理
验证
- 能够检测空文件
- 返回有意义的错误信息
"""
# 创建空文件
empty_file = tmp_path / "empty.csv"
empty_file.write_text("", encoding='utf-8')
# 运行分析
result = run_analysis(
data_file=str(empty_file),
output_dir=temp_output_dir
)
# 验证结果
assert result['success'] is False
assert 'error' in result
def test_malformed_csv(self, tmp_path, temp_output_dir):
"""
测试格式错误的 CSV 文件
验证
- 能够处理格式错误
- 尝试多种解析策略
"""
# 创建格式错误的 CSV
malformed_file = tmp_path / "malformed.csv"
malformed_file.write_text("col1,col2\nvalue1\nvalue2,value3,value4", encoding='utf-8')
# 运行分析(可能成功也可能失败,取决于错误处理策略)
result = run_analysis(
data_file=str(malformed_file),
output_dir=temp_output_dir
)
# 验证至少有结果返回
assert 'success' in result
assert 'elapsed_time' in result
class TestOrchestrator:
"""编排器测试。"""
def test_orchestrator_initialization(self, sample_ticket_data, temp_output_dir):
"""
测试编排器初始化
验证
- 能够正确初始化
- 输出目录被创建
"""
orchestrator = AnalysisOrchestrator(
data_file=sample_ticket_data,
output_dir=temp_output_dir
)
assert orchestrator.data_file == sample_ticket_data
assert orchestrator.output_dir.exists()
assert orchestrator.output_dir.is_dir()
def test_orchestrator_stages(self, sample_ticket_data, temp_output_dir):
"""
测试编排器各阶段执行
验证
- 各阶段按顺序执行
- 每个阶段产生预期输出
"""
orchestrator = AnalysisOrchestrator(
data_file=sample_ticket_data,
output_dir=temp_output_dir
)
# 运行分析
result = orchestrator.run_analysis()
# 验证各阶段结果
assert orchestrator.data_profile is not None
assert orchestrator.requirement_spec is not None
assert orchestrator.analysis_plan is not None
assert len(orchestrator.analysis_results) > 0
assert orchestrator.report is not None
# 验证结果
assert result['success'] is True
class TestProgressTracking:
"""进度跟踪测试。"""
def test_progress_callback(self, sample_ticket_data, temp_output_dir):
"""
测试进度回调
验证
- 进度回调被正确调用
- 进度信息正确
"""
progress_calls = []
def callback(stage, current, total):
progress_calls.append({
'stage': stage,
'current': current,
'total': total
})
# 运行分析
result = run_analysis(
data_file=sample_ticket_data,
output_dir=temp_output_dir,
progress_callback=callback
)
# 验证进度回调
assert len(progress_calls) > 0
# 验证进度递增
for i in range(len(progress_calls) - 1):
assert progress_calls[i]['current'] <= progress_calls[i + 1]['current']
# 验证最后一个进度是完成状态
last_call = progress_calls[-1]
assert last_call['current'] == last_call['total']
class TestOutputFiles:
"""输出文件测试。"""
def test_report_file_creation(self, sample_ticket_data, temp_output_dir):
"""
测试报告文件创建
验证
- 报告文件被创建
- 报告文件格式正确
"""
result = run_analysis(
data_file=sample_ticket_data,
output_dir=temp_output_dir
)
assert result['success'] is True
# 验证报告文件
report_path = Path(result['report_path'])
assert report_path.exists()
assert report_path.suffix == '.md'
# 验证报告内容是 UTF-8 编码
content = report_path.read_text(encoding='utf-8')
assert len(content) > 0
def test_log_file_creation(self, sample_ticket_data, temp_output_dir):
"""
测试日志文件创建
验证
- 日志文件被创建如果配置
- 日志内容正确
"""
# 配置日志文件
from src.logging_config import setup_logging
import logging
log_file = Path(temp_output_dir) / "test.log"
setup_logging(
level=logging.INFO,
log_file=str(log_file)
)
# 运行分析
result = run_analysis(
data_file=sample_ticket_data,
output_dir=temp_output_dir
)
# 验证日志文件
if log_file.exists():
log_content = log_file.read_text(encoding='utf-8')
assert len(log_content) > 0
assert '数据理解' in log_content or 'INFO' in log_content