清理表情

This commit is contained in:
2026-01-31 18:00:05 +08:00
parent 674f48c74b
commit 5eb13324c2
15 changed files with 394 additions and 156 deletions

View File

@@ -42,7 +42,7 @@ class CacheManager:
with open(cache_path, 'rb') as f:
return pickle.load(f)
except Exception as e:
print(f"⚠️ 读取缓存失败: {e}")
print(f"[WARN] 读取缓存失败: {e}")
return None
return None
@@ -56,14 +56,14 @@ class CacheManager:
with open(cache_path, 'wb') as f:
pickle.dump(value, f)
except Exception as e:
print(f"⚠️ 写入缓存失败: {e}")
print(f"[WARN] 写入缓存失败: {e}")
def clear(self) -> None:
"""清空所有缓存"""
if self.cache_dir.exists():
for cache_file in self.cache_dir.glob("*.pkl"):
cache_file.unlink()
print(" 缓存已清空")
print("[OK] 缓存已清空")
def cached(self, key_func: Optional[Callable] = None):
"""缓存装饰器"""
@@ -82,7 +82,7 @@ class CacheManager:
# 尝试从缓存获取
cached_value = self.get(cache_key)
if cached_value is not None:
print(f"💾 使用缓存: {cache_key[:8]}...")
print(f"[CACHE] 使用缓存: {cache_key[:8]}...")
return cached_value
# 执行函数并缓存结果

View File

@@ -410,17 +410,17 @@ from IPython.display import display
try:
# 尝试保存
fig.savefig(auto_filepath, bbox_inches='tight')
print(f"💾 [Auto-Save] 检测到未闭合图表,已安全保存至: {auto_filepath}")
print(f"[CACHE] [Auto-Save] 检测到未闭合图表,已安全保存至: {auto_filepath}")
# 添加到输出中告知Agent
output += f"\n[Auto-Save] ⚠️ 检测到Figure {fig_num}未关闭,系统已自动保存为: {auto_filename}"
output += f"\n[Auto-Save] [WARN] 检测到Figure {fig_num}未关闭,系统已自动保存为: {auto_filename}"
self.image_counter += 1
except Exception as e:
print(f"⚠️ [Auto-Save] 保存失败: {e}")
print(f"[WARN] [Auto-Save] 保存失败: {e}")
finally:
plt.close(fig_num)
except Exception as e:
print(f"⚠️ [Auto-Save Global] 异常: {e}")
print(f"[WARN] [Auto-Save Global] 异常: {e}")
# --- 自动保存机制 end ---
return {

View File

@@ -34,7 +34,7 @@ def load_and_profile_data(file_paths: list) -> str:
profile_summary += f"## 文件: {file_name}\n\n"
if not os.path.exists(file_path):
profile_summary += f"⚠️ 文件不存在: {file_path}\n\n"
profile_summary += f"[WARN] 文件不存在: {file_path}\n\n"
continue
try:
@@ -52,7 +52,7 @@ def load_and_profile_data(file_paths: list) -> str:
elif ext in ['.xlsx', '.xls']:
df = pd.read_excel(file_path)
else:
profile_summary += f"⚠️ 不支持的文件格式: {ext}\n\n"
profile_summary += f"[WARN] 不支持的文件格式: {ext}\n\n"
continue
# 基础信息
@@ -70,7 +70,7 @@ def load_and_profile_data(file_paths: list) -> str:
profile_summary += f"#### {col} ({dtype})\n"
if null_count > 0:
profile_summary += f"- ⚠️ 空值: {null_count} ({null_ratio:.1f}%)\n"
profile_summary += f"- [WARN] 空值: {null_count} ({null_ratio:.1f}%)\n"
# 数值列分析
if pd.api.types.is_numeric_dtype(dtype):
@@ -96,7 +96,7 @@ def load_and_profile_data(file_paths: list) -> str:
profile_summary += "\n"
except Exception as e:
profile_summary += f" 读取或分析文件失败: {str(e)}\n\n"
profile_summary += f"[ERROR] 读取或分析文件失败: {str(e)}\n\n"
return profile_summary
@@ -141,7 +141,7 @@ def load_data_chunked(file_path: str, chunksize: Optional[int] = None) -> Iterat
except UnicodeDecodeError:
continue
except Exception as e:
print(f" 读取CSV文件失败: {e}")
print(f"[ERROR] 读取CSV文件失败: {e}")
break
elif ext in ['.xlsx', '.xls']:
# Excel文件不支持chunksize直接读取
@@ -151,7 +151,7 @@ def load_data_chunked(file_path: str, chunksize: Optional[int] = None) -> Iterat
for i in range(0, len(df), chunksize):
yield df.iloc[i:i+chunksize]
except Exception as e:
print(f" 读取Excel文件失败: {e}")
print(f"[ERROR] 读取Excel文件失败: {e}")
def load_data_with_cache(file_path: str, force_reload: bool = False) -> Optional[pd.DataFrame]:
@@ -166,7 +166,7 @@ def load_data_with_cache(file_path: str, force_reload: bool = False) -> Optional
DataFrame或None
"""
if not os.path.exists(file_path):
print(f"⚠️ 文件不存在: {file_path}")
print(f"[WARN] 文件不存在: {file_path}")
return None
# 检查文件大小
@@ -174,7 +174,7 @@ def load_data_with_cache(file_path: str, force_reload: bool = False) -> Optional
# 对于大文件,建议使用流式处理
if file_size_mb > app_config.max_file_size_mb:
print(f"⚠️ 文件过大 ({file_size_mb:.1f}MB),建议使用 load_data_chunked() 流式处理")
print(f"[WARN] 文件过大 ({file_size_mb:.1f}MB),建议使用 load_data_chunked() 流式处理")
# 生成缓存键
cache_key = get_file_hash(file_path)
@@ -183,7 +183,7 @@ def load_data_with_cache(file_path: str, force_reload: bool = False) -> Optional
if not force_reload and app_config.data_cache_enabled:
cached_data = data_cache.get(cache_key)
if cached_data is not None:
print(f"💾 从缓存加载数据: {os.path.basename(file_path)}")
print(f"[CACHE] 从缓存加载数据: {os.path.basename(file_path)}")
return cached_data
# 加载数据
@@ -202,16 +202,16 @@ def load_data_with_cache(file_path: str, force_reload: bool = False) -> Optional
elif ext in ['.xlsx', '.xls']:
df = pd.read_excel(file_path)
else:
print(f"⚠️ 不支持的文件格式: {ext}")
print(f"[WARN] 不支持的文件格式: {ext}")
return None
# 缓存数据
if df is not None and app_config.data_cache_enabled:
data_cache.set(cache_key, df)
print(f" 数据已缓存: {os.path.basename(file_path)}")
print(f"[OK] 数据已缓存: {os.path.basename(file_path)}")
return df
except Exception as e:
print(f" 加载数据失败: {e}")
print(f"[ERROR] 加载数据失败: {e}")
return None

View File

@@ -192,27 +192,27 @@ class DataQualityChecker:
summary += f"**质量评分**: {self.quality_score:.1f}/100\n\n"
if self.quality_score >= 90:
summary += " **评级**: 优秀 - 数据质量很好\n\n"
summary += "[OK] **评级**: 优秀 - 数据质量很好\n\n"
elif self.quality_score >= 75:
summary += "⚠️ **评级**: 良好 - 存在一些小问题\n\n"
summary += "[WARN] **评级**: 良好 - 存在一些小问题\n\n"
elif self.quality_score >= 60:
summary += "⚠️ **评级**: 一般 - 需要处理多个问题\n\n"
summary += "[WARN] **评级**: 一般 - 需要处理多个问题\n\n"
else:
summary += " **评级**: 差 - 数据质量问题严重\n\n"
summary += "[ERROR] **评级**: 差 - 数据质量问题严重\n\n"
summary += f"**问题统计**: 共 {len(self.issues)} 个质量问题\n"
summary += f"- 🔴 高严重性: {len([i for i in self.issues if i.severity == 'high'])}\n"
summary += f"- 🟡 中严重性: {len([i for i in self.issues if i.severity == 'medium'])}\n"
summary += f"- 🟢 低严重性: {len([i for i in self.issues if i.severity == 'low'])}\n\n"
summary += f"- [RED] 高严重性: {len([i for i in self.issues if i.severity == 'high'])}\n"
summary += f"- [YELLOW] 中严重性: {len([i for i in self.issues if i.severity == 'medium'])}\n"
summary += f"- [GREEN] 低严重性: {len([i for i in self.issues if i.severity == 'low'])}\n\n"
if self.issues:
summary += "### 主要问题:\n\n"
# 只显示高和中严重性的问题
for issue in self.issues:
if issue.severity in ["high", "medium"]:
emoji = "🔴" if issue.severity == "high" else "🟡"
emoji = "[RED]" if issue.severity == "high" else "[YELLOW]"
summary += f"{emoji} **{issue.column}** - {issue.description}\n"
summary += f" 💡 {issue.suggestion}\n\n"
summary += f" [TIP] {issue.suggestion}\n\n"
return summary

View File

@@ -57,7 +57,7 @@ class AsyncFallbackOpenAIClient:
self.fallback_client = AsyncOpenAI(api_key=fallback_api_key, base_url=fallback_base_url, **_fallback_args)
self.fallback_model_name = fallback_model_name
else:
print("⚠️ 警告: 未完全配置备用 API 客户端。如果主 API 失败,将无法进行回退。")
print("[WARN] 警告: 未完全配置备用 API 客户端。如果主 API 失败,将无法进行回退。")
self.content_filter_error_code = content_filter_error_code
self.content_filter_error_field = content_filter_error_field
@@ -90,11 +90,11 @@ class AsyncFallbackOpenAIClient:
return completion
except (APIConnectionError, APITimeoutError) as e: # 通常可以重试的网络错误
last_exception = e
print(f"⚠️ {api_name} API 调用时发生可重试错误 ({type(e).__name__}): {e}. 尝试次数 {attempt + 1}/{max_retries + 1}")
print(f"[WARN] {api_name} API 调用时发生可重试错误 ({type(e).__name__}): {e}. 尝试次数 {attempt + 1}/{max_retries + 1}")
if attempt < max_retries:
await asyncio.sleep(self.retry_delay_seconds * (attempt + 1)) # 增加延迟
else:
print(f" {api_name} API 在达到最大重试次数后仍然失败。")
print(f"[ERROR] {api_name} API 在达到最大重试次数后仍然失败。")
except APIStatusError as e: # API 返回的特定状态码错误
is_content_filter_error = False
retry_after = None
@@ -118,7 +118,7 @@ class AsyncFallbackOpenAIClient:
if delay_str.endswith("s"):
try:
retry_after = float(delay_str[:-1])
print(f" 收到服务器 RetryInfo等待时间: {retry_after}")
print(f"[TIMER] 收到服务器 RetryInfo等待时间: {retry_after}")
except ValueError:
pass
except Exception:
@@ -128,7 +128,7 @@ class AsyncFallbackOpenAIClient:
raise e
last_exception = e
print(f"⚠️ {api_name} API 调用时发生 APIStatusError ({e.status_code}): {e}. 尝试次数 {attempt + 1}/{max_retries + 1}")
print(f"[WARN] {api_name} API 调用时发生 APIStatusError ({e.status_code}): {e}. 尝试次数 {attempt + 1}/{max_retries + 1}")
if attempt < max_retries:
# 如果获取到了明确的 retry_after则使用它否则使用默认的指数退避
@@ -137,13 +137,13 @@ class AsyncFallbackOpenAIClient:
if e.status_code == 429 and retry_after is None:
wait_time = max(wait_time, 5.0 * (attempt + 1)) # 429 默认至少等 5 秒
print(f"💤 将等待 {wait_time:.2f} 秒后重试...")
print(f"[WAIT] 将等待 {wait_time:.2f} 秒后重试...")
await asyncio.sleep(wait_time)
else:
print(f" {api_name} API 在达到最大重试次数后仍然失败 (APIStatusError)。")
print(f"[ERROR] {api_name} API 在达到最大重试次数后仍然失败 (APIStatusError)。")
except APIError as e: # 其他不可轻易重试的 OpenAI 错误
last_exception = e
print(f" {api_name} API 调用时发生不可重试错误 ({type(e).__name__}): {e}")
print(f"[ERROR] {api_name} API 调用时发生不可重试错误 ({type(e).__name__}): {e}")
break # 不再重试此类错误
if last_exception:
@@ -196,7 +196,7 @@ class AsyncFallbackOpenAIClient:
pass
if is_content_filter_error and self.fallback_client and self.fallback_model_name:
print(f" 主 API 内容过滤错误 ({e_primary.status_code})。尝试切换到备用 API ({self.fallback_client.base_url})...")
print(f"[INFO] 主 API 内容过滤错误 ({e_primary.status_code})。尝试切换到备用 API ({self.fallback_client.base_url})...")
try:
fallback_completion = await self._attempt_api_call(
client=self.fallback_client,
@@ -206,20 +206,20 @@ class AsyncFallbackOpenAIClient:
api_name="备用",
**kwargs.copy()
)
print(f" 备用 API 调用成功。")
print(f"[OK] 备用 API 调用成功。")
return fallback_completion
except APIError as e_fallback:
print(f" 备用 API 调用最终失败: {type(e_fallback).__name__} - {e_fallback}")
print(f"[ERROR] 备用 API 调用最终失败: {type(e_fallback).__name__} - {e_fallback}")
raise e_fallback
else:
if not (self.fallback_client and self.fallback_model_name and is_content_filter_error):
# 如果不是内容过滤错误或者没有可用的备用API则记录主API的原始错误
print(f" 主 API 错误 ({type(e_primary).__name__}: {e_primary}), 且不满足备用条件或备用API未配置。")
print(f"[INFO] 主 API 错误 ({type(e_primary).__name__}: {e_primary}), 且不满足备用条件或备用API未配置。")
raise e_primary
except APIError as e_primary_other:
print(f" 主 API 调用最终失败 (非内容过滤,错误类型: {type(e_primary_other).__name__}): {e_primary_other}")
print(f"[ERROR] 主 API 调用最终失败 (非内容过滤,错误类型: {type(e_primary_other).__name__}): {e_primary_other}")
if self.fallback_client and self.fallback_model_name:
print(f" 主 API 失败,尝试切换到备用 API ({self.fallback_client.base_url})...")
print(f"[INFO] 主 API 失败,尝试切换到备用 API ({self.fallback_client.base_url})...")
try:
fallback_completion = await self._attempt_api_call(
client=self.fallback_client,
@@ -229,10 +229,10 @@ class AsyncFallbackOpenAIClient:
api_name="备用",
**kwargs.copy()
)
print(f" 备用 API 调用成功。")
print(f"[OK] 备用 API 调用成功。")
return fallback_completion
except APIError as e_fallback_after_primary_fail:
print(f" 备用 API 在主 API 失败后也调用失败: {type(e_fallback_after_primary_fail).__name__} - {e_fallback_after_primary_fail}")
print(f"[ERROR] 备用 API 在主 API 失败后也调用失败: {type(e_fallback_after_primary_fail).__name__} - {e_fallback_after_primary_fail}")
raise e_fallback_after_primary_fail
else:
raise e_primary_other

View File

@@ -7,17 +7,17 @@ def format_execution_result(result: Dict[str, Any]) -> str:
feedback = []
if result['success']:
feedback.append(" 代码执行成功")
feedback.append("[OK] 代码执行成功")
if result['output']:
feedback.append(f"📊 输出结果:\n{result['output']}")
feedback.append(f"[CHART] 输出结果:\n{result['output']}")
if result.get('variables'):
feedback.append("📋 新生成的变量:")
feedback.append("[LIST] 新生成的变量:")
for var_name, var_info in result['variables'].items():
feedback.append(f" - {var_name}: {var_info}")
else:
feedback.append(" 代码执行失败")
feedback.append("[ERROR] 代码执行失败")
feedback.append(f"错误信息: {result['error']}")
if result['output']:
feedback.append(f"部分输出: {result['output']}")

View File

@@ -117,7 +117,7 @@ class LLMHelper:
if use_cache and app_config.llm_cache_enabled:
cached_response = llm_cache.get(cache_key)
if cached_response:
print("💾 使用LLM缓存响应")
print("[CACHE] 使用LLM缓存响应")
return cached_response
# 调用LLM

215
utils/script_generator.py Normal file
View File

@@ -0,0 +1,215 @@
# -*- coding: utf-8 -*-
"""
可复用脚本生成器
从分析会话的执行历史中提取成功执行的代码,
合并去重后生成可独立运行的 .py 脚本文件。
"""
import os
import re
from datetime import datetime
from typing import List, Dict, Any, Set
def extract_imports(code: str) -> Set[str]:
"""从代码中提取所有 import 语句"""
imports = set()
lines = code.split('\n')
for line in lines:
stripped = line.strip()
if stripped.startswith('import ') or stripped.startswith('from '):
# 标准化 import 语句
imports.add(stripped)
return imports
def remove_imports(code: str) -> str:
"""从代码中移除所有 import 语句"""
lines = code.split('\n')
result_lines = []
for line in lines:
stripped = line.strip()
if not stripped.startswith('import ') and not stripped.startswith('from '):
result_lines.append(line)
return '\n'.join(result_lines)
def clean_code_block(code: str) -> str:
"""清理代码块,移除不必要的内容"""
# 移除可能的重复配置代码
patterns_to_skip = [
r"plt\.rcParams\['font\.sans-serif'\]", # 字体配置在模板中统一处理
r"plt\.rcParams\['axes\.unicode_minus'\]",
]
lines = code.split('\n')
result_lines = []
skip_until_empty = False
for line in lines:
stripped = line.strip()
# 跳过空行连续的情况
if not stripped:
if skip_until_empty:
skip_until_empty = False
continue
result_lines.append(line)
continue
# 检查是否需要跳过的模式
should_skip = False
for pattern in patterns_to_skip:
if re.search(pattern, stripped):
should_skip = True
break
if not should_skip:
result_lines.append(line)
return '\n'.join(result_lines)
def generate_reusable_script(
analysis_results: List[Dict[str, Any]],
data_files: List[str],
session_output_dir: str,
user_requirement: str = ""
) -> str:
"""
从分析结果中生成可复用的 Python 脚本
Args:
analysis_results: 分析过程中记录的结果列表,每个元素包含 'code', 'result'
data_files: 原始数据文件路径列表
session_output_dir: 会话输出目录
user_requirement: 用户的原始需求描述
Returns:
生成的脚本文件路径
"""
# 收集所有成功执行的代码
all_imports = set()
code_blocks = []
for result in analysis_results:
# 只处理 generate_code 类型的结果
if result.get("action") == "collect_figures":
continue
code = result.get("code", "")
exec_result = result.get("result", {})
# 只收集成功执行的代码
if code and exec_result.get("success", False):
# 提取 imports
imports = extract_imports(code)
all_imports.update(imports)
# 清理代码块
cleaned_code = remove_imports(code)
cleaned_code = clean_code_block(cleaned_code)
# 只添加非空的代码块
if cleaned_code.strip():
code_blocks.append({
"round": result.get("round", 0),
"code": cleaned_code.strip()
})
if not code_blocks:
print("[WARN] 没有成功执行的代码块,跳过脚本生成")
return ""
# 生成脚本内容
now = datetime.now()
timestamp = now.strftime("%Y%m%d_%H%M%S")
# 构建脚本头部
script_header = f'''#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
数据分析脚本 - 自动生成
=====================================
原始数据文件: {', '.join(data_files)}
生成时间: {now.strftime("%Y-%m-%d %H:%M:%S")}
原始需求: {user_requirement[:200] + '...' if len(user_requirement) > 200 else user_requirement}
=====================================
使用方法:
1. 修改下方 DATA_FILES 列表中的文件路径
2. 修改 OUTPUT_DIR 指定输出目录
3. 运行: python {os.path.basename(session_output_dir)}_分析脚本.py
"""
import os
'''
# 添加标准 imports去重后排序
standard_imports = sorted([imp for imp in all_imports if imp.startswith('import ')])
from_imports = sorted([imp for imp in all_imports if imp.startswith('from ')])
imports_section = '\n'.join(standard_imports + from_imports)
# 配置区域
config_section = f'''
# ========== 配置区域 (可修改) ==========
# 数据文件路径 - 修改此处以分析不同的数据
DATA_FILES = {repr(data_files)}
# 输出目录 - 图片和报告将保存在此目录
OUTPUT_DIR = "./analysis_output"
# 创建输出目录
os.makedirs(OUTPUT_DIR, exist_ok=True)
# ========== 字体配置 (中文显示) ==========
import platform
import matplotlib.pyplot as plt
system_name = platform.system()
if system_name == 'Darwin':
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'PingFang SC', 'sans-serif']
elif system_name == 'Windows':
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'sans-serif']
else:
plt.rcParams['font.sans-serif'] = ['WenQuanYi Micro Hei', 'sans-serif']
plt.rcParams['axes.unicode_minus'] = False
# 设置 session_output_dir 变量(兼容原始代码)
session_output_dir = OUTPUT_DIR
'''
# 合并代码块
code_section = "\n# ========== 分析代码 ==========\n\n"
for i, block in enumerate(code_blocks, 1):
code_section += f"# --- 第 {block['round']} 轮分析 ---\n"
code_section += block['code'] + "\n\n"
# 脚本尾部
script_footer = '''
# ========== 完成 ==========
print("\\n" + "=" * 50)
print("[OK] 分析完成!")
print(f"[OUTPUT] 输出目录: {os.path.abspath(OUTPUT_DIR)}")
print("=" * 50)
'''
# 组装完整脚本
full_script = script_header + imports_section + config_section + code_section + script_footer
# 保存脚本文件
script_filename = f"分析脚本_{timestamp}.py"
script_path = os.path.join(session_output_dir, script_filename)
try:
with open(script_path, 'w', encoding='utf-8') as f:
f.write(full_script)
print(f"[OK] 可复用脚本已生成: {script_path}")
return script_path
except Exception as e:
print(f"[ERROR] 保存脚本失败: {e}")
return ""