12345678

重构readme
小小优化，不成敬意
2026-03-09 10:37:35 +08:00 · 2026-03-09 10:26:03 +08:00 · 2026-03-09 10:21:33 +08:00 · 2026-03-09 10:06:21 +08:00 · 2026-03-07 00:04:29 +08:00 · 2026-01-07 16:41:38 +08:00
200 changed files with 13546 additions and 2364 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/.env.example
+++ b/.env.example
@@ -1,7 +1,7 @@
 # 火山引擎配置
 OPENAI_API_KEY=sk-c44i1hy64xgzwox6x08o4zug93frq6rgn84oqugf2pje1tg4
-OPENAI_BASE_URL=https://api.xiaomimimo.com/v1/chat/completions
+OPENAI_BASE_URL=https://api.xiaomimimo.com/v1
 # 文本模型
 OPENAI_MODEL=mimo-v2-flash
 # OPENAI_MODEL=deepseek-r1-250528
--- a/.gitignore
+++ b/.gitignore
@@ -1,173 +0,0 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 pip-wheel-metadata/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # Project specific
 # Output files and generated reports
 outputs/
 *.png
 *.jpg
 *.jpeg
 *.pdf
 *.docx
 *.xlsx
 *.csv
 !贵州茅台利润表.csv
 # 允许assets目录下的图片文件（项目资源）
 !assets/**/*.png
 !assets/**/*.jpg
 !assets/**/*.jpeg
 # IDE and editor files
 .vscode/
 .idea/
 *.swp
 *.swo
 *~
 # OS specific files
 .DS_Store
 .DS_Store?
 ._*
 .Spotlight-V100
 .Trashes
 ehthumbs.db
 Thumbs.db
 # API keys and configuration
 config.ini
 .env
 secrets.json
 api_keys.txt
 # Temporary files
 *.tmp
 *.temp
 *.log
--- a/.hypothesis/constants/09fb4673aaf2e760
+++ b/.hypothesis/constants/09fb4673aaf2e760
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\tools\tool_manager.py
 # hypothesis_version: 6.151.9
 ['address', 'calculate_statistics', 'calculate_trend', 'categorical', 'city', 'country', 'create_bar_chart', 'create_heatmap', 'create_line_chart', 'create_pie_chart', 'datetime', 'description', 'detect_outliers', 'get_correlation', 'get_time_series', 'get_value_counts', 'lat', 'latitude', 'location', 'lon', 'longitude', 'name', 'numeric', 'parameters', 'perform_groupby']
--- a/.hypothesis/constants/1489ccdc430439ed
+++ b/.hypothesis/constants/1489ccdc430439ed
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\tools\query_tools.py
 # hypothesis_version: 6.151.9
 [0.0, 100, '%Y-%m-%d', 'D', 'aggregation', 'array', 'boolean', 'column', 'columns', 'correlation_matrix', 'count', 'datetime', 'default', 'description', 'distribution', 'error', 'frequency', 'get_correlation', 'get_time_series', 'get_value_counts', 'integer', 'items', 'max', 'mean', 'method', 'min', 'missing_count', 'normalize', 'normalized', 'numeric', 'object', 'pearson', 'percentage', 'properties', 'required', 'string', 'sum', 'time', 'time_column', 'time_series', 'top_n', 'total_count', 'type', 'unique_count', 'value', 'value_column', 'value_counts', '时间列名', '是否返回百分比而不是计数', '至少需要两个数值列来计算相关性', '获取时间序列数据，按时间聚合指定指标。', '要分析的列名', '要聚合的值列名', '计算数值列之间的相关系数矩阵。', '返回前N个最常见的值']
--- a/.hypothesis/constants/1e51c0dedb326fad
+++ b/.hypothesis/constants/1e51c0dedb326fad
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\performance_optimization.py
 # hypothesis_version: 6.151.9
 [0.5, 1000000, '*.json', 'category', 'count', 'float64', 'gpt-4', 'int64', 'max', 'mean', 'min', 'object', 'r', 'sum', 'utf-8', 'w']
--- a/.hypothesis/constants/21502725e69b1597
+++ b/.hypothesis/constants/21502725e69b1597
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\logging_config.py
 # hypothesis_version: 6.151.9
 [100, '\x1b[0m', '\x1b[31m', '\x1b[32m', '\x1b[33m', '\x1b[35m', '\x1b[36m', '%H:%M:%S', '=', 'CRITICAL', 'DEBUG', 'ERROR', 'INFO', 'WARNING', '[AI 思考]', 'ai_thought', 'completed', 'completed_stages', 'details', 'duration', 'end_time', 'failed', 'failed_stages', 'httpcore', 'httpx', 'openai', 'stages', 'start_time', 'started', 'status', 'total_duration', 'total_stages', 'urllib3', 'utf-8', '✓', '✗', '失败', '开始执行跟踪', '成功', '执行摘要', '进度:']
--- a/.hypothesis/constants/2efec0acf87004bd
+++ b/.hypothesis/constants/2efec0acf87004bd
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\engines\__init__.py
 # hypothesis_version: 6.151.9
 ['adjust_plan', 'execute_task', 'extract_insights', 'extract_key_findings', 'generate_basic_stats', 'generate_report', 'parse_template', 'plan_analysis', 'understand_data']
--- a/.hypothesis/constants/2f06dbc37fd16100
+++ b/.hypothesis/constants/2f06dbc37fd16100
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\tools\query_tools.py
 # hypothesis_version: 6.151.9
 [0.0, 100, '%Y-%m-%d', 'D', 'aggregation', 'array', 'boolean', 'column', 'columns', 'correlation_matrix', 'count', 'datetime', 'default', 'description', 'distribution', 'error', 'frequency', 'get_correlation', 'get_time_series', 'get_value_counts', 'integer', 'items', 'max', 'mean', 'method', 'min', 'missing_count', 'normalize', 'normalized', 'numeric', 'object', 'pearson', 'percentage', 'properties', 'required', 'returned_points', 'string', 'sum', 'time', 'time_column', 'time_series', 'top_n', 'total_count', 'total_points', 'type', 'unique_count', 'value', 'value_column', 'value_counts', '时间列名', '是否返回百分比而不是计数', '至少需要两个数值列来计算相关性', '获取时间序列数据，按时间聚合指定指标。', '要分析的列名', '要聚合的值列名', '计算数值列之间的相关系数矩阵。', '返回前N个最常见的值']
--- a/.hypothesis/constants/2f8710039dd44cee
+++ b/.hypothesis/constants/2f8710039dd44cee
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\engines\plan_adjustment.py
 # hypothesis_version: 6.151.9
 [0.7, 2000, 'New Task', 'OPENAI_API_KEY', '\\{.*\\}', 'abnormal', 'anomaly', 'content', 'critical', 'dependencies', 'description', 'expected_output', 'gpt-4', 'high', 'id', 'insight', 'insights', 'medium', 'name', 'needs_adjustment', 'new_tasks', 'outlier', 'pending', 'priority', 'priority_changes', 'reasoning', 'required_tools', 'role', 'severity', 'skip_tasks', 'skipped', 'success', 'system', 'task', 'task_id', 'task_name', 'unusual', 'user', '不正常', '严重', '异常', '异常值', '离群']
--- a/.hypothesis/constants/364583d4f2b54d8c
+++ b/.hypothesis/constants/364583d4f2b54d8c
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\engines\data_understanding.py
 # hypothesis_version: 6.151.9
 [0.0, 0.01, 0.25, 0.3, 0.5, 0.7, 0.75, 0.9, 100, '%Y-%m-%d %H:%M:%S', 'address', 'age', 'amount', 'assigned', 'avg_length', 'categorical', 'category', 'class', 'closed', 'column_count', 'columns', 'completed', 'cost', 'count', 'created', 'customer', 'date', 'date_range_days', 'datetime', 'days', 'duration', 'email', 'end', 'file_path', 'gender', 'id', 'issue', 'max', 'max_date', 'max_length', 'mean', 'median', 'min', 'min_date', 'modified', 'most_common', 'most_common_count', 'name', 'number', 'numeric', 'order', 'phone', 'price', 'priority', 'problem', 'product', 'q25', 'q75', 'quantity', 'registration', 'revenue', 'row_count', 'sales', 'start', 'state', 'status', 'std', 'text', 'ticket', 'time', 'type', 'unknown', 'updated', 'user', '。', '一般', '优秀', '创建时间', '完成时间', '工单数据', '数量', '时长', '时间字段', '更新时间', '未知类型数据', '标识符', '状态', '用户数据', '类型/分类', '良好', '较差', '金额', '销售数据']
--- a/.hypothesis/constants/3d04b04a17235a7d
+++ b/.hypothesis/constants/3d04b04a17235a7d
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\models\data_profile.py
 # hypothesis_version: 6.151.9
 [0.0, 'ColumnInfo', 'DataProfile', 'column_count', 'columns', 'file_path', 'inferred_type', 'key_fields', 'quality_score', 'row_count', 'summary']
--- a/.hypothesis/constants/3ff7c44e55581836
+++ b/.hypothesis/constants/3ff7c44e55581836
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\tools\stats_tools.py
 # hypothesis_version: 6.151.9
 [0.0, 0.25, 0.75, 1.5, 100, 'aggregation', 'bounds', 'calculate_statistics', 'calculate_trend', 'column', 'count', 'datetime', 'decreasing', 'default', 'description', 'detect_outliers', 'error', 'group', 'group_by', 'groups', 'growth_rate', 'increasing', 'intercept', 'iqr', 'kurtosis', 'lower', 'max', 'mean', 'median', 'method', 'min', 'number', 'numeric', 'object', 'outlier_count', 'outlier_percentage', 'outlier_values', 'p_value', 'perform_groupby', 'properties', 'q25', 'q75', 'r_squared', 'required', 'returned_groups', 'skewness', 'slope', 'stable', 'std', 'string', 'sum', 'threshold', 'time_column', 'total_groups', 'trend', 'type', 'upper', 'value', 'value_column', 'zscore', '值列名', '分组依据的列名', '数据点太少，无法计算趋势', '时间列名', '检测方法：iqr 或 zscore', '要分析的列名', '要检测的列名', '要聚合的值列名，如果为空则计数']
--- a/.hypothesis/constants/4f8eaad4fd421f28
+++ b/.hypothesis/constants/4f8eaad4fd421f28
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\tools\stats_tools.py
 # hypothesis_version: 6.151.9
 [0.0, 0.25, 0.75, 1.5, 100, 'aggregation', 'bounds', 'calculate_statistics', 'calculate_trend', 'column', 'count', 'datetime', 'decreasing', 'default', 'description', 'detect_outliers', 'error', 'group', 'group_by', 'groups', 'growth_rate', 'increasing', 'intercept', 'iqr', 'kurtosis', 'lower', 'max', 'mean', 'median', 'method', 'min', 'number', 'numeric', 'object', 'outlier_count', 'outlier_percentage', 'outlier_values', 'p_value', 'perform_groupby', 'properties', 'q25', 'q75', 'r_squared', 'required', 'skewness', 'slope', 'stable', 'std', 'string', 'sum', 'threshold', 'time_column', 'trend', 'type', 'upper', 'value', 'value_column', 'zscore', '值列名', '分组依据的列名', '数据点太少，无法计算趋势', '时间列名', '检测方法：iqr 或 zscore', '要分析的列名', '要检测的列名', '要聚合的值列名，如果为空则计数']
--- a/.hypothesis/constants/584fdee6e6e18bca
+++ b/.hypothesis/constants/584fdee6e6e18bca
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\models\analysis_result.py
 # hypothesis_version: 6.151.9
 [0.0, 'AnalysisResult']
--- a/.hypothesis/constants/623775b22e6feba9
+++ b/.hypothesis/constants/623775b22e6feba9
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\main.py
 # hypothesis_version: 6.151.9
 [100, '=', 'analysis_report.md', 'columns', 'completed', 'data_type', 'data_understanding', 'elapsed_time', 'error', 'failed', 'objectives_count', 'output', 'performance_stats', 'report_path', 'results_count', 'rows', 'started', 'success', 'tasks_count', 'utf-8', '任务执行', '分析数据特征...', '分析流程失败', '分析规划', '完成', '完整分析', '性能统计', '报告生成', '数据理解', '检查是否需要调整计划...', '生成分析报告...', '生成分析计划...', '解析用户需求...', '跳过', '选择分析工具...', '需求理解']
--- a/.hypothesis/constants/66937db06263c9ce
+++ b/.hypothesis/constants/66937db06263c9ce
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\engines\report_generation.py
 # hypothesis_version: 6.151.9
 [0.7, 3000, '## 分析追溯', '## 执行摘要', '## 数据概览', '## 结论与建议', '## 详细分析', '## 附录：分析任务', '### 其他发现', '### 建议', '### 异常分析', '### 趋势分析', '### 跳过的分析', '%', '---', 'N/A', 'OPENAI_API_KEY', 'abnormal', 'anomaly', 'anomaly_count', 'category', 'change', 'conclusions', 'content', 'critical', 'data', 'decline', 'decrease', 'detailed_analysis', 'error', 'executive_summary', 'failure', 'finding', 'gpt-4', 'growth', 'importance', 'increase', 'insight', 'issue', 'key_findings', 'long', 'pending', 'percent', 'problem', 'recommendations', 'role', 'sales', 'sections', 'severe', 'source_task', 'summary', 'system', 'task_name', 'ticket', 'title', 'trend', 'trend_count', 'unknown', 'urgent', 'use_template', 'user', 'utf-8', 'visualizations', 'w', '| 任务名称 | 状态 | 执行时间 |', '✓', '✓ 成功', '✗', '✗ 失败', '上升', '下降', '严重', '产品分析', '以下分析由于数据限制或错误而被跳过：', '健康', '关键', '关键字段：', '减速', '分布', '分析完成，未发现明显异常。', '加速', '占比低', '占比过高', '占比高', '变化', '增长', '失败', '工单', '建议优先处理积压的待处理项，提高处理效率', '建议优化处理流程，缩短处理时长', '建议关注占比异常高的类别，分析根本原因', '异常', '异常分析', '待处理', '执行摘要', '持续', '故障', '数据', '数据概览', '时长', '显著', '本报告基于以下分析任务：', '波动', '状态分析', '用户', '百分', '稳定', '突出', '类型分析', '紧急', '结论与建议', '详细分析', '超出', '趋势', '趋势分析', '过低', '过高', '重大', '销售', '销售分析', '错误', '长', '问题']
--- a/.hypothesis/constants/681da1efa44634b1
+++ b/.hypothesis/constants/681da1efa44634b1
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\main.py
 # hypothesis_version: 6.151.9
 [100, 'analysis_report.md', 'columns', 'completed', 'data_type', 'elapsed_time', 'error', 'failed', 'objectives_count', 'output', 'report_path', 'results_count', 'rows', 'started', 'success', 'tasks_count', 'utf-8', '任务执行', '分析数据特征...', '分析流程失败', '分析规划', '完成', '完整分析', '报告生成', '数据理解', '检查是否需要调整计划...', '生成分析报告...', '生成分析计划...', '解析用户需求...', '跳过', '选择分析工具...', '需求理解']
--- a/.hypothesis/constants/6bd2157110bb9ad3
+++ b/.hypothesis/constants/6bd2157110bb9ad3
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\tools\__init__.py
 # hypothesis_version: 6.151.9
 ['AnalysisTool', 'ToolRegistry', 'get_applicable_tools', 'get_tool', 'list_tools', 'register_tool']
--- a/.hypothesis/constants/74a3dbebd9e4074a
+++ b/.hypothesis/constants/74a3dbebd9e4074a
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\tools\base.py
 # hypothesis_version: 6.151.9
 ['required']
--- a/.hypothesis/constants/7f1d791fd72c24c1
+++ b/.hypothesis/constants/7f1d791fd72c24c1
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\env_loader.py
 # hypothesis_version: 6.151.9
 [0.0, '"', '#', "'", '.env', '.env.local', '1', '=', 'on', 'r', 'true', 'utf-8', 'yes', '环境变量摘要:']
--- a/.hypothesis/constants/813532efc91b30af
+++ b/.hypothesis/constants/813532efc91b30af
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\__init__.py
 # hypothesis_version: 6.151.9
 ['0.1.0']
--- a/.hypothesis/constants/946ba8c598d14bdd
+++ b/.hypothesis/constants/946ba8c598d14bdd
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\config.py
 # hypothesis_version: 6.151.9
 [0.7, 120, 300, 10000, 1000000, '***', '0.7', '1', '10000', '1000000', '120', '20', '3', '300', '60', 'AGENT_MAX_ROUNDS', 'AGENT_OUTPUT_DIR', 'AGENT_TIMEOUT', 'CHART_DIR', 'Config', 'DATA_MAX_ROWS', 'DEBUG', 'ERROR', 'GEMINI_API_KEY', 'GEMINI_BASE_URL', 'GEMINI_MODEL', 'INFO', 'LLM API key 不能为空', 'LLM API key 未设置', 'LLM_MAX_RETRIES', 'LLM_MAX_TOKENS', 'LLM_PROVIDER', 'LLM_TEMPERATURE', 'LLM_TIMEOUT', 'LOG_DIR', 'LOG_LEVEL', 'LOG_TO_CONSOLE', 'LOG_TO_FILE', 'MAX_CONCURRENT_TASKS', 'OPENAI_API_KEY', 'OPENAI_BASE_URL', 'OPENAI_MODEL', 'REPORT_FILENAME', 'TOOL_MAX_QUERY_ROWS', 'WARNING', 'agent_max_rounds', 'agent_timeout', 'analysis_report.md', 'api_key', 'base_url', 'chart_dir', 'charts', 'data_max_rows', 'gemini', 'gemini-2.0-flash-exp', 'gpt-4', 'llm', 'log_dir', 'log_level', 'log_to_console', 'log_to_file', 'max_concurrent_tasks', 'max_retries', 'max_retries 不能为负数', 'max_tokens', 'model', 'openai', 'output', 'output_dir', 'performance', 'provider', 'r', 'report_filename', 'temperature', 'timeout', 'timeout 必须大于 0', 'tool_max_query_rows', 'true', 'utf-8', 'w']
--- a/.hypothesis/constants/9ad9d84748d09727
+++ b/.hypothesis/constants/9ad9d84748d09727
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\engines\requirement_understanding.py
 # hypothesis_version: 6.151.9
 [0.7, 2000, 'OPENAI_API_KEY', '\\{.*\\}', '^#+\\s+(.+)$', 'all_satisfied', 'can_proceed', 'constraints', 'content', 'datetime', 'description', 'distribution', 'expected_outputs', 'gpt-4', 'health', 'metrics', 'missing_fields', 'name', 'objectives', 'priority', 'r', 'required_charts', 'required_metrics', 'role', 'satisfied_objectives', 'sections', 'status', 'system', 'time', 'trend', 'type', 'user', 'utf-8', '健康度', '健康度分析', '关键发现', '分布', '分布分析', '分析报告', '分析数据的分布特征', '分析数据的整体健康状况', '分析数据随时间的变化趋势', '可视化图表', '基础统计', '增长率', '处理效率', '完成率', '对数据进行全面分析', '数值分布', '时间', '时间序列', '状态', '积压情况', '类别分布', '类型', '综合分析', '趋势', '趋势分析']
--- a/.hypothesis/constants/9b1a7d7d85d72278
+++ b/.hypothesis/constants/9b1a7d7d85d72278
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\models\data_profile.py
 # hypothesis_version: 6.151.9
 [0.0, 'ColumnInfo', 'DataProfile', 'column_count', 'columns', 'dtype', 'file_path', 'inferred_type', 'key_fields', 'missing_rate', 'name', 'quality_score', 'row_count', 'sample_values', 'statistics', 'summary', 'unique_count']
--- a/.hypothesis/constants/9bda871697adefb4
+++ b/.hypothesis/constants/9bda871697adefb4
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\data_access.py
 # hypothesis_version: 6.151.9
 [0.0, 0.05, 100, 1000000, 'DataAccessLayer', 'categorical', 'datetime', 'error', 'gb2312', 'gbk', 'iso-8859-1', 'latin1', 'max', 'mean', 'median', 'min', 'num_categories', 'numeric', 'object', 'records', 'std', 'success', 'text', 'tool', 'top_values', 'unknown', 'utf-8']
--- a/.hypothesis/constants/b9dfba88b7797cd8
+++ b/.hypothesis/constants/b9dfba88b7797cd8
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\error_handling.py
 # hypothesis_version: 6.151.9
 [0.0, 30.0, 1024, 1000000, ',', ';', 'AI 调用失败，使用降级策略', 'AI 调用成功', 'AI 返回 None', 'completed', 'data', 'dependencies', 'error', 'execute', 'failed', 'gb2312', 'gbk', 'id', 'integer', 'iso-8859-1', 'latin1', 'name', 'number', 'parameters', 'properties', 'python', 'required', 'skip', 'skipped', 'status', 'string', 'success', 'task_id', 'task_name', 'tasks', 'tool', 'type', 'unknown', 'utf-8', 'valid', '|', '工具返回 None', '数据为空']
--- a/.hypothesis/constants/ca4f149613285b64
+++ b/.hypothesis/constants/ca4f149613285b64
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\models\requirement_spec.py
 # hypothesis_version: 6.151.9
 ['AnalysisObjective', 'RequirementSpec', 'constraints', 'expected_outputs', 'objectives', 'template_path', 'user_input']
--- a/.hypothesis/constants/ca88f8a3ce954a65
+++ b/.hypothesis/constants/ca88f8a3ce954a65
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\engines\analysis_planning.py
 # hypothesis_version: 6.151.9
 [0.7, 3000, 'OPENAI_API_KEY', '\\{.*\\}', 'calculate_statistics', 'calculate_trend', 'content', 'create_bar_chart', 'create_line_chart', 'dependencies', 'description', 'detect_outliers', 'distribution', 'estimated_duration', 'expected_output', 'forms_dag', 'get_time_series', 'get_value_counts', 'gpt-4', 'health', 'id', 'missing_dep', 'missing_dependencies', 'name', 'overview', 'pending', 'priority', 'quality', 'required_tools', 'role', 'statistics', 'system', 'task_1', 'task_id', 'tasks', 'time', 'tool_config', 'trend', 'user', 'valid', '健康', '分布', '分布图表和统计', '对数据进行全面的探索性分析', '数据分析报告', '时间', '概览', '统计', '统计摘要', '综合数据分析', '质量', '质量评分和问题识别', '趋势', '趋势图表和分析']
--- a/.hypothesis/constants/da0edc6bd16fa2d1
+++ b/.hypothesis/constants/da0edc6bd16fa2d1
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\engines\data_understanding.py
 # hypothesis_version: 6.151.9
 [0.0, 0.01, 0.25, 0.3, 0.5, 0.7, 0.75, 0.9, 100, '%Y-%m-%d %H:%M:%S', 'address', 'age', 'amount', 'assigned', 'avg_length', 'categorical', 'category', 'class', 'closed', 'column_count', 'columns', 'completed', 'cost', 'count', 'created', 'customer', 'date', 'date_range_days', 'datetime', 'days', 'duration', 'email', 'end', 'file_path', 'gender', 'id', 'issue', 'max', 'max_date', 'max_length', 'mean', 'median', 'min', 'min_date', 'modified', 'most_common', 'most_common_count', 'name', 'number', 'numeric', 'order', 'phone', 'price', 'priority', 'problem', 'product', 'q25', 'q75', 'quantity', 'registration', 'revenue', 'row_count', 'sales', 'start', 'state', 'status', 'std', 'text', 'ticket', 'time', 'type', 'unknown', 'updated', 'user', '。', '一般', '优秀', '创建时间', '完成时间', '工单数据', '数量', '时长', '时间字段', '更新时间', '未知类型数据', '标识符', '状态', '用户数据', '类型/分类', '良好', '较差', '金额', '销售数据']
--- a/.hypothesis/constants/de9390680a26147e
+++ b/.hypothesis/constants/de9390680a26147e
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\engines\task_execution.py
 # hypothesis_version: 6.151.9
 [0.7, 500, 1000, 3000, 'OPENAI_API_KEY', '\\[.*\\]', '\\{.*\\}', 'action', 'content', 'data', 'error', 'gpt-4', 'is_completed', 'observation', 'params', 'reasoning', 'result', 'role', 'selected_tool', 'success', 'system', 'thought', 'tool', 'tool_params', 'type', 'user', 'visualization_path']
--- a/.hypothesis/constants/e300194a1061558e
+++ b/.hypothesis/constants/e300194a1061558e
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\data_access.py
 # hypothesis_version: 6.151.9
 [0.0, 0.05, 100, 1024, 1000000, 'DataAccessLayer', 'categorical', 'datetime', 'error', 'gb2312', 'gbk', 'iso-8859-1', 'latin1', 'max', 'mean', 'median', 'min', 'num_categories', 'numeric', 'object', 'records', 'std', 'success', 'text', 'tool', 'top_values', 'unknown', 'utf-8']
--- a/.hypothesis/constants/e9c900e698ec3af4
+++ b/.hypothesis/constants/e9c900e698ec3af4
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\models\__init__.py
 # hypothesis_version: 6.151.9
 ['AnalysisObjective', 'AnalysisPlan', 'AnalysisResult', 'AnalysisTask', 'ColumnInfo', 'DataProfile', 'RequirementSpec']
--- a/.hypothesis/constants/f2abc17af6ccbf95
+++ b/.hypothesis/constants/f2abc17af6ccbf95
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\engines\__init__.py
 # hypothesis_version: 6.151.9
 []
--- a/.hypothesis/constants/f9ddb173be0bc253
+++ b/.hypothesis/constants/f9ddb173be0bc253
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\models\analysis_plan.py
 # hypothesis_version: 6.151.9
 ['AnalysisPlan', 'AnalysisTask', 'created_at', 'estimated_duration', 'objectives', 'pending', 'tasks', 'tool_config', 'updated_at']
--- a/.hypothesis/constants/fb4664b8fcae11c1
+++ b/.hypothesis/constants/fb4664b8fcae11c1
@@ -0,0 +1,4 @@
 # file: D:\code\iov_data_analysis_agent_old\src\tools\viz_tools.py
 # hypothesis_version: 6.151.9
 [0.3, 0.8, 100, 1000, '%1.1f%%', '.2f', 'Agg', 'DejaVu Sans', 'SimHei', 'X轴列名（分类变量）', 'X轴列名（通常是时间）', 'Y轴列名（数值变量）', 'Y轴列名（数值变量），如果为空则计数', 'array', 'auto', 'axes.unicode_minus', 'bar', 'bar_chart.png', 'black', 'bold', 'categories', 'center', 'chart_path', 'chart_type', 'column', 'columns', 'coolwarm', 'create_bar_chart', 'create_heatmap', 'create_line_chart', 'create_pie_chart', 'data_points', 'default', 'description', 'error', 'font.sans-serif', 'heatmap', 'heatmap.png', 'integer', 'items', 'line', 'line_chart.png', 'method', 'numeric', 'o', 'object', 'output_path', 'pearson', 'pie', 'pie_chart.png', 'properties', 'required', 'right', 'shrink', 'string', 'success', 'tight', 'title', 'top_n', 'type', 'white', 'x', 'x_column', 'y_column', '其他', '创建饼图，用于展示各部分占整体的比例。', '只显示前N个类别', "只显示前N个类别，其余归为'其他'", '图表标题', '折线图', '柱状图', '相关性热力图', '至少需要两个数值列来创建热力图', '要分析的列名', '计数', '输出文件路径', '饼图']
--- a/.hypothesis/examples/01b85c636eba6742/1037a70c4c2ed4f4
+++ b/.hypothesis/examples/01b85c636eba6742/1037a70c4c2ed4f4
--- a/.hypothesis/examples/04e6b3400353b141/01b85c636eba6742
+++ b/.hypothesis/examples/04e6b3400353b141/01b85c636eba6742
--- a/.hypothesis/examples/04e6b3400353b141/1cd770e72a9295de
+++ b/.hypothesis/examples/04e6b3400353b141/1cd770e72a9295de
@@ -0,0 +1 @@
 ýû¥ÊhØG2ÔK|Qò5ò™A™s#¬„ä¿%Bw”·ïÅ]ÚhX<>¬.secondary
--- a/.hypothesis/examples/04e6b3400353b141/374c9f5a6c41b2f2
+++ b/.hypothesis/examples/04e6b3400353b141/374c9f5a6c41b2f2
@@ -0,0 +1 @@
 ©όΌc<EFBFBD>ƒ<EFBFBD>Y%Ώο$π†vο<>ί΅Λ/
--- a/.hypothesis/examples/04e6b3400353b141/63d04e6f43cafacd
+++ b/.hypothesis/examples/04e6b3400353b141/63d04e6f43cafacd
@@ -0,0 +1 @@
 ýűĄĘhŘG2ÔK|Qň5ň™A™s#¬„äż%Bw”·ďĹ]ÚhXť¬
--- a/.hypothesis/examples/04e6b3400353b141/6ecb0a52a9d3487e
+++ b/.hypothesis/examples/04e6b3400353b141/6ecb0a52a9d3487e
@@ -0,0 +1 @@
 ©ü¼cŸƒˆY%¿ï$ð†vïšß¡Ë/
--- a/.hypothesis/examples/04e6b3400353b141/dd5302cfa7abab2e
+++ b/.hypothesis/examples/04e6b3400353b141/dd5302cfa7abab2e
@@ -0,0 +1 @@
 Sđ‹2ť<EFBFBD> ëo¬÷Z@±ŹĘŹ°'ôŮ±‰rčX·I–ĐÍÔÍ+Ť˝<>ßôŇ§×‘©›2
--- a/.hypothesis/examples/1cd770e72a9295de/0e6df42f15bb2a32
+++ b/.hypothesis/examples/1cd770e72a9295de/0e6df42f15bb2a32
--- a/.hypothesis/examples/1cd770e72a9295de/0f53ba841b413f09
+++ b/.hypothesis/examples/1cd770e72a9295de/0f53ba841b413f09
--- a/.hypothesis/examples/1cd770e72a9295de/19732b8ef01e505a
+++ b/.hypothesis/examples/1cd770e72a9295de/19732b8ef01e505a
--- a/.hypothesis/examples/1cd770e72a9295de/25d97624a3342811
+++ b/.hypothesis/examples/1cd770e72a9295de/25d97624a3342811
--- a/.hypothesis/examples/1cd770e72a9295de/27eb7b1998751853
+++ b/.hypothesis/examples/1cd770e72a9295de/27eb7b1998751853
--- a/.hypothesis/examples/1cd770e72a9295de/39b7cea2c2d9f257
+++ b/.hypothesis/examples/1cd770e72a9295de/39b7cea2c2d9f257
--- a/.hypothesis/examples/1cd770e72a9295de/490c1f29ec0c2dfd
+++ b/.hypothesis/examples/1cd770e72a9295de/490c1f29ec0c2dfd
--- a/.hypothesis/examples/1cd770e72a9295de/4e73ad2c677d4029
+++ b/.hypothesis/examples/1cd770e72a9295de/4e73ad2c677d4029
--- a/.hypothesis/examples/1cd770e72a9295de/54c86f3d9209752f
+++ b/.hypothesis/examples/1cd770e72a9295de/54c86f3d9209752f
--- a/.hypothesis/examples/1cd770e72a9295de/5d86183260475e7a
+++ b/.hypothesis/examples/1cd770e72a9295de/5d86183260475e7a
--- a/.hypothesis/examples/1cd770e72a9295de/6dcbe1697d947e99
+++ b/.hypothesis/examples/1cd770e72a9295de/6dcbe1697d947e99
--- a/.hypothesis/examples/1cd770e72a9295de/74e9341346415f77
+++ b/.hypothesis/examples/1cd770e72a9295de/74e9341346415f77
--- a/.hypothesis/examples/1cd770e72a9295de/93518e3fd70f7996
+++ b/.hypothesis/examples/1cd770e72a9295de/93518e3fd70f7996
--- a/.hypothesis/examples/1cd770e72a9295de/93c6f1809c820e71
+++ b/.hypothesis/examples/1cd770e72a9295de/93c6f1809c820e71
--- a/.hypothesis/examples/1cd770e72a9295de/949a8b1838e5ead2
+++ b/.hypothesis/examples/1cd770e72a9295de/949a8b1838e5ead2
--- a/.hypothesis/examples/1cd770e72a9295de/99ca8a33d0efc425
+++ b/.hypothesis/examples/1cd770e72a9295de/99ca8a33d0efc425
--- a/.hypothesis/examples/1cd770e72a9295de/9b07bc3cd80884fb
+++ b/.hypothesis/examples/1cd770e72a9295de/9b07bc3cd80884fb
--- a/.hypothesis/examples/1cd770e72a9295de/a3e9300f198f00cb
+++ b/.hypothesis/examples/1cd770e72a9295de/a3e9300f198f00cb
--- a/.hypothesis/examples/1cd770e72a9295de/a55df545ac44ad6f
+++ b/.hypothesis/examples/1cd770e72a9295de/a55df545ac44ad6f
--- a/.hypothesis/examples/1cd770e72a9295de/a623cf434b5dd90c
+++ b/.hypothesis/examples/1cd770e72a9295de/a623cf434b5dd90c
--- a/.hypothesis/examples/1cd770e72a9295de/bab0fb21ed17541e
+++ b/.hypothesis/examples/1cd770e72a9295de/bab0fb21ed17541e
--- a/.hypothesis/examples/1cd770e72a9295de/d7c3bf74cd9835f5
+++ b/.hypothesis/examples/1cd770e72a9295de/d7c3bf74cd9835f5
--- a/.hypothesis/examples/1cd770e72a9295de/e1faaba2498903da
+++ b/.hypothesis/examples/1cd770e72a9295de/e1faaba2498903da
--- a/.hypothesis/examples/374c9f5a6c41b2f2/9317a95d1109835e
+++ b/.hypothesis/examples/374c9f5a6c41b2f2/9317a95d1109835e
--- a/.hypothesis/examples/63d04e6f43cafacd/89509f5523b118f3
+++ b/.hypothesis/examples/63d04e6f43cafacd/89509f5523b118f3
--- a/.hypothesis/examples/6ecb0a52a9d3487e/aaf8e354f9f2298f
+++ b/.hypothesis/examples/6ecb0a52a9d3487e/aaf8e354f9f2298f
--- a/.hypothesis/examples/dd5302cfa7abab2e/f448f54a84e8fd97
+++ b/.hypothesis/examples/dd5302cfa7abab2e/f448f54a84e8fd97
--- a/.hypothesis/tmp/tmp22v0flx7
+++ b/.hypothesis/tmp/tmp22v0flx7
--- a/.hypothesis/tmp/tmp35gexqws
+++ b/.hypothesis/tmp/tmp35gexqws
--- a/.hypothesis/tmp/tmp416ed4us
+++ b/.hypothesis/tmp/tmp416ed4us
--- a/.hypothesis/tmp/tmp5lzv541m
+++ b/.hypothesis/tmp/tmp5lzv541m
--- a/.hypothesis/tmp/tmp5vcs3okn
+++ b/.hypothesis/tmp/tmp5vcs3okn
--- a/.hypothesis/tmp/tmp8btfn_uy
+++ b/.hypothesis/tmp/tmp8btfn_uy
--- a/.hypothesis/tmp/tmp8qchuu3b
+++ b/.hypothesis/tmp/tmp8qchuu3b
--- a/.hypothesis/tmp/tmpddxz1dzy
+++ b/.hypothesis/tmp/tmpddxz1dzy
--- a/.hypothesis/tmp/tmpfswws739
+++ b/.hypothesis/tmp/tmpfswws739
--- a/.hypothesis/tmp/tmpfvexlsh6
+++ b/.hypothesis/tmp/tmpfvexlsh6
--- a/.hypothesis/tmp/tmpg2sxn863
+++ b/.hypothesis/tmp/tmpg2sxn863
--- a/.hypothesis/tmp/tmpg4h1cymr
+++ b/.hypothesis/tmp/tmpg4h1cymr
--- a/.hypothesis/tmp/tmph5w2g0pf
+++ b/.hypothesis/tmp/tmph5w2g0pf
--- a/.hypothesis/tmp/tmplgn__bn1
+++ b/.hypothesis/tmp/tmplgn__bn1
--- a/.hypothesis/tmp/tmpomizu2_b
+++ b/.hypothesis/tmp/tmpomizu2_b
--- a/.hypothesis/tmp/tmpq86_9tua
+++ b/.hypothesis/tmp/tmpq86_9tua
--- a/.hypothesis/tmp/tmps6_o9dd7
+++ b/.hypothesis/tmp/tmps6_o9dd7
--- a/.hypothesis/tmp/tmptr3r_843
+++ b/.hypothesis/tmp/tmptr3r_843
--- a/.hypothesis/tmp/tmpud_es0fv
+++ b/.hypothesis/tmp/tmpud_es0fv
--- a/.hypothesis/tmp/tmpur901c_q
+++ b/.hypothesis/tmp/tmpur901c_q
--- a/.hypothesis/tmp/tmpzbtiep8n
+++ b/.hypothesis/tmp/tmpzbtiep8n
--- a/.hypothesis/unicode_data/14.0.0/charmap.json.gz
+++ b/.hypothesis/unicode_data/14.0.0/charmap.json.gz
--- a/.hypothesis/unicode_data/14.0.0/codec-utf-8.json.gz
+++ b/.hypothesis/unicode_data/14.0.0/codec-utf-8.json.gz
--- a/.kiro/hooks/code-quality-review.kiro.hook
+++ b/.kiro/hooks/code-quality-review.kiro.hook
@@ -0,0 +1,15 @@
 {
  "enabled": true,
  "name": "Code Quality Review",
  "description": "在 git push 前手动触发，审核所有 Python 文件的代码质量",
  "version": "1",
  "when": {
    "type": "userTriggered"
  },
  "then": {
    "type": "askAgent",
    "prompt": "审核刚刚编辑的 Python 文件，检查以下代码质量问题并给出具体改进建议：\n1. 命名规范（变量、函数、类名是否符合 PEP8）\n2. 函数复杂度（是否过长或逻辑过于复杂）\n3. 错误处理（是否有适当的异常处理）\n4. 代码重复（是否有可以抽取的重复逻辑）\n5. 注释和文档字符串是否完整\n请直接指出问题所在的具体行，并给出修改建议。"
  },
  "workspaceFolderName": "iov_data_analysis_agent_old",
  "shortName": "code-quality-review"
 }
--- a/.kiro/specs/true-ai-agent/design.md
+++ b/.kiro/specs/true-ai-agent/design.md
--- a/.kiro/specs/true-ai-agent/requirements.md
+++ b/.kiro/specs/true-ai-agent/requirements.md
@@ -0,0 +1,447 @@
 # 真正的 AI 数据分析 Agent - 需求文档
 ## 1. 项目背景
 ### 1.1 当前问题
 现有系统是"四不像"：
 - 任务规划：基于模板的规则生成（固定90个任务）
 - 任务执行：AI 驱动的 ReAct 模式
 - 结果：规则 + AI = 不协调、不灵活
 ### 1.2 核心问题
 **用户的真实需求**：
 > "我有数据，帮我分析一下"
 > "我想了解工单的健康度"
 > "按照这个模板分析，但要灵活调整"
 **系统应该做什么**：
 - 像人类分析师一样理解数据
 - 自主决定分析什么
 - 根据发现调整分析计划
 - 生成有洞察力的报告
 **而不是**：
 - 机械地执行固定任务
 - 死板地按模板填空
 ## 2. 用户故事
 ### 2.1 场景1：完全自主分析
 **作为** 数据分析师  
 **我想要** 上传数据文件，让 AI 自动分析  
 **以便** 快速了解数据的关键信息
 **验收标准**：
 - AI 能识别数据类型（工单、销售、用户等）
 - AI 能推断关键字段的业务含义
 - AI 能自主决定分析维度
 - AI 能生成合理的分析计划
 - AI 能执行分析并生成报告
 - 报告包含关键发现和洞察
 **示例**：
 ```
 输入：cleaned_data.csv
 输出：
  - 数据类型：工单数据
  - 关键发现：
    * 待处理工单占比50%（异常高）
    * 某车型问题占比80%
    * 平均处理时长超过标准2倍
  - 建议：优先处理该车型的积压工单
 ```
 ### 2.2 场景2：指定分析方向
 **作为** 业务负责人  
 **我想要** 指定分析方向（如"健康度"）  
 **以便** 获得针对性的分析结果
 **验收标准**：
 - AI 能理解"健康度"的业务含义
 - AI 能将抽象概念转化为具体指标
 - AI 能根据数据特征选择合适的分析方法
 - AI 能生成针对性的报告
 **示例**：
 ```
 输入：
  - 数据：cleaned_data.csv
  - 需求："我想了解工单的健康度"
 AI 理解：
  - 健康度 = 关闭率 + 处理效率 + 积压情况 + 响应及时性
 AI 分析：
  - 关闭率：75%（中等）
  - 平均处理时长：48小时（偏长）
  - 积压工单：50%（严重）
  - 健康度评分：60/100（需改进）
 ```
 ### 2.3 场景3：参考模板分析
 **作为** 数据分析师  
 **我想要** 使用模板作为参考框架  
 **以便** 保持报告结构的一致性，同时保持灵活性
 **验收标准**：
 - AI 能理解模板的结构和要求
 - AI 能检查数据是否满足模板要求
 - 如果数据缺少某些字段，AI 能灵活调整
 - AI 能按模板结构组织报告
 - AI 不会因为数据不完全匹配而失败
 **示例**：
 ```
 输入：
  - 数据：cleaned_data.csv
  - 模板：issue_analysis.md（要求14个图表）
 AI 检查：
  - 模板要求"严重程度分布"，但数据中没有"严重程度"字段
  - 决策：跳过该分析，在报告中说明
 AI 调整：
  - 执行其他13个分析
  - 报告中注明："数据缺少严重程度字段，无法分析该维度"
 ```
 ### 2.4 场景4：迭代深入分析
 **作为** 数据分析师  
 **我想要** AI 能根据发现深入分析  
 **以便** 找到问题的根因
 **验收标准**：
 - AI 能识别异常或关键发现
 - AI 能自主决定是否需要深入分析
 - AI 能动态调整分析计划
 - AI 能追踪问题的根因
 **示例**：
 ```
 初步分析：
  - 发现：待处理工单占比50%（异常高）
 AI 决策：需要深入分析
 深入分析1：
  - 分析待处理工单的特征
  - 发现：某车型占80%
 AI 决策：继续深入
 深入分析2：
  - 分析该车型的问题类型
  - 发现：都是"远程控制"问题
 AI 决策：继续深入
 深入分析3：
  - 分析"远程控制"问题的模块分布
  - 发现：90%是"车门模块"
 结论：车门模块的远程控制功能存在系统性问题
 ```
 ## 3. 功能需求
 ### 3.1 数据理解（Data Understanding）
 **FR-1.1 数据加载**
 - 系统应支持 CSV 格式数据
 - 系统应自动检测编码（UTF-8, GBK等）
 - 系统应处理常见的数据格式问题
 **FR-1.2 数据类型识别**
 - AI 应分析列名、数据类型、值分布
 - AI 应推断数据的业务类型（工单、销售、用户等）
 - AI 应识别关键字段（时间、状态、分类、数值）
 **FR-1.3 字段含义理解**
 - AI 应推断每个字段的业务含义
 - AI 应识别字段之间的关系
 - AI 应识别可能的分析维度
 **FR-1.4 数据质量评估**
 - AI 应检查缺失值
 - AI 应检查异常值
 - AI 应评估数据质量分数
 ### 3.2 需求理解（Requirement Understanding）
 **FR-2.1 自主需求推断**
 - 当用户未指定需求时，AI 应根据数据类型推断常见分析需求
 - AI 应生成默认的分析目标
 **FR-2.2 用户需求理解**
 - AI 应理解用户的自然语言需求
 - AI 应将抽象概念转化为具体指标
 - AI 应判断数据是否支持用户需求
 **FR-2.3 模板理解**
 - AI 应解析模板结构
 - AI 应理解模板要求的指标和图表
 - AI 应检查数据是否满足模板要求
 - AI 应在数据不满足时灵活调整
 ### 3.3 分析规划（Analysis Planning）
 **FR-3.1 动态任务生成**
 - AI 应根据数据特征和需求生成分析任务
 - 任务应是动态的，不是固定的
 - 任务应包含优先级和依赖关系
 **FR-3.2 任务优先级**
 - AI 应根据重要性排序任务
 - 必需的分析应优先执行
 - 可选的分析应后执行
 **FR-3.3 计划调整**
 - AI 应能根据中间结果调整计划
 - AI 应能增加新的深入分析任务
 - AI 应能跳过不适用的任务
 ### 3.4 工具集管理（Tool Management）
 **FR-4.1 预设工具集**
 - 系统应提供基础数据分析工具集
 - 基础工具包括：数据查询、统计分析、可视化、数据清洗
 - 工具应有标准的接口和描述
 **FR-4.2 动态工具调整**
 - AI 应根据数据特征决定需要哪些工具
 - AI 应根据分析需求动态启用/禁用工具
 - AI 应能识别缺少的工具并请求添加
 **FR-4.3 工具适配**
 - AI 应根据数据类型调整工具参数
 - 例如：时间序列数据 → 启用趋势分析工具
 - 例如：分类数据 → 启用分布分析工具
 - 例如：地理数据 → 启用地图可视化工具
 **FR-4.4 自定义工具生成**
 - AI 应能根据特定需求生成临时工具
 - AI 应能组合现有工具创建新功能
 - 自定义工具应在分析结束后可选保留
 **示例**：
 ```
 数据特征：
  - 包含时间字段（created_at, closed_at）
  - 包含分类字段（status, type, model）
  - 包含数值字段（duration）
 AI 决策：
  - 启用工具：时间序列分析、分类分布、数值统计
  - 禁用工具：地理分析（无地理字段）
  - 生成工具：计算处理时长（closed_at - created_at）
 ```
 ### 3.5 分析执行（Analysis Execution）
 **FR-5.1 ReAct 执行模式**
 - 每个任务应使用 ReAct 模式执行
 - AI 应思考 → 行动 → 观察 → 判断
 - AI 应能从错误中学习
 **FR-5.2 工具调用**
 - AI 应从可用工具集中选择合适的工具
 - AI 应能组合多个工具完成复杂任务
 - AI 应能处理工具调用失败的情况
 **FR-5.3 结果验证**
 - AI 应验证每个任务的结果
 - AI 应识别异常结果
 - AI 应决定是否需要重试或调整
 **FR-5.4 迭代深入**
 - AI 应识别关键发现
 - AI 应决定是否需要深入分析
 - AI 应动态增加深入分析任务
 ### 3.6 报告生成（Report Generation）
 **FR-6.1 关键发现提炼**
 - AI 应从所有结果中提炼关键发现
 - AI 应识别异常和趋势
 - AI 应提供洞察而不是简单罗列数据
 **FR-6.2 报告结构组织**
 - AI 应根据分析内容组织报告结构
 - 如果有模板，应参考模板结构
 - 如果没有模板，应生成合理的结构
 **FR-6.3 结论和建议**
 - AI 应基于分析结果得出结论
 - AI 应提供可操作的建议
 - AI 应说明建议的依据
 **FR-6.4 多格式输出**
 - 系统应生成 Markdown 格式报告
 - 系统应支持导出为 Word 文档（可选）
 - 报告应包含所有生成的图表
 ## 4. 非功能需求
 ### 4.1 性能需求
 **NFR-1.1 响应时间**
 - 数据理解阶段：< 30秒
 - 分析规划阶段：< 60秒
 - 单个任务执行：< 120秒
 - 完整分析流程：< 30分钟（取决于数据大小和任务数量）
 **NFR-1.2 数据规模**
 - 支持最大 100MB 的 CSV 文件
 - 支持最大 100万行数据
 - 支持最大 100列
 ### 4.2 可靠性需求
 **NFR-2.1 错误处理**
 - AI 调用失败时应有降级策略
 - 单个任务失败不应影响整体流程
 - 系统应记录详细的错误日志
 **NFR-2.2 数据安全**
 - 数据应在本地处理，不上传到外部服务
 - 生成的报告应保存在用户指定的目录
 - 敏感信息应脱敏处理
 ### 4.3 可用性需求
 **NFR-3.1 易用性**
 - 用户只需提供数据文件即可开始分析
 - 分析过程应显示进度和状态
 - 错误信息应清晰易懂
 **NFR-3.2 可观察性**
 - 系统应显示 AI 的思考过程
 - 系统应显示每个阶段的进度
 - 系统应记录完整的执行日志
 ### 4.4 可扩展性需求
 **NFR-4.1 工具扩展**
 - 应易于添加新的分析工具
 - 工具应有标准接口
 - AI 应能自动发现和使用新工具
 - 工具应支持热加载，无需重启系统
 **NFR-4.2 工具动态性**
 - 工具集应根据数据特征动态调整
 - 工具参数应根据数据类型自适应
 - 系统应支持运行时生成临时工具
 **NFR-4.3 模型扩展**
 - 应支持不同的 LLM 提供商
 - 应支持本地模型和云端模型
 - 应支持模型切换
 ## 5. 约束条件
 ### 5.1 技术约束
 - 使用 Python 3.8+
 - 使用 OpenAI 兼容的 LLM API
 - 使用 pandas 进行数据处理
 - 使用 matplotlib 进行可视化
 ### 5.2 业务约束
 - 系统应在离线环境下工作（除 LLM 调用外）
 - 系统不应依赖特定的数据格式或业务领域
 - 系统应保持通用性，适用于各种数据分析场景
 ### 5.3 隐私和安全约束
 **数据隐私保护**：
 - AI 不能访问完整的原始数据内容
 - AI 只能读取：
  - 表头（列名）
  - 数据类型信息
  - 基本统计摘要（行数、列数、缺失值比例、数据类型分布）
  - 工具执行后的聚合结果（如分组统计结果、图表数据）
 - 所有原始数据处理必须在本地完成，不发送给 LLM
 - AI 通过调用本地工具来分析数据，工具返回摘要结果而非原始数据
 ### 5.3 隐私和安全约束
 **数据隐私保护**：
 - AI 不能访问完整的原始数据内容
 - AI 只能读取：
  - 表头（列名）
  - 数据类型信息
  - 基本统计摘要（行数、列数、缺失值比例、数据类型分布）
  - 工具执行后的聚合结果（如分组统计结果、图表数据）
 - 所有原始数据处理必须在本地完成，不发送给 LLM
 - AI 通过调用本地工具来分析数据，工具返回摘要结果而非原始数据
 ## 6. 验收标准
 ### 6.1 场景1验收
 - [ ] 上传任意 CSV 文件，AI 能识别数据类型
 - [ ] AI 能自主生成分析计划
 - [ ] AI 能执行分析并生成报告
 - [ ] 报告包含关键发现和洞察
 ### 6.2 场景2验收
 - [ ] 指定"健康度"等抽象需求，AI 能理解
 - [ ] AI 能生成相关指标
 - [ ] AI 能执行针对性分析
 - [ ] 报告聚焦于用户需求
 ### 6.3 场景3验收
 - [ ] 提供模板，AI 能理解模板要求
 - [ ] 数据缺少字段时，AI 能灵活调整
 - [ ] 报告按模板结构组织
 - [ ] 报告说明哪些分析被跳过及原因
 ### 6.4 场景4验收
 - [ ] AI 能识别异常发现
 - [ ] AI 能自主决定深入分析
 - [ ] AI 能动态调整分析计划
 - [ ] 报告包含深入分析的结果
 ### 6.5 工具动态性验收
 - [ ] 系统根据数据特征自动启用相关工具
 - [ ] 系统根据数据特征自动禁用无关工具
 - [ ] AI 能识别需要但缺失的工具
 - [ ] AI 能生成临时工具满足特定需求
 - [ ] 工具参数根据数据类型自动调整
 ## 7. 成功指标
 ### 7.1 功能指标
 - 数据类型识别准确率 > 90%
 - 字段含义推断准确率 > 80%
 - 分析计划合理性（人工评估）> 85%
 - 报告质量（人工评估）> 80%
 ### 7.2 性能指标
 - 完整分析流程完成率 > 95%
 - AI 调用成功率 > 90%
 ### 7.3 用户满意度
 - 用户认为分析结果有价值 > 80%
 - 用户愿意再次使用 > 85%
 - 用户推荐给他人 > 75%
 ---
 **版本**: v3.0.0  
 **日期**: 2026-03-06  
 **状态**: 需求定义完成
--- a/.kiro/specs/true-ai-agent/tasks.md
+++ b/.kiro/specs/true-ai-agent/tasks.md
@@ -0,0 +1,458 @@
 # 实施计划：真正的 AI 数据分析 Agent
 ## 概述
 本实施计划将设计转化为具体的编码任务。系统采用五阶段流水线架构，每个阶段由 AI 驱动，具有自主决策能力。实施将按照从核心数据结构到各个引擎组件，最后到集成的顺序进行。
 ## 任务列表
 - [x] 1. 搭建项目结构和核心数据模型
  - 创建项目目录结构（src/models, src/engines, src/tools, tests）
  - 定义核心数据类（DataProfile, ColumnInfo, RequirementSpec, AnalysisObjective, AnalysisPlan, AnalysisTask, AnalysisResult）
  - 实现数据类的序列化和反序列化方法
  - 设置测试框架（pytest, hypothesis）
  - _需求：FR-1.1, FR-1.2_
 - [x] 2. 实现数据访问层和隐私保护机制
  - [x] 2.1 实现 DataAccessLayer 类
    - 实现数据加载功能（支持多种编码）
    - 实现数据画像生成（不暴露原始数据）
    - 实现结果过滤机制（sanitize_result）
    - _需求：约束条件5.3_
  - [x] 2.2 编写属性测试：数据访问限制
    - **属性 18：数据访问限制**
    - **验证需求：约束条件5.3**
  - [x] 2.3 编写单元测试
    - 测试多种编码的数据加载
    - 测试空文件和格式错误的处理
    - 测试结果过滤功能
 - [x] 3. 实现工具系统基础设施
  - [x] 3.1 定义工具接口（AnalysisTool 抽象类）
    - 定义标准接口（name, description, parameters, execute, is_applicable）
    - 实现工具注册机制
    - _需求：FR-4.1_
  - [x] 3.2 实现基础数据查询工具
    - 实现 get_column_distribution 工具
    - 实现 get_value_counts 工具
    - 实现 get_time_series 工具
    - 实现 get_correlation 工具
    - 确保所有工具返回聚合数据而非原始数据
    - _需求：FR-4.1, 约束条件5.3_
  - [x] 3.3 实现基础统计分析工具
    - 实现 calculate_statistics 工具
    - 实现 perform_groupby 工具
    - 实现 detect_outliers 工具
    - 实现 calculate_trend 工具
    - _需求：FR-4.1_
  - [x] 3.4 编写属性测试：工具接口一致性和输出过滤
    - **属性 10：工具接口一致性**
    - **属性 19：工具输出过滤**
    - **验证需求：FR-4.1, 约束条件5.3**
  - [x] 3.5 编写单元测试
    - 测试每个工具的基本功能
    - 测试工具参数验证
    - 测试工具执行错误处理
 - [x] 4. 实现可视化工具
  - [x] 4.1 实现图表生成工具
    - 实现 create_bar_chart 工具
    - 实现 create_line_chart 工具
    - 实现 create_pie_chart 工具
    - 实现 create_heatmap 工具
    - 实现 ai_picture 依据数据特性画图工具
    - 使用 matplotlib 生成图表并保存为文件
    - _需求：FR-4.1_
  - [x] 4.2 编写单元测试
    - 测试图表生成功能
    - 测试图表文件保存
 - [x] 5. 检查点 - 确保工具系统测试通过
  - 确保所有测试通过，如有问题请询问用户
 - [x] 6. 实现工具管理器
  - [x] 6.1 实现 ToolManager 类
    - 实现工具选择逻辑（select_tools 方法）
    - 根据数据特征启用/禁用工具
    - 实现工具适用性判断
    - _需求：FR-4.2, FR-4.3_
  - [x] 6.2 实现动态工具调整策略
    - 检查时间字段并启用时间序列工具
    - 检查分类字段并启用分布分析工具
    - 检查数值字段并启用统计工具
    - 检查地理字段并启用地理工具
    - _需求：FR-4.2, FR-4.3_
  - [x] 6.3 编写属性测试：工具选择和适用性
    - **属性 9：工具选择适配性**
    - **属性 11：工具适用性判断**
    - **属性 12：工具需求识别**
    - **验证需求：FR-4.2, FR-4.3, 工具动态性验收.1, .2, .3**
  - [x] 6.4 编写单元测试
    - 测试不同数据特征的工具选择
    - 测试工具适用性判断
 - [x] 7. 实现数据理解引擎
  - [x] 7.1 实现基础统计生成
    - 实现 generate_basic_stats 函数
    - 生成列信息（名称、类型、缺失率、唯一值数量）
    - 生成示例值（每列最多5个）
    - _需求：FR-1.2, FR-1.3_
  - [x] 7.2 实现 AI 驱动的数据理解
    - 实现 understand_data 函数
    - 调用 LLM 推断数据类型
    - 调用 LLM 识别关键字段和业务含义
    - 调用 LLM 评估数据质量
    - 生成 DataProfile 对象
    - _需求：FR-1.2, FR-1.3, FR-1.4_
  - [x] 7.3 编写属性测试：数据理解
    - **属性 1：数据类型识别**
    - **属性 2：数据画像完整性**
    - **验证需求：场景1验收.1, FR-1.2, FR-1.3, FR-1.4**
  - [x] 7.4 编写单元测试
    - 测试工单数据识别
    - 测试销售数据识别
    - 测试数据质量评估
 - [x] 8. 实现需求理解引擎
  - [x] 8.1 实现用户需求解析
    - 实现 understand_requirement 函数
    - 调用 LLM 解析自然语言需求
    - 将抽象概念转化为具体指标
    - 生成 RequirementSpec 对象
    - _需求：FR-2.1, FR-2.2_
  - [x] 8.2 实现模板解析功能
    - 实现 parse_template 函数
    - 解析模板文件结构
    - 提取模板要求的指标和图表
    - _需求：FR-2.3_
  - [x] 8.3 实现数据-需求匹配检查
    - 实现 check_data_requirement_match 函数
    - 检查数据是否满足需求
    - 标记缺失的字段或能力
    - _需求：FR-2.3_
  - [x] 8.4 编写属性测试：需求理解
    - **属性 3：抽象需求转化**
    - **属性 4：模板解析**
    - **属性 5：数据-需求匹配检查**
    - **验证需求：场景2验收.1, .2, 场景3验收.1, .2, FR-2.1, FR-2.2, FR-2.3**
  - [x] 8.5 编写单元测试
    - 测试"健康度"需求的理解
    - 测试模板解析
    - 测试数据不满足需求的情况
 - [x] 9. 检查点 - 确保数据和需求理解测试通过
  - 确保所有测试通过，如有问题请询问用户
 - [x] 10. 实现分析规划引擎
  - [x] 10.1 实现 AI 驱动的任务生成
    - 实现 plan_analysis 函数
    - 调用 LLM 根据数据特征和需求生成任务列表
    - 为每个任务分配优先级
    - 识别任务依赖关系
    - 生成 AnalysisPlan 对象
    - _需求：FR-3.1, FR-3.2_
  - [x] 10.2 实现任务依赖验证
    - 实现 validate_task_dependencies 函数
    - 检查依赖关系是否形成 DAG
    - 检查所有依赖的任务是否存在
    - _需求：FR-3.1_
  - [x] 10.3 编写属性测试：分析规划
    - **属性 6：动态任务生成**
    - **属性 7：任务依赖一致性**
    - **验证需求：场景1验收.2, FR-3.1, FR-3.2**
  - [x] 10.4 编写单元测试
    - 测试任务生成
    - 测试循环依赖检测
    - 测试任务优先级排序
 - [x] 11. 实现任务执行引擎（ReAct 模式）
  - [x] 11.1 实现 ReAct 执行循环
    - 实现 execute_task 函数
    - 实现思考-行动-观察循环
    - 调用 LLM 进行思考和决策
    - 选择并调用工具
    - 记录执行历史
    - 实现循环终止条件（完成或达到最大迭代次数）
    - _需求：FR-5.1_
  - [x] 11.2 实现工具调用和结果处理
    - 实现 call_tool 函数
    - 根据 AI 决策选择工具
    - 传递参数并执行工具
    - 处理工具执行结果
    - _需求：FR-5.2_
  - [x] 11.3 实现洞察提炼
    - 实现 extract_insights 函数
    - 从执行历史中提炼关键发现
    - 识别异常和趋势
    - _需求：FR-5.4_
  - [x] 11.4 编写属性测试：任务执行
    - **属性 13：任务执行完整性**
    - **属性 14：ReAct 循环终止**
    - **属性 15：异常识别**
    - **验证需求：场景1验收.3, 场景4验收.1, FR-5.1**
  - [x] 11.5 编写单元测试
    - 测试 ReAct 循环
    - 测试工具选择和调用
    - 测试异常数据的识别
 - [x] 12. 实现动态计划调整
  - [x] 12.1 实现计划调整逻辑
    - 实现 adjust_plan 函数
    - 分析已完成任务的结果
    - 识别关键发现和异常
    - 决定是否需要深入分析
    - 生成新任务或调整优先级
    - _需求：FR-3.3, FR-5.4_
  - [x] 12.2 编写属性测试：计划调整
    - **属性 8：计划动态调整**
    - **验证需求：场景4验收.2, .3, FR-3.3**
  - [x] 12.3 编写单元测试
    - 测试发现异常后的计划调整
    - 测试新任务的生成
    - 测试任务跳过逻辑
 - [ ] 13. 检查点 - 确保规划和执行引擎测试通过
  - 确保所有测试通过，如有问题请询问用户
 - [x] 14. 实现报告生成引擎
  - [x] 14.1 实现关键发现提炼
    - 实现 extract_key_findings 函数
    - 从所有分析结果中提炼关键发现
    - 识别最重要的异常和趋势
    - 排序和优先级排列
    - _需求：FR-6.1_
  - [x] 14.2 实现报告结构组织
    - 实现 organize_report_structure 函数
    - 根据分析内容组织报告结构
    - 如果有模板，参考模板结构
    - 如果没有模板，生成合理的结构
    - _需求：FR-6.2_
  - [x] 14.3 实现 AI 驱动的报告生成
    - 实现 generate_report 函数
    - 调用 LLM 生成报告内容
    - 包含执行摘要、详细分析、结论和建议
    - 嵌入图表和可视化
    - 格式化为 Markdown
    - _需求：FR-6.1, FR-6.2, FR-6.3_
  - [x] 14.4 实现报告追溯性
    - 确保报告中的所有发现都能追溯到分析结果
    - 说明哪些分析被跳过及原因
    - _需求：FR-6.1_
  - [x] 14.5 编写属性测试：报告生成
    - **属性 16：报告结构完整性**
    - **属性 17：报告内容追溯性**
    - **验证需求：场景3验收.3, .4, 场景4验收.4, FR-6.1, FR-6.2**
  - [x] 14.6 编写单元测试
    - 测试报告结构生成
    - 测试模板结构遵循
    - 测试跳过分析的说明
 - [x] 15. 实现错误处理机制
  - [x] 15.1 实现数据加载错误处理
    - 实现 load_data_with_retry 函数
    - 支持多种编码尝试
    - 处理文件过大的情况（采样）
    - 处理格式错误
    - _需求：NFR-2.1_
  - [x] 15.2 实现 AI 调用错误处理
    - 实现 call_llm_with_fallback 函数
    - 实现重试机制（指数退避）
    - 实现降级策略（规则方法）
    - _需求：NFR-2.1_
  - [x] 15.3 实现工具执行错误处理
    - 实现 execute_tool_safely 函数
    - 验证工具参数
    - 捕获执行异常
    - 返回错误信息而不是崩溃
    - _需求：NFR-2.1_
  - [x] 15.4 实现任务执行错误处理
    - 实现 execute_task_with_recovery 函数
    - 检查依赖任务状态
    - 处理依赖失败的情况
    - 单个任务失败不影响整体流程
    - _需求：NFR-2.1_
  - [x] 15.5 编写单元测试
    - 测试各种错误场景
    - 测试重试机制
    - 测试降级策略
    - 测试错误恢复
 - [x] 16. 实现主流程编排
  - [x] 16.1 实现完整分析流程
    - 实现 run_analysis 主函数
    - 编排五个阶段的执行顺序
    - 处理阶段之间的数据传递
    - 实现进度显示
    - _需求：所有功能需求_
  - [x] 16.2 实现命令行接口
    - 实现 CLI 参数解析
    - 支持指定数据文件
    - 支持指定用户需求
    - 支持指定模板文件
    - 支持指定输出目录
    - _需求：NFR-3.1_
  - [x] 16.3 实现日志和可观察性
    - 配置日志系统
    - 记录每个阶段的执行状态
    - 显示 AI 的思考过程
    - 记录错误和警告
    - _需求：NFR-3.2_
  - [x] 16.4 编写集成测试
    - 测试端到端分析流程
    - 测试基于模板的分析
    - 测试错误恢复流程
 - [x] 17. 实现配置和环境管理
  - [x] 17.1 创建配置文件
    - 定义 LLM API 配置
    - 定义性能参数（超时、重试次数）
    - 定义输出路径配置
    - _需求：约束条件5.1_
  - [x] 17.2 实现环境变量支持
    - 支持从环境变量读取 API 密钥
    - 支持配置文件覆盖
    - _需求：约束条件5.1_
  - [x] 17.3 编写单元测试
    - 测试配置加载
    - 测试环境变量读取
 - [x] 18. 检查点 - 确保所有测试通过
  - 确保所有测试通过，如有问题请询问用户
 - [x] 19. 创建测试数据和示例
  - [x] 19.1 创建测试数据集
    - 创建工单数据示例（ticket_sample.csv）
    - 创建销售数据示例（sales_sample.csv）
    - 创建用户数据示例（user_sample.csv）
    - 创建包含异常的数据集
    - _需求：验收标准_
  - [x] 19.2 创建分析模板
    - 创建工单分析模板（ticket_analysis.md）
    - 创建问题分析模板（problem_analysis.md）
    - 创建基于数据特征的分析模板（data_analysis.md）
    - _需求：场景3验收_
  - [x] 19.3 编写示例脚本
    - 创建完全自主分析示例
    - 创建指定需求分析示例
    - 创建基于模板分析示例
 - [x] 20. 编写文档
  - [x] 20.1 编写 README
    - 项目介绍
    - 安装说明
    - 使用示例
    - 配置说明
    - _需求：NFR-3.1_
  - [x] 20.2 编写 API 文档
    - 核心类和函数的文档字符串
    - 工具接口文档
    - 配置参数文档
    - _需求：NFR-3.1_
  - [x] 20.3 编写开发者指南
    - 如何添加新工具
    - 如何扩展功能
    - 架构说明
    - _需求：NFR-4.1_
 - [x] 21. 性能优化和验证
  - [x] 21.1 运行性能测试
    - 测试数据理解阶段性能（< 30秒）
    - 测试完整分析流程性能（< 30分钟）
    - 测试大数据集处理（100万行）
    - _需求：NFR-1.1, NFR-1.2_
  - [x] 21.2 优化性能瓶颈
    - 优化数据加载
    - 优化 AI 调用（批处理、缓存）
    - 优化工具执行
    - _需求：NFR-1.1_
  - [x] 21.3 编写性能测试
    - 测试各阶段的性能指标
    - 测试内存使用
 - [x] 22. 最终检查点 - 完整系统验证
  - 运行所有测试套件
  - 验证所有验收标准
  - 运行端到端示例
  - 确保所有测试通过，如有问题请询问用户
 ## 注意事项
 - 所有任务都是必需的，确保从一开始就有完整的测试覆盖
 - 每个任务都引用了具体的需求以便追溯
 - 检查点确保增量验证
 - 属性测试验证通用正确性属性
 - 单元测试验证特定示例和边缘情况
 - 所有属性测试应使用 hypothesis 库，最少运行 100 次迭代
 - 每个属性测试必须包含注释标签：`# Feature: true-ai-agent, Property {number}: {property_text}`
 ## 实施顺序说明
 1. **阶段1（任务1-5）**：搭建基础设施，实现工具系统
 2. **阶段2（任务6-9）**：实现数据理解和需求理解引擎
 3. **阶段3（任务10-13）**：实现分析规划和任务执行引擎
 4. **阶段4（任务14-15）**：实现报告生成和错误处理
 5. **阶段5（任务16-18）**：集成和主流程编排
 6. **阶段6（任务19-22）**：测试数据、文档和性能优化
 每个阶段都有检查点，确保在继续之前验证功能正确性。
 ---
 **版本**: v1.0.0  
 **日期**: 2026-03-06  
 **状态**: 任务计划完成
--- a/21
+++ b/21
@@ -1,21 +0,0 @@
 MIT License
 Copyright (c) 2025 Data Analysis Agent Team
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -1,357 +1,213 @@
-# 数据分析智能体 (Data Analysis Agent)
+# AI-Driven Data Analysis Framework
-🤖 **基于LLM的智能数据分析代理**
+全自动 AI 数据分析框架。给一个 CSV 文件，AI 自主完成从数据理解到报告生成的全流程。
-[![Python Version](https://img.shields.io/badge/python-3.8%2B-blue.svg)](https://python.org)
+## 核心理念
 [![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
 [![OpenAI](https://img.shields.io/badge/LLM-OpenAI%20Compatible-orange.svg)](https://openai.com)
-## 📋 项目简介
+**框架只提供引擎和工具，AI 在运行时做所有决策。**
-![alt text](assets/images/40d04b1dc21848cf9eeac4b50551f2a1.png)
+- 没有硬编码的列名规则、数据类型判断或分析策略
-![alt text](assets/images/d24d6dd97279a27fd8c9d652bac1fdb2.png)
+- AI 只能看到元数据（表头、统计摘要、样本值），永远不接触原始数据行
-数据分析智能体是一个功能强大的Python工具，它结合了大语言模型(LLM)的理解能力和Python数据分析库的计算能力，能够：
+- 对任意 CSV 文件自动适配，无需修改代码
- 🎯 **自然语言分析**：接受用户的自然语言需求，自动生成专业的数据分析代码
+## 工作流程
 - 📊 **智能可视化**：自动生成高质量的图表，支持中文显示，输出到专用目录
 - 🔄 **迭代优化**：基于执行结果自动调整分析策略，持续优化分析质量
 - 📝 **报告生成**：自动生成包含图表和分析结论的专业报告(Markdown + Word)
 - 🛡️ **安全执行**：在受限的环境中安全执行代码，支持常用的数据分析库
 ## 🏗️ 项目架构
 ```
-data_analysis_agent/
+CSV 文件
-├── 📁 config/                 # 配置管理
+  │
-│   ├── __init__.py
+  ▼
-│   └── llm_config.py          # LLM配置(API密钥、模型等)
+[1] AI 数据理解 ─── AI 看元数据，推断数据类型、关键字段、质量评分
-├── 📁 utils/                  # 核心工具模块
+  │
-│   ├── code_executor.py       # 安全的代码执行器
+  ▼
-│   ├── llm_helper.py          # LLM调用辅助类
+[2] 需求理解 ─────── 解析自然语言需求 + 可选模板，生成分析目标
-│   ├── fallback_openai_client.py  # 支持故障转移的OpenAI客户端
+  │
-│   ├── extract_code.py        # 代码提取工具
+  ▼
-│   ├── format_execution_result.py  # 执行结果格式化
+[3] AI 分析规划 ──── AI 根据数据特征和工具库，生成具体任务列表
-│   └── create_session_dir.py  # 会话目录管理
+  │
-├── 📄 data_analysis_agent.py  # 主智能体类
+  ▼
-├── 📄 prompts.py              # 系统提示词模板
+[4] AI 任务执行 ──── ReAct 模式：AI 选工具 → 调用 → 观察结果 → 决定下一步
-├── 📄 main.py                 # 使用示例
+  │
-├── 📄 requirements.txt        # 项目依赖
+  ▼
-├── 📄 .env                    # 环境变量配置
+[5] 报告生成 ─────── AI 生成图文结合的 Markdown 报告
 └── 📁 outputs/                # 分析结果输出目录
    └── session_[时间戳]/        # 每次分析的独立会话目录
        ├── *.png             # 生成的图表
        ├── 最终分析报告.md    # Markdown报告
        └── 最终分析报告.docx  # Word报告
 ```
-## 📊 数据分析流程图
+## 快速开始
-使用Mermaid图表展示完整的数据分析流程：
+### 1. 安装依赖
 ```mermaid
 graph TD
    A[用户输入自然语言需求] --> B[初始化智能体]
    B --> C[创建专用会话目录]
    C --> D[LLM理解需求并生成代码]
    D --> E[安全代码执行器执行]
    E --> F{执行是否成功?}
    F -->|失败| G[错误分析与修复]
    G --> D
    F -->|成功| H[结果格式化与存储]
    H --> I{是否需要更多分析?}
    I -->|是| J[基于当前结果继续分析]
    J --> D
    I -->|否| K[收集所有图表]
    K --> L[生成最终分析报告]
    L --> M[输出Markdown和Word报告]
    M --> N[分析完成]
    style A fill:#e1f5fe
    style N fill:#c8e6c9
    style F fill:#fff3e0
    style I fill:#fff3e0
 ```
 ## 🔄 智能体工作流程
 ```mermaid
 sequenceDiagram
    participant User as 用户
    participant Agent as 数据分析智能体
    participant LLM as 语言模型
    participant Executor as 代码执行器
    participant Storage as 文件存储
    User->>Agent: 提供数据文件和分析需求
    Agent->>Storage: 创建专用会话目录
    loop 多轮分析循环
        Agent->>LLM: 发送分析需求和上下文
        LLM->>Agent: 返回分析代码和推理
        Agent->>Executor: 执行Python代码
        Executor->>Storage: 保存图表文件
        Executor->>Agent: 返回执行结果
        alt 需要继续分析
            Agent->>LLM: 基于结果继续分析
        else 分析完成
            Agent->>LLM: 生成最终报告
            LLM->>Agent: 返回分析报告
            Agent->>Storage: 保存报告文件
        end
    end
    Agent->>User: 返回完整分析结果
 ```
 ## ✨ 核心特性
 ### 🧠 智能分析流程
 - **多阶段分析**：数据探索 → 清洗检查 → 分析可视化 → 图片收集 → 报告生成
 - **错误自愈**：自动检测并修复常见错误（编码、列名、数据类型等）
 - **上下文保持**：Notebook环境中变量和状态在分析过程中持续保持
 ### 📋 多格式报告
 - **Markdown报告**：结构化的分析报告，包含图表引用
 - **Word文档**：专业的文档格式，便于分享和打印
 - **图片集成**：报告中自动引用生成的图表
 ## 🚀 快速开始
 ### 1. 环境准备
 ```bash
 # 克隆项目
 git clone  https://github.com/li-xiu-qi/data_analysis_agent.git
 cd data_analysis_agent
 # 安装依赖
 pip install -r requirements.txt
 ```
-### 2. 配置API密钥
+### 2. 配置环境变量
 创建 `.env` 文件：
 ```env
 OPENAI_API_KEY=your-api-key
 OPENAI_BASE_URL=https://api.openai.com/v1
 OPENAI_MODEL=gpt-4
 ```
 支持任何 OpenAI 兼容 API（如自定义 base_url）。
 ### 3. 运行分析
 ```bash
-# OpenAI API配置
+# 最简用法 — AI 自动决定分析什么、怎么分析
-OPENAI_API_KEY=your_api_key_here
+python run_analysis_en.py --data your_data.csv
 # 指定分析需求
 python run_analysis_en.py --data sales.csv --requirement "分析各产品线的销售趋势和异常"
 # 使用报告模板
 python run_analysis_en.py --data tickets.csv --template templates/ticket_analysis.md
 # 指定输出目录
 python run_analysis_en.py --data data.csv --output my_output
 ```
 ### 4. 查看结果
 每次运行会在输出目录下创建带时间戳的子目录：
 ```
 analysis_output/
  └── run_20260309_143025/
        ├── analysis_report.md    ← 图文结合的分析报告
        └── charts/
              ├── bar_chart.png
              ├── pie_chart.png
              └── ...
 ```
 ## 项目结构
 ```
 ├── run_analysis_en.py          # 主入口（5 阶段 pipeline）
 ├── src/
 │   ├── config.py               # 配置管理（环境变量 / JSON / .env）
 │   ├── data_access.py          # 数据访问层（隐私保护，AI 不可见原始数据）
 │   ├── engines/
 │   │   ├── ai_data_understanding.py   # [阶段1] AI 数据理解
 │   │   ├── requirement_understanding.py # [阶段2] 需求解析
 │   │   ├── analysis_planning.py       # [阶段3] AI 分析规划
 │   │   ├── task_execution.py          # [阶段4] ReAct 任务执行
 │   │   └── report_generation.py       # [阶段5] 报告生成
 │   ├── tools/
 │   │   ├── base.py             # 工具抽象基类 + 注册表
 │   │   ├── tool_manager.py     # 工具筛选（按数据特征过滤）
 │   │   ├── query_tools.py      # 查询工具（分布、计数、时间序列、相关性）
 │   │   ├── stats_tools.py      # 统计工具（描述统计、分组聚合、异常检测、趋势）
 │   │   └── viz_tools.py        # 可视化工具（柱状图、折线图、饼图、热力图）
 │   └── models/                 # 数据模型
 │       ├── data_profile.py     # DataProfile, ColumnInfo
 │       ├── requirement_spec.py # RequirementSpec, AnalysisObjective
 │       ├── analysis_plan.py    # AnalysisPlan, AnalysisTask
 │       └── analysis_result.py  # AnalysisResult
 ├── templates/                  # 报告模板（可选）
 ├── test_data/                  # 示例数据
 └── examples/                   # 使用示例
 ```
 ## 内置工具
 框架提供 12 个分析工具，AI 在运行时自主选择和组合：
 | 类别 | 工具 | 说明 |
 |------|------|------|
 | 查询 | `get_column_distribution` | 列分布统计（值计数、百分比） |
 | 查询 | `get_value_counts` | 唯一值计数 |
 | 查询 | `get_time_series` | 时间序列聚合 |
 | 查询 | `get_correlation` | 相关性矩阵 |
 | 统计 | `calculate_statistics` | 描述性统计（均值、中位数、偏度等） |
 | 统计 | `perform_groupby` | 分组聚合 |
 | 统计 | `detect_outliers` | 异常值检测（IQR / Z-score） |
 | 统计 | `calculate_trend` | 趋势分析（线性回归） |
 | 可视化 | `create_bar_chart` | 柱状图 |
 | 可视化 | `create_line_chart` | 折线图 |
 | 可视化 | `create_pie_chart` | 饼图 |
 | 可视化 | `create_heatmap` | 热力图 |
 ## 隐私保护
 数据访问层（`DataAccessLayer`）是核心安全边界：
 - AI **永远看不到**原始数据行
 - AI 只能通过工具获取**聚合结果**（统计值、分布、图表）
 - 数据画像只包含元数据：列名、数据类型、缺失率、唯一值数、样本值（最多 5 个）
 - 工具返回结果自动截断（最多 100 行），防止数据泄露
 ## 配置
 ### 环境变量（推荐）
 通过 `.env` 文件或系统环境变量配置：
 ```env
 # LLM 配置（必填）
 OPENAI_API_KEY=sk-xxx
 OPENAI_BASE_URL=https://api.openai.com/v1
 OPENAI_MODEL=gpt-4
-# 或者使用兼容的API（如火山引擎）
+# 可选配置
-# OPENAI_BASE_URL=https://ark.cn-beijing.volces.com/api/v3
+LLM_TEMPERATURE=0.7
-# OPENAI_MODEL=deepseek-v3-250324
+LLM_TIMEOUT=120
 AGENT_MAX_ROUNDS=20
 LOG_LEVEL=INFO
 ```
-### 3. 基本使用
+### JSON 配置文件
 也可以使用 `config.example.json` 作为模板创建配置文件。
 ## 报告模板
 可以提供 Markdown 模板来控制报告结构。模板中的占位符会被 AI 用实际分析数据填充。
 参考 `templates/` 目录下的示例模板。
 ## 扩展工具
 实现 `AnalysisTool` 抽象类并注册即可：
 ```python
-from data_analysis_agent import DataAnalysisAgent
+from src.tools.base import AnalysisTool, register_tool
 from config.llm_config import LLMConfig
-# 初始化智能体
+class MyCustomTool(AnalysisTool):
-llm_config = LLMConfig()
+    @property
-agent = DataAnalysisAgent(llm_config)
+    def name(self) -> str:
        return "my_custom_tool"
-# 开始分析
+    @property
-files = ["your_data.csv"]
+    def description(self) -> str:
-report = agent.analyze(
+        return "工具描述（AI 会看到这段文字来决定是否使用）"
    user_input="分析销售数据，生成趋势图表和关键指标",
    files=files
 )
-print(report)
+    @property
-```
+    def parameters(self) -> dict:
-
+        return {
-```python
+            "type": "object",
-# 自定义配置
+            "properties": {
-agent = DataAnalysisAgent(
+                "column": {"type": "string", "description": "列名"}
-    llm_config=llm_config,
+            },
-    output_dir="custom_outputs",  # 自定义输出目录
+            "required": ["column"]
    max_rounds=30                 # 增加最大分析轮数
 )
 # 使用便捷函数
 from data_analysis_agent import quick_analysis
 report = quick_analysis(
    query="分析用户行为数据，重点关注转化率",
    files=["user_behavior.csv"],
    max_rounds=15
 )
 ```
 ## 📊 使用示例
 以下是分析贵州茅台财务数据的完整示例：
 ```python
 # 示例：茅台财务分析
 files = ["贵州茅台利润表.csv"]
 report = agent.analyze(
    user_input="基于贵州茅台的数据，输出五个重要的统计指标，并绘制相关图表。最后生成汇报给我。",
    files=files
 )
 ```
 **生成的分析内容包括：**
 - 📈 营业总收入趋势图
 - 💰 净利润率变化分析
 - 📊 利润构成分析图表
 - 💵 每股收益变化趋势
 - 📋 营业成本占比分析
 - 📄 综合分析报告
 ## 🎨 流程可视化
 ### 📊 分析过程状态图
 ```mermaid
 stateDiagram-v2
    [*] --> 数据加载
    数据加载 --> 数据探索: 成功加载
    数据加载 --> 编码修复: 编码错误
    编码修复 --> 数据探索: 修复完成
    数据探索 --> 数据清洗: 探索完成
    数据清洗 --> 统计分析: 清洗完成
    统计分析 --> 可视化生成: 分析完成
    可视化生成 --> 图表保存: 图表生成
    图表保存 --> 结果评估: 保存完成
    结果评估 --> 继续分析: 需要更多分析
    结果评估 --> 报告生成: 分析充分
    继续分析 --> 统计分析
    报告生成 --> [*]: 完成
 ```
 ## 🔧 配置选项
 ### LLM配置
 ```python
@dataclass
 class LLMConfig:
    provider: str = "openai"
    api_key: str = os.environ.get("OPENAI_API_KEY", "")
    base_url: str = os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1")
    model: str = os.environ.get("OPENAI_MODEL", "gpt-4")
    max_tokens: int = 4000
    temperature: float = 0.1
 ```
 ### 执行器配置
 ```python
 # 允许的库列表
 ALLOWED_IMPORTS = {
    'pandas', 'numpy', 'matplotlib', 'duckdb', 
    'scipy', 'sklearn', 'plotly', 'requests',
    'os', 'json', 'datetime', 're', 'pathlib'
        }
    def execute(self, data, **kwargs) -> dict:
        # 实现分析逻辑，返回聚合结果
        return {"result": "..."}
    def is_applicable(self, data_profile) -> bool:
        return True
 register_tool(MyCustomTool())
 ```
-## 🎯 最佳实践
+注册后，AI 会自动在规划和执行阶段发现并使用新工具。
-### 1. 数据准备
+## 依赖
- ✅ 使用CSV格式，支持UTF-8/GBK编码
+- Python 3.10+
- ✅ 确保列名清晰、无特殊字符
+- pandas, numpy, matplotlib, scipy, scikit-learn
- ✅ 数据量适中（建议<100MB）
+- openai（兼容任何 OpenAI API 格式的 LLM 服务）
-
+- python-dotenv
 ### 2. 查询编写
 - ✅ 使用清晰的中文描述分析需求
 - ✅ 指定想要的图表类型和关键指标
 - ✅ 明确分析的目标和重点
 ### 3. 结果解读
 - ✅ 检查生成的图表是否符合预期
 - ✅ 阅读分析报告中的关键发现
 - ✅ 根据需要调整查询重新分析
 ## 🚨 注意事项
 ### 安全限制
 - 🔒 仅支持预定义的数据分析库
 - 🔒 不允许文件系统操作（除图片保存）
 - 🔒 不支持网络请求（除LLM调用）
 ### 性能考虑
 - ⚡ 大数据集可能导致分析时间较长
 - ⚡ 复杂分析任务可能需要多轮交互
 - ⚡ API调用频率受到模型限制
 ### 兼容性
 - 🐍 Python 3.8+
 - 📊 支持pandas兼容的数据格式
 - 🖼️ 需要matplotlib中文字体支持
 ## 🐛 故障排除
 ### 常见问题
 **Q: 图表中文显示为方框？**
 A: 系统会自动检测并使用可用的中文字体（macOS: Hiragino Sans GB, Songti SC等；Windows: SimHei等）。
 **Q: API调用失败？**
 A: 检查`.env`文件中的API密钥和端点配置，确保网络连接正常。
 **Q: 数据加载错误？**
 A: 检查文件路径和编码格式，支持UTF-8、GBK等常见编码。
 **Q: 分析结果不准确？**
 A: 尝试提供更详细的分析需求，或检查原始数据质量。
 **Q: Mermaid流程图无法正常显示？**
 A: 确保在支持Mermaid的环境中查看（如GitHub、Typora、VS Code预览等）。如果在本地查看，推荐使用支持Mermaid的Markdown编辑器。
 **Q: 如何自定义流程图样式？**
 A: 可以在Mermaid代码块中添加样式定义，或使用不同的图表类型（graph、flowchart、sequenceDiagram等）来满足不同的展示需求。
 ### 错误日志
 分析过程中的错误信息会保存在会话目录中，便于调试和优化。
 ## 🤝 贡献指南
 欢迎贡献代码和改进建议！
 1. Fork 项目
 2. 创建功能分支
 3. 提交更改
 4. 推送到分支
 5. 创建Pull Request
 ## 📄 许可证
 本项目基于MIT许可证开源。详见[LICENSE](LICENSE)文件。
 ## 🔄 更新日志
 ### v1.0.0
 - ✨ 初始版本发布
 - 🎯 支持自然语言数据分析
 - 📊 集成matplotlib图表生成
 - 📝 自动报告生成功能
 - 🔒 安全的代码执行环境
 ---
 <div align="center">
 **🚀 让数据分析变得更智能、更简单！**
 </div>
--- a/init.py
+++ b/init.py
@@ -1,54 +0,0 @@
 # -*- coding: utf-8 -*-
 """
 Data Analysis Agent Package
 一个基于LLM的智能数据分析代理，专门为Jupyter Notebook环境设计。
 """
 from .core.notebook_agent import NotebookAgent
 from .config.llm_config import LLMConfig
 from .utils.code_executor import CodeExecutor
 __version__ = "1.0.0"
 __author__ = "Data Analysis Agent Team"
 # 主要导出类
 __all__ = [
    "NotebookAgent",
    "LLMConfig", 
    "CodeExecutor",
 ]
 # 便捷函数
 def create_agent(config=None, output_dir="outputs", max_rounds=20, session_dir=None):
    """
    创建一个数据分析智能体实例
    Args:
        config: LLM配置，如果为None则使用默认配置
        output_dir: 输出目录
        max_rounds: 最大分析轮数
        session_dir: 指定会话目录（可选）
    Returns:
        NotebookAgent: 智能体实例
    """
    if config is None:
        config = LLMConfig()
    return NotebookAgent(config=config, output_dir=output_dir, max_rounds=max_rounds, session_dir=session_dir)
 def quick_analysis(query, files=None, output_dir="outputs", max_rounds=10):
    """
    快速数据分析函数
    Args:
        query: 分析需求（自然语言）
        files: 数据文件路径列表
        output_dir: 输出目录
        max_rounds: 最大分析轮数
    Returns:
        dict: 分析结果
    """
    agent = create_agent(output_dir=output_dir, max_rounds=max_rounds)
    return agent.analyze(query, files)
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Jeason	8fc02944c8	12345678	2026-03-09 10:37:35 +08:00
Jeason	ba9ed95f04	重构readme	2026-03-09 10:26:03 +08:00
Jeason	237c96f629	小小优化，不成敬意	2026-03-09 10:21:33 +08:00
Jeason	dc9e4bd0ef	二次重构，加入预设模板	2026-03-09 10:06:21 +08:00
Jeason	7071b1f730	Complete AI Data Analysis Agent implementation with 95.7% test coverage	2026-03-07 00:04:29 +08:00
Zhaojie	621e546b43	feat: Update core agent logic, code execution utilities, and LLM configuration.	2026-01-07 16:41:38 +08:00
Zhaojie	3a2f90aef5	feat: Adjust LLM temperature and update prompts to refine agent behavior.。。。。初版定稿校验	2026-01-07 13:42:52 +08:00
AI Agent	fae233b10d	refactor: Reorganize prompt definitions and update agent implementation.	2026-01-06 21:19:40 +08:00
AI Agent	8d90f029e1	优化提示词	2026-01-06 19:49:04 +08:00
AI Agent	1f420b1b6e	Merge branch 'main' of http://jeason.online:3000/zhaojie/iov_data_analysis_agent	2026-01-06 19:48:28 +08:00
AI Agent	fcbdec1298	feat: adjust report format and enforce image persistence	2026-01-06 19:44:17 +08:00
Zhaojie	8115abb6d6	feat: update data analysis agent logic and prompts	2026-01-06 18:08:44 +08:00
Zhaojie	ca134e94c8	feat: implement logging and refine agent core, execution, and prompts.	2026-01-06 17:53:19 +08:00
Zhaojie	24870ba497	feat: update data analysis agent logic and refine associated prompts.	2026-01-06 16:10:02 +08:00
		`@@ -0,0 +1 @@`
							`ýû¥ÊhØG2ÔK\|Qò5ò™A™s#¬„ä¿%Bw”·ïÅ]ÚhX<>¬.secondary`
		`@@ -0,0 +1 @@`
							`©όΌc<EFBFBD>ƒ<EFBFBD>Y%Ώο$π†vο<>ί΅Λ/`
		`@@ -0,0 +1 @@`
							`ýűĄĘhŘG2ÔK\|Qň5ň™A™s#¬„äż%Bw”·ďĹ]ÚhXť¬`
		`@@ -0,0 +1 @@`
							`Sđ‹2ť<EFBFBD> ëo¬÷Z@±ŹĘŹ°'ôŮ±‰rčX·I–ĐÍÔÍ+Ť˝<>ßôŇ§×‘©›2`