feat: Update core agent logic, code execution utilities, and LLM configuration.

2026-01-07 16:41:38 +08:00
parent 3a2f90aef5
commit 621e546b43
7 changed files with 73 additions and 14 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,8 @@ __pycache__/
 # C extensions
 *.so

+
+
 # Distribution / packaging
 .Python
 build/
--- a/config/llm_config.py
+++ b/config/llm_config.py
@@ -17,13 +17,24 @@ load_dotenv()
 class LLMConfig:
    """LLM配置"""

-    provider: str = "openai"  # openai, anthropic, etc.
-    api_key: str = os.environ.get("OPENAI_API_KEY", "sk-c44i1hy64xgzwox6x08o4zug93frq6rgn84oqugf2pje1tg4")
+    provider: str = os.environ.get("LLM_PROVIDER", "gemini")  # openai, gemini, etc.
+    api_key: str = os.environ.get("OPENAI_API_KEY", "sk---c44i1hy64xgzwox6x08o4zug93frq6rgn84oqugf2pje1tg4")
    base_url: str = os.environ.get("OPENAI_BASE_URL", "https://api.xiaomimimo.com/v1")
    model: str = os.environ.get("OPENAI_MODEL", "mimo-v2-flash")
    temperature: float = 0.5
    max_tokens: int = 131072

+    def __post_init__(self):
+        """配置初始化后的处理"""
+        if self.provider == "gemini":
+            # 如果使用 Gemini，尝试从环境变量加载 Gemini 配置，或者使用默认的 Gemini 配置
+            # 注意：如果 OPENAI_API_KEY 已设置且 GEMINI_API_KEY 未设置，可能会沿用 OpenAI 的 Key，
+            # 但既然用户切换了 provider，通常会有配套的 Key。
+            self.api_key = os.environ.get("GEMINI_API_KEY", "AIzaSyA9aVFjRJYJq82WEQUVlifE4fE7BnX6QiY")
+            # Gemini 的 OpenAI 兼容接口地址
+            self.base_url = os.environ.get("GEMINI_BASE_URL", "https://gemini.jeason.online")
+            self.model = os.environ.get("GEMINI_MODEL", "gemini-2.5-flash")
+
    def to_dict(self) -> Dict[str, Any]:
        """转换为字典"""
        return asdict(self)
--- a/data_analysis_agent.py
+++ b/data_analysis_agent.py
@@ -89,7 +89,11 @@ class DataAnalysisAgent:
                return self._handle_generate_code(response, yaml_data)

        except Exception as e:
-            print(f"⚠️ 解析响应失败: {str(e)}，按generate_code处理")
+            print(f"⚠️ 解析响应失败: {str(e)}，尝试提取代码并按generate_code处理")
+            # 即使YAML解析失败，也尝试提取代码
+            extracted_code = extract_code_from_response(response)
+            if extracted_code:
+                 return self._handle_generate_code(response, {"code": extracted_code})
            return self._handle_generate_code(response, {})

    def _handle_analysis_complete(
--- a/main.py
+++ b/main.py
@@ -39,7 +39,7 @@ def setup_logging(log_dir):

 def main():
    llm_config = LLMConfig()
-    files = ["./UB IOV Support_TR.csv"]
+    files = ["./cleaned_data.csv"]
    analysis_requirement = """
 基于所有运维工单，整理一份工单健康度报告，包括但不限于对所有车联网技术支持工单的全面数据分析，
 深入挖掘工单处理过程中的关键问题、效率瓶颈及改进机会。涵盖工单状态、问题类型、模块分布、严重程度、责任人负载、车型分布、来源渠道及处理时长等多个维度。
--- a/utils/code_executor.py
+++ b/utils/code_executor.py
@@ -35,6 +35,7 @@ class CodeExecutor:
        "duckdb",
        "scipy",
        "sklearn",
+        "sklearn.feature_extraction.text",
        "statsmodels",
        "plotly",
        "dash",
--- a/utils/extract_code.py
+++ b/utils/extract_code.py
@@ -29,6 +29,22 @@ def extract_code_from_response(response: str) -> Optional[str]:
            end = response.find('```', start)
            if end != -1:
                return response[start:end].strip()
+        
+        # 尝试提取 code: | 形式的代码块（针对YAML格式错误但结构清晰的情况）
+        import re
+        # 匹配 code: | 后面的内容，直到遇到下一个键（next_key:）或结尾
+        # 假设代码块至少缩进2个空格
+        pattern = r'code:\s*\|\s*\n((?: {2,}.*\n?)+)'
+        match = re.search(pattern, response)
+        if match:
+            code_block = match.group(1)
+            # 尝试去除公共缩进
+            try:
+                import textwrap
+                return textwrap.dedent(code_block).strip()
+            except:
+                return code_block.strip()
+
        elif '```' in response:
            start = response.find('```') + 3
            end = response.find('```', start)
--- a/utils/fallback_openai_client.py
+++ b/utils/fallback_openai_client.py
@@ -97,23 +97,48 @@ class AsyncFallbackOpenAIClient:
                    print(f"❌ {api_name} API 在达到最大重试次数后仍然失败。")
            except APIStatusError as e: # API 返回的特定状态码错误
                is_content_filter_error = False
-                if e.status_code == 400:
+                retry_after = None
+                
+                # 尝试解析错误详情以获取更多信息（如 Google RPC RetryInfo）
                try:
                    error_json = e.response.json()
                    error_details = error_json.get("error", {})
+                    
+                    # 检查内容过滤错误（针对特定服务商）
                    if (error_details.get("code") == self.content_filter_error_code and
                        self.content_filter_error_field in error_json):
                        is_content_filter_error = True
+                    
+                    # 检查 Google RPC RetryInfo
+                    # 格式示例: {'error': {'details': [{'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '38s'}]}}
+                    if "details" in error_details:
+                        for detail in error_details["details"]:
+                            if detail.get("@type") == "type.googleapis.com/google.rpc.RetryInfo":
+                                delay_str = detail.get("retryDelay", "")
+                                if delay_str.endswith("s"):
+                                    try:
+                                        retry_after = float(delay_str[:-1])
+                                        print(f"⏳ 收到服务器 RetryInfo，等待时间: {retry_after}秒")
+                                    except ValueError:
+                                        pass
                except Exception:
-                        pass # 解析错误响应失败，不认为是内容过滤错误
+                    pass # 解析错误响应失败，忽略
                
                if is_content_filter_error and api_name == "主": # 如果是主 API 的内容过滤错误，则直接抛出以便回退
                    raise e 
                
                last_exception = e
                print(f"⚠️ {api_name} API 调用时发生 APIStatusError ({e.status_code}): {e}. 尝试次数 {attempt + 1}/{max_retries + 1}")
+                
                if attempt < max_retries:
-                    await asyncio.sleep(self.retry_delay_seconds * (attempt + 1))
+                    # 如果获取到了明确的 retry_after，则使用它；否则使用默认的指数退避
+                    wait_time = retry_after if retry_after is not None else (self.retry_delay_seconds * (attempt + 1))
+                    # 如果是 429 Too Many Requests 且没有解析出 retry_after，建议加大等待时间
+                    if e.status_code == 429 and retry_after is None:
+                        wait_time = max(wait_time, 5.0 * (attempt + 1)) # 429 默认至少等 5 秒
+                        
+                    print(f"💤 将等待 {wait_time:.2f} 秒后重试...")
+                    await asyncio.sleep(wait_time)
                else:
                    print(f"❌ {api_name} API 在达到最大重试次数后仍然失败 (APIStatusError)。")
            except APIError as e: # 其他不可轻易重试的 OpenAI 错误