截图识别飞书个人任务清单
This commit is contained in:
11
services/__init__.py
Normal file
11
services/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
服务模块
|
||||
"""
|
||||
|
||||
from .ai_service import AIService
|
||||
from .feishu_service import FeishuService
|
||||
|
||||
__all__ = [
|
||||
'AIService',
|
||||
'FeishuService'
|
||||
]
|
||||
624
services/ai_service.py
Normal file
624
services/ai_service.py
Normal file
@@ -0,0 +1,624 @@
|
||||
import base64
|
||||
import json
|
||||
import datetime
|
||||
import time
|
||||
import re
|
||||
from typing import Dict, Optional, List
|
||||
from openai import OpenAI
|
||||
from openai import APIError, RateLimitError, AuthenticationError
|
||||
|
||||
|
||||
class AIService:
|
||||
def __init__(self, config):
|
||||
"""
|
||||
初始化 AI 服务
|
||||
:param config: 包含 ai 配置的字典 (来自 config.yaml)
|
||||
"""
|
||||
self.api_key = config.get('api_key')
|
||||
self.base_url = config.get('base_url')
|
||||
self.model = config.get('model', 'gpt-4o')
|
||||
self.max_retries = config.get('max_retries', 3)
|
||||
self.retry_delay = config.get('retry_delay', 1.0)
|
||||
|
||||
# 初始化 OpenAI 客户端 (兼容所有支持 OpenAI 格式的 API)
|
||||
self.client = OpenAI(
|
||||
api_key=self.api_key,
|
||||
base_url=self.base_url
|
||||
)
|
||||
|
||||
# 支持的图片格式
|
||||
self.supported_formats = {'.png', '.jpg', '.jpeg', '.bmp', '.gif'}
|
||||
|
||||
# Prompt模板缓存
|
||||
self.prompt_templates = {}
|
||||
|
||||
def _encode_image(self, image_path):
|
||||
"""将本地图片转换为 Base64 编码"""
|
||||
try:
|
||||
with open(image_path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode('utf-8')
|
||||
except Exception as e:
|
||||
raise Exception(f"图片编码失败: {str(e)}")
|
||||
|
||||
def _encode_image_from_bytes(self, image_bytes: bytes) -> str:
|
||||
"""将图片字节数据转换为 Base64 编码"""
|
||||
try:
|
||||
return base64.b64encode(image_bytes).decode('utf-8')
|
||||
except Exception as e:
|
||||
raise Exception(f"图片字节数据编码失败: {str(e)}")
|
||||
|
||||
def _validate_image_file(self, image_path):
|
||||
"""验证图片文件"""
|
||||
from pathlib import Path
|
||||
|
||||
file_path = Path(image_path)
|
||||
|
||||
# 检查文件是否存在
|
||||
if not file_path.exists():
|
||||
raise FileNotFoundError(f"图片文件不存在: {image_path}")
|
||||
|
||||
# 检查文件大小(限制为10MB)
|
||||
file_size = file_path.stat().st_size
|
||||
if file_size > 10 * 1024 * 1024: # 10MB
|
||||
raise ValueError(f"图片文件过大: {file_size / 1024 / 1024:.2f}MB (最大10MB)")
|
||||
|
||||
# 检查文件格式
|
||||
if file_path.suffix.lower() not in self.supported_formats:
|
||||
raise ValueError(f"不支持的图片格式: {file_path.suffix}")
|
||||
|
||||
return True
|
||||
|
||||
def _build_prompt(self, current_date_str: str) -> str:
|
||||
"""构建AI提示词"""
|
||||
prompt = f"""
|
||||
你是一个专业的项目管理助手。当前系统日期是:{current_date_str}。
|
||||
你的任务是从用户上传的图片(可能是聊天记录、邮件、文档截图)中提取任务信息。
|
||||
|
||||
请严格按照以下 JSON 格式返回结果:
|
||||
{{
|
||||
"task_description": "任务的具体描述,简练概括",
|
||||
"priority": "必须从以下选项中选择一个: ['紧急', '较紧急', '一般', '普通']",
|
||||
"status": "必须从以下选项中选择一个: ['已停滞','待开始', '进行中', '已完成']",
|
||||
"latest_progress": "图片中提到的最新进展,如果没有则留空字符串",
|
||||
"initiator": "任务发起人姓名。请仔细识别图片中的发件人/发送者名字。如果是邮件截图,请识别发件人;如果是聊天记录,请识别发送者。如果没有明确的发起人,则留空字符串。",
|
||||
"department": "发起人部门,如果没有则留空字符串",
|
||||
"start_date": "YYYY-MM-DD 格式的日期字符串。如果提到'今天'就是当前日期,'下周一'请根据当前日期计算。如果没提到则返回 null",
|
||||
"due_date": "YYYY-MM-DD 格式的截止日期字符串。逻辑同上,如果没提到则返回 null"
|
||||
}}
|
||||
|
||||
注意:
|
||||
1. 如果图片中包含多个任务,请只提取最核心的一个。
|
||||
2. 请特别关注图片中的发件人/发送者信息,准确提取姓名。
|
||||
3. 如果识别到的名字可能存在重名,请在任务描述中添加提示信息。
|
||||
4. 不要返回 Markdown 代码块标记(如 ```json),直接返回纯 JSON 字符串。
|
||||
5. 确保返回的 JSON 格式正确,可以被 Python 的 json.loads() 解析。
|
||||
"""
|
||||
return prompt
|
||||
|
||||
def _parse_ai_response(self, content: Optional[str]) -> Optional[Dict]:
|
||||
"""解析AI响应"""
|
||||
if not content:
|
||||
raise ValueError("AI响应内容为空")
|
||||
|
||||
try:
|
||||
# 清理可能的 markdown 标记
|
||||
content = content.replace("```json", "").replace("```", "").strip()
|
||||
|
||||
# 尝试修复常见的JSON格式问题
|
||||
# 1. 处理未闭合的引号
|
||||
content = self._fix_json_string(content)
|
||||
|
||||
# 2. 处理转义字符问题
|
||||
content = self._fix_json_escapes(content)
|
||||
|
||||
# 解析JSON
|
||||
result_dict = json.loads(content)
|
||||
|
||||
# 验证必需的字段
|
||||
required_fields = ['task_description', 'priority', 'status']
|
||||
for field in required_fields:
|
||||
if field not in result_dict:
|
||||
raise ValueError(f"AI响应缺少必需字段: {field}")
|
||||
|
||||
# 验证选项值
|
||||
valid_priorities = ['紧急', '较紧急', '一般', '普通']
|
||||
valid_statuses = ['已停滞','待开始', '进行中', '已完成']
|
||||
|
||||
if result_dict.get('priority') not in valid_priorities:
|
||||
raise ValueError(f"无效的优先级: {result_dict.get('priority')}")
|
||||
|
||||
if result_dict.get('status') not in valid_statuses:
|
||||
raise ValueError(f"无效的状态: {result_dict.get('status')}")
|
||||
|
||||
return result_dict
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
# 尝试更详细的错误修复
|
||||
try:
|
||||
# 如果标准解析失败,尝试使用更宽松的解析
|
||||
content = self._aggressive_json_fix(content)
|
||||
result_dict = json.loads(content)
|
||||
|
||||
# 重新验证字段
|
||||
required_fields = ['task_description', 'priority', 'status']
|
||||
for field in required_fields:
|
||||
if field not in result_dict:
|
||||
raise ValueError(f"AI响应缺少必需字段: {field}")
|
||||
|
||||
# 重新验证选项值
|
||||
valid_priorities = ['紧急', '较紧急', '一般', '普通']
|
||||
valid_statuses = ['已停滞','待开始', '进行中', '已完成']
|
||||
|
||||
if result_dict.get('priority') not in valid_priorities:
|
||||
raise ValueError(f"无效的优先级: {result_dict.get('priority')}")
|
||||
|
||||
if result_dict.get('status') not in valid_statuses:
|
||||
raise ValueError(f"无效的状态: {result_dict.get('status')}")
|
||||
|
||||
return result_dict
|
||||
except Exception as retry_error:
|
||||
raise ValueError(f"AI响应不是有效的JSON: {str(e)} (修复后错误: {str(retry_error)})")
|
||||
except Exception as e:
|
||||
raise ValueError(f"解析AI响应失败: {str(e)}")
|
||||
|
||||
def _fix_json_string(self, content: str) -> str:
|
||||
"""修复JSON字符串中的未闭合引号问题"""
|
||||
import re
|
||||
|
||||
# 查找可能未闭合的字符串
|
||||
# 匹配模式:从引号开始,但没有对应的闭合引号
|
||||
lines = content.split('\n')
|
||||
fixed_lines = []
|
||||
|
||||
for line in lines:
|
||||
# 检查行中是否有未闭合的引号
|
||||
in_string = False
|
||||
escaped = False
|
||||
fixed_line = []
|
||||
|
||||
for char in line:
|
||||
if escaped:
|
||||
fixed_line.append(char)
|
||||
escaped = False
|
||||
continue
|
||||
|
||||
if char == '\\':
|
||||
escaped = True
|
||||
fixed_line.append(char)
|
||||
continue
|
||||
|
||||
if char == '"':
|
||||
if in_string:
|
||||
in_string = False
|
||||
else:
|
||||
in_string = True
|
||||
fixed_line.append(char)
|
||||
else:
|
||||
fixed_line.append(char)
|
||||
|
||||
# 如果行结束时仍在字符串中,添加闭合引号
|
||||
if in_string:
|
||||
fixed_line.append('"')
|
||||
|
||||
fixed_lines.append(''.join(fixed_line))
|
||||
|
||||
return '\n'.join(fixed_lines)
|
||||
|
||||
def _fix_json_escapes(self, content: str) -> str:
|
||||
"""修复JSON转义字符问题"""
|
||||
# 注意:我们不应该转义JSON结构中的引号,只转义字符串内容中的引号
|
||||
# 这个函数暂时不处理换行符,因为JSON中的换行符是有效的
|
||||
# 更复杂的转义修复应该在JSON解析后进行
|
||||
|
||||
return content
|
||||
|
||||
def _aggressive_json_fix(self, content: str) -> str:
|
||||
"""更激进的JSON修复策略"""
|
||||
# 1. 移除可能的非JSON内容
|
||||
content = re.sub(r'^[^{]*', '', content) # 移除JSON前的非JSON内容
|
||||
content = re.sub(r'[^}]*$', '', content) # 移除JSON后的非JSON内容
|
||||
|
||||
# 2. 确保JSON对象闭合
|
||||
if not content.strip().endswith('}'):
|
||||
content = content.strip() + '}'
|
||||
|
||||
# 3. 确保JSON对象开始
|
||||
if not content.strip().startswith('{'):
|
||||
content = '{' + content.strip()
|
||||
|
||||
# 4. 处理常见的AI响应格式问题
|
||||
# 移除可能的Markdown代码块标记
|
||||
content = content.replace('```json', '').replace('```', '')
|
||||
|
||||
# 5. 处理可能的多余空格和换行
|
||||
content = ' '.join(content.split())
|
||||
|
||||
return content
|
||||
|
||||
def _call_ai_with_retry(self, image_path: str, prompt: str) -> Optional[str]:
|
||||
"""调用AI API,带重试机制"""
|
||||
base64_image = self._encode_image(image_path)
|
||||
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
# 尝试使用response_format参数(适用于OpenAI格式的API)
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{base64_image}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
response_format={"type": "json_object"},
|
||||
max_tokens=2000
|
||||
)
|
||||
except Exception as e:
|
||||
# 如果response_format参数不支持,尝试不使用该参数
|
||||
print(f"⚠️ response_format参数不支持,尝试不使用该参数: {str(e)}")
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{base64_image}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
max_tokens=2000
|
||||
)
|
||||
|
||||
if not response.choices:
|
||||
return None
|
||||
|
||||
content = response.choices[0].message.content
|
||||
return content if content else None
|
||||
|
||||
except RateLimitError as e:
|
||||
if attempt < self.max_retries - 1:
|
||||
wait_time = self.retry_delay * (2 ** attempt) # 指数退避
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
raise Exception(f"API速率限制: {str(e)}")
|
||||
|
||||
except AuthenticationError as e:
|
||||
raise Exception(f"API认证失败: {str(e)}")
|
||||
|
||||
except APIError as e:
|
||||
if attempt < self.max_retries - 1:
|
||||
wait_time = self.retry_delay * (2 ** attempt)
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
raise Exception(f"API调用失败: {str(e)}")
|
||||
|
||||
except Exception as e:
|
||||
if attempt < self.max_retries - 1:
|
||||
wait_time = self.retry_delay * (2 ** attempt)
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
raise Exception(f"未知错误: {str(e)}")
|
||||
|
||||
raise Exception(f"AI调用失败,已重试 {self.max_retries} 次")
|
||||
|
||||
def _call_ai_with_retry_from_bytes(self, image_bytes: bytes, prompt: str, image_name: str = "memory_image") -> Optional[str]:
|
||||
"""调用AI API,带重试机制(从内存字节数据)"""
|
||||
base64_image = self._encode_image_from_bytes(image_bytes)
|
||||
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
# 尝试使用response_format参数(适用于OpenAI格式的API)
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{base64_image}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
response_format={"type": "json_object"},
|
||||
max_tokens=2000
|
||||
)
|
||||
except Exception as e:
|
||||
# 如果response_format参数不支持,尝试不使用该参数
|
||||
print(f"⚠️ response_format参数不支持,尝试不使用该参数: {str(e)}")
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{base64_image}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
max_tokens=2000
|
||||
)
|
||||
|
||||
if not response.choices:
|
||||
return None
|
||||
|
||||
content = response.choices[0].message.content
|
||||
return content if content else None
|
||||
|
||||
except RateLimitError as e:
|
||||
if attempt < self.max_retries - 1:
|
||||
wait_time = self.retry_delay * (2 ** attempt) # 指数退避
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
raise Exception(f"API速率限制: {str(e)}")
|
||||
|
||||
except AuthenticationError as e:
|
||||
raise Exception(f"API认证失败: {str(e)}")
|
||||
|
||||
except APIError as e:
|
||||
if attempt < self.max_retries - 1:
|
||||
wait_time = self.retry_delay * (2 ** attempt)
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
raise Exception(f"API调用失败: {str(e)}")
|
||||
|
||||
except Exception as e:
|
||||
if attempt < self.max_retries - 1:
|
||||
wait_time = self.retry_delay * (2 ** attempt)
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
raise Exception(f"未知错误: {str(e)}")
|
||||
|
||||
raise Exception(f"AI调用失败,已重试 {self.max_retries} 次")
|
||||
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
# 尝试使用response_format参数(适用于OpenAI格式的API)
|
||||
try:
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{base64_image}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
response_format={"type": "json_object"},
|
||||
max_tokens=2000
|
||||
)
|
||||
except Exception as e:
|
||||
# 如果response_format参数不支持,尝试不使用该参数
|
||||
print(f"⚠️ response_format参数不支持,尝试不使用该参数: {str(e)}")
|
||||
response = self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{base64_image}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
max_tokens=2000
|
||||
)
|
||||
|
||||
if not response.choices:
|
||||
return None
|
||||
|
||||
content = response.choices[0].message.content
|
||||
return content if content else None
|
||||
|
||||
except RateLimitError as e:
|
||||
if attempt < self.max_retries - 1:
|
||||
wait_time = self.retry_delay * (2 ** attempt) # 指数退避
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
raise Exception(f"API速率限制: {str(e)}")
|
||||
|
||||
except AuthenticationError as e:
|
||||
raise Exception(f"API认证失败: {str(e)}")
|
||||
|
||||
except APIError as e:
|
||||
if attempt < self.max_retries - 1:
|
||||
wait_time = self.retry_delay * (2 ** attempt)
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
raise Exception(f"API调用失败: {str(e)}")
|
||||
|
||||
except Exception as e:
|
||||
if attempt < self.max_retries - 1:
|
||||
wait_time = self.retry_delay * (2 ** attempt)
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
raise Exception(f"未知错误: {str(e)}")
|
||||
|
||||
raise Exception(f"AI调用失败,已重试 {self.max_retries} 次")
|
||||
|
||||
def analyze_image(self, image_path):
|
||||
"""
|
||||
核心方法:发送图片到 AI 并获取结构化数据
|
||||
:param image_path: 图片文件的路径
|
||||
:return: 解析后的字典 (Dict)
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
file_path = Path(image_path)
|
||||
# 使用sys.stdout.write替代print,避免编码问题
|
||||
import sys
|
||||
sys.stdout.write(f" [AI] 正在分析图片: {file_path.name} ...\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
try:
|
||||
# 1. 验证图片文件
|
||||
self._validate_image_file(image_path)
|
||||
|
||||
# 2. 获取当前日期 (用于辅助 AI 推断相对时间)
|
||||
now = datetime.datetime.now()
|
||||
current_date_str = now.strftime("%Y-%m-%d %A") # 例如: 2023-10-27 Sunday
|
||||
|
||||
# 3. 构建 Prompt
|
||||
prompt = self._build_prompt(current_date_str)
|
||||
|
||||
# 4. 调用AI API(带重试机制)
|
||||
content = self._call_ai_with_retry(image_path, prompt)
|
||||
|
||||
# 5. 解析结果
|
||||
if not content:
|
||||
import sys
|
||||
sys.stdout.write(f" [AI] AI返回空内容\n")
|
||||
sys.stdout.flush()
|
||||
return None
|
||||
|
||||
result_dict = self._parse_ai_response(content)
|
||||
|
||||
# 记录成功日志
|
||||
if result_dict:
|
||||
task_desc = result_dict.get('task_description', '')
|
||||
if task_desc and len(task_desc) > 30:
|
||||
task_desc = task_desc[:30] + "..."
|
||||
import sys
|
||||
sys.stdout.write(f" [AI] 识别成功: {task_desc}\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
return result_dict
|
||||
|
||||
except Exception as e:
|
||||
import sys
|
||||
sys.stdout.write(f" [AI] 分析失败: {str(e)}\n")
|
||||
sys.stdout.flush()
|
||||
return None
|
||||
|
||||
def analyze_image_from_bytes(self, image_bytes: bytes, image_name: str = "memory_image"):
|
||||
"""
|
||||
核心方法:从内存中的图片字节数据发送到 AI 并获取结构化数据
|
||||
:param image_bytes: 图片的字节数据
|
||||
:param image_name: 图片名称(用于日志)
|
||||
:return: 解析后的字典 (Dict)
|
||||
"""
|
||||
# 使用sys.stdout.write替代print,避免编码问题
|
||||
import sys
|
||||
sys.stdout.write(f" [AI] 正在分析内存图片: {image_name} ...\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
try:
|
||||
# 1. 验证图片数据大小
|
||||
if len(image_bytes) > 10 * 1024 * 1024: # 10MB
|
||||
raise ValueError(f"图片数据过大: {len(image_bytes) / 1024 / 1024:.2f}MB (最大10MB)")
|
||||
|
||||
# 2. 获取当前日期 (用于辅助 AI 推断相对时间)
|
||||
now = datetime.datetime.now()
|
||||
current_date_str = now.strftime("%Y-%m-%d %A") # 例如: 2023-10-27 Sunday
|
||||
|
||||
# 3. 构建 Prompt
|
||||
prompt = self._build_prompt(current_date_str)
|
||||
|
||||
# 4. 调用AI API(带重试机制)
|
||||
content = self._call_ai_with_retry_from_bytes(image_bytes, prompt, image_name)
|
||||
|
||||
# 5. 解析结果
|
||||
if not content:
|
||||
import sys
|
||||
sys.stdout.write(f" [AI] AI返回空内容\n")
|
||||
sys.stdout.flush()
|
||||
return None
|
||||
|
||||
result_dict = self._parse_ai_response(content)
|
||||
|
||||
# 记录成功日志
|
||||
if result_dict:
|
||||
task_desc = result_dict.get('task_description', '')
|
||||
if task_desc and len(task_desc) > 30:
|
||||
task_desc = task_desc[:30] + "..."
|
||||
import sys
|
||||
sys.stdout.write(f" [AI] 识别成功: {task_desc}\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
return result_dict
|
||||
|
||||
except Exception as e:
|
||||
import sys
|
||||
sys.stdout.write(f" [AI] 分析失败: {str(e)}\n")
|
||||
sys.stdout.flush()
|
||||
return None
|
||||
|
||||
def analyze_image_batch(self, image_paths: List[str]) -> Dict[str, Optional[Dict]]:
|
||||
"""
|
||||
批量分析图片
|
||||
:param image_paths: 图片文件路径列表
|
||||
:return: 字典,键为图片路径,值为分析结果
|
||||
"""
|
||||
results = {}
|
||||
|
||||
for image_path in image_paths:
|
||||
try:
|
||||
result = self.analyze_image(image_path)
|
||||
results[image_path] = result
|
||||
except Exception as e:
|
||||
import sys
|
||||
sys.stdout.write(f" [AI] 批量处理失败 {image_path}: {str(e)}\n")
|
||||
sys.stdout.flush()
|
||||
results[image_path] = None
|
||||
|
||||
return results
|
||||
|
||||
# ================= 单元测试 =================
|
||||
if __name__ == "__main__":
|
||||
# 在这里填入你的配置进行测试
|
||||
test_config = {
|
||||
"api_key": "sk-xxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"model": "gpt-4o"
|
||||
}
|
||||
|
||||
# 确保目录下有一张名为 test_img.jpg 的图片
|
||||
import os
|
||||
if os.path.exists("test_img.jpg"):
|
||||
ai = AIService(test_config)
|
||||
res = ai.analyze_image("test_img.jpg")
|
||||
import sys
|
||||
sys.stdout.write(json.dumps(res, indent=2, ensure_ascii=False) + "\n")
|
||||
sys.stdout.flush()
|
||||
else:
|
||||
import sys
|
||||
sys.stdout.write("请在同级目录下放一张 test_img.jpg 用于测试\n")
|
||||
sys.stdout.flush()
|
||||
489
services/feishu_service.py
Normal file
489
services/feishu_service.py
Normal file
@@ -0,0 +1,489 @@
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Dict, Optional, List
|
||||
from pathlib import Path
|
||||
from utils.user_cache import UserCacheManager
|
||||
|
||||
|
||||
class FeishuService:
|
||||
def __init__(self, config):
|
||||
"""
|
||||
初始化飞书服务
|
||||
:param config: 包含飞书配置的字典 (来自 config.yaml)
|
||||
"""
|
||||
self.app_id = config.get('app_id')
|
||||
self.app_secret = config.get('app_secret')
|
||||
self.app_token = config.get('app_token') # 多维表格的 Base Token
|
||||
self.table_id = config.get('table_id') # 数据表 ID
|
||||
|
||||
# 内部变量,用于缓存 Token
|
||||
self._tenant_access_token = None
|
||||
self._token_expire_time = 0
|
||||
|
||||
# 重试配置
|
||||
self.max_retries = config.get('max_retries', 3)
|
||||
self.retry_delay = config.get('retry_delay', 1.0)
|
||||
|
||||
# 用户匹配配置
|
||||
user_matching_config = config.get('user_matching', {})
|
||||
self.user_matching_enabled = user_matching_config.get('enabled', True)
|
||||
self.fallback_to_random = user_matching_config.get('fallback_to_random', True)
|
||||
self.cache_enabled = user_matching_config.get('cache_enabled', True)
|
||||
self.cache_file = user_matching_config.get('cache_file', './data/user_cache.json')
|
||||
|
||||
# 用户缓存管理器
|
||||
self.user_cache = UserCacheManager(self.cache_file) if self.cache_enabled else None
|
||||
|
||||
# 支持的字段类型映射
|
||||
self.field_type_map = {
|
||||
'text': 'text',
|
||||
'number': 'number',
|
||||
'date': 'date',
|
||||
'select': 'single_select',
|
||||
'multi_select': 'multiple_select',
|
||||
'checkbox': 'checkbox',
|
||||
'url': 'url',
|
||||
'email': 'email',
|
||||
'phone': 'phone',
|
||||
'user': 'user' # 用户字段类型
|
||||
}
|
||||
|
||||
# 验证必需的配置
|
||||
self._validate_config()
|
||||
|
||||
def _validate_config(self):
|
||||
"""验证飞书配置"""
|
||||
if not self.app_id:
|
||||
raise ValueError("飞书配置缺少 app_id")
|
||||
if not self.app_secret:
|
||||
raise ValueError("飞书配置缺少 app_secret")
|
||||
if not self.app_token:
|
||||
raise ValueError("飞书配置缺少 app_token")
|
||||
if not self.table_id:
|
||||
raise ValueError("飞书配置缺少 table_id")
|
||||
|
||||
# 验证用户匹配配置
|
||||
if self.user_matching_enabled and not self.cache_enabled:
|
||||
logging.warning("用户匹配已启用但缓存未启用,可能影响性能")
|
||||
|
||||
def _get_token(self, retry_count: int = 0) -> Optional[str]:
|
||||
"""
|
||||
获取 tenant_access_token (带缓存机制和重试)
|
||||
如果 Token 还有效,直接返回;否则重新请求。
|
||||
"""
|
||||
now = time.time()
|
||||
# 如果 token 存在且离过期还有60秒以上,直接复用
|
||||
if self._tenant_access_token and now < self._token_expire_time - 60:
|
||||
return self._tenant_access_token
|
||||
|
||||
url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal"
|
||||
headers = {"Content-Type": "application/json; charset=utf-8"}
|
||||
payload = {
|
||||
"app_id": self.app_id,
|
||||
"app_secret": self.app_secret
|
||||
}
|
||||
|
||||
try:
|
||||
resp = requests.post(url, headers=headers, json=payload, timeout=10)
|
||||
resp_data = resp.json()
|
||||
|
||||
if resp_data.get("code") == 0:
|
||||
self._tenant_access_token = resp_data.get("tenant_access_token")
|
||||
# expire_in 通常是 7200 秒,我们记录下过期的绝对时间戳
|
||||
self._token_expire_time = now + resp_data.get("expire", 7200)
|
||||
logging.info("🔄 [Feishu] Token 已刷新")
|
||||
return self._tenant_access_token
|
||||
else:
|
||||
error_msg = resp_data.get("msg", "未知错误")
|
||||
raise Exception(f"获取飞书 Token 失败: {error_msg}")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
if retry_count < self.max_retries:
|
||||
wait_time = self.retry_delay * (2 ** retry_count)
|
||||
logging.warning(f"网络错误,{wait_time}秒后重试: {str(e)}")
|
||||
time.sleep(wait_time)
|
||||
return self._get_token(retry_count + 1)
|
||||
logging.error(f" [Feishu] 网络错误,已重试 {self.max_retries} 次: {str(e)}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error(f" [Feishu] 认证错误: {str(e)}")
|
||||
return None
|
||||
|
||||
def _date_to_timestamp(self, date_str: str) -> Optional[int]:
|
||||
"""
|
||||
将 YYYY-MM-DD 字符串转换为飞书需要的毫秒级时间戳
|
||||
"""
|
||||
if not date_str or date_str == "null":
|
||||
return None
|
||||
try:
|
||||
# 假设 AI 返回的是 YYYY-MM-DD
|
||||
dt = datetime.strptime(date_str, "%Y-%m-%d")
|
||||
# 转换为毫秒时间戳
|
||||
return int(dt.timestamp() * 1000)
|
||||
except ValueError:
|
||||
logging.warning(f"⚠️ [Feishu] 日期格式无法解析: {date_str},将留空。")
|
||||
return None
|
||||
|
||||
def _validate_ai_data(self, ai_data: Dict) -> bool:
|
||||
"""验证AI返回的数据"""
|
||||
required_fields = ['task_description', 'priority', 'status']
|
||||
|
||||
for field in required_fields:
|
||||
if field not in ai_data:
|
||||
logging.error(f" [Feishu] AI数据缺少必需字段: {field}")
|
||||
return False
|
||||
|
||||
# 验证选项值
|
||||
valid_priorities = ['紧急', '较紧急', '一般', '普通']
|
||||
valid_statuses = [' 已停滞', '待开始', '进行中', ',已完成']
|
||||
|
||||
if ai_data.get('priority') not in valid_priorities:
|
||||
logging.error(f" [Feishu] 无效的优先级: {ai_data.get('priority')}")
|
||||
return False
|
||||
|
||||
if ai_data.get('status') not in valid_statuses:
|
||||
logging.error(f" [Feishu] 无效的状态: {ai_data.get('status')}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _build_fields(self, ai_data: Dict) -> Dict:
|
||||
"""构建飞书字段数据"""
|
||||
# --- 字段映射区域 (Key 必须与飞书表格列名完全一致) ---
|
||||
fields = {
|
||||
"任务描述": ai_data.get("task_description", ""),
|
||||
"重要紧急程度": ai_data.get("priority", "普通"), # 默认值防止报错
|
||||
"进展": ai_data.get("status", "待开始"),
|
||||
"最新进展记录": ai_data.get("latest_progress", "进行中,请及时更新"),
|
||||
# 任务发起方.部门是多选字段,需要列表格式
|
||||
"任务发起方.部门": self._build_multi_select_field(ai_data.get("department", "")) # 修复字段名:添加点号
|
||||
}
|
||||
|
||||
# 处理任务发起方
|
||||
initiator_name = ai_data.get("initiator", "")
|
||||
if initiator_name and self.user_matching_enabled:
|
||||
# 尝试匹配用户
|
||||
user_match = self._match_initiator(initiator_name)
|
||||
if user_match:
|
||||
if user_match.get('matched'):
|
||||
# 成功匹配到用户ID
|
||||
fields["任务发起方"] = self._build_user_field(user_match['user_id'])
|
||||
logging.info(f"成功匹配任务发起方: {initiator_name} -> {user_match['user_name']}")
|
||||
else:
|
||||
# 未匹配到用户,添加待确认标记到任务描述
|
||||
fields["任务描述"] = f"[待确认发起人] {fields['任务描述']}"
|
||||
# 在任务发起方.部门字段中添加待确认信息(作为提示)
|
||||
existing_dept = ai_data.get("department", "")
|
||||
if existing_dept:
|
||||
fields["任务发起方.部门"] = self._build_multi_select_field(f"{existing_dept} (待确认: {initiator_name})")
|
||||
else:
|
||||
fields["任务发起方.部门"] = self._build_multi_select_field(f"待确认: {initiator_name}")
|
||||
logging.warning(f"未匹配到用户: {initiator_name},标记为待确认")
|
||||
|
||||
# 添加到最近联系人
|
||||
if self.user_cache:
|
||||
self.user_cache.add_recent_contact(initiator_name)
|
||||
else:
|
||||
# 匹配失败,使用随机选择或留空
|
||||
if self.fallback_to_random and self.user_cache:
|
||||
random_contact = self.user_cache.get_random_recent_contact()
|
||||
if random_contact:
|
||||
fields["任务描述"] = f"[随机匹配] {fields['任务描述']}"
|
||||
# 在任务发起方.部门字段中添加随机匹配信息
|
||||
existing_dept = ai_data.get("department", "")
|
||||
if existing_dept:
|
||||
fields["任务发起方.部门"] = self._build_multi_select_field(f"{existing_dept} (随机: {random_contact})")
|
||||
else:
|
||||
fields["任务发起方.部门"] = self._build_multi_select_field(f"随机: {random_contact}")
|
||||
logging.info(f"随机匹配任务发起方: {random_contact}")
|
||||
else:
|
||||
fields["任务描述"] = f"[待确认发起人] {fields['任务描述']}"
|
||||
# 在任务发起方.部门字段中添加待确认信息
|
||||
existing_dept = ai_data.get("department", "")
|
||||
if existing_dept:
|
||||
fields["任务发起方.部门"] = self._build_multi_select_field(f"{existing_dept} (待确认: {initiator_name})")
|
||||
else:
|
||||
fields["任务发起方.部门"] = self._build_multi_select_field(f"待确认: {initiator_name}")
|
||||
logging.warning(f"无最近联系人可用,标记为待确认: {initiator_name}")
|
||||
else:
|
||||
fields["任务描述"] = f"[待确认发起人] {fields['任务描述']}"
|
||||
# 在任务发起方.部门字段中添加待确认信息
|
||||
existing_dept = ai_data.get("department", "")
|
||||
if existing_dept:
|
||||
fields["任务发起方.部门"] = self._build_multi_select_field(f"{existing_dept} (待确认: {initiator_name})")
|
||||
else:
|
||||
fields["任务发起方.部门"] = self._build_multi_select_field(f"待确认: {initiator_name}")
|
||||
logging.warning(f"未匹配到用户且未启用随机匹配: {initiator_name}")
|
||||
elif initiator_name:
|
||||
# 用户匹配未启用,直接使用名字
|
||||
# 在任务发起方.部门字段中添加发起人信息
|
||||
existing_dept = ai_data.get("department", "")
|
||||
if existing_dept:
|
||||
fields["任务发起方.部门"] = self._build_multi_select_field(f"{existing_dept} (发起人: {initiator_name})")
|
||||
else:
|
||||
fields["任务发起方.部门"] = self._build_multi_select_field(f"发起人: {initiator_name}")
|
||||
|
||||
# 移除用户字段,避免UserFieldConvFail错误(如果匹配失败)
|
||||
if "任务发起方" not in fields:
|
||||
fields.pop("任务发起方", None)
|
||||
|
||||
# 处理日期字段 (AI 返回的是字符串,飞书写入最好用时间戳)
|
||||
start_date = ai_data.get("start_date")
|
||||
if start_date and isinstance(start_date, str):
|
||||
start_ts = self._date_to_timestamp(start_date)
|
||||
if start_ts:
|
||||
fields["开始日期"] = start_ts
|
||||
|
||||
due_date = ai_data.get("due_date")
|
||||
if due_date and isinstance(due_date, str):
|
||||
due_ts = self._date_to_timestamp(due_date)
|
||||
if due_ts:
|
||||
fields["预计完成日期"] = due_ts
|
||||
|
||||
return fields
|
||||
|
||||
def _build_multi_select_field(self, value: str) -> Optional[List]:
|
||||
"""构建飞书多选字段"""
|
||||
if not value:
|
||||
return None
|
||||
|
||||
# 多选字段需要以列表格式提供
|
||||
# 格式:["选项1", "选项2"]
|
||||
return [value]
|
||||
|
||||
def _call_feishu_api(self, url: str, headers: Dict, payload: Dict, retry_count: int = 0) -> Optional[Dict]:
|
||||
"""调用飞书API,带重试机制"""
|
||||
try:
|
||||
response = requests.post(url, headers=headers, json=payload, timeout=30)
|
||||
res_json = response.json()
|
||||
|
||||
if res_json.get("code") == 0:
|
||||
return res_json
|
||||
else:
|
||||
error_msg = res_json.get("msg", "未知错误")
|
||||
error_code = res_json.get("code", "未知错误码")
|
||||
raise Exception(f"飞书API错误 {error_code}: {error_msg}")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
if retry_count < self.max_retries:
|
||||
wait_time = self.retry_delay * (2 ** retry_count)
|
||||
logging.warning(f"网络错误,{wait_time}秒后重试: {str(e)}")
|
||||
time.sleep(wait_time)
|
||||
return self._call_feishu_api(url, headers, payload, retry_count + 1)
|
||||
logging.error(f" [Feishu] 网络错误,已重试 {self.max_retries} 次: {str(e)}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error(f" [Feishu] API调用失败: {str(e)}")
|
||||
return None
|
||||
|
||||
def add_task(self, ai_data: Optional[Dict]) -> bool:
|
||||
"""
|
||||
将 AI 识别的数据写入多维表格
|
||||
:param ai_data: AI 返回的 JSON 字典
|
||||
:return: 是否成功
|
||||
"""
|
||||
# 验证AI数据
|
||||
if not ai_data or not self._validate_ai_data(ai_data):
|
||||
return False
|
||||
|
||||
token = self._get_token()
|
||||
if not token:
|
||||
logging.error(" [Feishu] 无法获取 Token,跳过写入。")
|
||||
return False
|
||||
|
||||
url = f"https://open.feishu.cn/open-apis/bitable/v1/apps/{self.app_token}/tables/{self.table_id}/records"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json; charset=utf-8"
|
||||
}
|
||||
|
||||
# 构建字段数据
|
||||
fields = self._build_fields(ai_data)
|
||||
payload = {"fields": fields}
|
||||
|
||||
# 调用API
|
||||
res_json = self._call_feishu_api(url, headers, payload)
|
||||
|
||||
if res_json and res_json.get("code") == 0:
|
||||
record_id = res_json['data']['record']['record_id']
|
||||
logging.info(f" [Feishu] 成功写入一条记录!(Record ID: {record_id})")
|
||||
return True
|
||||
else:
|
||||
logging.error(f" [Feishu] 写入失败: {json.dumps(res_json, ensure_ascii=False) if res_json else '无响应'}")
|
||||
return False
|
||||
|
||||
def add_task_batch(self, ai_data_list: List[Dict]) -> Dict[str, bool]:
|
||||
"""
|
||||
批量写入任务到飞书
|
||||
:param ai_data_list: AI数据列表
|
||||
:return: 字典,键为数据索引,值为是否成功
|
||||
"""
|
||||
results = {}
|
||||
|
||||
for index, ai_data in enumerate(ai_data_list):
|
||||
try:
|
||||
success = self.add_task(ai_data)
|
||||
results[str(index)] = success
|
||||
except Exception as e:
|
||||
logging.error(f" [Feishu] 批量写入失败 (索引 {index}): {str(e)}")
|
||||
results[str(index)] = False
|
||||
|
||||
return results
|
||||
|
||||
def _match_initiator(self, initiator_name: str) -> Optional[Dict]:
|
||||
"""
|
||||
匹配任务发起人
|
||||
|
||||
Args:
|
||||
initiator_name: AI识别的发起人名字
|
||||
|
||||
Returns:
|
||||
匹配结果字典,包含matched, user_id, user_name等字段
|
||||
"""
|
||||
if not initiator_name or not self.user_matching_enabled:
|
||||
return None
|
||||
|
||||
# 确保用户缓存已初始化
|
||||
if not self.user_cache:
|
||||
return None
|
||||
|
||||
# 检查缓存是否过期,如果过期则重新获取用户列表
|
||||
if self.user_cache.is_cache_expired(max_age_hours=24):
|
||||
logging.info("用户缓存已过期,重新获取用户列表")
|
||||
users = self._get_user_list()
|
||||
if users:
|
||||
self.user_cache.update_users(users)
|
||||
|
||||
# 尝试匹配用户
|
||||
matched_user = self.user_cache.match_user_by_name(initiator_name)
|
||||
|
||||
if matched_user:
|
||||
return {
|
||||
'matched': True,
|
||||
'user_id': matched_user.get('user_id'),
|
||||
'user_name': matched_user.get('name')
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'matched': False,
|
||||
'user_name': initiator_name
|
||||
}
|
||||
|
||||
def _get_user_list(self) -> Optional[List[Dict]]:
|
||||
"""
|
||||
从飞书API获取用户列表
|
||||
|
||||
Returns:
|
||||
用户列表,每个用户包含 name, user_id 等字段
|
||||
"""
|
||||
token = self._get_token()
|
||||
if not token:
|
||||
logging.error(" [Feishu] 无法获取 Token,跳过用户列表获取。")
|
||||
return None
|
||||
|
||||
url = "https://open.feishu.cn/open-apis/contact/v3/users"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json; charset=utf-8"
|
||||
}
|
||||
|
||||
try:
|
||||
# 分页获取用户列表
|
||||
all_users = []
|
||||
page_token = None
|
||||
|
||||
while True:
|
||||
params = {"page_size": 100}
|
||||
if page_token:
|
||||
params["page_token"] = page_token
|
||||
|
||||
response = requests.get(url, headers=headers, params=params, timeout=30)
|
||||
res_json = response.json()
|
||||
|
||||
if res_json.get("code") == 0:
|
||||
users = res_json.get("data", {}).get("items", [])
|
||||
for user in users:
|
||||
# 提取用户信息
|
||||
user_info = {
|
||||
'user_id': user.get('user_id'),
|
||||
'name': user.get('name'),
|
||||
'email': user.get('email'),
|
||||
'mobile': user.get('mobile')
|
||||
}
|
||||
all_users.append(user_info)
|
||||
|
||||
# 检查是否有更多页面
|
||||
page_token = res_json.get("data", {}).get("page_token")
|
||||
if not page_token:
|
||||
break
|
||||
else:
|
||||
error_msg = res_json.get("msg", "未知错误")
|
||||
logging.error(f"获取用户列表失败: {error_msg}")
|
||||
break
|
||||
|
||||
logging.info(f"成功获取 {len(all_users)} 个用户")
|
||||
return all_users
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"获取用户列表时出错: {str(e)}")
|
||||
return None
|
||||
|
||||
def _build_user_field(self, user_id: str) -> Optional[Dict]:
|
||||
"""
|
||||
构建飞书用户字段
|
||||
|
||||
Args:
|
||||
user_id: 用户ID
|
||||
|
||||
Returns:
|
||||
用户字段字典
|
||||
"""
|
||||
if not user_id:
|
||||
return None
|
||||
|
||||
return {
|
||||
"id": user_id
|
||||
}
|
||||
|
||||
# ================= 单元测试代码 =================
|
||||
if __name__ == "__main__":
|
||||
# 设置日志
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
|
||||
# 如果你直接运行这个文件,会执行下面的测试代码
|
||||
# 这里填入真实的配置进行测试
|
||||
test_config = {
|
||||
"app_id": "cli_xxxxxxxxxxxx", # 换成你的 App ID
|
||||
"app_secret": "xxxxxxxxxxxxxx", # 换成你的 App Secret
|
||||
"app_token": "basxxxxxxxxxxxxx", # 换成你的 Base Token
|
||||
"table_id": "tblxxxxxxxxxxxxx" # 换成你的 Table ID
|
||||
}
|
||||
|
||||
# 模拟 AI 返回的数据
|
||||
mock_ai_data = {
|
||||
"task_description": "测试任务:编写 Feishu Service 模块",
|
||||
"priority": "普通", # 确保飞书表格里有这个选项
|
||||
"status": "进行中", # 确保飞书表格里有这个选项
|
||||
"latest_progress": "代码框架已完成,正在调试 API",
|
||||
"initiator": "Python脚本",
|
||||
"department": "研发部",
|
||||
"start_date": "2023-10-27",
|
||||
"due_date": "2023-10-30"
|
||||
}
|
||||
|
||||
try:
|
||||
fs = FeishuService(test_config)
|
||||
success = fs.add_task(mock_ai_data)
|
||||
import sys
|
||||
if success:
|
||||
sys.stdout.write("测试成功!\n")
|
||||
else:
|
||||
sys.stdout.write("测试失败!\n")
|
||||
sys.stdout.flush()
|
||||
except Exception as e:
|
||||
import sys
|
||||
sys.stdout.write(f"测试出错: {str(e)}\n")
|
||||
sys.stdout.flush()
|
||||
Reference in New Issue
Block a user