diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..006f16c --- /dev/null +++ b/.editorconfig @@ -0,0 +1,57 @@ +# EditorConfig is awesome: https://EditorConfig.org + +# 顶级配置文件 +root = true + +# 所有文件 +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +# Python 文件 +[*.py] +charset = utf-8 +indent_style = space +indent_size = 4 + +# JSON 文件 +[*.json] +charset = utf-8 +indent_style = space +indent_size = 2 + +# Markdown 文件 +[*.md] +charset = utf-8 +trim_trailing_whitespace = false + +# YAML 文件 +[*.{yml,yaml}] +charset = utf-8 +indent_style = space +indent_size = 2 + +# JavaScript/TypeScript 文件 +[*.{js,ts,jsx,tsx}] +charset = utf-8 +indent_style = space +indent_size = 2 + +# HTML/CSS 文件 +[*.{html,css}] +charset = utf-8 +indent_style = space +indent_size = 2 + +# Batch 文件 (Windows) +[*.bat] +charset = utf-8 +end_of_line = crlf + +# Shell 脚本 +[*.sh] +charset = utf-8 +end_of_line = lf + diff --git a/.vscode/settings.json b/.vscode/settings.json index e8f80c1..048bfc1 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,25 @@ { - "files.autoGuessEncoding": true + "files.autoGuessEncoding": false, + "files.encoding": "utf8", + "files.eol": "\n", + "[python]": { + "files.encoding": "utf8", + "files.eol": "\n" + }, + "[json]": { + "files.encoding": "utf8" + }, + "[javascript]": { + "files.encoding": "utf8" + }, + "[html]": { + "files.encoding": "utf8" + }, + "[css]": { + "files.encoding": "utf8" + }, + "[markdown]": { + "files.encoding": "utf8" + }, + "python.defaultInterpreterPath": "${workspaceFolder}/.venv/Scripts/python.exe" } diff --git a/UTF8_ENCODING_STANDARD.md b/UTF8_ENCODING_STANDARD.md new file mode 100644 index 0000000..57bdcea --- /dev/null +++ b/UTF8_ENCODING_STANDARD.md @@ -0,0 +1,83 @@ +# UTF-8 编码规范 + +## 项目编码标准 + +本项目所有文件必须使用 **UTF-8** 编码格式,以确保中文和特殊字符的正确显示和处理。 + +## 文件编码要求 + +### 1. Python 文件 +- **必须** 在文件开头添加编码声明: + ```python + # -*- coding: utf-8 -*- + ``` + 或 + ```python + # coding: utf-8 + ``` + +### 2. 文件保存 +- 所有文件保存时使用 **UTF-8** 编码(无BOM) +- 禁止使用 GBK、GB2312 等其他编码格式 + +### 3. 文件读取/写入 +- 所有文件操作必须明确指定 `encoding='utf-8'`: + ```python + with open('file.txt', 'r', encoding='utf-8') as f: + content = f.read() + + with open('file.txt', 'w', encoding='utf-8') as f: + f.write(content) + ``` + +## Cursor/VS Code 配置 + +项目已配置 `.vscode/settings.json`,确保: +- 默认文件编码:UTF-8 +- 自动检测编码:禁用(避免误判) +- 文件行尾:LF(Unix风格) + +## 控制台输出 + +### Windows 系统 +在 Python 脚本中,需要设置标准输出编码: +```python +import sys +import io + +if sys.platform == 'win32': + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace') +``` + +## 检查脚本 + +使用 `check_encoding.py` 脚本检查所有文件的编码格式: +```bash +python check_encoding.py +``` + +## 常见问题 + +### 1. 控制台输出乱码 +- 确保文件以 UTF-8 保存 +- 在脚本开头设置标准输出编码 +- Windows 系统运行 `chcp 65001` 设置控制台代码页 + +### 2. 文件读取乱码 +- 检查文件实际编码(可用 `check_encoding.py`) +- 确保使用 `encoding='utf-8'` 参数 + +### 3. 文件保存乱码 +- 检查编辑器编码设置 +- 确保 Cursor/VS Code 设置为 UTF-8 + +## 验证清单 + +创建新文件时,请确认: +- [ ] 文件以 UTF-8 编码保存 +- [ ] Python 文件包含编码声明 +- [ ] 文件读写操作指定 `encoding='utf-8'` +- [ ] 控制台输出脚本设置了 UTF-8 编码 +- [ ] 测试输出中文显示正常 + diff --git a/check_encoding.py b/check_encoding.py new file mode 100644 index 0000000..f00431a --- /dev/null +++ b/check_encoding.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +文件编码检查工具 +检查项目中所有文件是否使用UTF-8编码 +""" + +import os +import sys +import chardet +from pathlib import Path + +def check_file_encoding(file_path: Path) -> dict: + """检查文件编码""" + try: + with open(file_path, 'rb') as f: + raw_data = f.read() + result = chardet.detect(raw_data) + encoding = result.get('encoding', 'unknown') + confidence = result.get('confidence', 0) + + # 检查文件是否有BOM + has_bom = False + if raw_data.startswith(b'\xef\xbb\xbf'): + has_bom = True + encoding = 'utf-8-sig' + + return { + 'file': str(file_path), + 'encoding': encoding, + 'confidence': confidence, + 'has_bom': has_bom, + 'is_utf8': encoding.lower() in ['utf-8', 'utf-8-sig', 'ascii'], + 'size': len(raw_data) + } + except Exception as e: + return { + 'file': str(file_path), + 'error': str(e) + } + +def check_python_file_header(file_path: Path) -> bool: + """检查Python文件是否有编码声明""" + try: + with open(file_path, 'r', encoding='utf-8') as f: + first_lines = [f.readline() for _ in range(3)] + for line in first_lines: + if 'coding' in line.lower() or 'encoding' in line.lower(): + return True + return False + except: + return False + +def main(): + """主函数""" + project_root = Path(__file__).parent + + # 需要检查的文件扩展名 + check_extensions = {'.py', '.json', '.md', '.txt', '.html', '.css', '.js', '.sql', '.bat', '.sh'} + + # 排除的目录 + exclude_dirs = {'.git', '.venv', '__pycache__', 'node_modules', '.idea', 'logs', 'data', 'dist', 'build'} + + results = [] + python_files_without_encoding = [] + + print("=" * 80) + print("文件编码检查工具") + print("=" * 80) + print() + + # 遍历所有文件 + for root, dirs, files in os.walk(project_root): + # 排除指定目录 + dirs[:] = [d for d in dirs if d not in exclude_dirs] + + for file in files: + file_path = Path(root) / file + + # 只检查指定扩展名的文件 + if file_path.suffix.lower() not in check_extensions: + continue + + # 检查编码 + result = check_file_encoding(file_path) + results.append(result) + + # 检查Python文件的编码声明 + if file_path.suffix == '.py': + if not check_python_file_header(file_path): + python_files_without_encoding.append(file_path) + + # 统计结果 + total_files = len(results) + utf8_files = sum(1 for r in results if r.get('is_utf8', False)) + non_utf8_files = total_files - utf8_files + + print(f"总计检查文件: {total_files}") + print(f"UTF-8 编码文件: {utf8_files}") + print(f"非 UTF-8 编码文件: {non_utf8_files}") + print() + + # 显示非UTF-8文件 + if non_utf8_files > 0: + print("=" * 80) + print("⚠️ 非 UTF-8 编码文件:") + print("=" * 80) + for result in results: + if not result.get('is_utf8', False) and 'error' not in result: + print(f" {result['file']}") + print(f" 编码: {result['encoding']} (置信度: {result['confidence']:.2%})") + if result.get('has_bom'): + print(f" ⚠️ 包含 BOM") + print() + + # 显示缺少编码声明的Python文件 + if python_files_without_encoding: + print("=" * 80) + print("⚠️ Python 文件缺少编码声明:") + print("=" * 80) + for file_path in python_files_without_encoding: + print(f" {file_path}") + print() + print("建议在这些文件开头添加: # -*- coding: utf-8 -*-") + print() + + # 显示错误 + errors = [r for r in results if 'error' in r] + if errors: + print("=" * 80) + print("❌ 检查出错的文件:") + print("=" * 80) + for result in errors: + print(f" {result['file']}: {result['error']}") + print() + + # 总结 + print("=" * 80) + if non_utf8_files == 0 and not python_files_without_encoding: + print("✅ 所有文件编码检查通过!") + else: + print("⚠️ 发现编码问题,请根据上述信息修复") + print("=" * 80) + + return non_utf8_files == 0 and not python_files_without_encoding + +if __name__ == "__main__": + try: + import chardet + except ImportError: + print("错误: 需要安装 chardet 库") + print("运行: pip install chardet") + sys.exit(1) + + success = main() + sys.exit(0 if success else 1) + diff --git a/config/unified_config.json b/config/unified_config.json index 547805e..8223b8d 100644 --- a/config/unified_config.json +++ b/config/unified_config.json @@ -1,6 +1,6 @@ { "database": { - "url": "mysql+pymysql://tsp_assistant:password@43.134.68.207/tsp_assistant?charset=utf8mb4", + "url": "mysql+pymysql://tsp_assistant:password@jeason.online/tsp_assistant?charset=utf8mb4", "pool_size": 10, "max_overflow": 20, "pool_timeout": 30, diff --git a/src/config/__pycache__/config.cpython-311.pyc b/src/config/__pycache__/config.cpython-311.pyc index 3f8a86e..7db62f8 100644 Binary files a/src/config/__pycache__/config.cpython-311.pyc and b/src/config/__pycache__/config.cpython-311.pyc differ diff --git a/src/config/config.py b/src/config/config.py index dda57bf..a5d34e7 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -10,7 +10,7 @@ class Config: ALIBABA_MODEL_NAME = "qwen-plus-latest" # 数据库配置 - DATABASE_URL = "mysql+pymysql://tsp_assistant:123456@43.134.68.207/tsp_assistant?charset=utf8mb4" + DATABASE_URL = "mysql+pymysql://tsp_assistant:123456@jeason.online/tsp_assistant?charset=utf8mb4" # 知识库配置 KNOWLEDGE_BASE_PATH = "data/knowledge_base" diff --git a/src/config/unified_config.py b/src/config/unified_config.py index af2434d..92981a6 100644 --- a/src/config/unified_config.py +++ b/src/config/unified_config.py @@ -18,7 +18,7 @@ logger = logging.getLogger(__name__) @dataclass class DatabaseConfig: """数据库配置""" - url: str = "mysql+pymysql://tsp_assistant:password@43.134.68.207/tsp_assistant?charset=utf8mb4" + url: str = "mysql+pymysql://tsp_assistant:password@jeason.online/tsp_assistant?charset=utf8mb4" pool_size: int = 10 max_overflow: int = 20 pool_timeout: int = 30 diff --git a/src/core/__pycache__/database.cpython-311.pyc b/src/core/__pycache__/database.cpython-311.pyc index 19119c8..ba94a4d 100644 Binary files a/src/core/__pycache__/database.cpython-311.pyc and b/src/core/__pycache__/database.cpython-311.pyc differ diff --git a/src/core/database.py b/src/core/database.py index de84e43..5373ae0 100644 --- a/src/core/database.py +++ b/src/core/database.py @@ -34,10 +34,13 @@ class DatabaseManager: max_overflow=30, # 增加溢出连接数 pool_pre_ping=True, pool_recycle=1800, # 减少回收时间 - pool_timeout=10, # 连接超时 + pool_timeout=30, # 连接池超时(秒) connect_args={ "charset": "utf8mb4", - "autocommit": False + "autocommit": False, + "connect_timeout": 30, # 连接超时(秒)- 适用于网络延迟较大的情况 + "read_timeout": 30, # 读取超时(秒) + "write_timeout": 30, # 写入超时(秒) } ) else: diff --git a/src/utils/__pycache__/encoding_helper.cpython-311.pyc b/src/utils/__pycache__/encoding_helper.cpython-311.pyc new file mode 100644 index 0000000..f818bc6 Binary files /dev/null and b/src/utils/__pycache__/encoding_helper.cpython-311.pyc differ diff --git a/src/utils/encoding_helper.py b/src/utils/encoding_helper.py new file mode 100644 index 0000000..820c344 --- /dev/null +++ b/src/utils/encoding_helper.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +""" +编码辅助工具 +提供UTF-8编码相关的辅助函数 +""" + +import sys +import io +import os + + +def setup_utf8_output(): + """设置标准输出为UTF-8编码(Windows系统)""" + if sys.platform == 'win32': + try: + # 设置标准输出编码 + if hasattr(sys.stdout, 'buffer'): + sys.stdout = io.TextIOWrapper( + sys.stdout.buffer, + encoding='utf-8', + errors='replace', + line_buffering=True + ) + if hasattr(sys.stderr, 'buffer'): + sys.stderr = io.TextIOWrapper( + sys.stderr.buffer, + encoding='utf-8', + errors='replace', + line_buffering=True + ) + # 设置控制台代码页为UTF-8 + os.system('chcp 65001 >nul 2>&1') + except Exception: + pass + + +def safe_print(*args, **kwargs): + """安全的UTF-8打印函数""" + try: + print(*args, **kwargs) + except UnicodeEncodeError: + # 如果输出失败,尝试使用ASCII安全版本 + safe_args = [] + for arg in args: + if isinstance(arg, str): + try: + safe_args.append(arg.encode('ascii', 'replace').decode('ascii')) + except: + safe_args.append(repr(arg)) + else: + safe_args.append(arg) + print(*safe_args, **kwargs) + + +def read_file_utf8(file_path: str) -> str: + """读取UTF-8编码的文件""" + with open(file_path, 'r', encoding='utf-8') as f: + return f.read() + + +def write_file_utf8(file_path: str, content: str): + """写入UTF-8编码的文件""" + os.makedirs(os.path.dirname(file_path) if os.path.dirname(file_path) else '.', exist_ok=True) + with open(file_path, 'w', encoding='utf-8', newline='\n') as f: + f.write(content) + diff --git a/test_mysql_connection.py b/test_mysql_connection.py new file mode 100644 index 0000000..344ebb9 --- /dev/null +++ b/test_mysql_connection.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +MySQL连接诊断工具 +""" + +import socket +import sys +import os +from urllib.parse import urlparse + +# 添加项目路径以导入编码工具 +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +try: + from src.utils.encoding_helper import setup_utf8_output, safe_print + setup_utf8_output() +except ImportError: + # 如果导入失败,使用本地实现 + import io + if sys.platform == 'win32': + try: + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace') + os.system('chcp 65001 >nul 2>&1') + except: + pass + safe_print = print + +def test_port(host, port, timeout=5): + """测试端口是否开放""" + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(timeout) + result = sock.connect_ex((host, port)) + sock.close() + return result == 0 + except Exception as e: + print(f"端口测试异常: {e}") + return False + +def test_mysql_connection(): + """测试MySQL连接""" + # 从配置中提取连接信息 + db_url = "mysql+pymysql://tsp_assistant:123456@jeason.online/tsp_assistant?charset=utf8mb4" + + parsed = urlparse(db_url.replace("mysql+pymysql://", "http://")) + host = parsed.hostname + port = parsed.port or 3306 + + # 使用安全的UTF-8输出 + safe_print("=" * 60) + safe_print("MySQL连接诊断工具") + safe_print("=" * 60) + safe_print(f"主机: {host}") + safe_print(f"端口: {port}") + safe_print() + + # 1. 测试网络连通性 + safe_print("[1] 测试网络连通性 (Ping)...") + try: + import subprocess + result = subprocess.run( + ["ping", "-n", "2", host], + capture_output=True, + text=True, + timeout=10 + ) + if "TTL" in result.stdout or "TTL" in result.stderr: + safe_print("[OK] 网络连通正常") + else: + safe_print("[X] 网络不通") + except Exception as e: + safe_print(f"[!] Ping测试失败: {e}") + safe_print() + + # 2. 测试端口是否开放 + safe_print(f"[2] 测试端口 {port} 是否开放...") + if test_port(host, port, timeout=10): + safe_print(f"[OK] 端口 {port} 开放") + else: + safe_print(f"[X] 端口 {port} 无法连接") + safe_print() + safe_print("可能的原因:") + safe_print(" 1. MySQL服务器防火墙未开放3306端口") + safe_print(" 2. MySQL配置只允许localhost连接") + safe_print(" 3. 云服务商安全组规则阻止了3306端口") + safe_print(" 4. MySQL服务未启动") + safe_print() + safe_print("解决方案:") + safe_print(" 1. 检查MySQL服务器防火墙配置:") + safe_print(" - Linux: sudo ufw allow 3306/tcp") + safe_print(" - Windows: 在防火墙中添加入站规则") + safe_print(" 2. 检查MySQL配置文件 (my.cnf 或 my.ini):") + safe_print(" - 确保 bind-address = 0.0.0.0 (不是127.0.0.1)") + safe_print(" 3. 检查云服务商安全组:") + safe_print(" - 添加规则允许3306端口入站") + safe_print(" 4. 使用SSH隧道连接:") + safe_print(" ssh -L 3306:localhost:3306 user@jeason.online") + safe_print() + + # 3. 尝试使用PyMySQL连接 + safe_print("[3] 尝试使用PyMySQL连接...") + try: + import pymysql + connection = pymysql.connect( + host=host, + port=port, + user="tsp_assistant", + password="123456", + database="tsp_assistant", + connect_timeout=10, + read_timeout=10, + write_timeout=10 + ) + safe_print("[OK] MySQL连接成功!") + connection.close() + except Exception as e: + safe_print(f"[X] MySQL连接失败: {e}") + safe_print() + if "timed out" in str(e).lower() or "can't connect" in str(e).lower(): + safe_print("这是连接超时错误,说明端口无法访问。") + safe_print("请按照上面的解决方案检查服务器配置。") + safe_print() + + safe_print("=" * 60) + +if __name__ == "__main__": + test_mysql_connection() +