feat:测试验证

This commit is contained in:
2025-11-05 10:43:36 +08:00
parent c9d5c80f42
commit 148a2fc9d6
13 changed files with 524 additions and 6 deletions

57
.editorconfig Normal file
View File

@@ -0,0 +1,57 @@
# EditorConfig is awesome: https://EditorConfig.org
# 顶级配置文件
root = true
# 所有文件
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
# Python 文件
[*.py]
charset = utf-8
indent_style = space
indent_size = 4
# JSON 文件
[*.json]
charset = utf-8
indent_style = space
indent_size = 2
# Markdown 文件
[*.md]
charset = utf-8
trim_trailing_whitespace = false
# YAML 文件
[*.{yml,yaml}]
charset = utf-8
indent_style = space
indent_size = 2
# JavaScript/TypeScript 文件
[*.{js,ts,jsx,tsx}]
charset = utf-8
indent_style = space
indent_size = 2
# HTML/CSS 文件
[*.{html,css}]
charset = utf-8
indent_style = space
indent_size = 2
# Batch 文件 (Windows)
[*.bat]
charset = utf-8
end_of_line = crlf
# Shell 脚本
[*.sh]
charset = utf-8
end_of_line = lf

24
.vscode/settings.json vendored
View File

@@ -1,3 +1,25 @@
{
"files.autoGuessEncoding": true
"files.autoGuessEncoding": false,
"files.encoding": "utf8",
"files.eol": "\n",
"[python]": {
"files.encoding": "utf8",
"files.eol": "\n"
},
"[json]": {
"files.encoding": "utf8"
},
"[javascript]": {
"files.encoding": "utf8"
},
"[html]": {
"files.encoding": "utf8"
},
"[css]": {
"files.encoding": "utf8"
},
"[markdown]": {
"files.encoding": "utf8"
},
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/Scripts/python.exe"
}

83
UTF8_ENCODING_STANDARD.md Normal file
View File

@@ -0,0 +1,83 @@
# UTF-8 编码规范
## 项目编码标准
本项目所有文件必须使用 **UTF-8** 编码格式,以确保中文和特殊字符的正确显示和处理。
## 文件编码要求
### 1. Python 文件
- **必须** 在文件开头添加编码声明:
```python
# -*- coding: utf-8 -*-
```
```python
# coding: utf-8
```
### 2. 文件保存
- 所有文件保存时使用 **UTF-8** 编码无BOM
- 禁止使用 GBK、GB2312 等其他编码格式
### 3. 文件读取/写入
- 所有文件操作必须明确指定 `encoding='utf-8'`
```python
with open('file.txt', 'r', encoding='utf-8') as f:
content = f.read()
with open('file.txt', 'w', encoding='utf-8') as f:
f.write(content)
```
## Cursor/VS Code 配置
项目已配置 `.vscode/settings.json`,确保:
- 默认文件编码UTF-8
- 自动检测编码:禁用(避免误判)
- 文件行尾LFUnix风格
## 控制台输出
### Windows 系统
在 Python 脚本中,需要设置标准输出编码:
```python
import sys
import io
if sys.platform == 'win32':
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
```
## 检查脚本
使用 `check_encoding.py` 脚本检查所有文件的编码格式:
```bash
python check_encoding.py
```
## 常见问题
### 1. 控制台输出乱码
- 确保文件以 UTF-8 保存
- 在脚本开头设置标准输出编码
- Windows 系统运行 `chcp 65001` 设置控制台代码页
### 2. 文件读取乱码
- 检查文件实际编码(可用 `check_encoding.py`
- 确保使用 `encoding='utf-8'` 参数
### 3. 文件保存乱码
- 检查编辑器编码设置
- 确保 Cursor/VS Code 设置为 UTF-8
## 验证清单
创建新文件时,请确认:
- [ ] 文件以 UTF-8 编码保存
- [ ] Python 文件包含编码声明
- [ ] 文件读写操作指定 `encoding='utf-8'`
- [ ] 控制台输出脚本设置了 UTF-8 编码
- [ ] 测试输出中文显示正常

157
check_encoding.py Normal file
View File

@@ -0,0 +1,157 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
文件编码检查工具
检查项目中所有文件是否使用UTF-8编码
"""
import os
import sys
import chardet
from pathlib import Path
def check_file_encoding(file_path: Path) -> dict:
"""检查文件编码"""
try:
with open(file_path, 'rb') as f:
raw_data = f.read()
result = chardet.detect(raw_data)
encoding = result.get('encoding', 'unknown')
confidence = result.get('confidence', 0)
# 检查文件是否有BOM
has_bom = False
if raw_data.startswith(b'\xef\xbb\xbf'):
has_bom = True
encoding = 'utf-8-sig'
return {
'file': str(file_path),
'encoding': encoding,
'confidence': confidence,
'has_bom': has_bom,
'is_utf8': encoding.lower() in ['utf-8', 'utf-8-sig', 'ascii'],
'size': len(raw_data)
}
except Exception as e:
return {
'file': str(file_path),
'error': str(e)
}
def check_python_file_header(file_path: Path) -> bool:
"""检查Python文件是否有编码声明"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
first_lines = [f.readline() for _ in range(3)]
for line in first_lines:
if 'coding' in line.lower() or 'encoding' in line.lower():
return True
return False
except:
return False
def main():
"""主函数"""
project_root = Path(__file__).parent
# 需要检查的文件扩展名
check_extensions = {'.py', '.json', '.md', '.txt', '.html', '.css', '.js', '.sql', '.bat', '.sh'}
# 排除的目录
exclude_dirs = {'.git', '.venv', '__pycache__', 'node_modules', '.idea', 'logs', 'data', 'dist', 'build'}
results = []
python_files_without_encoding = []
print("=" * 80)
print("文件编码检查工具")
print("=" * 80)
print()
# 遍历所有文件
for root, dirs, files in os.walk(project_root):
# 排除指定目录
dirs[:] = [d for d in dirs if d not in exclude_dirs]
for file in files:
file_path = Path(root) / file
# 只检查指定扩展名的文件
if file_path.suffix.lower() not in check_extensions:
continue
# 检查编码
result = check_file_encoding(file_path)
results.append(result)
# 检查Python文件的编码声明
if file_path.suffix == '.py':
if not check_python_file_header(file_path):
python_files_without_encoding.append(file_path)
# 统计结果
total_files = len(results)
utf8_files = sum(1 for r in results if r.get('is_utf8', False))
non_utf8_files = total_files - utf8_files
print(f"总计检查文件: {total_files}")
print(f"UTF-8 编码文件: {utf8_files}")
print(f"非 UTF-8 编码文件: {non_utf8_files}")
print()
# 显示非UTF-8文件
if non_utf8_files > 0:
print("=" * 80)
print("⚠️ 非 UTF-8 编码文件:")
print("=" * 80)
for result in results:
if not result.get('is_utf8', False) and 'error' not in result:
print(f" {result['file']}")
print(f" 编码: {result['encoding']} (置信度: {result['confidence']:.2%})")
if result.get('has_bom'):
print(f" ⚠️ 包含 BOM")
print()
# 显示缺少编码声明的Python文件
if python_files_without_encoding:
print("=" * 80)
print("⚠️ Python 文件缺少编码声明:")
print("=" * 80)
for file_path in python_files_without_encoding:
print(f" {file_path}")
print()
print("建议在这些文件开头添加: # -*- coding: utf-8 -*-")
print()
# 显示错误
errors = [r for r in results if 'error' in r]
if errors:
print("=" * 80)
print("❌ 检查出错的文件:")
print("=" * 80)
for result in errors:
print(f" {result['file']}: {result['error']}")
print()
# 总结
print("=" * 80)
if non_utf8_files == 0 and not python_files_without_encoding:
print("✅ 所有文件编码检查通过!")
else:
print("⚠️ 发现编码问题,请根据上述信息修复")
print("=" * 80)
return non_utf8_files == 0 and not python_files_without_encoding
if __name__ == "__main__":
try:
import chardet
except ImportError:
print("错误: 需要安装 chardet 库")
print("运行: pip install chardet")
sys.exit(1)
success = main()
sys.exit(0 if success else 1)

View File

@@ -1,6 +1,6 @@
{
"database": {
"url": "mysql+pymysql://tsp_assistant:password@43.134.68.207/tsp_assistant?charset=utf8mb4",
"url": "mysql+pymysql://tsp_assistant:password@jeason.online/tsp_assistant?charset=utf8mb4",
"pool_size": 10,
"max_overflow": 20,
"pool_timeout": 30,

View File

@@ -10,7 +10,7 @@ class Config:
ALIBABA_MODEL_NAME = "qwen-plus-latest"
# 数据库配置
DATABASE_URL = "mysql+pymysql://tsp_assistant:123456@43.134.68.207/tsp_assistant?charset=utf8mb4"
DATABASE_URL = "mysql+pymysql://tsp_assistant:123456@jeason.online/tsp_assistant?charset=utf8mb4"
# 知识库配置
KNOWLEDGE_BASE_PATH = "data/knowledge_base"

View File

@@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
@dataclass
class DatabaseConfig:
"""数据库配置"""
url: str = "mysql+pymysql://tsp_assistant:password@43.134.68.207/tsp_assistant?charset=utf8mb4"
url: str = "mysql+pymysql://tsp_assistant:password@jeason.online/tsp_assistant?charset=utf8mb4"
pool_size: int = 10
max_overflow: int = 20
pool_timeout: int = 30

View File

@@ -34,10 +34,13 @@ class DatabaseManager:
max_overflow=30, # 增加溢出连接数
pool_pre_ping=True,
pool_recycle=1800, # 减少回收时间
pool_timeout=10, # 连接超时
pool_timeout=30, # 连接超时(秒)
connect_args={
"charset": "utf8mb4",
"autocommit": False
"autocommit": False,
"connect_timeout": 30, # 连接超时(秒)- 适用于网络延迟较大的情况
"read_timeout": 30, # 读取超时(秒)
"write_timeout": 30, # 写入超时(秒)
}
)
else:

Binary file not shown.

View File

@@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
"""
编码辅助工具
提供UTF-8编码相关的辅助函数
"""
import sys
import io
import os
def setup_utf8_output():
"""设置标准输出为UTF-8编码Windows系统"""
if sys.platform == 'win32':
try:
# 设置标准输出编码
if hasattr(sys.stdout, 'buffer'):
sys.stdout = io.TextIOWrapper(
sys.stdout.buffer,
encoding='utf-8',
errors='replace',
line_buffering=True
)
if hasattr(sys.stderr, 'buffer'):
sys.stderr = io.TextIOWrapper(
sys.stderr.buffer,
encoding='utf-8',
errors='replace',
line_buffering=True
)
# 设置控制台代码页为UTF-8
os.system('chcp 65001 >nul 2>&1')
except Exception:
pass
def safe_print(*args, **kwargs):
"""安全的UTF-8打印函数"""
try:
print(*args, **kwargs)
except UnicodeEncodeError:
# 如果输出失败尝试使用ASCII安全版本
safe_args = []
for arg in args:
if isinstance(arg, str):
try:
safe_args.append(arg.encode('ascii', 'replace').decode('ascii'))
except:
safe_args.append(repr(arg))
else:
safe_args.append(arg)
print(*safe_args, **kwargs)
def read_file_utf8(file_path: str) -> str:
"""读取UTF-8编码的文件"""
with open(file_path, 'r', encoding='utf-8') as f:
return f.read()
def write_file_utf8(file_path: str, content: str):
"""写入UTF-8编码的文件"""
os.makedirs(os.path.dirname(file_path) if os.path.dirname(file_path) else '.', exist_ok=True)
with open(file_path, 'w', encoding='utf-8', newline='\n') as f:
f.write(content)

130
test_mysql_connection.py Normal file
View File

@@ -0,0 +1,130 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
MySQL连接诊断工具
"""
import socket
import sys
import os
from urllib.parse import urlparse
# 添加项目路径以导入编码工具
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
try:
from src.utils.encoding_helper import setup_utf8_output, safe_print
setup_utf8_output()
except ImportError:
# 如果导入失败,使用本地实现
import io
if sys.platform == 'win32':
try:
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
os.system('chcp 65001 >nul 2>&1')
except:
pass
safe_print = print
def test_port(host, port, timeout=5):
"""测试端口是否开放"""
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(timeout)
result = sock.connect_ex((host, port))
sock.close()
return result == 0
except Exception as e:
print(f"端口测试异常: {e}")
return False
def test_mysql_connection():
"""测试MySQL连接"""
# 从配置中提取连接信息
db_url = "mysql+pymysql://tsp_assistant:123456@jeason.online/tsp_assistant?charset=utf8mb4"
parsed = urlparse(db_url.replace("mysql+pymysql://", "http://"))
host = parsed.hostname
port = parsed.port or 3306
# 使用安全的UTF-8输出
safe_print("=" * 60)
safe_print("MySQL连接诊断工具")
safe_print("=" * 60)
safe_print(f"主机: {host}")
safe_print(f"端口: {port}")
safe_print()
# 1. 测试网络连通性
safe_print("[1] 测试网络连通性 (Ping)...")
try:
import subprocess
result = subprocess.run(
["ping", "-n", "2", host],
capture_output=True,
text=True,
timeout=10
)
if "TTL" in result.stdout or "TTL" in result.stderr:
safe_print("[OK] 网络连通正常")
else:
safe_print("[X] 网络不通")
except Exception as e:
safe_print(f"[!] Ping测试失败: {e}")
safe_print()
# 2. 测试端口是否开放
safe_print(f"[2] 测试端口 {port} 是否开放...")
if test_port(host, port, timeout=10):
safe_print(f"[OK] 端口 {port} 开放")
else:
safe_print(f"[X] 端口 {port} 无法连接")
safe_print()
safe_print("可能的原因:")
safe_print(" 1. MySQL服务器防火墙未开放3306端口")
safe_print(" 2. MySQL配置只允许localhost连接")
safe_print(" 3. 云服务商安全组规则阻止了3306端口")
safe_print(" 4. MySQL服务未启动")
safe_print()
safe_print("解决方案:")
safe_print(" 1. 检查MySQL服务器防火墙配置:")
safe_print(" - Linux: sudo ufw allow 3306/tcp")
safe_print(" - Windows: 在防火墙中添加入站规则")
safe_print(" 2. 检查MySQL配置文件 (my.cnf 或 my.ini):")
safe_print(" - 确保 bind-address = 0.0.0.0 (不是127.0.0.1)")
safe_print(" 3. 检查云服务商安全组:")
safe_print(" - 添加规则允许3306端口入站")
safe_print(" 4. 使用SSH隧道连接:")
safe_print(" ssh -L 3306:localhost:3306 user@jeason.online")
safe_print()
# 3. 尝试使用PyMySQL连接
safe_print("[3] 尝试使用PyMySQL连接...")
try:
import pymysql
connection = pymysql.connect(
host=host,
port=port,
user="tsp_assistant",
password="123456",
database="tsp_assistant",
connect_timeout=10,
read_timeout=10,
write_timeout=10
)
safe_print("[OK] MySQL连接成功!")
connection.close()
except Exception as e:
safe_print(f"[X] MySQL连接失败: {e}")
safe_print()
if "timed out" in str(e).lower() or "can't connect" in str(e).lower():
safe_print("这是连接超时错误,说明端口无法访问。")
safe_print("请按照上面的解决方案检查服务器配置。")
safe_print()
safe_print("=" * 60)
if __name__ == "__main__":
test_mysql_connection()