feat:测试验证
This commit is contained in:
157
check_encoding.py
Normal file
157
check_encoding.py
Normal file
@@ -0,0 +1,157 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
文件编码检查工具
|
||||
检查项目中所有文件是否使用UTF-8编码
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import chardet
|
||||
from pathlib import Path
|
||||
|
||||
def check_file_encoding(file_path: Path) -> dict:
|
||||
"""检查文件编码"""
|
||||
try:
|
||||
with open(file_path, 'rb') as f:
|
||||
raw_data = f.read()
|
||||
result = chardet.detect(raw_data)
|
||||
encoding = result.get('encoding', 'unknown')
|
||||
confidence = result.get('confidence', 0)
|
||||
|
||||
# 检查文件是否有BOM
|
||||
has_bom = False
|
||||
if raw_data.startswith(b'\xef\xbb\xbf'):
|
||||
has_bom = True
|
||||
encoding = 'utf-8-sig'
|
||||
|
||||
return {
|
||||
'file': str(file_path),
|
||||
'encoding': encoding,
|
||||
'confidence': confidence,
|
||||
'has_bom': has_bom,
|
||||
'is_utf8': encoding.lower() in ['utf-8', 'utf-8-sig', 'ascii'],
|
||||
'size': len(raw_data)
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
'file': str(file_path),
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
def check_python_file_header(file_path: Path) -> bool:
|
||||
"""检查Python文件是否有编码声明"""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
first_lines = [f.readline() for _ in range(3)]
|
||||
for line in first_lines:
|
||||
if 'coding' in line.lower() or 'encoding' in line.lower():
|
||||
return True
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
project_root = Path(__file__).parent
|
||||
|
||||
# 需要检查的文件扩展名
|
||||
check_extensions = {'.py', '.json', '.md', '.txt', '.html', '.css', '.js', '.sql', '.bat', '.sh'}
|
||||
|
||||
# 排除的目录
|
||||
exclude_dirs = {'.git', '.venv', '__pycache__', 'node_modules', '.idea', 'logs', 'data', 'dist', 'build'}
|
||||
|
||||
results = []
|
||||
python_files_without_encoding = []
|
||||
|
||||
print("=" * 80)
|
||||
print("文件编码检查工具")
|
||||
print("=" * 80)
|
||||
print()
|
||||
|
||||
# 遍历所有文件
|
||||
for root, dirs, files in os.walk(project_root):
|
||||
# 排除指定目录
|
||||
dirs[:] = [d for d in dirs if d not in exclude_dirs]
|
||||
|
||||
for file in files:
|
||||
file_path = Path(root) / file
|
||||
|
||||
# 只检查指定扩展名的文件
|
||||
if file_path.suffix.lower() not in check_extensions:
|
||||
continue
|
||||
|
||||
# 检查编码
|
||||
result = check_file_encoding(file_path)
|
||||
results.append(result)
|
||||
|
||||
# 检查Python文件的编码声明
|
||||
if file_path.suffix == '.py':
|
||||
if not check_python_file_header(file_path):
|
||||
python_files_without_encoding.append(file_path)
|
||||
|
||||
# 统计结果
|
||||
total_files = len(results)
|
||||
utf8_files = sum(1 for r in results if r.get('is_utf8', False))
|
||||
non_utf8_files = total_files - utf8_files
|
||||
|
||||
print(f"总计检查文件: {total_files}")
|
||||
print(f"UTF-8 编码文件: {utf8_files}")
|
||||
print(f"非 UTF-8 编码文件: {non_utf8_files}")
|
||||
print()
|
||||
|
||||
# 显示非UTF-8文件
|
||||
if non_utf8_files > 0:
|
||||
print("=" * 80)
|
||||
print("⚠️ 非 UTF-8 编码文件:")
|
||||
print("=" * 80)
|
||||
for result in results:
|
||||
if not result.get('is_utf8', False) and 'error' not in result:
|
||||
print(f" {result['file']}")
|
||||
print(f" 编码: {result['encoding']} (置信度: {result['confidence']:.2%})")
|
||||
if result.get('has_bom'):
|
||||
print(f" ⚠️ 包含 BOM")
|
||||
print()
|
||||
|
||||
# 显示缺少编码声明的Python文件
|
||||
if python_files_without_encoding:
|
||||
print("=" * 80)
|
||||
print("⚠️ Python 文件缺少编码声明:")
|
||||
print("=" * 80)
|
||||
for file_path in python_files_without_encoding:
|
||||
print(f" {file_path}")
|
||||
print()
|
||||
print("建议在这些文件开头添加: # -*- coding: utf-8 -*-")
|
||||
print()
|
||||
|
||||
# 显示错误
|
||||
errors = [r for r in results if 'error' in r]
|
||||
if errors:
|
||||
print("=" * 80)
|
||||
print("❌ 检查出错的文件:")
|
||||
print("=" * 80)
|
||||
for result in errors:
|
||||
print(f" {result['file']}: {result['error']}")
|
||||
print()
|
||||
|
||||
# 总结
|
||||
print("=" * 80)
|
||||
if non_utf8_files == 0 and not python_files_without_encoding:
|
||||
print("✅ 所有文件编码检查通过!")
|
||||
else:
|
||||
print("⚠️ 发现编码问题,请根据上述信息修复")
|
||||
print("=" * 80)
|
||||
|
||||
return non_utf8_files == 0 and not python_files_without_encoding
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
import chardet
|
||||
except ImportError:
|
||||
print("错误: 需要安装 chardet 库")
|
||||
print("运行: pip install chardet")
|
||||
sys.exit(1)
|
||||
|
||||
success = main()
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
Reference in New Issue
Block a user