43 lines
1.1 KiB
Python
43 lines
1.1 KiB
Python
|
|
# -*- coding: utf-8 -*-
|
||
|
|
"""
|
||
|
|
数据预处理模块配置
|
||
|
|
"""
|
||
|
|
|
||
|
|
import os
|
||
|
|
from dataclasses import dataclass
|
||
|
|
|
||
|
|
# 获取项目根目录
|
||
|
|
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass
|
||
|
|
class Config:
|
||
|
|
"""预处理模块配置"""
|
||
|
|
|
||
|
|
# 原始数据存放目录
|
||
|
|
raw_data_dir: str = os.path.join(PROJECT_ROOT, "raw_data")
|
||
|
|
|
||
|
|
# 清洗后数据输出目录
|
||
|
|
cleaned_data_dir: str = os.path.join(PROJECT_ROOT, "cleaned_data")
|
||
|
|
|
||
|
|
# 默认时间列名
|
||
|
|
default_time_column: str = "SendTime"
|
||
|
|
|
||
|
|
# 支持的文件扩展名
|
||
|
|
supported_extensions: tuple = (".csv", ".xlsx", ".xls")
|
||
|
|
|
||
|
|
# CSV 编码
|
||
|
|
csv_encoding: str = "utf-8-sig"
|
||
|
|
|
||
|
|
def ensure_dirs(self):
|
||
|
|
"""确保目录存在"""
|
||
|
|
os.makedirs(self.raw_data_dir, exist_ok=True)
|
||
|
|
os.makedirs(self.cleaned_data_dir, exist_ok=True)
|
||
|
|
print(f"[OK] 目录已就绪:")
|
||
|
|
print(f" 原始数据: {self.raw_data_dir}")
|
||
|
|
print(f" 清洗输出: {self.cleaned_data_dir}")
|
||
|
|
|
||
|
|
|
||
|
|
# 默认配置实例
|
||
|
|
default_config = Config()
|