# -*- coding: utf-8 -*- """ 数据预处理命令行接口 使用示例: # 合并 Excel 文件 python -m data_preprocessing.cli merge --source raw_data/remotecontrol --output cleaned_data/merged.csv # 合并并排序 python -m data_preprocessing.cli merge --source raw_data/remotecontrol --sort-by SendTime # 排序已有 CSV python -m data_preprocessing.cli sort --input data.csv --output sorted.csv --time-col SendTime # 初始化目录结构 python -m data_preprocessing.cli init """ import argparse import sys from .config import default_config from .sorter import sort_by_time from .merger import merge_files def main(): parser = argparse.ArgumentParser( prog="data_preprocessing", description="数据预处理工具:排序、合并", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" 示例: %(prog)s merge --source raw_data/remotecontrol --sort-by SendTime %(prog)s sort --input data.csv --time-col CreateTime %(prog)s init """ ) subparsers = parser.add_subparsers(dest="command", help="可用命令") # ========== merge 命令 ========== merge_parser = subparsers.add_parser("merge", help="合并同类文件") merge_parser.add_argument( "--source", "-s", required=True, help="源数据目录路径" ) merge_parser.add_argument( "--output", "-o", default=None, help="输出文件路径 (默认: cleaned_data/<目录名>_merged.csv)" ) merge_parser.add_argument( "--pattern", "-p", default="*.xlsx", help="文件匹配模式 (默认: *.xlsx)" ) merge_parser.add_argument( "--sort-by", default=None, dest="time_column", help="合并后按此时间列排序" ) merge_parser.add_argument( "--no-source-col", action="store_true", help="不添加来源文件列" ) # ========== sort 命令 ========== sort_parser = subparsers.add_parser("sort", help="按时间排序 CSV") sort_parser.add_argument( "--input", "-i", required=True, help="输入 CSV 文件路径" ) sort_parser.add_argument( "--output", "-o", default=None, help="输出文件路径 (默认: cleaned_data/<文件名>_sorted.csv)" ) sort_parser.add_argument( "--time-col", "-t", default=None, dest="time_column", help=f"时间列名 (默认: {default_config.default_time_column})" ) sort_parser.add_argument( "--inplace", action="store_true", help="原地覆盖输入文件" ) # ========== init 命令 ========== init_parser = subparsers.add_parser("init", help="初始化目录结构") # 解析参数 args = parser.parse_args() if args.command is None: parser.print_help() sys.exit(0) try: if args.command == "merge": result = merge_files( source_dir=args.source, output_file=args.output, pattern=args.pattern, time_column=args.time_column, add_source_column=not args.no_source_col ) print(f"\n✅ 合并成功: {result}") elif args.command == "sort": result = sort_by_time( input_path=args.input, output_path=args.output, time_column=args.time_column, inplace=args.inplace ) print(f"\n✅ 排序成功: {result}") elif args.command == "init": default_config.ensure_dirs() print("\n✅ 目录初始化完成") except FileNotFoundError as e: print(f"\n❌ 错误: {e}") sys.exit(1) except KeyError as e: print(f"\n❌ 错误: {e}") sys.exit(1) except Exception as e: print(f"\n❌ 未知错误: {e}") sys.exit(1) if __name__ == "__main__": main()