2026-01-09 16:52:45 +08:00
|
|
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
|
|
def sort_csv_by_time(file_path="remotecontrol_merged.csv", time_col="SendTime"):
|
|
|
|
|
|
"""
|
|
|
|
|
|
读取 CSV 文件,按时间列排序,并保存。
|
|
|
|
|
|
"""
|
|
|
|
|
|
if not os.path.exists(file_path):
|
2026-01-31 18:00:05 +08:00
|
|
|
|
print(f"[ERROR] 文件不存在: {file_path}")
|
2026-01-09 16:52:45 +08:00
|
|
|
|
return
|
|
|
|
|
|
|
2026-01-31 18:00:05 +08:00
|
|
|
|
print(f"[READ] 正在读取 {file_path} ...")
|
2026-01-09 16:52:45 +08:00
|
|
|
|
try:
|
|
|
|
|
|
# 读取 CSV
|
|
|
|
|
|
df = pd.read_csv(file_path, low_memory=False)
|
2026-01-31 18:00:05 +08:00
|
|
|
|
print(f" [CHART] 数据行数: {len(df)}")
|
2026-01-09 16:52:45 +08:00
|
|
|
|
|
|
|
|
|
|
if time_col not in df.columns:
|
2026-01-31 18:00:05 +08:00
|
|
|
|
print(f"[ERROR] 未找到时间列: {time_col}")
|
2026-01-09 16:52:45 +08:00
|
|
|
|
print(f" 可用列: {list(df.columns)}")
|
|
|
|
|
|
return
|
|
|
|
|
|
|
2026-01-31 18:00:05 +08:00
|
|
|
|
print(f"[LOOP] 正在解析时间列 '{time_col}' ...")
|
2026-01-09 16:52:45 +08:00
|
|
|
|
# 转换为 datetime 对象,无法解析的设为 NaT
|
|
|
|
|
|
df[time_col] = pd.to_datetime(df[time_col], errors='coerce')
|
|
|
|
|
|
|
|
|
|
|
|
# 检查无效时间
|
|
|
|
|
|
nat_count = df[time_col].isna().sum()
|
|
|
|
|
|
if nat_count > 0:
|
2026-01-31 18:00:05 +08:00
|
|
|
|
print(f"[WARN] 发现 {nat_count} 行无效时间数据,排序时将排在最后")
|
2026-01-09 16:52:45 +08:00
|
|
|
|
|
2026-01-31 18:00:05 +08:00
|
|
|
|
print("[LOOP] 正在按时间排序...")
|
2026-01-09 16:52:45 +08:00
|
|
|
|
df_sorted = df.sort_values(by=time_col)
|
|
|
|
|
|
|
2026-01-31 18:00:05 +08:00
|
|
|
|
print(f"[CACHE] 正在保存及覆盖文件: {file_path} ...")
|
2026-01-09 16:52:45 +08:00
|
|
|
|
df_sorted.to_csv(file_path, index=False, encoding="utf-8-sig")
|
|
|
|
|
|
|
2026-01-31 18:00:05 +08:00
|
|
|
|
print("[OK] 排序并保存完成!")
|
2026-01-09 16:52:45 +08:00
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
2026-01-31 18:00:05 +08:00
|
|
|
|
print(f"[ERROR]处理失败: {e}")
|
2026-01-09 16:52:45 +08:00
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
sort_csv_by_time()
|