Files
iov_data_analysis_agent/sort_csv.py

46 lines
1.5 KiB
Python
Raw Normal View History

2026-01-09 16:52:45 +08:00
import pandas as pd
import os
def sort_csv_by_time(file_path="remotecontrol_merged.csv", time_col="SendTime"):
"""
读取 CSV 文件按时间列排序并保存
"""
if not os.path.exists(file_path):
2026-01-31 18:00:05 +08:00
print(f"[ERROR] 文件不存在: {file_path}")
2026-01-09 16:52:45 +08:00
return
2026-01-31 18:00:05 +08:00
print(f"[READ] 正在读取 {file_path} ...")
2026-01-09 16:52:45 +08:00
try:
# 读取 CSV
df = pd.read_csv(file_path, low_memory=False)
2026-01-31 18:00:05 +08:00
print(f" [CHART] 数据行数: {len(df)}")
2026-01-09 16:52:45 +08:00
if time_col not in df.columns:
2026-01-31 18:00:05 +08:00
print(f"[ERROR] 未找到时间列: {time_col}")
2026-01-09 16:52:45 +08:00
print(f" 可用列: {list(df.columns)}")
return
2026-01-31 18:00:05 +08:00
print(f"[LOOP] 正在解析时间列 '{time_col}' ...")
2026-01-09 16:52:45 +08:00
# 转换为 datetime 对象,无法解析的设为 NaT
df[time_col] = pd.to_datetime(df[time_col], errors='coerce')
# 检查无效时间
nat_count = df[time_col].isna().sum()
if nat_count > 0:
2026-01-31 18:00:05 +08:00
print(f"[WARN] 发现 {nat_count} 行无效时间数据,排序时将排在最后")
2026-01-09 16:52:45 +08:00
2026-01-31 18:00:05 +08:00
print("[LOOP] 正在按时间排序...")
2026-01-09 16:52:45 +08:00
df_sorted = df.sort_values(by=time_col)
2026-01-31 18:00:05 +08:00
print(f"[CACHE] 正在保存及覆盖文件: {file_path} ...")
2026-01-09 16:52:45 +08:00
df_sorted.to_csv(file_path, index=False, encoding="utf-8-sig")
2026-01-31 18:00:05 +08:00
print("[OK] 排序并保存完成!")
2026-01-09 16:52:45 +08:00
except Exception as e:
2026-01-31 18:00:05 +08:00
print(f"[ERROR]处理失败: {e}")
2026-01-09 16:52:45 +08:00
if __name__ == "__main__":
sort_csv_by_time()