46 lines
1.4 KiB
Python
46 lines
1.4 KiB
Python
|
|
|
|||
|
|
import pandas as pd
|
|||
|
|
import os
|
|||
|
|
|
|||
|
|
def sort_csv_by_time(file_path="remotecontrol_merged.csv", time_col="SendTime"):
|
|||
|
|
"""
|
|||
|
|
读取 CSV 文件,按时间列排序,并保存。
|
|||
|
|
"""
|
|||
|
|
if not os.path.exists(file_path):
|
|||
|
|
print(f"❌ 文件不存在: {file_path}")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
print(f"📖 正在读取 {file_path} ...")
|
|||
|
|
try:
|
|||
|
|
# 读取 CSV
|
|||
|
|
df = pd.read_csv(file_path, low_memory=False)
|
|||
|
|
print(f" 📊 数据行数: {len(df)}")
|
|||
|
|
|
|||
|
|
if time_col not in df.columns:
|
|||
|
|
print(f"❌ 未找到时间列: {time_col}")
|
|||
|
|
print(f" 可用列: {list(df.columns)}")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
print(f"🔄 正在解析时间列 '{time_col}' ...")
|
|||
|
|
# 转换为 datetime 对象,无法解析的设为 NaT
|
|||
|
|
df[time_col] = pd.to_datetime(df[time_col], errors='coerce')
|
|||
|
|
|
|||
|
|
# 检查无效时间
|
|||
|
|
nat_count = df[time_col].isna().sum()
|
|||
|
|
if nat_count > 0:
|
|||
|
|
print(f"⚠️ 发现 {nat_count} 行无效时间数据,排序时将排在最后")
|
|||
|
|
|
|||
|
|
print("🔄 正在按时间排序...")
|
|||
|
|
df_sorted = df.sort_values(by=time_col)
|
|||
|
|
|
|||
|
|
print(f"💾 正在保存及覆盖文件: {file_path} ...")
|
|||
|
|
df_sorted.to_csv(file_path, index=False, encoding="utf-8-sig")
|
|||
|
|
|
|||
|
|
print("✅ 排序并保存完成!")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"❌处理失败: {e}")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
sort_csv_by_time()
|