Change date format to string for campatibility with Windows systems
This commit is contained in:
@@ -213,7 +213,7 @@ def main():
|
|||||||
"Sample Flow Controller Read (vccm)": pd.Series(
|
"Sample Flow Controller Read (vccm)": pd.Series(
|
||||||
dtype="float64"
|
dtype="float64"
|
||||||
),
|
),
|
||||||
"date": pd.Series(dtype="datetime64[ns]"),
|
"date": pd.Series(dtype="str"),
|
||||||
"hour": pd.Series(dtype="int64"),
|
"hour": pd.Series(dtype="int64"),
|
||||||
},
|
},
|
||||||
index=pd.DatetimeIndex([]),
|
index=pd.DatetimeIndex([]),
|
||||||
@@ -340,7 +340,7 @@ def main():
|
|||||||
delete_partition_if_exists(
|
delete_partition_if_exists(
|
||||||
output_path=f"{run_config['output']}/pbp_calibrated",
|
output_path=f"{run_config['output']}/pbp_calibrated",
|
||||||
partition_values={
|
partition_values={
|
||||||
"date": chunk_start.strftime("%Y-%m-%d 00:00:00"),
|
"date": chunk_start.strftime("%Y-%m-%d"),
|
||||||
"hour": chunk_start.hour,
|
"hour": chunk_start.hour,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -411,9 +411,8 @@ def main():
|
|||||||
)
|
)
|
||||||
|
|
||||||
# 2) cast partition columns *before* Dask strips them off
|
# 2) cast partition columns *before* Dask strips them off
|
||||||
ddf_conc["date"] = dd.to_datetime(ddf_conc["date"]).astype(
|
# Keep date as string to avoid Windows path issues with datetime partitions
|
||||||
"datetime64[ns]"
|
ddf_conc["date"] = ddf_conc["date"].astype("str")
|
||||||
)
|
|
||||||
ddf_conc["hour"] = ddf_conc["hour"].astype("int64")
|
ddf_conc["hour"] = ddf_conc["hour"].astype("int64")
|
||||||
|
|
||||||
conc_future = ddf_conc.to_parquet(
|
conc_future = ddf_conc.to_parquet(
|
||||||
|
|||||||
@@ -182,7 +182,9 @@ def calibrate_particle_data(
|
|||||||
curve_type=scatt_conf.get("curve_type"),
|
curve_type=scatt_conf.get("curve_type"),
|
||||||
params=scatt_conf.get("parameters", []),
|
params=scatt_conf.get("parameters", []),
|
||||||
)
|
)
|
||||||
df["date"] = df.index.date.astype("datetime64[ns]")
|
# Use string format for date to avoid Windows path issues with datetime partitions
|
||||||
|
# PyArrow creates partition dirs like "date=2019-05-08 00:00:00" which has colons (invalid on Windows)
|
||||||
|
df["date"] = df.index.strftime("%Y-%m-%d")
|
||||||
df["hour"] = df.index.hour.astype("int8")
|
df["hour"] = df.index.hour.astype("int8")
|
||||||
return df
|
return df
|
||||||
|
|
||||||
@@ -218,8 +220,7 @@ def calibrate_single_particle(
|
|||||||
meta_cal = ddf_raw._meta.copy()
|
meta_cal = ddf_raw._meta.copy()
|
||||||
meta_cal["BC mass"] = pd.Series([], dtype="float64")
|
meta_cal["BC mass"] = pd.Series([], dtype="float64")
|
||||||
meta_cal["Opt diam"] = pd.Series([], dtype="float64")
|
meta_cal["Opt diam"] = pd.Series([], dtype="float64")
|
||||||
meta_cal["time_lag"] = pd.Series([], dtype="int8")
|
meta_cal["date"] = pd.Series([], dtype="str")
|
||||||
meta_cal["date"] = pd.Series([], dtype="datetime64[ns]")
|
|
||||||
meta_cal["hour"] = pd.Series([], dtype="int8")
|
meta_cal["hour"] = pd.Series([], dtype="int8")
|
||||||
|
|
||||||
ddf_cal = ddf_raw.map_partitions(
|
ddf_cal = ddf_raw.map_partitions(
|
||||||
|
|||||||
@@ -427,7 +427,8 @@ def process_histograms(
|
|||||||
# merged_ddf["date"] = dd.to_datetime(merged_ddf.index.to_series()).dt.normalize()
|
# merged_ddf["date"] = dd.to_datetime(merged_ddf.index.to_series()).dt.normalize()
|
||||||
# merged_ddf["hour"] = merged_ddf["hour"].astype("int64")
|
# merged_ddf["hour"] = merged_ddf["hour"].astype("int64")
|
||||||
time_index = dd.to_datetime(merged_ddf.index.to_series())
|
time_index = dd.to_datetime(merged_ddf.index.to_series())
|
||||||
merged_ddf["date"] = time_index.dt.normalize() # works on Series
|
# Use string format to avoid Windows path issues with datetime partitions
|
||||||
|
merged_ddf["date"] = time_index.dt.strftime("%Y-%m-%d")
|
||||||
merged_ddf["hour"] = time_index.dt.hour.astype("int64")
|
merged_ddf["hour"] = time_index.dt.hour.astype("int64")
|
||||||
|
|
||||||
# --- Save hists to parquet
|
# --- Save hists to parquet
|
||||||
|
|||||||
@@ -92,7 +92,8 @@ def build_dt_summary(pdf: pd.DataFrame, dt_s: int = 1) -> pd.DataFrame:
|
|||||||
out = out[out["original_idx"] != 0].drop(columns="original_idx")
|
out = out[out["original_idx"] != 0].drop(columns="original_idx")
|
||||||
|
|
||||||
# add date / hour helper cols
|
# add date / hour helper cols
|
||||||
out["date"] = out.index.normalize()
|
# Use string format to avoid Windows path issues with datetime partitions
|
||||||
|
out["date"] = out.index.strftime("%Y-%m-%d")
|
||||||
out["hour"] = out.index.hour.astype("int64")
|
out["hour"] = out.index.hour.astype("int64")
|
||||||
|
|
||||||
return out
|
return out
|
||||||
@@ -130,7 +131,8 @@ def resample_hk_partition(pdf: pd.DataFrame, dt="1s") -> pd.DataFrame:
|
|||||||
out = out.asfreq(dt).ffill(limit=1)
|
out = out.asfreq(dt).ffill(limit=1)
|
||||||
|
|
||||||
# --- add date/hour columns ---
|
# --- add date/hour columns ---
|
||||||
out["date"] = out.index.normalize()
|
# Use string format to avoid Windows path issues with datetime partitions
|
||||||
|
out["date"] = out.index.strftime("%Y-%m-%d")
|
||||||
out["hour"] = out.index.hour.astype("int64")
|
out["hour"] = out.index.hour.astype("int64")
|
||||||
|
|
||||||
return out
|
return out
|
||||||
@@ -176,7 +178,8 @@ def aggregate_dt(ddf_pbp_dt, ddf_hk_dt, run_config):
|
|||||||
)
|
)
|
||||||
time_index = dd.to_datetime(ddf_pbp_hk_dt.index.to_series())
|
time_index = dd.to_datetime(ddf_pbp_hk_dt.index.to_series())
|
||||||
|
|
||||||
ddf_pbp_hk_dt["date"] = time_index.dt.normalize() # works on Series
|
# Use string format to avoid Windows path issues with datetime partitions
|
||||||
|
ddf_pbp_hk_dt["date"] = time_index.dt.strftime("%Y-%m-%d")
|
||||||
ddf_pbp_hk_dt["hour"] = time_index.dt.hour.astype("int64")
|
ddf_pbp_hk_dt["hour"] = time_index.dt.hour.astype("int64")
|
||||||
|
|
||||||
# Optionally drop the old columns
|
# Optionally drop the old columns
|
||||||
|
|||||||
@@ -6,7 +6,8 @@ from typing import Mapping
|
|||||||
# schema.py ── central truth table ───────────────────────────────────────────
|
# schema.py ── central truth table ───────────────────────────────────────────
|
||||||
CANONICAL_DTYPES = {
|
CANONICAL_DTYPES = {
|
||||||
# identifiers / file bookkeeping
|
# identifiers / file bookkeeping
|
||||||
"date": "datetime64[ns]",
|
# Use string for date to avoid Windows path issues with datetime partitions
|
||||||
|
"date": "string[pyarrow]",
|
||||||
"hour": pd.Int64Dtype(),
|
"hour": pd.Int64Dtype(),
|
||||||
"file": "string[pyarrow]",
|
"file": "string[pyarrow]",
|
||||||
"folder_name": "string[pyarrow]",
|
"folder_name": "string[pyarrow]",
|
||||||
|
|||||||
Reference in New Issue
Block a user