Cleanup: remove wreck code from src/sp2xr/schema.py

This commit is contained in:
2025-08-22 16:38:06 +02:00
parent 4696c2cbb9
commit d2a0533a12

View File

@@ -20,7 +20,6 @@ CANONICAL_DTYPES = {
"Scatt numb within range": pd.Float64Dtype(),
"BC mass": pd.Float64Dtype(),
"BC mass within range": pd.Float64Dtype(),
# "BC mass bin": pd.Int64Dtype(),
"BC numb": pd.Int64Dtype(),
"BC numb from file": pd.Int64Dtype(),
"BC numb within range": pd.Float64Dtype(),
@@ -42,20 +41,6 @@ CANONICAL_DTYPES = {
"BC_massConc_within_range_vol": pd.Float64Dtype(),
"BC_massConc_std": pd.Float64Dtype(),
"BC_massConc_vol": pd.Float64Dtype(),
# counters / QC flags
# "cnts_thin": pd.Int64Dtype(),
# "cnts_thick": pd.Int64Dtype(),
# "cnts_unclassified": pd.Int64Dtype(),
# "cnts_thin_noScatt": pd.Int64Dtype(),
# "cnts_thick_sat": pd.Int64Dtype(),
# "cnts_thin_sat": pd.Int64Dtype(),
# "cnts_ntl_sat": pd.Int64Dtype(),
# "cnts_ntl": pd.Int64Dtype(),
# "cnts_extreme_positive_timelag": pd.Int64Dtype(),
# "cnts_thin_low_inc_scatt_ratio": pd.Int64Dtype(),
# "cnts_particles_for_tl_dist": pd.Int64Dtype(),
# "cnts_thin_total": pd.Int64Dtype(),
# "cnts_thick_total": pd.Int64Dtype(),
**{
c: pd.Int64Dtype()
for c in [
@@ -77,17 +62,6 @@ CANONICAL_DTYPES = {
"temporary_col",
]
},
# "flag_valid_inc_signal_in_range": 'boolean',
# "flag_valid_inc_signal": 'boolean',
# "flag_inc_not_sat": 'boolean',
# "flag_valid_scatt_signal": 'boolean',
# "flag_extreme_positive_timelag": 'boolean',
# "flag_scatt_not_sat": 'boolean',
# "flag_valid_scatt_signal_in_range": 'boolean',
# "flag_negative_timelag": 'boolean',
# "flag_valid_timelag_thin": 'boolean',
# "flag_valid_timelag_thick":'boolean',
# "flag_low_ratio_inc_scatt": 'boolean',
**{
c: "boolean"
for c in [
@@ -107,7 +81,6 @@ CANONICAL_DTYPES = {
# timing
"calculated_time": "datetime64[us]",
"time_lag": pd.Float64Dtype(),
# "temporary_col": pd.Float64Dtype(),
"Sample Flow Controller Read (vccm)": pd.Float64Dtype(),
"Sample Flow Controller Read (sccm)": pd.Float64Dtype(),
"Dropped Records": pd.Float64Dtype(),
@@ -116,27 +89,6 @@ CANONICAL_DTYPES = {
DEFAULT_FLOAT = pd.Float64Dtype()
'''def enforce_schema(ddf: dd.DataFrame) -> dd.DataFrame:
"""Return a dataframe that matches the hard-coded CANONICAL_DTYPES."""
# 1. add columns that are missing in this partition
"""for col, dtype in CANONICAL_DTYPES.items():
if col not in ddf.columns:
ddf[col] = pd.Series(pd.NA, dtype=dtype)"""
# 2. cast *only* the columns whose dtype differs
mismatched = {
col: dtype
for col, dtype in CANONICAL_DTYPES.items()
if ddf[col].dtype != dtype
}
if mismatched:
ddf = ddf.astype(mismatched)
# 3. drop unexpected columns (optional but safest)
ddf = ddf[[*CANONICAL_DTYPES]]
return ddf'''
def enforce_schema(
ddf: dd.DataFrame,