Cleanup: remove wreck code from src/sp2xr/schema.py
This commit is contained in:
@@ -20,7 +20,6 @@ CANONICAL_DTYPES = {
|
||||
"Scatt numb within range": pd.Float64Dtype(),
|
||||
"BC mass": pd.Float64Dtype(),
|
||||
"BC mass within range": pd.Float64Dtype(),
|
||||
# "BC mass bin": pd.Int64Dtype(),
|
||||
"BC numb": pd.Int64Dtype(),
|
||||
"BC numb from file": pd.Int64Dtype(),
|
||||
"BC numb within range": pd.Float64Dtype(),
|
||||
@@ -42,20 +41,6 @@ CANONICAL_DTYPES = {
|
||||
"BC_massConc_within_range_vol": pd.Float64Dtype(),
|
||||
"BC_massConc_std": pd.Float64Dtype(),
|
||||
"BC_massConc_vol": pd.Float64Dtype(),
|
||||
# counters / QC flags
|
||||
# "cnts_thin": pd.Int64Dtype(),
|
||||
# "cnts_thick": pd.Int64Dtype(),
|
||||
# "cnts_unclassified": pd.Int64Dtype(),
|
||||
# "cnts_thin_noScatt": pd.Int64Dtype(),
|
||||
# "cnts_thick_sat": pd.Int64Dtype(),
|
||||
# "cnts_thin_sat": pd.Int64Dtype(),
|
||||
# "cnts_ntl_sat": pd.Int64Dtype(),
|
||||
# "cnts_ntl": pd.Int64Dtype(),
|
||||
# "cnts_extreme_positive_timelag": pd.Int64Dtype(),
|
||||
# "cnts_thin_low_inc_scatt_ratio": pd.Int64Dtype(),
|
||||
# "cnts_particles_for_tl_dist": pd.Int64Dtype(),
|
||||
# "cnts_thin_total": pd.Int64Dtype(),
|
||||
# "cnts_thick_total": pd.Int64Dtype(),
|
||||
**{
|
||||
c: pd.Int64Dtype()
|
||||
for c in [
|
||||
@@ -77,17 +62,6 @@ CANONICAL_DTYPES = {
|
||||
"temporary_col",
|
||||
]
|
||||
},
|
||||
# "flag_valid_inc_signal_in_range": 'boolean',
|
||||
# "flag_valid_inc_signal": 'boolean',
|
||||
# "flag_inc_not_sat": 'boolean',
|
||||
# "flag_valid_scatt_signal": 'boolean',
|
||||
# "flag_extreme_positive_timelag": 'boolean',
|
||||
# "flag_scatt_not_sat": 'boolean',
|
||||
# "flag_valid_scatt_signal_in_range": 'boolean',
|
||||
# "flag_negative_timelag": 'boolean',
|
||||
# "flag_valid_timelag_thin": 'boolean',
|
||||
# "flag_valid_timelag_thick":'boolean',
|
||||
# "flag_low_ratio_inc_scatt": 'boolean',
|
||||
**{
|
||||
c: "boolean"
|
||||
for c in [
|
||||
@@ -107,7 +81,6 @@ CANONICAL_DTYPES = {
|
||||
# timing
|
||||
"calculated_time": "datetime64[us]",
|
||||
"time_lag": pd.Float64Dtype(),
|
||||
# "temporary_col": pd.Float64Dtype(),
|
||||
"Sample Flow Controller Read (vccm)": pd.Float64Dtype(),
|
||||
"Sample Flow Controller Read (sccm)": pd.Float64Dtype(),
|
||||
"Dropped Records": pd.Float64Dtype(),
|
||||
@@ -116,27 +89,6 @@ CANONICAL_DTYPES = {
|
||||
|
||||
DEFAULT_FLOAT = pd.Float64Dtype()
|
||||
|
||||
'''def enforce_schema(ddf: dd.DataFrame) -> dd.DataFrame:
|
||||
"""Return a dataframe that matches the hard-coded CANONICAL_DTYPES."""
|
||||
# 1. add columns that are missing in this partition
|
||||
"""for col, dtype in CANONICAL_DTYPES.items():
|
||||
if col not in ddf.columns:
|
||||
ddf[col] = pd.Series(pd.NA, dtype=dtype)"""
|
||||
|
||||
# 2. cast *only* the columns whose dtype differs
|
||||
mismatched = {
|
||||
col: dtype
|
||||
for col, dtype in CANONICAL_DTYPES.items()
|
||||
if ddf[col].dtype != dtype
|
||||
}
|
||||
if mismatched:
|
||||
ddf = ddf.astype(mismatched)
|
||||
|
||||
# 3. drop unexpected columns (optional but safest)
|
||||
ddf = ddf[[*CANONICAL_DTYPES]]
|
||||
|
||||
return ddf'''
|
||||
|
||||
|
||||
def enforce_schema(
|
||||
ddf: dd.DataFrame,
|
||||
|
||||
Reference in New Issue
Block a user