diff --git a/src/sp2xr/schema.py b/src/sp2xr/schema.py index 45fb3d6..d199c05 100644 --- a/src/sp2xr/schema.py +++ b/src/sp2xr/schema.py @@ -174,3 +174,16 @@ def enforce_schema( ] return ddf + + +def _cast_and_arrow(pdf: pd.DataFrame) -> pd.DataFrame: + """ + Cast every column in *this* partition to the canonical dtype + (or DEFAULT_FLOAT), then switch the frame to pyarrow-backed dtypes. + Works for both concentration and histogram dataframes. + """ + cast_map = { + col: CANONICAL_DTYPES.get(col, DEFAULT_FLOAT) for col in pdf.columns # fallback + } + pdf = pdf.astype(cast_map, copy=False) + return pdf.convert_dtypes(dtype_backend="pyarrow")