From 40bae0e5d2d5b18499c884e63155d252f6767d22 Mon Sep 17 00:00:00 2001 From: Barbara Bertozzi Date: Fri, 22 Aug 2025 16:19:42 +0200 Subject: [PATCH] Refactor: move function _cast_and_arrow to schema.py --- src/sp2xr/schema.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/sp2xr/schema.py b/src/sp2xr/schema.py index 45fb3d6..d199c05 100644 --- a/src/sp2xr/schema.py +++ b/src/sp2xr/schema.py @@ -174,3 +174,16 @@ def enforce_schema( ] return ddf + + +def _cast_and_arrow(pdf: pd.DataFrame) -> pd.DataFrame: + """ + Cast every column in *this* partition to the canonical dtype + (or DEFAULT_FLOAT), then switch the frame to pyarrow-backed dtypes. + Works for both concentration and histogram dataframes. + """ + cast_map = { + col: CANONICAL_DTYPES.get(col, DEFAULT_FLOAT) for col in pdf.columns # fallback + } + pdf = pdf.astype(cast_map, copy=False) + return pdf.convert_dtypes(dtype_backend="pyarrow")