Refactor: move function _cast_and_arrow to schema.py

This commit is contained in:
2025-08-22 16:19:42 +02:00
parent 2704128e8d
commit 40bae0e5d2

View File

@@ -174,3 +174,16 @@ def enforce_schema(
]
return ddf
def _cast_and_arrow(pdf: pd.DataFrame) -> pd.DataFrame:
"""
Cast every column in *this* partition to the canonical dtype
(or DEFAULT_FLOAT), then switch the frame to pyarrow-backed dtypes.
Works for both concentration and histogram dataframes.
"""
cast_map = {
col: CANONICAL_DTYPES.get(col, DEFAULT_FLOAT) for col in pdf.columns # fallback
}
pdf = pdf.astype(cast_map, copy=False)
return pdf.convert_dtypes(dtype_backend="pyarrow")