From 2da9eb6089cca76691d51051a6e9a1f88199a211 Mon Sep 17 00:00:00 2001 From: Barbara Bertozzi Date: Fri, 22 Aug 2025 16:40:26 +0200 Subject: [PATCH] Cleanup: remove wreck code from src/sp2xr/helpers.py --- src/sp2xr/helpers.py | 45 -------------------------------------------- 1 file changed, 45 deletions(-) diff --git a/src/sp2xr/helpers.py b/src/sp2xr/helpers.py index 9e9fb16..5f823a9 100644 --- a/src/sp2xr/helpers.py +++ b/src/sp2xr/helpers.py @@ -437,48 +437,3 @@ def partition_rowcount(ddf: dd.DataFrame) -> int: meta=pd.Series(dtype="int64"), ) return int(row_series.sum().compute()) - - -'''def normalize_dtypes( - df: Union[pd.DataFrame, dd.DataFrame], -) -> Union[pd.DataFrame, dd.DataFrame]: - """ - Enforce consistent dtypes across all columns: - - Strings -> string[pyarrow] - - Booleans -> boolean (nullable) - - Integers -> Int64 (nullable) - - Floats -> float64 - - Datetimes -> datetime64[ns] - """ - from pandas import StringDtype, BooleanDtype, Int64Dtype - - new_columns = {} - - for col in df.columns: - dtype = df[col].dtype - - try: - if is_string_dtype(dtype) or is_object_dtype(dtype): - new_columns[col] = df[col].astype(StringDtype(storage="pyarrow")) - - elif is_bool_dtype(dtype): - new_columns[col] = df[col].astype(BooleanDtype()) - - elif is_integer_dtype(dtype): - new_columns[col] = df[col].astype(Int64Dtype()) - - elif is_numeric_dtype(dtype): # includes floats - new_columns[col] = df[col].astype("float64") - - elif is_datetime64_any_dtype(dtype): - new_columns[col] = df[col].map_partitions( - lambda s: pd.to_datetime(s).astype("datetime64[ns]") - ) - - except Exception as e: - print(f"Warning: failed to normalize column '{col}' ({dtype}): {e}") - - if new_columns: - df = df.assign(**new_columns) - - return df'''