Cleanup: remove wreck code from src/sp2xr/helpers.py

This commit is contained in:
2025-08-22 16:40:26 +02:00
parent d2a0533a12
commit 2da9eb6089

View File

@@ -437,48 +437,3 @@ def partition_rowcount(ddf: dd.DataFrame) -> int:
meta=pd.Series(dtype="int64"),
)
return int(row_series.sum().compute())
'''def normalize_dtypes(
df: Union[pd.DataFrame, dd.DataFrame],
) -> Union[pd.DataFrame, dd.DataFrame]:
"""
Enforce consistent dtypes across all columns:
- Strings -> string[pyarrow]
- Booleans -> boolean (nullable)
- Integers -> Int64 (nullable)
- Floats -> float64
- Datetimes -> datetime64[ns]
"""
from pandas import StringDtype, BooleanDtype, Int64Dtype
new_columns = {}
for col in df.columns:
dtype = df[col].dtype
try:
if is_string_dtype(dtype) or is_object_dtype(dtype):
new_columns[col] = df[col].astype(StringDtype(storage="pyarrow"))
elif is_bool_dtype(dtype):
new_columns[col] = df[col].astype(BooleanDtype())
elif is_integer_dtype(dtype):
new_columns[col] = df[col].astype(Int64Dtype())
elif is_numeric_dtype(dtype): # includes floats
new_columns[col] = df[col].astype("float64")
elif is_datetime64_any_dtype(dtype):
new_columns[col] = df[col].map_partitions(
lambda s: pd.to_datetime(s).astype("datetime64[ns]")
)
except Exception as e:
print(f"Warning: failed to normalize column '{col}' ({dtype}): {e}")
if new_columns:
df = df.assign(**new_columns)
return df'''