Cleanup: remove wreck code from src/sp2xr/helpers.py
This commit is contained in:
@@ -437,48 +437,3 @@ def partition_rowcount(ddf: dd.DataFrame) -> int:
|
||||
meta=pd.Series(dtype="int64"),
|
||||
)
|
||||
return int(row_series.sum().compute())
|
||||
|
||||
|
||||
'''def normalize_dtypes(
|
||||
df: Union[pd.DataFrame, dd.DataFrame],
|
||||
) -> Union[pd.DataFrame, dd.DataFrame]:
|
||||
"""
|
||||
Enforce consistent dtypes across all columns:
|
||||
- Strings -> string[pyarrow]
|
||||
- Booleans -> boolean (nullable)
|
||||
- Integers -> Int64 (nullable)
|
||||
- Floats -> float64
|
||||
- Datetimes -> datetime64[ns]
|
||||
"""
|
||||
from pandas import StringDtype, BooleanDtype, Int64Dtype
|
||||
|
||||
new_columns = {}
|
||||
|
||||
for col in df.columns:
|
||||
dtype = df[col].dtype
|
||||
|
||||
try:
|
||||
if is_string_dtype(dtype) or is_object_dtype(dtype):
|
||||
new_columns[col] = df[col].astype(StringDtype(storage="pyarrow"))
|
||||
|
||||
elif is_bool_dtype(dtype):
|
||||
new_columns[col] = df[col].astype(BooleanDtype())
|
||||
|
||||
elif is_integer_dtype(dtype):
|
||||
new_columns[col] = df[col].astype(Int64Dtype())
|
||||
|
||||
elif is_numeric_dtype(dtype): # includes floats
|
||||
new_columns[col] = df[col].astype("float64")
|
||||
|
||||
elif is_datetime64_any_dtype(dtype):
|
||||
new_columns[col] = df[col].map_partitions(
|
||||
lambda s: pd.to_datetime(s).astype("datetime64[ns]")
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Warning: failed to normalize column '{col}' ({dtype}): {e}")
|
||||
|
||||
if new_columns:
|
||||
df = df.assign(**new_columns)
|
||||
|
||||
return df'''
|
||||
|
||||
Reference in New Issue
Block a user