Moved is_structured_array() and to_serializable_dtype() to utils, ranamed a few functions and propagated changes to dependent modules.
This commit is contained in:
@ -109,7 +109,7 @@ def created_at():
|
||||
created_at = now_tz_aware.strftime('%Y-%m-%d_%H-%M-%S') + '_UTC-OFST_' + tz
|
||||
return created_at
|
||||
|
||||
def dataframe_to_np_structured_array(df: pd.DataFrame):
|
||||
def convert_dataframe_to_np_structured_array(df: pd.DataFrame):
|
||||
|
||||
# Define the dtype for the structured array, ensuring compatibility with h5py
|
||||
dtype = []
|
||||
@ -153,6 +153,47 @@ def convert_string_to_bytes(input_list: list):
|
||||
|
||||
return input_array_bytes
|
||||
|
||||
def convert_attrdict_to_np_structured_array(attr_value: dict):
|
||||
"""
|
||||
Converts a dictionary of attributes into a numpy structured array for HDF5
|
||||
compound type compatibility.
|
||||
|
||||
Each dictionary key is mapped to a field in the structured array, with the
|
||||
data type (S) determined by the longest string representation of the values.
|
||||
If the dictionary is empty, the function returns 'missing'.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
attr_value : dict
|
||||
Dictionary containing the attributes to be converted. Example:
|
||||
attr_value = {
|
||||
'name': 'Temperature',
|
||||
'unit': 'Celsius',
|
||||
'value': 23.5,
|
||||
'timestamp': '2023-09-26 10:00'
|
||||
}
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_attr_value : ndarray or str
|
||||
Numpy structured array with UTF-8 encoded fields. Returns 'missing' if
|
||||
the input dictionary is empty.
|
||||
"""
|
||||
dtype = []
|
||||
values_list = []
|
||||
max_length = max(len(str(attr_value[key])) for key in attr_value.keys())
|
||||
for key in attr_value.keys():
|
||||
if key != 'rename_as':
|
||||
dtype.append((key, f'S{max_length}'))
|
||||
values_list.append(attr_value[key])
|
||||
if values_list:
|
||||
new_attr_value = np.array([tuple(values_list)], dtype=dtype)
|
||||
else:
|
||||
new_attr_value = 'missing'
|
||||
|
||||
return new_attr_value
|
||||
|
||||
|
||||
def infer_units(column_name):
|
||||
# TODO: complete or remove
|
||||
|
||||
@ -165,23 +206,6 @@ def infer_units(column_name):
|
||||
|
||||
return match
|
||||
|
||||
def parse_attribute(attr_value : dict):
|
||||
"Parse a dictionary attribute into an equivalent numpy structured array, which compatible with compound HDF5 type"
|
||||
dtype = []
|
||||
values_list = []
|
||||
max_length = max(len(str(attr_value[key])) for key in attr_value.keys())
|
||||
for key in attr_value.keys():
|
||||
if (not key=='rename_as'):
|
||||
dtype.append((key,f'S{max_length}'))
|
||||
values_list.append(attr_value[key])
|
||||
|
||||
if values_list:
|
||||
new_attr_value = np.array([tuple(values_list)],dtype=dtype)
|
||||
else:
|
||||
new_attr_value = 'missing'
|
||||
|
||||
return new_attr_value
|
||||
|
||||
def progressBar(count_value, total, suffix=''):
|
||||
bar_length = 100
|
||||
filled_up_Length = int(round(bar_length* count_value / float(total)))
|
||||
@ -270,4 +294,59 @@ def copy_directory_with_contraints(input_dir_path, output_dir_path,
|
||||
except Exception as e:
|
||||
logging.error("Failed to copy %s: %s", src_file_path, e)
|
||||
|
||||
return path_to_files_dict
|
||||
return path_to_files_dict
|
||||
|
||||
def to_serializable_dtype(value):
|
||||
|
||||
"""Transform value's dtype into YAML/JSON compatible dtype
|
||||
|
||||
Parameters
|
||||
----------
|
||||
value : _type_
|
||||
_description_
|
||||
|
||||
Returns
|
||||
-------
|
||||
_type_
|
||||
_description_
|
||||
"""
|
||||
try:
|
||||
if isinstance(value, np.generic):
|
||||
if np.issubdtype(value.dtype, np.bytes_):
|
||||
value = value.decode('utf-8')
|
||||
elif np.issubdtype(value.dtype, np.unicode_):
|
||||
value = str(value)
|
||||
elif np.issubdtype(value.dtype, np.number):
|
||||
value = float(value)
|
||||
else:
|
||||
print('Yaml-compatible data-type was not found. Value has been set to NaN.')
|
||||
value = np.nan
|
||||
elif isinstance(value, np.ndarray):
|
||||
# Handling structured array types (with fields)
|
||||
if value.dtype.names:
|
||||
value = {field: to_serializable_dtype(value[field]) for field in value.dtype.names}
|
||||
else:
|
||||
# Handling regular array NumPy types
|
||||
if np.issubdtype(value.dtype, np.bytes_):
|
||||
value = [item.decode('utf-8') for item in value] if len(value) > 1 else value[0].decode('utf-8')
|
||||
elif np.issubdtype(value.dtype, np.unicode_):
|
||||
value = [str(item) for item in value] if len(value) > 1 else str(value[0])
|
||||
elif np.issubdtype(value.dtype, np.integer):
|
||||
value = [int(item) for item in value] if len(value) > 1 else int(value[0])
|
||||
elif np.issubdtype(value.dtype, np.floating):
|
||||
value = [float(item) for item in value] if len(value) > 1 else float(value[0])
|
||||
else:
|
||||
print('Yaml-compatible data-type was not found. Value has been set to NaN.')
|
||||
value = np.nan
|
||||
|
||||
except Exception as e:
|
||||
print(f'Error converting value: {e}. Value has been set to NaN.')
|
||||
value = np.nan
|
||||
|
||||
return value
|
||||
|
||||
def is_structured_array(attr_val):
|
||||
if isinstance(attr_val,np.ndarray):
|
||||
return True if attr_val.dtype.names is not None else False
|
||||
else:
|
||||
return False
|
Reference in New Issue
Block a user