Abstracted a code snippet from def create_hdf5_file_from_filesystem_path(..) as transfer_file_dict_to_hdf5() so that it can be reusable.

This commit is contained in:
2024-06-13 15:44:01 +02:00
parent 9b70493fbf
commit 622661d4d3

View File

@ -308,7 +308,54 @@ def copy_directory_with_contraints(input_dir_path, output_dir_path, select_dir_k
return path_to_files_dict
def transfer_file_dict_to_hdf5(h5file, group_name, file_dict):
"""
Transfers data from a file_dict to an HDF5 file.
Parameters:
h5file (h5py.File): HDF5 file object where the data will be written.
group_name (str): Name of the HDF5 group where data will be stored.
file_dict (dict): Dictionary containing file data to be transferred.
Required structure:
{
'name': str,
'attributes_dict': dict,
'datasets': [
{
'name': str,
'data': array-like,
'shape': tuple,
'attributes': dict (optional)
},
...
]
}
"""
if not file_dict:
return
try:
# Create group and add their attributes
group = h5file[group_name].create_group(name=file_dict['name'])
# Add group attributes
group.attrs.update(file_dict['attributes_dict'])
# Add datasets to the just created group
for dataset in file_dict['datasets']:
dataset_obj = group.create_dataset(
name=dataset['name'],
data=dataset['data'],
shape=dataset['shape']
)
# Add dataset's attributes
attributes = dataset.get('attributes', {})
dataset_obj.attrs.update(attributes)
except Exception as inst:
print(inst)
logging.error('Failed to transfer data into HDF5: %s', inst)
def create_hdf5_file_from_filesystem_path(output_filename : str,
input_file_system_path : str,
@ -407,28 +454,7 @@ def create_hdf5_file_from_filesystem_path(output_filename : str,
#file_dict = config_file.select_file_readers(group_id)[file_ext](os.path.join(dirpath,filename))
file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))
if not file_dict:
continue
try:
# Create group and add their attributes
h5file[group_name].create_group(name=file_dict['name'])
# Add group attributes
h5file[group_name][file_dict['name']].attrs.update(file_dict['attributes_dict'])
# Add datasets to just created group
for dataset in file_dict['datasets']:
h5file[group_name][file_dict['name']].create_dataset(name = dataset['name'],
data = dataset['data'],
#dtype = file_dict['dtype'],
shape = dataset['shape'])
# Add dataset's attributes
attributes = dataset.get('attributes', {})
h5file[group_name][file_dict['name']][dataset['name']].attrs.update(attributes)
except Exception as inst:
print(inst)
logging.error('Fail to transfer %s into HDF5: %s',os.path.join(dirpath,filename),inst)
transfer_file_dict_to_hdf5(h5file, group_name, file_dict)
else:
source_file_path = os.path.join(dirpath,filename)