diff --git a/src/hdf5_lib.py b/src/hdf5_lib.py index 7057d2b..8867be9 100644 --- a/src/hdf5_lib.py +++ b/src/hdf5_lib.py @@ -308,7 +308,54 @@ def copy_directory_with_contraints(input_dir_path, output_dir_path, select_dir_k return path_to_files_dict +def transfer_file_dict_to_hdf5(h5file, group_name, file_dict): + """ + Transfers data from a file_dict to an HDF5 file. + Parameters: + h5file (h5py.File): HDF5 file object where the data will be written. + group_name (str): Name of the HDF5 group where data will be stored. + file_dict (dict): Dictionary containing file data to be transferred. + Required structure: + { + 'name': str, + 'attributes_dict': dict, + 'datasets': [ + { + 'name': str, + 'data': array-like, + 'shape': tuple, + 'attributes': dict (optional) + }, + ... + ] + } + + + """ + if not file_dict: + return + + try: + # Create group and add their attributes + group = h5file[group_name].create_group(name=file_dict['name']) + # Add group attributes + group.attrs.update(file_dict['attributes_dict']) + + # Add datasets to the just created group + for dataset in file_dict['datasets']: + dataset_obj = group.create_dataset( + name=dataset['name'], + data=dataset['data'], + shape=dataset['shape'] + ) + + # Add dataset's attributes + attributes = dataset.get('attributes', {}) + dataset_obj.attrs.update(attributes) + except Exception as inst: + print(inst) + logging.error('Failed to transfer data into HDF5: %s', inst) def create_hdf5_file_from_filesystem_path(output_filename : str, input_file_system_path : str, @@ -407,28 +454,7 @@ def create_hdf5_file_from_filesystem_path(output_filename : str, #file_dict = config_file.select_file_readers(group_id)[file_ext](os.path.join(dirpath,filename)) file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename)) - if not file_dict: - continue - - try: - # Create group and add their attributes - h5file[group_name].create_group(name=file_dict['name']) - # Add group attributes - h5file[group_name][file_dict['name']].attrs.update(file_dict['attributes_dict']) - - # Add datasets to just created group - for dataset in file_dict['datasets']: - h5file[group_name][file_dict['name']].create_dataset(name = dataset['name'], - data = dataset['data'], - #dtype = file_dict['dtype'], - shape = dataset['shape']) - - # Add dataset's attributes - attributes = dataset.get('attributes', {}) - h5file[group_name][file_dict['name']][dataset['name']].attrs.update(attributes) - except Exception as inst: - print(inst) - logging.error('Fail to transfer %s into HDF5: %s',os.path.join(dirpath,filename),inst) + transfer_file_dict_to_hdf5(h5file, group_name, file_dict) else: source_file_path = os.path.join(dirpath,filename)