diff --git a/src/hdf5_writer.py b/src/hdf5_writer.py index 5d38910..86326b9 100644 --- a/src/hdf5_writer.py +++ b/src/hdf5_writer.py @@ -49,7 +49,8 @@ def __transfer_file_dict_to_hdf5(h5file, group_name, file_dict): try: # Create group and add their attributes - group = h5file[group_name].create_group(name=file_dict['name']) + filename = file_dict['name'] + group = h5file[group_name].create_group(name=filename) # Add group attributes group.attrs.update(file_dict['attributes_dict']) @@ -65,10 +66,15 @@ def __transfer_file_dict_to_hdf5(h5file, group_name, file_dict): attributes = dataset.get('attributes', {}) dataset_obj.attrs.update(attributes) group.attrs['last_update_date'] = utils.created_at().encode('utf-8') + + stdout = f'Completed transfer for /{group_name}/{filename}' + except Exception as inst: - print(inst) + stdout = inst logging.error('Failed to transfer data into HDF5: %s', inst) + return stdout + def __copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group_name, work_with_copy : bool = True): # Create copy of original file to avoid possible file corruption and work with it. @@ -84,6 +90,9 @@ def __copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group if 'tmp_files' in tmp_file_path: os.remove(tmp_file_path) + stdout = f'Completed transfer for /{dest_group_name}' + return stdout + def create_hdf5_file_from_filesystem_path(path_to_input_directory: str, path_to_filenames_dict: dict = None, select_dir_keywords : list = [], @@ -147,19 +156,19 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str, # Set input_directory as copied input directory root_dir = path_to_input_directory path_to_output_file = path_to_input_directory.rstrip(os.path.sep) + '.h5' + + start_message = f'\n[Start] Data integration :\nSource: {path_to_input_directory}\nDestination: {path_to_output_file}\n' + + print(start_message) + logging.info(start_message) + with h5py.File(path_to_output_file, mode=mode, track_order=True) as h5file: number_of_dirs = len(path_to_filenames_dict.keys()) dir_number = 1 - for dirpath, filtered_filenames_list in path_to_filenames_dict.items(): - - start_message = f'Starting to transfer files in directory: {dirpath}' - end_message = f'\nCompleted transferring files in directory: {dirpath}' - # Print and log the start message - print(start_message) - logging.info(start_message) - + for dirpath, filtered_filenames_list in path_to_filenames_dict.items(): + # Check if filtered_filenames_list is nonempty. TODO: This is perhaps redundant by design of path_to_filenames_dict. if not filtered_filenames_list: continue @@ -176,14 +185,16 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str, if len(tmp_list) > offset+1: group_name = '/'.join([tmp_list[i] for i in range(offset+1)]) - # Group hierarchy is implicitly defined by the forward slashes + # Create group called "group_name". Hierarchy of nested groups can be implicitly defined by the forward slashes if not group_name in h5file.keys(): h5file.create_group(group_name) h5file[group_name].attrs['creation_date'] = utils.created_at().encode('utf-8') #h5file[group_name].attrs.create(name='filtered_file_list',data=convert_string_to_bytes(filtered_filename_list)) #h5file[group_name].attrs.create(name='file_list',data=convert_string_to_bytes(filenames_list)) - else: - print(group_name,' was already created.') + #else: + #print(group_name,' was already created.') + instFoldermsgStart = f'Starting data transfer from instFolder: {group_name}' + print(instFoldermsgStart) for filenumber, filename in enumerate(filtered_filenames_list): @@ -198,7 +209,7 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str, #file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename)) file_dict = filereader_registry.select_file_reader(dest_group_name)(os.path.join(dirpath,filename)) - __transfer_file_dict_to_hdf5(h5file, group_name, file_dict) + stdout = __transfer_file_dict_to_hdf5(h5file, group_name, file_dict) else: source_file_path = os.path.join(dirpath,filename) @@ -206,14 +217,17 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str, #group_name +'/'+filename #ext_to_reader_dict[file_ext](source_file_path, dest_file_obj, dest_group_name) #g5505f_reader.select_file_reader(dest_group_name)(source_file_path, dest_file_obj, dest_group_name) - __copy_file_in_group(source_file_path, dest_file_obj, dest_group_name, False) + stdout = __copy_file_in_group(source_file_path, dest_file_obj, dest_group_name, False) # Update the progress bar and log the end message - utils.progressBar(dir_number, number_of_dirs, end_message) - logging.info(end_message) + instFoldermsdEnd = f'\nCompleted data transfer for instFolder: {group_name}\n' + # Print and log the start message + utils.progressBar(dir_number, number_of_dirs, instFoldermsdEnd) + logging.info(instFoldermsdEnd ) dir_number = dir_number + 1 - + print('[End] Data integration') + logging.info('[End] Data integration') if len(root_metadata_dict.keys())>0: for key, value in root_metadata_dict.items():