Improved progress description stdout

This commit is contained in:
2024-11-10 18:21:00 +01:00
parent ca2c98eebc
commit 1be4b8493a

View File

@ -49,7 +49,8 @@ def __transfer_file_dict_to_hdf5(h5file, group_name, file_dict):
try:
# Create group and add their attributes
group = h5file[group_name].create_group(name=file_dict['name'])
filename = file_dict['name']
group = h5file[group_name].create_group(name=filename)
# Add group attributes
group.attrs.update(file_dict['attributes_dict'])
@ -65,10 +66,15 @@ def __transfer_file_dict_to_hdf5(h5file, group_name, file_dict):
attributes = dataset.get('attributes', {})
dataset_obj.attrs.update(attributes)
group.attrs['last_update_date'] = utils.created_at().encode('utf-8')
stdout = f'Completed transfer for /{group_name}/{filename}'
except Exception as inst:
print(inst)
stdout = inst
logging.error('Failed to transfer data into HDF5: %s', inst)
return stdout
def __copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group_name, work_with_copy : bool = True):
# Create copy of original file to avoid possible file corruption and work with it.
@ -84,6 +90,9 @@ def __copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group
if 'tmp_files' in tmp_file_path:
os.remove(tmp_file_path)
stdout = f'Completed transfer for /{dest_group_name}'
return stdout
def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
path_to_filenames_dict: dict = None,
select_dir_keywords : list = [],
@ -147,19 +156,19 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
# Set input_directory as copied input directory
root_dir = path_to_input_directory
path_to_output_file = path_to_input_directory.rstrip(os.path.sep) + '.h5'
start_message = f'\n[Start] Data integration :\nSource: {path_to_input_directory}\nDestination: {path_to_output_file}\n'
print(start_message)
logging.info(start_message)
with h5py.File(path_to_output_file, mode=mode, track_order=True) as h5file:
number_of_dirs = len(path_to_filenames_dict.keys())
dir_number = 1
for dirpath, filtered_filenames_list in path_to_filenames_dict.items():
start_message = f'Starting to transfer files in directory: {dirpath}'
end_message = f'\nCompleted transferring files in directory: {dirpath}'
# Print and log the start message
print(start_message)
logging.info(start_message)
for dirpath, filtered_filenames_list in path_to_filenames_dict.items():
# Check if filtered_filenames_list is nonempty. TODO: This is perhaps redundant by design of path_to_filenames_dict.
if not filtered_filenames_list:
continue
@ -176,14 +185,16 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
if len(tmp_list) > offset+1:
group_name = '/'.join([tmp_list[i] for i in range(offset+1)])
# Group hierarchy is implicitly defined by the forward slashes
# Create group called "group_name". Hierarchy of nested groups can be implicitly defined by the forward slashes
if not group_name in h5file.keys():
h5file.create_group(group_name)
h5file[group_name].attrs['creation_date'] = utils.created_at().encode('utf-8')
#h5file[group_name].attrs.create(name='filtered_file_list',data=convert_string_to_bytes(filtered_filename_list))
#h5file[group_name].attrs.create(name='file_list',data=convert_string_to_bytes(filenames_list))
else:
print(group_name,' was already created.')
#else:
#print(group_name,' was already created.')
instFoldermsgStart = f'Starting data transfer from instFolder: {group_name}'
print(instFoldermsgStart)
for filenumber, filename in enumerate(filtered_filenames_list):
@ -198,7 +209,7 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
#file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))
file_dict = filereader_registry.select_file_reader(dest_group_name)(os.path.join(dirpath,filename))
__transfer_file_dict_to_hdf5(h5file, group_name, file_dict)
stdout = __transfer_file_dict_to_hdf5(h5file, group_name, file_dict)
else:
source_file_path = os.path.join(dirpath,filename)
@ -206,14 +217,17 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
#group_name +'/'+filename
#ext_to_reader_dict[file_ext](source_file_path, dest_file_obj, dest_group_name)
#g5505f_reader.select_file_reader(dest_group_name)(source_file_path, dest_file_obj, dest_group_name)
__copy_file_in_group(source_file_path, dest_file_obj, dest_group_name, False)
stdout = __copy_file_in_group(source_file_path, dest_file_obj, dest_group_name, False)
# Update the progress bar and log the end message
utils.progressBar(dir_number, number_of_dirs, end_message)
logging.info(end_message)
instFoldermsdEnd = f'\nCompleted data transfer for instFolder: {group_name}\n'
# Print and log the start message
utils.progressBar(dir_number, number_of_dirs, instFoldermsdEnd)
logging.info(instFoldermsdEnd )
dir_number = dir_number + 1
print('[End] Data integration')
logging.info('[End] Data integration')
if len(root_metadata_dict.keys())>0:
for key, value in root_metadata_dict.items():