Improved progress description stdout
This commit is contained in:
@ -49,7 +49,8 @@ def __transfer_file_dict_to_hdf5(h5file, group_name, file_dict):
|
||||
|
||||
try:
|
||||
# Create group and add their attributes
|
||||
group = h5file[group_name].create_group(name=file_dict['name'])
|
||||
filename = file_dict['name']
|
||||
group = h5file[group_name].create_group(name=filename)
|
||||
# Add group attributes
|
||||
group.attrs.update(file_dict['attributes_dict'])
|
||||
|
||||
@ -65,10 +66,15 @@ def __transfer_file_dict_to_hdf5(h5file, group_name, file_dict):
|
||||
attributes = dataset.get('attributes', {})
|
||||
dataset_obj.attrs.update(attributes)
|
||||
group.attrs['last_update_date'] = utils.created_at().encode('utf-8')
|
||||
|
||||
stdout = f'Completed transfer for /{group_name}/{filename}'
|
||||
|
||||
except Exception as inst:
|
||||
print(inst)
|
||||
stdout = inst
|
||||
logging.error('Failed to transfer data into HDF5: %s', inst)
|
||||
|
||||
return stdout
|
||||
|
||||
def __copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group_name, work_with_copy : bool = True):
|
||||
# Create copy of original file to avoid possible file corruption and work with it.
|
||||
|
||||
@ -84,6 +90,9 @@ def __copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group
|
||||
if 'tmp_files' in tmp_file_path:
|
||||
os.remove(tmp_file_path)
|
||||
|
||||
stdout = f'Completed transfer for /{dest_group_name}'
|
||||
return stdout
|
||||
|
||||
def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
||||
path_to_filenames_dict: dict = None,
|
||||
select_dir_keywords : list = [],
|
||||
@ -147,19 +156,19 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
||||
# Set input_directory as copied input directory
|
||||
root_dir = path_to_input_directory
|
||||
path_to_output_file = path_to_input_directory.rstrip(os.path.sep) + '.h5'
|
||||
|
||||
start_message = f'\n[Start] Data integration :\nSource: {path_to_input_directory}\nDestination: {path_to_output_file}\n'
|
||||
|
||||
print(start_message)
|
||||
logging.info(start_message)
|
||||
|
||||
|
||||
with h5py.File(path_to_output_file, mode=mode, track_order=True) as h5file:
|
||||
|
||||
number_of_dirs = len(path_to_filenames_dict.keys())
|
||||
dir_number = 1
|
||||
for dirpath, filtered_filenames_list in path_to_filenames_dict.items():
|
||||
|
||||
start_message = f'Starting to transfer files in directory: {dirpath}'
|
||||
end_message = f'\nCompleted transferring files in directory: {dirpath}'
|
||||
# Print and log the start message
|
||||
print(start_message)
|
||||
logging.info(start_message)
|
||||
|
||||
for dirpath, filtered_filenames_list in path_to_filenames_dict.items():
|
||||
|
||||
# Check if filtered_filenames_list is nonempty. TODO: This is perhaps redundant by design of path_to_filenames_dict.
|
||||
if not filtered_filenames_list:
|
||||
continue
|
||||
@ -176,14 +185,16 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
||||
if len(tmp_list) > offset+1:
|
||||
group_name = '/'.join([tmp_list[i] for i in range(offset+1)])
|
||||
|
||||
# Group hierarchy is implicitly defined by the forward slashes
|
||||
# Create group called "group_name". Hierarchy of nested groups can be implicitly defined by the forward slashes
|
||||
if not group_name in h5file.keys():
|
||||
h5file.create_group(group_name)
|
||||
h5file[group_name].attrs['creation_date'] = utils.created_at().encode('utf-8')
|
||||
#h5file[group_name].attrs.create(name='filtered_file_list',data=convert_string_to_bytes(filtered_filename_list))
|
||||
#h5file[group_name].attrs.create(name='file_list',data=convert_string_to_bytes(filenames_list))
|
||||
else:
|
||||
print(group_name,' was already created.')
|
||||
#else:
|
||||
#print(group_name,' was already created.')
|
||||
instFoldermsgStart = f'Starting data transfer from instFolder: {group_name}'
|
||||
print(instFoldermsgStart)
|
||||
|
||||
for filenumber, filename in enumerate(filtered_filenames_list):
|
||||
|
||||
@ -198,7 +209,7 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
||||
#file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))
|
||||
file_dict = filereader_registry.select_file_reader(dest_group_name)(os.path.join(dirpath,filename))
|
||||
|
||||
__transfer_file_dict_to_hdf5(h5file, group_name, file_dict)
|
||||
stdout = __transfer_file_dict_to_hdf5(h5file, group_name, file_dict)
|
||||
|
||||
else:
|
||||
source_file_path = os.path.join(dirpath,filename)
|
||||
@ -206,14 +217,17 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
||||
#group_name +'/'+filename
|
||||
#ext_to_reader_dict[file_ext](source_file_path, dest_file_obj, dest_group_name)
|
||||
#g5505f_reader.select_file_reader(dest_group_name)(source_file_path, dest_file_obj, dest_group_name)
|
||||
__copy_file_in_group(source_file_path, dest_file_obj, dest_group_name, False)
|
||||
stdout = __copy_file_in_group(source_file_path, dest_file_obj, dest_group_name, False)
|
||||
|
||||
# Update the progress bar and log the end message
|
||||
utils.progressBar(dir_number, number_of_dirs, end_message)
|
||||
logging.info(end_message)
|
||||
instFoldermsdEnd = f'\nCompleted data transfer for instFolder: {group_name}\n'
|
||||
# Print and log the start message
|
||||
utils.progressBar(dir_number, number_of_dirs, instFoldermsdEnd)
|
||||
logging.info(instFoldermsdEnd )
|
||||
dir_number = dir_number + 1
|
||||
|
||||
|
||||
print('[End] Data integration')
|
||||
logging.info('[End] Data integration')
|
||||
|
||||
if len(root_metadata_dict.keys())>0:
|
||||
for key, value in root_metadata_dict.items():
|
||||
|
Reference in New Issue
Block a user