Improved progress description stdout
This commit is contained in:
@ -49,7 +49,8 @@ def __transfer_file_dict_to_hdf5(h5file, group_name, file_dict):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# Create group and add their attributes
|
# Create group and add their attributes
|
||||||
group = h5file[group_name].create_group(name=file_dict['name'])
|
filename = file_dict['name']
|
||||||
|
group = h5file[group_name].create_group(name=filename)
|
||||||
# Add group attributes
|
# Add group attributes
|
||||||
group.attrs.update(file_dict['attributes_dict'])
|
group.attrs.update(file_dict['attributes_dict'])
|
||||||
|
|
||||||
@ -65,10 +66,15 @@ def __transfer_file_dict_to_hdf5(h5file, group_name, file_dict):
|
|||||||
attributes = dataset.get('attributes', {})
|
attributes = dataset.get('attributes', {})
|
||||||
dataset_obj.attrs.update(attributes)
|
dataset_obj.attrs.update(attributes)
|
||||||
group.attrs['last_update_date'] = utils.created_at().encode('utf-8')
|
group.attrs['last_update_date'] = utils.created_at().encode('utf-8')
|
||||||
|
|
||||||
|
stdout = f'Completed transfer for /{group_name}/{filename}'
|
||||||
|
|
||||||
except Exception as inst:
|
except Exception as inst:
|
||||||
print(inst)
|
stdout = inst
|
||||||
logging.error('Failed to transfer data into HDF5: %s', inst)
|
logging.error('Failed to transfer data into HDF5: %s', inst)
|
||||||
|
|
||||||
|
return stdout
|
||||||
|
|
||||||
def __copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group_name, work_with_copy : bool = True):
|
def __copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group_name, work_with_copy : bool = True):
|
||||||
# Create copy of original file to avoid possible file corruption and work with it.
|
# Create copy of original file to avoid possible file corruption and work with it.
|
||||||
|
|
||||||
@ -84,6 +90,9 @@ def __copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group
|
|||||||
if 'tmp_files' in tmp_file_path:
|
if 'tmp_files' in tmp_file_path:
|
||||||
os.remove(tmp_file_path)
|
os.remove(tmp_file_path)
|
||||||
|
|
||||||
|
stdout = f'Completed transfer for /{dest_group_name}'
|
||||||
|
return stdout
|
||||||
|
|
||||||
def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
||||||
path_to_filenames_dict: dict = None,
|
path_to_filenames_dict: dict = None,
|
||||||
select_dir_keywords : list = [],
|
select_dir_keywords : list = [],
|
||||||
@ -148,18 +157,18 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
|||||||
root_dir = path_to_input_directory
|
root_dir = path_to_input_directory
|
||||||
path_to_output_file = path_to_input_directory.rstrip(os.path.sep) + '.h5'
|
path_to_output_file = path_to_input_directory.rstrip(os.path.sep) + '.h5'
|
||||||
|
|
||||||
|
start_message = f'\n[Start] Data integration :\nSource: {path_to_input_directory}\nDestination: {path_to_output_file}\n'
|
||||||
|
|
||||||
|
print(start_message)
|
||||||
|
logging.info(start_message)
|
||||||
|
|
||||||
|
|
||||||
with h5py.File(path_to_output_file, mode=mode, track_order=True) as h5file:
|
with h5py.File(path_to_output_file, mode=mode, track_order=True) as h5file:
|
||||||
|
|
||||||
number_of_dirs = len(path_to_filenames_dict.keys())
|
number_of_dirs = len(path_to_filenames_dict.keys())
|
||||||
dir_number = 1
|
dir_number = 1
|
||||||
for dirpath, filtered_filenames_list in path_to_filenames_dict.items():
|
for dirpath, filtered_filenames_list in path_to_filenames_dict.items():
|
||||||
|
|
||||||
start_message = f'Starting to transfer files in directory: {dirpath}'
|
|
||||||
end_message = f'\nCompleted transferring files in directory: {dirpath}'
|
|
||||||
# Print and log the start message
|
|
||||||
print(start_message)
|
|
||||||
logging.info(start_message)
|
|
||||||
|
|
||||||
# Check if filtered_filenames_list is nonempty. TODO: This is perhaps redundant by design of path_to_filenames_dict.
|
# Check if filtered_filenames_list is nonempty. TODO: This is perhaps redundant by design of path_to_filenames_dict.
|
||||||
if not filtered_filenames_list:
|
if not filtered_filenames_list:
|
||||||
continue
|
continue
|
||||||
@ -176,14 +185,16 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
|||||||
if len(tmp_list) > offset+1:
|
if len(tmp_list) > offset+1:
|
||||||
group_name = '/'.join([tmp_list[i] for i in range(offset+1)])
|
group_name = '/'.join([tmp_list[i] for i in range(offset+1)])
|
||||||
|
|
||||||
# Group hierarchy is implicitly defined by the forward slashes
|
# Create group called "group_name". Hierarchy of nested groups can be implicitly defined by the forward slashes
|
||||||
if not group_name in h5file.keys():
|
if not group_name in h5file.keys():
|
||||||
h5file.create_group(group_name)
|
h5file.create_group(group_name)
|
||||||
h5file[group_name].attrs['creation_date'] = utils.created_at().encode('utf-8')
|
h5file[group_name].attrs['creation_date'] = utils.created_at().encode('utf-8')
|
||||||
#h5file[group_name].attrs.create(name='filtered_file_list',data=convert_string_to_bytes(filtered_filename_list))
|
#h5file[group_name].attrs.create(name='filtered_file_list',data=convert_string_to_bytes(filtered_filename_list))
|
||||||
#h5file[group_name].attrs.create(name='file_list',data=convert_string_to_bytes(filenames_list))
|
#h5file[group_name].attrs.create(name='file_list',data=convert_string_to_bytes(filenames_list))
|
||||||
else:
|
#else:
|
||||||
print(group_name,' was already created.')
|
#print(group_name,' was already created.')
|
||||||
|
instFoldermsgStart = f'Starting data transfer from instFolder: {group_name}'
|
||||||
|
print(instFoldermsgStart)
|
||||||
|
|
||||||
for filenumber, filename in enumerate(filtered_filenames_list):
|
for filenumber, filename in enumerate(filtered_filenames_list):
|
||||||
|
|
||||||
@ -198,7 +209,7 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
|||||||
#file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))
|
#file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))
|
||||||
file_dict = filereader_registry.select_file_reader(dest_group_name)(os.path.join(dirpath,filename))
|
file_dict = filereader_registry.select_file_reader(dest_group_name)(os.path.join(dirpath,filename))
|
||||||
|
|
||||||
__transfer_file_dict_to_hdf5(h5file, group_name, file_dict)
|
stdout = __transfer_file_dict_to_hdf5(h5file, group_name, file_dict)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
source_file_path = os.path.join(dirpath,filename)
|
source_file_path = os.path.join(dirpath,filename)
|
||||||
@ -206,14 +217,17 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
|||||||
#group_name +'/'+filename
|
#group_name +'/'+filename
|
||||||
#ext_to_reader_dict[file_ext](source_file_path, dest_file_obj, dest_group_name)
|
#ext_to_reader_dict[file_ext](source_file_path, dest_file_obj, dest_group_name)
|
||||||
#g5505f_reader.select_file_reader(dest_group_name)(source_file_path, dest_file_obj, dest_group_name)
|
#g5505f_reader.select_file_reader(dest_group_name)(source_file_path, dest_file_obj, dest_group_name)
|
||||||
__copy_file_in_group(source_file_path, dest_file_obj, dest_group_name, False)
|
stdout = __copy_file_in_group(source_file_path, dest_file_obj, dest_group_name, False)
|
||||||
|
|
||||||
# Update the progress bar and log the end message
|
# Update the progress bar and log the end message
|
||||||
utils.progressBar(dir_number, number_of_dirs, end_message)
|
instFoldermsdEnd = f'\nCompleted data transfer for instFolder: {group_name}\n'
|
||||||
logging.info(end_message)
|
# Print and log the start message
|
||||||
|
utils.progressBar(dir_number, number_of_dirs, instFoldermsdEnd)
|
||||||
|
logging.info(instFoldermsdEnd )
|
||||||
dir_number = dir_number + 1
|
dir_number = dir_number + 1
|
||||||
|
|
||||||
|
print('[End] Data integration')
|
||||||
|
logging.info('[End] Data integration')
|
||||||
|
|
||||||
if len(root_metadata_dict.keys())>0:
|
if len(root_metadata_dict.keys())>0:
|
||||||
for key, value in root_metadata_dict.items():
|
for key, value in root_metadata_dict.items():
|
||||||
|
Reference in New Issue
Block a user