Check whether h5 file being written exists. If so, we do not overwrite it because it may be underdoing refinement, changes or updates, for archiving, sharing, or publishing.
This commit is contained in:
@ -162,79 +162,87 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
||||
print(start_message)
|
||||
logging.info(start_message)
|
||||
|
||||
|
||||
with h5py.File(path_to_output_file, mode=mode, track_order=True) as h5file:
|
||||
# Check if the .h5 file already exists
|
||||
if os.path.exists(path_to_output_file) and mode in ['w']:
|
||||
message = (
|
||||
f"[Notice] The file '{path_to_output_file}' already exists and will not be overwritten.\n"
|
||||
"If you wish to replace it, please delete the existing file first and rerun the program."
|
||||
)
|
||||
print(message)
|
||||
logging.error(message)
|
||||
else:
|
||||
with h5py.File(path_to_output_file, mode=mode, track_order=True) as h5file:
|
||||
|
||||
number_of_dirs = len(path_to_filenames_dict.keys())
|
||||
dir_number = 1
|
||||
for dirpath, filtered_filenames_list in path_to_filenames_dict.items():
|
||||
|
||||
# Check if filtered_filenames_list is nonempty. TODO: This is perhaps redundant by design of path_to_filenames_dict.
|
||||
if not filtered_filenames_list:
|
||||
continue
|
||||
number_of_dirs = len(path_to_filenames_dict.keys())
|
||||
dir_number = 1
|
||||
for dirpath, filtered_filenames_list in path_to_filenames_dict.items():
|
||||
|
||||
# Check if filtered_filenames_list is nonempty. TODO: This is perhaps redundant by design of path_to_filenames_dict.
|
||||
if not filtered_filenames_list:
|
||||
continue
|
||||
|
||||
group_name = dirpath.replace(os.sep,'/')
|
||||
group_name = group_name.replace(root_dir.replace(os.sep,'/') + '/', '/')
|
||||
group_name = dirpath.replace(os.sep,'/')
|
||||
group_name = group_name.replace(root_dir.replace(os.sep,'/') + '/', '/')
|
||||
|
||||
# Flatten group name to one level
|
||||
if select_dir_keywords:
|
||||
offset = sum([len(i.split(os.sep)) if i in dirpath else 0 for i in select_dir_keywords])
|
||||
else:
|
||||
offset = 1
|
||||
tmp_list = group_name.split('/')
|
||||
if len(tmp_list) > offset+1:
|
||||
group_name = '/'.join([tmp_list[i] for i in range(offset+1)])
|
||||
|
||||
# Create group called "group_name". Hierarchy of nested groups can be implicitly defined by the forward slashes
|
||||
if not group_name in h5file.keys():
|
||||
h5file.create_group(group_name)
|
||||
h5file[group_name].attrs['creation_date'] = utils.created_at().encode('utf-8')
|
||||
#h5file[group_name].attrs.create(name='filtered_file_list',data=convert_string_to_bytes(filtered_filename_list))
|
||||
#h5file[group_name].attrs.create(name='file_list',data=convert_string_to_bytes(filenames_list))
|
||||
#else:
|
||||
#print(group_name,' was already created.')
|
||||
instFoldermsgStart = f'Starting data transfer from instFolder: {group_name}'
|
||||
print(instFoldermsgStart)
|
||||
|
||||
for filenumber, filename in enumerate(filtered_filenames_list):
|
||||
|
||||
#file_ext = os.path.splitext(filename)[1]
|
||||
#try:
|
||||
|
||||
# hdf5 path to filename group
|
||||
dest_group_name = f'{group_name}/{filename}'
|
||||
|
||||
if not 'h5' in filename:
|
||||
#file_dict = config_file.select_file_readers(group_id)[file_ext](os.path.join(dirpath,filename))
|
||||
#file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))
|
||||
file_dict = filereader_registry.select_file_reader(dest_group_name)(os.path.join(dirpath,filename))
|
||||
|
||||
stdout = __transfer_file_dict_to_hdf5(h5file, group_name, file_dict)
|
||||
|
||||
# Flatten group name to one level
|
||||
if select_dir_keywords:
|
||||
offset = sum([len(i.split(os.sep)) if i in dirpath else 0 for i in select_dir_keywords])
|
||||
else:
|
||||
source_file_path = os.path.join(dirpath,filename)
|
||||
dest_file_obj = h5file
|
||||
#group_name +'/'+filename
|
||||
#ext_to_reader_dict[file_ext](source_file_path, dest_file_obj, dest_group_name)
|
||||
#g5505f_reader.select_file_reader(dest_group_name)(source_file_path, dest_file_obj, dest_group_name)
|
||||
stdout = __copy_file_in_group(source_file_path, dest_file_obj, dest_group_name, False)
|
||||
offset = 1
|
||||
tmp_list = group_name.split('/')
|
||||
if len(tmp_list) > offset+1:
|
||||
group_name = '/'.join([tmp_list[i] for i in range(offset+1)])
|
||||
|
||||
# Update the progress bar and log the end message
|
||||
instFoldermsdEnd = f'\nCompleted data transfer for instFolder: {group_name}\n'
|
||||
# Print and log the start message
|
||||
utils.progressBar(dir_number, number_of_dirs, instFoldermsdEnd)
|
||||
logging.info(instFoldermsdEnd )
|
||||
dir_number = dir_number + 1
|
||||
# Create group called "group_name". Hierarchy of nested groups can be implicitly defined by the forward slashes
|
||||
if not group_name in h5file.keys():
|
||||
h5file.create_group(group_name)
|
||||
h5file[group_name].attrs['creation_date'] = utils.created_at().encode('utf-8')
|
||||
#h5file[group_name].attrs.create(name='filtered_file_list',data=convert_string_to_bytes(filtered_filename_list))
|
||||
#h5file[group_name].attrs.create(name='file_list',data=convert_string_to_bytes(filenames_list))
|
||||
#else:
|
||||
#print(group_name,' was already created.')
|
||||
instFoldermsgStart = f'Starting data transfer from instFolder: {group_name}'
|
||||
print(instFoldermsgStart)
|
||||
|
||||
print('[End] Data integration')
|
||||
logging.info('[End] Data integration')
|
||||
|
||||
if len(root_metadata_dict.keys())>0:
|
||||
for key, value in root_metadata_dict.items():
|
||||
#if key in h5file.attrs:
|
||||
# del h5file.attrs[key]
|
||||
h5file.attrs.create(key, value)
|
||||
#annotate_root_dir(output_filename,root_metadata_dict)
|
||||
for filenumber, filename in enumerate(filtered_filenames_list):
|
||||
|
||||
#file_ext = os.path.splitext(filename)[1]
|
||||
#try:
|
||||
|
||||
# hdf5 path to filename group
|
||||
dest_group_name = f'{group_name}/{filename}'
|
||||
|
||||
if not 'h5' in filename:
|
||||
#file_dict = config_file.select_file_readers(group_id)[file_ext](os.path.join(dirpath,filename))
|
||||
#file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))
|
||||
file_dict = filereader_registry.select_file_reader(dest_group_name)(os.path.join(dirpath,filename))
|
||||
|
||||
stdout = __transfer_file_dict_to_hdf5(h5file, group_name, file_dict)
|
||||
|
||||
else:
|
||||
source_file_path = os.path.join(dirpath,filename)
|
||||
dest_file_obj = h5file
|
||||
#group_name +'/'+filename
|
||||
#ext_to_reader_dict[file_ext](source_file_path, dest_file_obj, dest_group_name)
|
||||
#g5505f_reader.select_file_reader(dest_group_name)(source_file_path, dest_file_obj, dest_group_name)
|
||||
stdout = __copy_file_in_group(source_file_path, dest_file_obj, dest_group_name, False)
|
||||
|
||||
# Update the progress bar and log the end message
|
||||
instFoldermsdEnd = f'\nCompleted data transfer for instFolder: {group_name}\n'
|
||||
# Print and log the start message
|
||||
utils.progressBar(dir_number, number_of_dirs, instFoldermsdEnd)
|
||||
logging.info(instFoldermsdEnd )
|
||||
dir_number = dir_number + 1
|
||||
|
||||
print('[End] Data integration')
|
||||
logging.info('[End] Data integration')
|
||||
|
||||
if len(root_metadata_dict.keys())>0:
|
||||
for key, value in root_metadata_dict.items():
|
||||
#if key in h5file.attrs:
|
||||
# del h5file.attrs[key]
|
||||
h5file.attrs.create(key, value)
|
||||
#annotate_root_dir(output_filename,root_metadata_dict)
|
||||
|
||||
|
||||
#output_yml_filename_path = hdf5_vis.take_yml_snapshot_of_hdf5_file(output_filename)
|
||||
|
Reference in New Issue
Block a user