Major update. Remove file filtering option and outputname input arg. The output name is now the same as the path_to_input_dir + .h5. By default, the hdf5 writer preserves second level subdirectories and the rest are flattend. dir filtering is outsource to copy_dir_with_constraints from utils-
This commit is contained in:
@ -238,11 +238,9 @@ def copy_file_in_group(source_file_path, dest_file_obj : h5py.File, dest_group_n
|
||||
if 'tmp_files' in tmp_file_path:
|
||||
os.remove(tmp_file_path)
|
||||
|
||||
def create_hdf5_file_from_filesystem_path(output_filename : str,
|
||||
path_to_input_directory: str,
|
||||
select_dir_keywords = [],
|
||||
select_file_keywords =[],
|
||||
top_sub_dir_mask : bool = True,
|
||||
def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
||||
path_to_filenames_dict: dict = None,
|
||||
select_dir_keywords : list = [],
|
||||
root_metadata_dict : dict = {}):
|
||||
|
||||
"""
|
||||
@ -257,17 +255,17 @@ def create_hdf5_file_from_filesystem_path(output_filename : str,
|
||||
----------
|
||||
output_filename : str
|
||||
Name of the output HDF5 file.
|
||||
path_to_input_directory: str
|
||||
path_to_input_directory : str
|
||||
Path to root directory, specified with forward slashes, e.g., path/to/root.
|
||||
|
||||
path_to_filenames_dict : dict, optional
|
||||
A pre-processed dictionary where keys are directory paths on the input directory's tree and values are lists of files.
|
||||
If provided, 'input_file_system_path' is ignored.
|
||||
|
||||
select_dir_keywords : list
|
||||
List of string elements to consider or select only directory paths that contain
|
||||
a word in 'select_dir_keywords'. When empty, all directory paths are considered
|
||||
to be included in the HDF5 file group hierarchy.
|
||||
select_file_keywords : list
|
||||
List of string elements to consider or select only files that contain a word in
|
||||
'select_file_keywords'. When empty, all files are considered to be stored in the HDF5 file.
|
||||
top_sub_dir_mask : bool
|
||||
Mask for top-level subdirectories.
|
||||
root_metadata_dict : dict
|
||||
Metadata to include at the root level of the HDF5 file.
|
||||
|
||||
@ -277,7 +275,9 @@ def create_hdf5_file_from_filesystem_path(output_filename : str,
|
||||
Path to the created HDF5 file.
|
||||
"""
|
||||
|
||||
allowed_file_extensions = filereader_registry.file_extensions
|
||||
output_filename = path_to_input_directory + '.h5'
|
||||
|
||||
|
||||
if '/' in path_to_input_directory:
|
||||
path_to_input_directory= path_to_input_directory.replace('/',os.sep)
|
||||
else:
|
||||
@ -287,12 +287,12 @@ def create_hdf5_file_from_filesystem_path(output_filename : str,
|
||||
select_dir_keywords[i] = keyword.replace('/',os.sep)
|
||||
|
||||
# Copy input_directory into the output_dir_path, and work with it from now on
|
||||
output_dir_path = os.path.splitext(output_filename)[0].replace('/',os.sep)
|
||||
path_to_filenames_dict = utils.copy_directory_with_contraints(path_to_input_directory,
|
||||
output_dir_path,
|
||||
select_dir_keywords,
|
||||
select_file_keywords,
|
||||
allowed_file_extensions)
|
||||
output_dir_path = os.path.splitext(output_filename)[0].replace('/',os.sep)
|
||||
|
||||
if not path_to_filenames_dict:
|
||||
path_to_filenames_dict = utils.copy_directory_with_contraints(path_to_input_directory,
|
||||
output_dir_path,
|
||||
dry_run=True)
|
||||
# Set input_directory as copied input directory
|
||||
root_dir = output_dir_path
|
||||
|
||||
@ -307,10 +307,7 @@ def create_hdf5_file_from_filesystem_path(output_filename : str,
|
||||
# Print and log the start message
|
||||
print(start_message)
|
||||
logging.info(start_message)
|
||||
|
||||
# Check if dirpath is valid. TODO: This is perhaps redundant by design of path_to_filenames_dict.
|
||||
if not is_valid_directory_path(dirpath,select_dir_keywords):
|
||||
continue
|
||||
|
||||
# Check if filtered_filenames_list is nonempty. TODO: This is perhaps redundant by design of path_to_filenames_dict.
|
||||
if not filtered_filenames_list:
|
||||
continue
|
||||
@ -319,7 +316,10 @@ def create_hdf5_file_from_filesystem_path(output_filename : str,
|
||||
group_name = group_name.replace(root_dir.replace(os.sep,'/') + '/', '/')
|
||||
|
||||
# Flatten group name to one level
|
||||
offset = sum([len(i.split(os.sep)) if i in dirpath else 0 for i in select_dir_keywords])
|
||||
if select_dir_keywords:
|
||||
offset = sum([len(i.split(os.sep)) if i in dirpath else 0 for i in select_dir_keywords])
|
||||
else:
|
||||
offset = 1
|
||||
tmp_list = group_name.split('/')
|
||||
if len(tmp_list) > offset+1:
|
||||
group_name = '/'.join([tmp_list[i] for i in range(offset+1)])
|
||||
|
Reference in New Issue
Block a user