From d3ec0bd473bae74b0e5db479c25e7a8565799c3e Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Tue, 23 Apr 2024 11:05:20 +0200 Subject: [PATCH] Included additional directory path validation based on dir keywords --- src/hdf5_lib.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/hdf5_lib.py b/src/hdf5_lib.py index 1f32524..9cf8080 100644 --- a/src/hdf5_lib.py +++ b/src/hdf5_lib.py @@ -232,6 +232,21 @@ import shutil # select_dir_keywords = [], # select_file_keywords =[], # top_sub_dir_mask : bool = True): + +def is_valid_directory_path(dirpath,select_dir_keywords): + + activated_keywords = [] + for item in select_dir_keywords: + if len(item.split(os.sep))>1: + is_sublist = all([x in dirpath.split(os.sep) for x in item.split(os.sep)]) + activated_keywords.append(is_sublist) + else: + activated_keywords.append(item in dirpath) + + return any(activated_keywords) + + + def create_hdf5_file_from_filesystem_path(output_filename : str, input_file_system_path : str, select_dir_keywords = [], @@ -320,7 +335,7 @@ def create_hdf5_file_from_filesystem_path(output_filename : str, # continue # Add files with name, that contains any of the file_keywords - if any([keyword in filename for keyword in select_file_keywords]): + if any([keyword in filename for keyword in select_file_keywords]): file_paths_dict[dirpath].append(filename) else: file_paths_dict[dirpath] = admissible_filenames @@ -352,16 +367,18 @@ def create_hdf5_file_from_filesystem_path(output_filename : str, #if (dirpath.count(os.sep) > offset) and not any([item in dirpath for item in select_dir_keywords]): #tail, dirname = os.path.split(dirpath) #if not any([item in dirname for item in select_dir_keywords]): - if not any([item in dirpath for item in select_dir_keywords]): + #if not any([item in dirpath for item in select_dir_keywords]): + if not is_valid_directory_path(dirpath,select_dir_keywords): continue group_name = dirpath.replace(os.sep,'/') group_name = group_name.replace(root_dir.replace(os.sep,'/') + '/', '/') # flatten group name to one level + offset = sum([len(i.split(os.sep)) if i in dirpath else 0 for i in select_dir_keywords]) tmp_list = group_name.split('/') - if len(tmp_list)>2: - group_name = '/'.join([tmp_list[0],tmp_list[1]]) + if len(tmp_list) > offset+1: + group_name = '/'.join([tmp_list[i] for i in range(offset+1)]) # Group hierarchy is implicitly defined by the forward slashes if not group_name in h5file.keys():