From 1537633b1a74b9d8ec3e3e449038d1e8cbd71a72 Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Fri, 24 May 2024 09:06:07 +0200 Subject: [PATCH] Made a few optimizations to code and documentation. Expressions relying on list comprehensions were simplified with generator expressions. ex,: any([keyword in filename for keyword in select_file_keywords]) was simplified to any(keyword in filename for keyword in select_file_keywords). --- src/hdf5_lib.py | 53 +++++++++++++++---------------------------------- 1 file changed, 16 insertions(+), 37 deletions(-) diff --git a/src/hdf5_lib.py b/src/hdf5_lib.py index a546df9..225433f 100644 --- a/src/hdf5_lib.py +++ b/src/hdf5_lib.py @@ -225,14 +225,6 @@ def annotate_root_dir(filename,annotation_dict: dict): file.attrs.create('metadata_'+key, annotation_dict[key]) -import shutil - -#def create_hdf5_file_from_filesystem_path(config_param : dict , -# input_file_system_path : str, -# select_dir_keywords = [], -# select_file_keywords =[], -# top_sub_dir_mask : bool = True): - def is_valid_directory_path(dirpath,select_dir_keywords): activated_keywords = [] @@ -256,9 +248,12 @@ def create_hdf5_file_from_filesystem_path(output_filename : str, """ Creates an .h5 file with name "output_filename" that preserves the directory tree (or folder structure) of given a filesystem path. - When file and directory keywords are non-empty, the keywords enable filtering of directories and files that do not contain the specified keywords. + When file and directory keywords are non-empty, the keywords enable filtering of directory paths and file paths that do not contain the specified keywords. - In the .h5 file, only files that are admissible file formats will be stored in the form of datasets and attributes. + The data integration capabilities are limited by our file reader, which can only access data from a list of admissible file formats. + These however can be extended. + Directories are groups in the resultsing hdf5 file. + Files are formatted as composite object consisting of a group, file, and attributes. Parameters: @@ -317,26 +312,28 @@ def create_hdf5_file_from_filesystem_path(output_filename : str, # (directory_path, suitable files) relationships in a dictionary. file_paths_dict = {} - check_file_ext = lambda filename: any([ext in filename for ext in admissible_file_ext_list]) + #check_file_ext = lambda filename: any([ext in filename for ext in admissible_file_ext_list]) + check_file_ext = lambda filename: os.path.splitext(filename)[1] in admissible_file_ext_list for dirpath, _, filenames in os.walk(item,topdown=False): file_paths_dict[dirpath] = [] - # Check files that have an admissible extension and store them in admissible_filenames list + # Keep files that have an admissible extension and store them in admissible_filenames list admissible_filenames = [] for fn in filenames: if check_file_ext(fn): admissible_filenames.append(fn) if select_file_keywords: # when select_file_keywords = [], all files are considered - for filename in admissible_filenames: - # Do not consider files with types for which there is still no file_reader. TODO: extend file_reader library. - #if not any([ext in filename for ext in admissible_file_ext_list]): - # continue + #for filename in admissible_filenames: + for i in range(len(admissible_filenames) - 1, -1, -1): + filename = admissible_filenames[i] - # Add files with name, that contains any of the file_keywords - if any([keyword in filename for keyword in select_file_keywords]): - file_paths_dict[dirpath].append(filename) + # Remove files that with filename, not adhering to file keyword constraints. + if not any(keyword in filename for keyword in select_file_keywords): + admissible_filenames.pop(i) + + file_paths_dict[dirpath] = admissible_filenames else: file_paths_dict[dirpath] = admissible_filenames @@ -344,21 +341,6 @@ def create_hdf5_file_from_filesystem_path(output_filename : str, dirpath, dirnames, filenames_list = node - #if node_number == 0: - # offset = dirpath.count(os.sep) - - # Filter out files with filenames not containing a keyword specified in the parameter 'select_file_keywords'. - # When select_file_keywords is an empty, i.e., [], do not apply any filter on the filenames. - - - #filtered_filename_list = [] - #if select_file_keywords: - # for filename in filenames_list: - # if any([keyword in filename for keyword in select_file_keywords]): - # filtered_filename_list.append(filename) - #else: - # filtered_filename_list = filenames_list.copy() - filtered_filename_list = file_paths_dict.get(dirpath,filenames_list.copy()) @@ -389,9 +371,6 @@ def create_hdf5_file_from_filesystem_path(output_filename : str, print(group_name,' was already created.') - - # TODO: for each "admissible" file in filenames, create an associated dataset in the corresponding group (subdirectory) - for filenumber, filename in enumerate(filtered_filename_list): # Get file extension (or file type)