Implemented filtering mechanishm in function create_hdf5_file_from_filesystem_path, which allows only directory paths containing a key word in include list to be stored in the hdf5 file group structure.

This commit is contained in:
2024-02-02 16:30:29 +01:00
parent be43367bc0
commit 8f757ca68c

View File

@ -234,7 +234,13 @@ def annotate_root_dir(filename,annotation_dict: dict):
def create_hdf5_file_from_filesystem_path(ofilename,input_file_system_path):
def create_hdf5_file_from_filesystem_path(ofilename,input_file_system_path, include_list = []):
"""
include_list (optional)(list): list of string elements, which keeps all directory_paths containing any of the words (string values) in include_list.
When left empty, all directory paths are considered to be included in the hdf5 file group hierarchy.
"""
with h5py.File(ofilename, 'w') as h5file:
@ -242,7 +248,15 @@ def create_hdf5_file_from_filesystem_path(ofilename,input_file_system_path):
# loops over (or visits each) subdirectories from root directory defined by input_file_sytem_path to the lower
#level subfolders
for dirpath, dirnames, filenames_list in os.walk(input_file_system_path,topdown=True):
for node_number, node in enumerate(os.walk(input_file_system_path,topdown=True)):
dirpath, dirnames, filenames_list = node
# if include_list is nonempty, filter out any directory path that does not contain the key words in include_list.
# TODO: explain better in fuction documentation
if (node_number > 0) and (len(include_list) > 0):
if not any([item in dirpath for item in include_list]):
continue
group_name = dirpath.replace(os.sep,'/')
@ -430,8 +444,14 @@ def main1():
file_dict = g5505_file_reader.read_xps_ibw_file_as_dict(inputfile_dir+'\\SES\\0069069_N1s_495eV.ibw')
group_by_type = lambda x : group_by_df_column(x,'filetype')
#create_hdf5_file_from_filesystem_path('test2.h5',inputfile_dir)
display_group_hierarchy_on_a_treemap('test2.h5')
include_list = ['NEXAFS', 'Notes', 'Photos', 'Pressure', 'RGA', 'SES']
#inputfile_dir = '\\\\fs03\\Iron_Sulphate'
#include_list = ['htof','ams', 'ptr', 'gas','smps']
create_hdf5_file_from_filesystem_path('test3.h5',inputfile_dir,include_list)
display_group_hierarchy_on_a_treemap('test3.h5')
#create_hdf5_file('test', inputfile_dir, 'Topdown', [group_by_type], extract_attrs_func = None)
def main2():