From a1c88fdb5a98c4bd4a5c8b349f4b445a2e36053c Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Wed, 17 Apr 2024 15:20:26 +0200 Subject: [PATCH] Added lines to flatten (shorten) original directory paths in the resulting hdf5 file. --- src/hdf5_lib.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/hdf5_lib.py b/src/hdf5_lib.py index 386d5a4..7b905c4 100644 --- a/src/hdf5_lib.py +++ b/src/hdf5_lib.py @@ -165,7 +165,7 @@ def get_parent_child_relationships(file: h5py.File): values = [len(file.keys())] def node_visitor(name,obj): - #if isinstance(obj,h5py.Group): + if name.count('/') <=2: nodes.append(obj.name) parent.append(obj.parent.name) #nodes.append(os.path.split(obj.name)[1]) @@ -174,8 +174,12 @@ def get_parent_child_relationships(file: h5py.File): if isinstance(obj,h5py.Dataset):# or not 'file_list' in obj.attrs.keys(): values.append(1) else: - values.append(len(obj.keys())) - #values.append(len(obj.attrs['file_list'])) + print(obj.name) + try: + values.append(len(obj.keys())) + except: + values.append(0) + file.visititems(node_visitor) return nodes, parent, values @@ -353,10 +357,20 @@ def create_hdf5_file_from_filesystem_path(config_param : dict , group_name = dirpath.replace(os.sep,'/') group_name = group_name.replace(root_dir.replace(os.sep,'/') + '/', '/') + + # flatten group name to one level + tmp_list = group_name.split('/') + if len(tmp_list)>2: + group_name = '/'.join([tmp_list[0],tmp_list[1]]) + # Group hierarchy is implicitly defined by the forward slashes - h5file.create_group(group_name) - h5file[group_name].attrs.create(name='filtered_file_list',data=filtered_filename_list) - h5file[group_name].attrs.create(name='file_list',data=filenames_list) + if not group_name in h5file.keys(): + h5file.create_group(group_name) + h5file[group_name].attrs.create(name='filtered_file_list',data=filtered_filename_list) + h5file[group_name].attrs.create(name='file_list',data=filenames_list) + else: + print(group_name,' was already created.') + # TODO: for each "admissible" file in filenames, create an associated dataset in the corresponding group (subdirectory)