Added lines to flatten (shorten) original directory paths in the resulting hdf5 file.

This commit is contained in:
2024-04-17 15:20:26 +02:00
parent 8005b60579
commit a1c88fdb5a

View File

@ -165,7 +165,7 @@ def get_parent_child_relationships(file: h5py.File):
values = [len(file.keys())]
def node_visitor(name,obj):
#if isinstance(obj,h5py.Group):
if name.count('/') <=2:
nodes.append(obj.name)
parent.append(obj.parent.name)
#nodes.append(os.path.split(obj.name)[1])
@ -174,8 +174,12 @@ def get_parent_child_relationships(file: h5py.File):
if isinstance(obj,h5py.Dataset):# or not 'file_list' in obj.attrs.keys():
values.append(1)
else:
values.append(len(obj.keys()))
#values.append(len(obj.attrs['file_list']))
print(obj.name)
try:
values.append(len(obj.keys()))
except:
values.append(0)
file.visititems(node_visitor)
return nodes, parent, values
@ -353,10 +357,20 @@ def create_hdf5_file_from_filesystem_path(config_param : dict ,
group_name = dirpath.replace(os.sep,'/')
group_name = group_name.replace(root_dir.replace(os.sep,'/') + '/', '/')
# flatten group name to one level
tmp_list = group_name.split('/')
if len(tmp_list)>2:
group_name = '/'.join([tmp_list[0],tmp_list[1]])
# Group hierarchy is implicitly defined by the forward slashes
h5file.create_group(group_name)
h5file[group_name].attrs.create(name='filtered_file_list',data=filtered_filename_list)
h5file[group_name].attrs.create(name='file_list',data=filenames_list)
if not group_name in h5file.keys():
h5file.create_group(group_name)
h5file[group_name].attrs.create(name='filtered_file_list',data=filtered_filename_list)
h5file[group_name].attrs.create(name='file_list',data=filenames_list)
else:
print(group_name,' was already created.')
# TODO: for each "admissible" file in filenames, create an associated dataset in the corresponding group (subdirectory)