Removed config_file output file naming and instead user now inputs desired output filename. Also added input argument to introduce root level metadata.

This commit is contained in:
2024-04-18 19:14:06 +02:00
parent 1ed37920c2
commit 074d2e3954

View File

@ -12,10 +12,18 @@ import numpy as np
import src.g5505_utils as utils
import input_files.config_file as config_file
import src.hdf5_vis as hdf5_vis
import src.g5505_file_reader as g5505f_reader
import h5py
import yaml
# Define mapping from extension to their file reader
ext_to_reader_dict = {'.ibw': g5505f_reader.read_xps_ibw_file_as_dict,
'.txt': g5505f_reader.read_txt_files_as_dict,
'.TXT': g5505f_reader.read_txt_files_as_dict,
'.dat': g5505f_reader.read_txt_files_as_dict,
'.h5': g5505f_reader.copy_file_in_group}
def progressBar(count_value, total, suffix=''):
bar_length = 100
@ -219,16 +227,17 @@ def annotate_root_dir(filename,annotation_dict: dict):
import shutil
def create_hdf5_file_from_filesystem_path(config_param : dict ,
input_file_system_path : str,
select_dir_keywords = [],
select_file_keywords =[],
top_sub_dir_mask : bool = True):
#def create_hdf5_file_from_filesystem_path(output_filename : str,
#def create_hdf5_file_from_filesystem_path(config_param : dict ,
# input_file_system_path : str,
# select_dir_keywords = [],
# select_file_keywords =[],
# top_sub_dir_mask : bool = True):
def create_hdf5_file_from_filesystem_path(output_filename : str,
input_file_system_path : str,
select_dir_keywords = [],
select_file_keywords =[],
top_sub_dir_mask : bool = True,
root_metadata_dict : dict = {}):
"""
Creates an .h5 file with name "output_filename" that preserves the directory tree (or folder structure) of given a filesystem path.
@ -255,16 +264,7 @@ def create_hdf5_file_from_filesystem_path(config_param : dict ,
"""
# Ensure OS compliant paths and keywords
# TODO: validate config_param dict, make sure output_filename is a valid file_path
group_id = config_param['group_id']
user_initials = config_param['user_initials']
created_at = config_file.created_at()
output_dir = config_param['output_dir']
output_filename = output_dir + config_file.output_filename_tempate(group_id,created_at,user_initials)
admissible_file_ext_list = list(config_file.select_file_readers(group_id).keys())
admissible_file_ext_list = list(ext_to_reader_dict.keys()) # list(config_file.select_file_readers(group_id).keys())
if '/' in input_file_system_path:
input_file_system_path = input_file_system_path.replace('/',os.sep)
@ -384,7 +384,8 @@ def create_hdf5_file_from_filesystem_path(config_param : dict ,
#try:
if not 'h5' in filename:
file_dict = config_file.select_file_readers(group_id)[file_ext](os.path.join(dirpath,filename))
#file_dict = config_file.select_file_readers(group_id)[file_ext](os.path.join(dirpath,filename))
file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))
if not file_dict:
continue
@ -418,14 +419,17 @@ def create_hdf5_file_from_filesystem_path(config_param : dict ,
print(inst)
else:
config_file.select_file_readers(group_id)[file_ext](source_file_path = os.path.join(dirpath,filename),
ext_to_reader_dict[file_ext](source_file_path = os.path.join(dirpath,filename),
dest_file_obj = h5file,
dest_group_name = group_name +'/'+filename)
#print(filename,file_ext, ':)')
progressBar(filenumber,len(filtered_filename_list), 'Uploading files in ' + dirpath)
if len(root_metadata_dict.keys())>0:
annotate_root_dir(output_filename,root_metadata_dict)
output_yml_filename_path = hdf5_vis.take_yml_snapshot_of_hdf5_file(output_filename)