diff --git a/src/hdf5_writer.py b/src/hdf5_writer.py index 3006b7e..0cb3602 100644 --- a/src/hdf5_writer.py +++ b/src/hdf5_writer.py @@ -7,6 +7,7 @@ import pandas as pd import numpy as np import h5py import logging +import json import utils.g5505_utils as utils import instruments.readers.filereader_registry as filereader_registry @@ -209,11 +210,22 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str, stdout = inst logging.error('Failed to create group %s into HDF5: %s', group_name, inst) + if 'data_lineage_metadata.json' in filtered_filenames_list: + idx = filtered_filenames_list.index('data_lineage_metadata.json') + data_lineage_file = filtered_filenames_list[idx] + try: + with open('/'.join([dirpath,data_lineage_file]),'r') as dlf: + data_lineage_dict = json.load(dlf) + filtered_filenames_list.pop(idx) + except json.JSONDecodeError: + data_lineage_dict = {} # Start fresh if file is invalid + + else: + data_lineage_dict = {} + + for filenumber, filename in enumerate(filtered_filenames_list): - #file_ext = os.path.splitext(filename)[1] - #try: - # hdf5 path to filename group dest_group_name = f'{group_name}/{filename}' @@ -221,6 +233,10 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str, #file_dict = config_file.select_file_readers(group_id)[file_ext](os.path.join(dirpath,filename)) #file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename)) file_dict = filereader_registry.select_file_reader(dest_group_name)(os.path.join(dirpath,filename)) + # Check whether there is an available file reader + if file_dict is not None and isinstance(file_dict, dict): + if 'attributes_dict' in file_dict: + file_dict['attributes_dict'].update(data_lineage_dict.get(filename,{})) stdout = __transfer_file_dict_to_hdf5(h5file, group_name, file_dict)