Implement data_lineage_metadata.json detection and then use it to annotate associated file.
This commit is contained in:
@ -7,6 +7,7 @@ import pandas as pd
|
||||
import numpy as np
|
||||
import h5py
|
||||
import logging
|
||||
import json
|
||||
|
||||
import utils.g5505_utils as utils
|
||||
import instruments.readers.filereader_registry as filereader_registry
|
||||
@ -209,10 +210,21 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
||||
stdout = inst
|
||||
logging.error('Failed to create group %s into HDF5: %s', group_name, inst)
|
||||
|
||||
for filenumber, filename in enumerate(filtered_filenames_list):
|
||||
if 'data_lineage_metadata.json' in filtered_filenames_list:
|
||||
idx = filtered_filenames_list.index('data_lineage_metadata.json')
|
||||
data_lineage_file = filtered_filenames_list[idx]
|
||||
try:
|
||||
with open('/'.join([dirpath,data_lineage_file]),'r') as dlf:
|
||||
data_lineage_dict = json.load(dlf)
|
||||
filtered_filenames_list.pop(idx)
|
||||
except json.JSONDecodeError:
|
||||
data_lineage_dict = {} # Start fresh if file is invalid
|
||||
|
||||
#file_ext = os.path.splitext(filename)[1]
|
||||
#try:
|
||||
else:
|
||||
data_lineage_dict = {}
|
||||
|
||||
|
||||
for filenumber, filename in enumerate(filtered_filenames_list):
|
||||
|
||||
# hdf5 path to filename group
|
||||
dest_group_name = f'{group_name}/{filename}'
|
||||
@ -221,6 +233,10 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
||||
#file_dict = config_file.select_file_readers(group_id)[file_ext](os.path.join(dirpath,filename))
|
||||
#file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))
|
||||
file_dict = filereader_registry.select_file_reader(dest_group_name)(os.path.join(dirpath,filename))
|
||||
# Check whether there is an available file reader
|
||||
if file_dict is not None and isinstance(file_dict, dict):
|
||||
if 'attributes_dict' in file_dict:
|
||||
file_dict['attributes_dict'].update(data_lineage_dict.get(filename,{}))
|
||||
|
||||
stdout = __transfer_file_dict_to_hdf5(h5file, group_name, file_dict)
|
||||
|
||||
|
Reference in New Issue
Block a user