This commit is contained in:
2025-02-22 18:02:45 +01:00
2 changed files with 21 additions and 3 deletions

View File

@ -7,6 +7,7 @@ import pandas as pd
import numpy as np
import h5py
import logging
import json
import utils.g5505_utils as utils
import instruments.filereader_registry as filereader_registry
@ -209,11 +210,22 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
stdout = inst
logging.error('Failed to create group %s into HDF5: %s', group_name, inst)
if 'data_lineage_metadata.json' in filtered_filenames_list:
idx = filtered_filenames_list.index('data_lineage_metadata.json')
data_lineage_file = filtered_filenames_list[idx]
try:
with open('/'.join([dirpath,data_lineage_file]),'r') as dlf:
data_lineage_dict = json.load(dlf)
filtered_filenames_list.pop(idx)
except json.JSONDecodeError:
data_lineage_dict = {} # Start fresh if file is invalid
else:
data_lineage_dict = {}
for filenumber, filename in enumerate(filtered_filenames_list):
#file_ext = os.path.splitext(filename)[1]
#try:
# hdf5 path to filename group
dest_group_name = f'{group_name}/{filename}'
@ -221,6 +233,10 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
#file_dict = config_file.select_file_readers(group_id)[file_ext](os.path.join(dirpath,filename))
#file_dict = ext_to_reader_dict[file_ext](os.path.join(dirpath,filename))
file_dict = filereader_registry.select_file_reader(dest_group_name)(os.path.join(dirpath,filename))
# Check whether there is an available file reader
if file_dict is not None and isinstance(file_dict, dict):
if 'attributes_dict' in file_dict:
file_dict['attributes_dict'].update(data_lineage_dict.get(filename,{}))
stdout = __transfer_file_dict_to_hdf5(h5file, group_name, file_dict)

View File

@ -161,6 +161,8 @@ def convert_dataframe_to_np_structured_array(df: pd.DataFrame):
dtype.append((col, 'i4')) # Assuming 32-bit integer
elif pd.api.types.is_float_dtype(col_dtype):
dtype.append((col, 'f4')) # Assuming 32-bit float
elif pd.api.types.is_bool_dtype(col_dtype):
dtype.append((col,bool))
else:
# Handle unsupported data types
print(f"Unsupported dtype found in column '{col}': {col_data.dtype}")