diff --git a/src/hdf5_writer.py b/src/hdf5_writer.py index 3113bea..30e3ae2 100644 --- a/src/hdf5_writer.py +++ b/src/hdf5_writer.py @@ -7,6 +7,7 @@ import pandas as pd import numpy as np import h5py import logging +import json #try: # from dima.utils import g5505_utils as utils @@ -158,11 +159,22 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str, stdout = inst logging.error('Failed to create group %s into HDF5: %s', group_name, inst) + if 'data_lineage_metadata.json' in filtered_filenames_list: + idx = filtered_filenames_list.index('data_lineage_metadata.json') + data_lineage_file = filtered_filenames_list[idx] + try: + with open('/'.join([dirpath,data_lineage_file]),'r') as dlf: + data_lineage_dict = json.load(dlf) + filtered_filenames_list.pop(idx) + except json.JSONDecodeError: + data_lineage_dict = {} # Start fresh if file is invalid + + else: + data_lineage_dict = {} + + for filenumber, filename in enumerate(filtered_filenames_list): - #file_ext = os.path.splitext(filename)[1] - #try: - # hdf5 path to filename group dest_group_name = f'{group_name}/{filename}' source_file_path = os.path.join(dirpath,filename) diff --git a/utils/g5505_utils.py b/utils/g5505_utils.py index a145df1..b413271 100644 --- a/utils/g5505_utils.py +++ b/utils/g5505_utils.py @@ -161,6 +161,8 @@ def convert_dataframe_to_np_structured_array(df: pd.DataFrame): dtype.append((col, 'i4')) # Assuming 32-bit integer elif pd.api.types.is_float_dtype(col_dtype): dtype.append((col, 'f4')) # Assuming 32-bit float + elif pd.api.types.is_bool_dtype(col_dtype): + dtype.append((col,bool)) else: # Handle unsupported data types print(f"Unsupported dtype found in column '{col}': {col_data.dtype}")