Merge branch 'main' into 'feature/DB_for_FileReader_Repo'

# Conflicts:
#   instruments/filereader_registry.py
#   pipelines/data_integration.py
#   src/hdf5_writer.py
This commit is contained in:
2025-02-25 10:41:02 +01:00
2 changed files with 17 additions and 3 deletions

View File

@ -7,6 +7,7 @@ import pandas as pd
import numpy as np
import h5py
import logging
import json
#try:
# from dima.utils import g5505_utils as utils
@ -158,10 +159,21 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
stdout = inst
logging.error('Failed to create group %s into HDF5: %s', group_name, inst)
for filenumber, filename in enumerate(filtered_filenames_list):
if 'data_lineage_metadata.json' in filtered_filenames_list:
idx = filtered_filenames_list.index('data_lineage_metadata.json')
data_lineage_file = filtered_filenames_list[idx]
try:
with open('/'.join([dirpath,data_lineage_file]),'r') as dlf:
data_lineage_dict = json.load(dlf)
filtered_filenames_list.pop(idx)
except json.JSONDecodeError:
data_lineage_dict = {} # Start fresh if file is invalid
#file_ext = os.path.splitext(filename)[1]
#try:
else:
data_lineage_dict = {}
for filenumber, filename in enumerate(filtered_filenames_list):
# hdf5 path to filename group
dest_group_name = f'{group_name}/{filename}'

View File

@ -161,6 +161,8 @@ def convert_dataframe_to_np_structured_array(df: pd.DataFrame):
dtype.append((col, 'i4')) # Assuming 32-bit integer
elif pd.api.types.is_float_dtype(col_dtype):
dtype.append((col, 'f4')) # Assuming 32-bit float
elif pd.api.types.is_bool_dtype(col_dtype):
dtype.append((col,bool))
else:
# Handle unsupported data types
print(f"Unsupported dtype found in column '{col}': {col_data.dtype}")