Merge branch 'main' into 'feature/DB_for_FileReader_Repo'
# Conflicts: # instruments/filereader_registry.py # pipelines/data_integration.py # src/hdf5_writer.py
This commit is contained in:
@ -7,6 +7,7 @@ import pandas as pd
|
||||
import numpy as np
|
||||
import h5py
|
||||
import logging
|
||||
import json
|
||||
|
||||
#try:
|
||||
# from dima.utils import g5505_utils as utils
|
||||
@ -158,11 +159,22 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
||||
stdout = inst
|
||||
logging.error('Failed to create group %s into HDF5: %s', group_name, inst)
|
||||
|
||||
if 'data_lineage_metadata.json' in filtered_filenames_list:
|
||||
idx = filtered_filenames_list.index('data_lineage_metadata.json')
|
||||
data_lineage_file = filtered_filenames_list[idx]
|
||||
try:
|
||||
with open('/'.join([dirpath,data_lineage_file]),'r') as dlf:
|
||||
data_lineage_dict = json.load(dlf)
|
||||
filtered_filenames_list.pop(idx)
|
||||
except json.JSONDecodeError:
|
||||
data_lineage_dict = {} # Start fresh if file is invalid
|
||||
|
||||
else:
|
||||
data_lineage_dict = {}
|
||||
|
||||
|
||||
for filenumber, filename in enumerate(filtered_filenames_list):
|
||||
|
||||
#file_ext = os.path.splitext(filename)[1]
|
||||
#try:
|
||||
|
||||
# hdf5 path to filename group
|
||||
dest_group_name = f'{group_name}/{filename}'
|
||||
source_file_path = os.path.join(dirpath,filename)
|
||||
|
@ -161,6 +161,8 @@ def convert_dataframe_to_np_structured_array(df: pd.DataFrame):
|
||||
dtype.append((col, 'i4')) # Assuming 32-bit integer
|
||||
elif pd.api.types.is_float_dtype(col_dtype):
|
||||
dtype.append((col, 'f4')) # Assuming 32-bit float
|
||||
elif pd.api.types.is_bool_dtype(col_dtype):
|
||||
dtype.append((col,bool))
|
||||
else:
|
||||
# Handle unsupported data types
|
||||
print(f"Unsupported dtype found in column '{col}': {col_data.dtype}")
|
||||
|
Reference in New Issue
Block a user