Merge branch 'main' into 'feature/DB_for_FileReader_Repo'
# Conflicts: # instruments/filereader_registry.py # pipelines/data_integration.py # src/hdf5_writer.py
This commit is contained in:
@ -7,6 +7,7 @@ import pandas as pd
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import h5py
|
import h5py
|
||||||
import logging
|
import logging
|
||||||
|
import json
|
||||||
|
|
||||||
#try:
|
#try:
|
||||||
# from dima.utils import g5505_utils as utils
|
# from dima.utils import g5505_utils as utils
|
||||||
@ -158,10 +159,21 @@ def create_hdf5_file_from_filesystem_path(path_to_input_directory: str,
|
|||||||
stdout = inst
|
stdout = inst
|
||||||
logging.error('Failed to create group %s into HDF5: %s', group_name, inst)
|
logging.error('Failed to create group %s into HDF5: %s', group_name, inst)
|
||||||
|
|
||||||
for filenumber, filename in enumerate(filtered_filenames_list):
|
if 'data_lineage_metadata.json' in filtered_filenames_list:
|
||||||
|
idx = filtered_filenames_list.index('data_lineage_metadata.json')
|
||||||
|
data_lineage_file = filtered_filenames_list[idx]
|
||||||
|
try:
|
||||||
|
with open('/'.join([dirpath,data_lineage_file]),'r') as dlf:
|
||||||
|
data_lineage_dict = json.load(dlf)
|
||||||
|
filtered_filenames_list.pop(idx)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
data_lineage_dict = {} # Start fresh if file is invalid
|
||||||
|
|
||||||
#file_ext = os.path.splitext(filename)[1]
|
else:
|
||||||
#try:
|
data_lineage_dict = {}
|
||||||
|
|
||||||
|
|
||||||
|
for filenumber, filename in enumerate(filtered_filenames_list):
|
||||||
|
|
||||||
# hdf5 path to filename group
|
# hdf5 path to filename group
|
||||||
dest_group_name = f'{group_name}/{filename}'
|
dest_group_name = f'{group_name}/{filename}'
|
||||||
|
@ -161,6 +161,8 @@ def convert_dataframe_to_np_structured_array(df: pd.DataFrame):
|
|||||||
dtype.append((col, 'i4')) # Assuming 32-bit integer
|
dtype.append((col, 'i4')) # Assuming 32-bit integer
|
||||||
elif pd.api.types.is_float_dtype(col_dtype):
|
elif pd.api.types.is_float_dtype(col_dtype):
|
||||||
dtype.append((col, 'f4')) # Assuming 32-bit float
|
dtype.append((col, 'f4')) # Assuming 32-bit float
|
||||||
|
elif pd.api.types.is_bool_dtype(col_dtype):
|
||||||
|
dtype.append((col,bool))
|
||||||
else:
|
else:
|
||||||
# Handle unsupported data types
|
# Handle unsupported data types
|
||||||
print(f"Unsupported dtype found in column '{col}': {col_data.dtype}")
|
print(f"Unsupported dtype found in column '{col}': {col_data.dtype}")
|
||||||
|
Reference in New Issue
Block a user