48 lines
1.7 KiB
Python
48 lines
1.7 KiB
Python
import h5py
|
|
import pandas as pd
|
|
import numpy as np
|
|
import os
|
|
import src.hdf5_vis as hdf5_vis
|
|
|
|
|
|
def read_dataset_from_hdf5file(hdf5_file_path, dataset_path):
|
|
# Open the HDF5 file
|
|
with h5py.File(hdf5_file_path, 'r') as hdf:
|
|
# Load the dataset
|
|
dataset = hdf[dataset_path]
|
|
data = np.empty(dataset.shape, dtype=dataset.dtype)
|
|
dataset.read_direct(data)
|
|
df = pd.DataFrame(data)
|
|
|
|
for col_name in df.select_dtypes(exclude='number'):
|
|
df[col_name] = df[col_name].str.decode('utf-8') #apply(lambda x: x.decode('utf-8') if isinstance(x,bytes) else x)
|
|
## Extract metadata (attributes) and convert to a dictionary
|
|
#metadata = hdf5_vis.construct_attributes_dict(hdf[dataset_name].attrs)
|
|
## Create a one-row DataFrame with the metadata
|
|
#metadata_df = pd.DataFrame.from_dict(data, orient='columns')
|
|
return df
|
|
|
|
def read_metadata_from_hdf5obj(hdf5_file_path, obj_path):
|
|
# TODO: Complete this function
|
|
metadata_df = pd.DataFrame.empty()
|
|
return metadata_df
|
|
|
|
def list_datasets_in_hdf5file(hdf5_file_path):
|
|
|
|
def get_datasets(name, obj, list_of_datasets):
|
|
if isinstance(obj,h5py.Dataset):
|
|
list_of_datasets.append(name)
|
|
#print(f'Adding dataset: {name}') #tail: {head} head: {tail}')
|
|
|
|
|
|
with h5py.File(hdf5_file_path,'r') as file:
|
|
list_of_datasets = []
|
|
file.visititems(lambda name, obj: get_datasets(name, obj, list_of_datasets))
|
|
|
|
dataset_df = pd.DataFrame({'dataset_name':list_of_datasets})
|
|
|
|
dataset_df['parent_instrument'] = dataset_df['dataset_name'].apply(lambda x: x.split('/')[-3])
|
|
dataset_df['parent_file'] = dataset_df['dataset_name'].apply(lambda x: x.split('/')[-2])
|
|
|
|
return dataset_df
|