import pandas as pd import h5py #import os import sys filename = 'FileList.h5' def read_hdf5_as_dataframe(filename): with h5py.File(filename,'r') as file: # Define group's attributes and datasets. This should hold # for all groups. TODO: implement verification and noncompliance error if needed. group_list = list(file.keys()) group_attrs = list(file[group_list[0]].attrs.keys()) # column_attr_names = [item[item.find('_')+1::] for item in group_attrs] column_attr_names_idx = [int(item[4:(item.find('_'))]) for item in group_attrs] group_datasets = list(file[group_list[0]].keys()) # column_dataset_names = [file[group_list[0]][item].attrs['column_name'] for item in group_datasets] column_dataset_names_idx = [int(item[2:]) for item in group_datasets] # Define data_frame as group_attrs + group_datasets #pd_series_index = group_attrs + group_datasets pd_series_index = column_attr_names + column_dataset_names output_dataframe = pd.DataFrame(columns=pd_series_index,index=group_list) for group_key in group_list: # Print group_name #print(group_key) tmp_row = [] for attr_key in group_attrs: #print(type(file[group_key].attrs[attr_key])) tmp_row.append(file[group_key].attrs[attr_key]) for ds_key in group_datasets: # Check dataset's type by uncommenting the line below # print(type(file[group_key][ds_key][()])) # Append to list the value of the file at dataset /group/ds tmp_row.append(file[group_key][ds_key][()]) # Create pandas Series/measurement row = pd.Series(data=tmp_row,index=pd_series_index, name = group_key) output_dataframe.loc[group_key,:] = row return output_dataframe