Moved read_mtable_as_dataframe(filename) to src/hdf5_ops.py
This commit is contained in:
@ -12,65 +12,7 @@ import utils.g5505_utils as utils
|
|||||||
import instruments.readers.filereader_registry as filereader_registry
|
import instruments.readers.filereader_registry as filereader_registry
|
||||||
import src.hdf5_ops as hdf5_ops
|
import src.hdf5_ops as hdf5_ops
|
||||||
|
|
||||||
def read_mtable_as_dataframe(filename):
|
|
||||||
|
|
||||||
""" Reconstruct a Matlab Table encoded in a .h5 file as a Pandas DataFrame. The input .h5 file
|
|
||||||
contains as many groups as rows in the Matlab Table, and each group stores dataset-like variables in the Table as
|
|
||||||
Datasets while categorical and numerical variables in the table are represented as attributes of each group.
|
|
||||||
|
|
||||||
Note: DataFrame is constructed columnwise to ensure homogenous data columns.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
|
|
||||||
filename (str): .h5 file's name. It may include location-path information.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
|
|
||||||
output_dataframe (pd.DataFrame): Matlab's Table as a Pandas DataFrame
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
#contructs dataframe by filling out entries columnwise. This way we can ensure homogenous data columns"""
|
|
||||||
|
|
||||||
with h5py.File(filename,'r') as file:
|
|
||||||
|
|
||||||
# Define group's attributes and datasets. This should hold
|
|
||||||
# for all groups. TODO: implement verification and noncompliance error if needed.
|
|
||||||
group_list = list(file.keys())
|
|
||||||
group_attrs = list(file[group_list[0]].attrs.keys())
|
|
||||||
#
|
|
||||||
column_attr_names = [item[item.find('_')+1::] for item in group_attrs]
|
|
||||||
column_attr_names_idx = [int(item[4:(item.find('_'))]) for item in group_attrs]
|
|
||||||
|
|
||||||
group_datasets = list(file[group_list[0]].keys()) if not 'DS_EMPTY' in file[group_list[0]].keys() else []
|
|
||||||
#
|
|
||||||
column_dataset_names = [file[group_list[0]][item].attrs['column_name'] for item in group_datasets]
|
|
||||||
column_dataset_names_idx = [int(item[2:]) for item in group_datasets]
|
|
||||||
|
|
||||||
|
|
||||||
# Define data_frame as group_attrs + group_datasets
|
|
||||||
#pd_series_index = group_attrs + group_datasets
|
|
||||||
pd_series_index = column_attr_names + column_dataset_names
|
|
||||||
|
|
||||||
output_dataframe = pd.DataFrame(columns=pd_series_index,index=group_list)
|
|
||||||
|
|
||||||
tmp_col = []
|
|
||||||
|
|
||||||
for meas_prop in group_attrs + group_datasets:
|
|
||||||
if meas_prop in group_attrs:
|
|
||||||
column_label = meas_prop[meas_prop.find('_')+1:]
|
|
||||||
# Create numerical or categorical column from group's attributes
|
|
||||||
tmp_col = [file[group_key].attrs[meas_prop][()][0] for group_key in group_list]
|
|
||||||
else:
|
|
||||||
# Create dataset column from group's datasets
|
|
||||||
column_label = file[group_list[0] + '/' + meas_prop].attrs['column_name']
|
|
||||||
#tmp_col = [file[group_key + '/' + meas_prop][()][0] for group_key in group_list]
|
|
||||||
tmp_col = [file[group_key + '/' + meas_prop][()] for group_key in group_list]
|
|
||||||
|
|
||||||
output_dataframe.loc[:,column_label] = tmp_col
|
|
||||||
|
|
||||||
return output_dataframe
|
|
||||||
|
|
||||||
def create_group_hierarchy(obj, df, columns):
|
def create_group_hierarchy(obj, df, columns):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
Reference in New Issue
Block a user