diff --git a/src/hdf5_lib_part2.py b/src/hdf5_lib_part2.py index 663f33f..29c528c 100644 --- a/src/hdf5_lib_part2.py +++ b/src/hdf5_lib_part2.py @@ -12,65 +12,7 @@ import utils.g5505_utils as utils import instruments.readers.filereader_registry as filereader_registry import src.hdf5_ops as hdf5_ops -def read_mtable_as_dataframe(filename): - - """ Reconstruct a Matlab Table encoded in a .h5 file as a Pandas DataFrame. The input .h5 file - contains as many groups as rows in the Matlab Table, and each group stores dataset-like variables in the Table as - Datasets while categorical and numerical variables in the table are represented as attributes of each group. - - Note: DataFrame is constructed columnwise to ensure homogenous data columns. - - Parameters: - - filename (str): .h5 file's name. It may include location-path information. - - Returns: - - output_dataframe (pd.DataFrame): Matlab's Table as a Pandas DataFrame - - """ - - #contructs dataframe by filling out entries columnwise. This way we can ensure homogenous data columns""" - - with h5py.File(filename,'r') as file: - - # Define group's attributes and datasets. This should hold - # for all groups. TODO: implement verification and noncompliance error if needed. - group_list = list(file.keys()) - group_attrs = list(file[group_list[0]].attrs.keys()) - # - column_attr_names = [item[item.find('_')+1::] for item in group_attrs] - column_attr_names_idx = [int(item[4:(item.find('_'))]) for item in group_attrs] - - group_datasets = list(file[group_list[0]].keys()) if not 'DS_EMPTY' in file[group_list[0]].keys() else [] - # - column_dataset_names = [file[group_list[0]][item].attrs['column_name'] for item in group_datasets] - column_dataset_names_idx = [int(item[2:]) for item in group_datasets] - - - # Define data_frame as group_attrs + group_datasets - #pd_series_index = group_attrs + group_datasets - pd_series_index = column_attr_names + column_dataset_names - - output_dataframe = pd.DataFrame(columns=pd_series_index,index=group_list) - - tmp_col = [] - - for meas_prop in group_attrs + group_datasets: - if meas_prop in group_attrs: - column_label = meas_prop[meas_prop.find('_')+1:] - # Create numerical or categorical column from group's attributes - tmp_col = [file[group_key].attrs[meas_prop][()][0] for group_key in group_list] - else: - # Create dataset column from group's datasets - column_label = file[group_list[0] + '/' + meas_prop].attrs['column_name'] - #tmp_col = [file[group_key + '/' + meas_prop][()][0] for group_key in group_list] - tmp_col = [file[group_key + '/' + meas_prop][()] for group_key in group_list] - - output_dataframe.loc[:,column_label] = tmp_col - - return output_dataframe - + def create_group_hierarchy(obj, df, columns): """