import pandas as pd
import h5py
#import os
import sys

filename = 'FileList.h5'

def read_hdf5_as_dataframe(filename):

    with h5py.File(filename,'r') as file:
     
        # Define group's attributes and datasets. This should hold
        # for all groups. TODO: implement verification and noncompliance error if needed.
        group_list = list(file.keys())
        group_attrs = list(file[group_list[0]].attrs.keys())
        # 
        column_attr_names = [item[item.find('_')+1::] for item in group_attrs]
        column_attr_names_idx = [int(item[4:(item.find('_'))]) for item in group_attrs]
    
        group_datasets = list(file[group_list[0]].keys())
        #
        column_dataset_names = [file[group_list[0]][item].attrs['column_name'] for item in group_datasets]
        column_dataset_names_idx = [int(item[2:]) for item in group_datasets]


        # Define data_frame as group_attrs + group_datasets
        #pd_series_index = group_attrs + group_datasets
        pd_series_index = column_attr_names + column_dataset_names

        output_dataframe = pd.DataFrame(columns=pd_series_index,index=group_list)

        for group_key in group_list:        
            # Print group_name
            #print(group_key)
            tmp_row = []
            for attr_key in group_attrs:
                #print(type(file[group_key].attrs[attr_key]))
                tmp_row.append(file[group_key].attrs[attr_key])
            for ds_key in group_datasets:
                # Check dataset's type by uncommenting the line below
                # print(type(file[group_key][ds_key][()]))

                # Append to list the value of the file at dataset /group/ds
                tmp_row.append(file[group_key][ds_key][()])

            # Create pandas Series/measurement
            row = pd.Series(data=tmp_row,index=pd_series_index, name = group_key)
            output_dataframe.loc[group_key,:] = row

    return output_dataframe