In [4]:
import os, sys
sys.path.append(os.path.abspath('src'))

import src.hdf5_lib as h5lib
import src.g5505_utils as utils


# define input file directory

input_file_path = './input_files\\BeamTimeMetaData.h5'
output_dir_path = './output_files'
if not os.path.exists(output_dir_path):
    os.makedirs(output_dir_path)

Read the above specified input_file_path as a dataframe. 

Since we know this file was created from a Thorsten Table's format, we can use h5lib.read_mtable_as_dataframe() to read it.

Then, we rename the 'name' column as 'filename', as this is the column's name use to idenfify files in subsequent functions.
Also, we augment the dataframe with a few categorical columns to be used as grouping variables when creating the hdf5 file's group hierarchy. 

In [5]:
# Read BeamTimeMetaData.h5, containing Thorsten's Matlab Table
input_data_df = h5lib.read_mtable_as_dataframe(input_file_path)

# Preprocess Thorsten's input_data dataframe so that i can be used to create a newer .h5 file
# under certain grouping specificiations.
input_data_df = input_data_df.rename(columns = {'name':'filename'})
input_data_df = utils.augment_with_filenumber(input_data_df)
input_data_df = utils.augment_with_filetype(input_data_df)
input_data_df = utils.split_sample_col_into_sample_and_data_quality_cols(input_data_df)
input_data_df['lastModifiedDatestr'] = input_data_df['lastModifiedDatestr'].astype('datetime64[s]')


We now create a hdf5 file with a 3-level group hierarchy based on the input_data and three grouping functions. Then
we visualize the group hierarchy of the created file as a treemap.

In [6]:
# Define grouping functions to be passed into create_hdf5_file function. These can also be set
# as strings refering to categorical columns in input_data_df.

test_grouping_funcs = True
if test_grouping_funcs:
    group_by_sample = lambda x : utils.group_by_df_column(x,'sample')
    group_by_type = lambda x : utils.group_by_df_column(x,'filetype')
    group_by_filenumber = lambda x : utils.group_by_df_column(x,'filenumber')
else:
    group_by_sample = 'sample'
    group_by_type = 'filetype'
    group_by_filenumber = 'filenumber'

output_filename = 'test.h5'

ofilepath = os.path.join(output_dir_path,output_filename)

h5lib.create_hdf5_file_from_dataframe(ofilepath,
                       input_data_df, 'top-down', 
                       group_by_funcs = [group_by_sample, group_by_type, group_by_filenumber]
                       )

annotation_dict = {'Campaign name': 'SLS-Campaign-2023',
                    'Producers':'Thorsten, Luca, Zoe',
                    'Startdate': str(input_data_df['lastModifiedDatestr'].min()),
                    'Enddate': str(input_data_df['lastModifiedDatestr'].max())
                    }
h5lib.annotate_root_dir(ofilepath,annotation_dict)

h5lib.display_group_hierarchy_on_a_treemap(ofilepath)

print(':)')


:)


:)
