From bf4d03f3693aa90c99e9048cde7a417bd056fb77 Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Thu, 2 Nov 2023 15:49:35 +0100 Subject: [PATCH] Added output_files' folder, environment.yml file, and jupyter notebook demo on how to create and visualize hdf5 files. --- demo_create_and_visualize_hdf5_file.ipynb | 113 ++++++++++++++++++++++ environment.yml | 13 +++ 2 files changed, 126 insertions(+) create mode 100644 demo_create_and_visualize_hdf5_file.ipynb create mode 100644 environment.yml diff --git a/demo_create_and_visualize_hdf5_file.ipynb b/demo_create_and_visualize_hdf5_file.ipynb new file mode 100644 index 0000000..0ce8a0f --- /dev/null +++ b/demo_create_and_visualize_hdf5_file.ipynb @@ -0,0 +1,113 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import hdf5_lib as h5lib\n", + "import os\n", + "\n", + "# define input file directory\n", + "\n", + "input_file_path = './input_files\\\\BeamTimeMetaData.h5'\n", + "output_dir_path = './output_files'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read the above specified input_file_path as a dataframe. \n", + "\n", + "Since we know this file was created from a Thorsten Table's format, we can use h5lib.read_mtable_as_dataframe() to read it.\n", + "\n", + "Then, we rename the name column as filename, as this is the column's name use to idenfify files in subsequent functions.\n", + "Also, we add to the dataframe a few categorical columns to be used as grouping variables when creating the hdf5 file's group hierarchy. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Read BeamTimeMetaData.h5, containing Thorsten's Matlab Table\n", + "input_data_df = h5lib.read_mtable_as_dataframe(input_file_path)\n", + "\n", + "# Preprocess Thorsten's input_data dataframe so that i can be used to create a newer .h5 file\n", + "# under certain grouping specificiations.\n", + "input_data_df = input_data_df.rename(columns = {'name':'filename'})\n", + "input_data_df = h5lib.augment_with_filenumber(input_data_df)\n", + "input_data_df = h5lib.augment_with_filetype(input_data_df)\n", + "input_data_df = h5lib.split_sample_col_into_sample_and_data_quality_cols(input_data_df)\n", + "input_data_df['lastModifiedDatestr'] = input_data_df['lastModifiedDatestr'].astype('datetime64[s]')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define grouping functions to be passed into create_hdf5_file function. These can also be set\n", + "# as strings refering to categorical columns in input_data_df.\n", + "\n", + "test_grouping_funcs = True\n", + "if test_grouping_funcs:\n", + " group_by_sample = lambda x : h5lib.group_by_df_column(x,'sample')\n", + " group_by_type = lambda x : h5lib.group_by_df_column(x,'filetype')\n", + " group_by_filenumber = lambda x : h5lib.group_by_df_column(x,'filenumber')\n", + "else:\n", + " group_by_sample = 'sample'\n", + " group_by_type = 'filetype'\n", + " group_by_filenumber = 'filenumber'\n", + "\n", + "output_filename = 'test.h5'\n", + "\n", + "h5lib.create_hdf5_file(os.path.join(output_dir_path,output_filename),\n", + " input_data_df, 'top-down', \n", + " group_by_funcs = [group_by_sample, group_by_type, group_by_filenumber]\n", + " )\n", + "\n", + "annotation_dict = {'Campaign name': 'SLS-Campaign-2023',\n", + " 'Users':'Thorsten, Luca, Zoe',\n", + " 'Startdate': str(input_data_df['lastModifiedDatestr'].min()),\n", + " 'Enddate': str(input_data_df['lastModifiedDatestr'].max())\n", + " }\n", + "h5lib.annotate_root_dir('test.h5',annotation_dict)\n", + "\n", + "h5lib.display_group_hierarchy_on_a_treemap('test.h5')\n", + "\n", + "print(':)')\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "test_atmos_chem_env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..c9009cc --- /dev/null +++ b/environment.yml @@ -0,0 +1,13 @@ +name: test_atmos_chem_env +channels: + - conda-forge + - defaults +dependencies: + - python=3.11 + - jupyter + - numpy + - pandas + - matplotlib + - plotly=5.18 + - h5py=3.10 + - pybis=1.35