Synch with remote repo

2025-02-03 10:31:48 +01:00
parent a3ccff4079
commit 32bba4239a
102 changed files with 19584 additions and 19584 deletions
--- a/notebooks/demo_create_and_visualize_hdf5_file.ipynb
+++ b/notebooks/demo_create_and_visualize_hdf5_file.ipynb
@ -1,151 +1,151 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "from nbutils import add_project_path_to_sys_path\n",
-    "\n",
-    "\n",
-    "# Add project root to sys.path\n",
-    "add_project_path_to_sys_path()\n",
-    "\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "\n",
-    "try:\n",
-    "    import src.hdf5_writer as hdf5_writer\n",
-    "    import src.hdf5_ops as hdf5_ops\n",
-    "    import visualization.hdf5_vis as h5vis\n",
-    "    import visualization.napp_plotlib as napp\n",
-    "\n",
-    "    import utils.g5505_utils as utils\n",
-    "    #import pipelines.metadata_revision as metadata_revision\n",
-    "    print(\"Imports successful!\")\n",
-    "except ImportError as e:\n",
-    "    print(f\"Import error: {e}\")\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Read the above specified input_file_path as a dataframe. \n",
-    "\n",
-    "Since we know this file was created from a Thorsten Table's format, we can use h5lib.read_mtable_as_dataframe() to read it.\n",
-    "\n",
-    "Then, we rename the 'name' column as 'filename', as this is the column's name use to idenfify files in subsequent functions.\n",
-    "Also, we augment the dataframe with a few categorical columns to be used as grouping variables when creating the hdf5 file's group hierarchy. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Define input file directory\n",
-    "\n",
-    "input_file_path = '../input_files/BeamTimeMetaData.h5'\n",
-    "output_dir_path = '../output_files'\n",
-    "if not os.path.exists(output_dir_path):\n",
-    "    os.makedirs(output_dir_path)\n",
-    "\n",
-    "# Read BeamTimeMetaData.h5, containing Thorsten's Matlab Table\n",
-    "input_data_df = hdf5_ops.read_mtable_as_dataframe(input_file_path)\n",
-    "\n",
-    "# Preprocess Thorsten's input_data dataframe so that i can be used to create a newer .h5 file\n",
-    "# under certain grouping specificiations.\n",
-    "input_data_df = input_data_df.rename(columns = {'name':'filename'})\n",
-    "input_data_df = utils.augment_with_filenumber(input_data_df)\n",
-    "input_data_df = utils.augment_with_filetype(input_data_df)\n",
-    "input_data_df = utils.split_sample_col_into_sample_and_data_quality_cols(input_data_df)\n",
-    "input_data_df['lastModifiedDatestr'] = input_data_df['lastModifiedDatestr'].astype('datetime64[s]')\n",
-    "\n",
-    "input_data_df.columns\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We now create a hdf5 file with a 3-level group hierarchy based on the input_data and three grouping functions. Then\n",
-    "we visualize the group hierarchy of the created file as a treemap."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Define grouping functions to be passed into create_hdf5_file function. These can also be set\n",
-    "# as strings refering to categorical columns in input_data_df.\n",
-    "\n",
-    "test_grouping_funcs = True\n",
-    "if test_grouping_funcs:\n",
-    "    group_by_sample = lambda x : utils.group_by_df_column(x,'sample')\n",
-    "    group_by_type = lambda x : utils.group_by_df_column(x,'filetype')\n",
-    "    group_by_filenumber = lambda x : utils.group_by_df_column(x,'filenumber')\n",
-    "else:\n",
-    "    group_by_sample = 'sample'\n",
-    "    group_by_type = 'filetype'\n",
-    "    group_by_filenumber = 'filenumber'\n",
-    "\n",
-    "import pandas as pd\n",
-    "import h5py\n",
-    "\n",
-    "path_to_output_filename = os.path.normpath(os.path.join(output_dir_path, 'test.h5'))\n",
-    "\n",
-    "grouping_by_vars = ['sample', 'filenumber']\n",
-    "\n",
-    "path_to_output_filename = hdf5_writer.create_hdf5_file_from_dataframe(path_to_output_filename, \n",
-    "                                            input_data_df, \n",
-    "                                            grouping_by_vars\n",
-    "                                            )\n",
-    "\n",
-    "annotation_dict = {'Campaign name': 'SLS-Campaign-2023',\n",
-    "                    'Producers':'Thorsten, Luca, Zoe',\n",
-    "                    'Startdate': str(input_data_df['lastModifiedDatestr'].min()),\n",
-    "                    'Enddate': str(input_data_df['lastModifiedDatestr'].max())\n",
-    "                    }\n",
-    "\n",
-    "dataOpsObj = hdf5_ops.HDF5DataOpsManager(path_to_output_filename)\n",
-    "dataOpsObj.load_file_obj()\n",
-    "# Annotate root folder with annotation_dict\n",
-    "dataOpsObj.append_metadata('/',annotation_dict)\n",
-    "dataOpsObj.unload_file_obj()\n",
-    "\n",
-    "\n",
-    "\n",
-    "h5vis.display_group_hierarchy_on_a_treemap(path_to_output_filename)\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "multiphase_chemistry_env",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from nbutils import add_project_path_to_sys_path\n",
+    "\n",
+    "\n",
+    "# Add project root to sys.path\n",
+    "add_project_path_to_sys_path()\n",
+    "\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "try:\n",
+    "    import src.hdf5_writer as hdf5_writer\n",
+    "    import src.hdf5_ops as hdf5_ops\n",
+    "    import visualization.hdf5_vis as h5vis\n",
+    "    import visualization.napp_plotlib as napp\n",
+    "\n",
+    "    import utils.g5505_utils as utils\n",
+    "    #import pipelines.metadata_revision as metadata_revision\n",
+    "    print(\"Imports successful!\")\n",
+    "except ImportError as e:\n",
+    "    print(f\"Import error: {e}\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Read the above specified input_file_path as a dataframe. \n",
+    "\n",
+    "Since we know this file was created from a Thorsten Table's format, we can use h5lib.read_mtable_as_dataframe() to read it.\n",
+    "\n",
+    "Then, we rename the 'name' column as 'filename', as this is the column's name use to idenfify files in subsequent functions.\n",
+    "Also, we augment the dataframe with a few categorical columns to be used as grouping variables when creating the hdf5 file's group hierarchy. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define input file directory\n",
+    "\n",
+    "input_file_path = '../input_files/BeamTimeMetaData.h5'\n",
+    "output_dir_path = '../output_files'\n",
+    "if not os.path.exists(output_dir_path):\n",
+    "    os.makedirs(output_dir_path)\n",
+    "\n",
+    "# Read BeamTimeMetaData.h5, containing Thorsten's Matlab Table\n",
+    "input_data_df = hdf5_ops.read_mtable_as_dataframe(input_file_path)\n",
+    "\n",
+    "# Preprocess Thorsten's input_data dataframe so that i can be used to create a newer .h5 file\n",
+    "# under certain grouping specificiations.\n",
+    "input_data_df = input_data_df.rename(columns = {'name':'filename'})\n",
+    "input_data_df = utils.augment_with_filenumber(input_data_df)\n",
+    "input_data_df = utils.augment_with_filetype(input_data_df)\n",
+    "input_data_df = utils.split_sample_col_into_sample_and_data_quality_cols(input_data_df)\n",
+    "input_data_df['lastModifiedDatestr'] = input_data_df['lastModifiedDatestr'].astype('datetime64[s]')\n",
+    "\n",
+    "input_data_df.columns\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We now create a hdf5 file with a 3-level group hierarchy based on the input_data and three grouping functions. Then\n",
+    "we visualize the group hierarchy of the created file as a treemap."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define grouping functions to be passed into create_hdf5_file function. These can also be set\n",
+    "# as strings refering to categorical columns in input_data_df.\n",
+    "\n",
+    "test_grouping_funcs = True\n",
+    "if test_grouping_funcs:\n",
+    "    group_by_sample = lambda x : utils.group_by_df_column(x,'sample')\n",
+    "    group_by_type = lambda x : utils.group_by_df_column(x,'filetype')\n",
+    "    group_by_filenumber = lambda x : utils.group_by_df_column(x,'filenumber')\n",
+    "else:\n",
+    "    group_by_sample = 'sample'\n",
+    "    group_by_type = 'filetype'\n",
+    "    group_by_filenumber = 'filenumber'\n",
+    "\n",
+    "import pandas as pd\n",
+    "import h5py\n",
+    "\n",
+    "path_to_output_filename = os.path.normpath(os.path.join(output_dir_path, 'test.h5'))\n",
+    "\n",
+    "grouping_by_vars = ['sample', 'filenumber']\n",
+    "\n",
+    "path_to_output_filename = hdf5_writer.create_hdf5_file_from_dataframe(path_to_output_filename, \n",
+    "                                            input_data_df, \n",
+    "                                            grouping_by_vars\n",
+    "                                            )\n",
+    "\n",
+    "annotation_dict = {'Campaign name': 'SLS-Campaign-2023',\n",
+    "                    'Producers':'Thorsten, Luca, Zoe',\n",
+    "                    'Startdate': str(input_data_df['lastModifiedDatestr'].min()),\n",
+    "                    'Enddate': str(input_data_df['lastModifiedDatestr'].max())\n",
+    "                    }\n",
+    "\n",
+    "dataOpsObj = hdf5_ops.HDF5DataOpsManager(path_to_output_filename)\n",
+    "dataOpsObj.load_file_obj()\n",
+    "# Annotate root folder with annotation_dict\n",
+    "dataOpsObj.append_metadata('/',annotation_dict)\n",
+    "dataOpsObj.unload_file_obj()\n",
+    "\n",
+    "\n",
+    "\n",
+    "h5vis.display_group_hierarchy_on_a_treemap(path_to_output_filename)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "multiphase_chemistry_env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/demo_data_integration.ipynb
+++ b/notebooks/demo_data_integration.ipynb
@ -1,182 +1,182 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Data integration workflow of experimental campaign\n",
-    "\n",
-    "In this notebook, we will go through a our data integration workflow. This involves the following steps:\n",
-    "\n",
-    "1. Specify data integration file through YAML configuration file.\n",
-    "2. Create an integrated HDF5 file of experimental campaign from configuration file.\n",
-    "3. Display the created HDF5 file using a treemap\n",
-    "\n",
-    "## Import libraries and modules\n",
-    "\n",
-    "* Excecute (or Run) the Cell below"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from nbutils import add_project_path_to_sys_path\n",
-    "\n",
-    "# Add project root to sys.path\n",
-    "add_project_path_to_sys_path()\n",
-    "\n",
-    "try:\n",
-    "    import visualization.hdf5_vis as hdf5_vis\n",
-    "    import pipelines.data_integration as data_integration\n",
-    "    print(\"Imports successful!\")\n",
-    "except ImportError as e:\n",
-    "    print(f\"Import error: {e}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 1: Specify data integration task through YAML configuration file\n",
-    "\n",
-    "* Create your configuration file (i.e., *.yaml file) adhering to the example yaml file in the input folder.\n",
-    "* Set up input directory and output directory paths and Excecute Cell.\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#output_filename_path = 'output_files/unified_file_smog_chamber_2024-04-07_UTC-OFST_+0200_NG.h5'\n",
-    "yaml_config_file_path = '../input_files/data_integr_config_file_TBR.yaml'\n",
-    "\n",
-    "#path_to_input_directory = 'output_files/kinetic_flowtube_study_2022-01-31_LuciaI'\n",
-    "#path_to_hdf5_file = hdf5_lib.create_hdf5_file_from_filesystem_path(path_to_input_directory)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 2: Create an integrated HDF5 file of experimental campaign.\n",
-    "\n",
-    "* Excecute Cell. Here we run the function `integrate_data_sources` with input argument as the previously specified YAML config file."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "hdf5_file_path = data_integration.run_pipeline(yaml_config_file_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "hdf5_file_path "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Display integrated HDF5 file using a treemap\n",
-    "\n",
-    "* Excecute Cell. A visual representation in html format of the integrated file should be displayed and stored in the output directory folder"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "if isinstance(hdf5_file_path ,list):\n",
-    "    for path_item in hdf5_file_path :\n",
-    "        hdf5_vis.display_group_hierarchy_on_a_treemap(path_item)\n",
-    "else:\n",
-    "    hdf5_vis.display_group_hierarchy_on_a_treemap(hdf5_file_path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import src.hdf5_ops as h5de \n",
-    "h5de.serialize_metadata(hdf5_file_path[0],folder_depth=3,output_format='yaml')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import src.hdf5_ops as h5de \n",
-    "print(hdf5_file_path)\n",
-    "DataOpsAPI = h5de.HDF5DataOpsManager(hdf5_file_path[0])\n",
-    "\n",
-    "DataOpsAPI.load_file_obj()\n",
-    "\n",
-    "#DataOpsAPI.reformat_datetime_column('ICAD/HONO/2022_11_22_Channel1_Data.dat/data_table',\n",
-    "#                                    'Start Date/Time (UTC)',\n",
-    "#                                    '%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S')\n",
-    "DataOpsAPI.extract_and_load_dataset_metadata()\n",
-    "df = DataOpsAPI.dataset_metadata_df\n",
-    "print(df.head())\n",
-    "\n",
-    "DataOpsAPI.unload_file_obj()\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "DataOpsAPI.load_file_obj()\n",
-    "\n",
-    "DataOpsAPI.append_metadata('/',{'test_attr':'this is a test value'})\n",
-    "\n",
-    "DataOpsAPI.unload_file_obj()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "multiphase_chemistry_env",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Data integration workflow of experimental campaign\n",
+    "\n",
+    "In this notebook, we will go through a our data integration workflow. This involves the following steps:\n",
+    "\n",
+    "1. Specify data integration file through YAML configuration file.\n",
+    "2. Create an integrated HDF5 file of experimental campaign from configuration file.\n",
+    "3. Display the created HDF5 file using a treemap\n",
+    "\n",
+    "## Import libraries and modules\n",
+    "\n",
+    "* Excecute (or Run) the Cell below"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nbutils import add_project_path_to_sys_path\n",
+    "\n",
+    "# Add project root to sys.path\n",
+    "add_project_path_to_sys_path()\n",
+    "\n",
+    "try:\n",
+    "    import visualization.hdf5_vis as hdf5_vis\n",
+    "    import pipelines.data_integration as data_integration\n",
+    "    print(\"Imports successful!\")\n",
+    "except ImportError as e:\n",
+    "    print(f\"Import error: {e}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 1: Specify data integration task through YAML configuration file\n",
+    "\n",
+    "* Create your configuration file (i.e., *.yaml file) adhering to the example yaml file in the input folder.\n",
+    "* Set up input directory and output directory paths and Excecute Cell.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#output_filename_path = 'output_files/unified_file_smog_chamber_2024-04-07_UTC-OFST_+0200_NG.h5'\n",
+    "yaml_config_file_path = '../input_files/data_integr_config_file_TBR.yaml'\n",
+    "\n",
+    "#path_to_input_directory = 'output_files/kinetic_flowtube_study_2022-01-31_LuciaI'\n",
+    "#path_to_hdf5_file = hdf5_lib.create_hdf5_file_from_filesystem_path(path_to_input_directory)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 2: Create an integrated HDF5 file of experimental campaign.\n",
+    "\n",
+    "* Excecute Cell. Here we run the function `integrate_data_sources` with input argument as the previously specified YAML config file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "hdf5_file_path = data_integration.run_pipeline(yaml_config_file_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hdf5_file_path "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Display integrated HDF5 file using a treemap\n",
+    "\n",
+    "* Excecute Cell. A visual representation in html format of the integrated file should be displayed and stored in the output directory folder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "if isinstance(hdf5_file_path ,list):\n",
+    "    for path_item in hdf5_file_path :\n",
+    "        hdf5_vis.display_group_hierarchy_on_a_treemap(path_item)\n",
+    "else:\n",
+    "    hdf5_vis.display_group_hierarchy_on_a_treemap(hdf5_file_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import src.hdf5_ops as h5de \n",
+    "h5de.serialize_metadata(hdf5_file_path[0],folder_depth=3,output_format='yaml')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import src.hdf5_ops as h5de \n",
+    "print(hdf5_file_path)\n",
+    "DataOpsAPI = h5de.HDF5DataOpsManager(hdf5_file_path[0])\n",
+    "\n",
+    "DataOpsAPI.load_file_obj()\n",
+    "\n",
+    "#DataOpsAPI.reformat_datetime_column('ICAD/HONO/2022_11_22_Channel1_Data.dat/data_table',\n",
+    "#                                    'Start Date/Time (UTC)',\n",
+    "#                                    '%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S')\n",
+    "DataOpsAPI.extract_and_load_dataset_metadata()\n",
+    "df = DataOpsAPI.dataset_metadata_df\n",
+    "print(df.head())\n",
+    "\n",
+    "DataOpsAPI.unload_file_obj()\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DataOpsAPI.load_file_obj()\n",
+    "\n",
+    "DataOpsAPI.append_metadata('/',{'test_attr':'this is a test value'})\n",
+    "\n",
+    "DataOpsAPI.unload_file_obj()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "multiphase_chemistry_env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/notebooks/demo_h5_file_2_obis_props_mapping.py
+++ b/notebooks/demo_h5_file_2_obis_props_mapping.py
@ -1,79 +1,79 @@
-import os
-from nbutils import add_project_path_to_sys_path
-
-
-# Add project root to sys.path
-add_project_path_to_sys_path()
-
-import datetime
-import logging
-
-try:
-    import src.openbis_lib as openbis_lib
-    import src.hdf5_ops as hdf5_ops
-    #import pipelines.metadata_revision as metadata_revision
-    print("Imports successful!")
-except ImportError as e:
-    print(f"Import error: {e}")
-
-def main():
-
-    #df_h5 = hdf5_lib.read_hdf5_as_dataframe_v2('BeamTimeMetaData.h5')
-    #df_h5['lastModifiedDatestr'] = df_h5['lastModifiedDatestr'].astype('datetime64[ns]')
-    #df_h5 = df_h5.sort_values(by='lastModifiedDatestr')
-
-    
-    openbis_obj = openbis_lib.initialize_openbis_obj()
-    
-    # Create df with sample measurements of type 'ISS_MEASUREMENT'
-    samples = openbis_obj.get_samples(type='ISS_MEASUREMENT',props=['FILENUMBER'])
-    for sample in samples:
-        print(type(sample))
-        print(sample.identifier)
-    df_openbis = samples.df.copy(deep=True)
-    h5_file_path = os.path.join(os.path.curdir,'input_files\\BeamTimeMetaData.h5')
-
-    df_h5 = hdf5_ops.read_mtable_as_dataframe(h5_file_path)
-
-    # dataframe preprocessing steps
-    df_h5, df_openbis = openbis_lib.align_datetime_observation_windows(df_h5, df_openbis)    
-    df_openbis = openbis_lib.pair_openbis_and_h5_dataframes(df_openbis, df_h5, 'REFORMATED_FILENUMBER', 'name')
-
-    
-
-    current_date = datetime.date.today()
-    log_filename = 'logs\\computed_openbis_props_logs_' + current_date.strftime('%d-%m-%Y') + '.log'
-    logging_flag = True
-
-    #logger = logging.getLogger(__name__)
-    #logger.setLevel(logging.DEBUG)
-
-    log_file_path = os.path.join(os.path.curdir,log_filename)
-
-    logging.basicConfig(filename=log_file_path,
-                        level=logging.DEBUG,
-                        format="%(asctime)s %(levelname)s %(message)s",                        
-                        datefmt="%d-%m-%Y %H:%M:%S",
-                        )  
-
-    for sample_idx in df_openbis.index:
-
-        #    logging.basicConfig(log_filename)
-        #print(formatted_dict)
-        sample_props_dict = openbis_lib.compute_openbis_sample_props_from_h5(df_openbis, df_h5, sample_idx)
-
-        formatted_dict = [f"{key}:{value}" for key, value in sample_props_dict.items()]
-        formatted_dict = "\n".join(formatted_dict)
-
-        logging.debug('\n'+formatted_dict)
-
-        
-        #print(props_dict)
-    openbis_obj.logout()
-
-    # Choose samples and specifici properties to update: create a log 
-
-
-if __name__=="__main__":
-    main()
-
+import os
+from nbutils import add_project_path_to_sys_path
+
+
+# Add project root to sys.path
+add_project_path_to_sys_path()
+
+import datetime
+import logging
+
+try:
+    import src.openbis_lib as openbis_lib
+    import src.hdf5_ops as hdf5_ops
+    #import pipelines.metadata_revision as metadata_revision
+    print("Imports successful!")
+except ImportError as e:
+    print(f"Import error: {e}")
+
+def main():
+
+    #df_h5 = hdf5_lib.read_hdf5_as_dataframe_v2('BeamTimeMetaData.h5')
+    #df_h5['lastModifiedDatestr'] = df_h5['lastModifiedDatestr'].astype('datetime64[ns]')
+    #df_h5 = df_h5.sort_values(by='lastModifiedDatestr')
+
+    
+    openbis_obj = openbis_lib.initialize_openbis_obj()
+    
+    # Create df with sample measurements of type 'ISS_MEASUREMENT'
+    samples = openbis_obj.get_samples(type='ISS_MEASUREMENT',props=['FILENUMBER'])
+    for sample in samples:
+        print(type(sample))
+        print(sample.identifier)
+    df_openbis = samples.df.copy(deep=True)
+    h5_file_path = os.path.join(os.path.curdir,'input_files\\BeamTimeMetaData.h5')
+
+    df_h5 = hdf5_ops.read_mtable_as_dataframe(h5_file_path)
+
+    # dataframe preprocessing steps
+    df_h5, df_openbis = openbis_lib.align_datetime_observation_windows(df_h5, df_openbis)    
+    df_openbis = openbis_lib.pair_openbis_and_h5_dataframes(df_openbis, df_h5, 'REFORMATED_FILENUMBER', 'name')
+
+    
+
+    current_date = datetime.date.today()
+    log_filename = 'logs\\computed_openbis_props_logs_' + current_date.strftime('%d-%m-%Y') + '.log'
+    logging_flag = True
+
+    #logger = logging.getLogger(__name__)
+    #logger.setLevel(logging.DEBUG)
+
+    log_file_path = os.path.join(os.path.curdir,log_filename)
+
+    logging.basicConfig(filename=log_file_path,
+                        level=logging.DEBUG,
+                        format="%(asctime)s %(levelname)s %(message)s",                        
+                        datefmt="%d-%m-%Y %H:%M:%S",
+                        )  
+
+    for sample_idx in df_openbis.index:
+
+        #    logging.basicConfig(log_filename)
+        #print(formatted_dict)
+        sample_props_dict = openbis_lib.compute_openbis_sample_props_from_h5(df_openbis, df_h5, sample_idx)
+
+        formatted_dict = [f"{key}:{value}" for key, value in sample_props_dict.items()]
+        formatted_dict = "\n".join(formatted_dict)
+
+        logging.debug('\n'+formatted_dict)
+
+        
+        #print(props_dict)
+    openbis_obj.logout()
+
+    # Choose samples and specifici properties to update: create a log 
+
+
+if __name__=="__main__":
+    main()
+
--- a/notebooks/demo_hdf5_data_sharing_and_plotting.ipynb
+++ b/notebooks/demo_hdf5_data_sharing_and_plotting.ipynb
@ -1,96 +1,96 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "from nbutils import add_project_path_to_sys_path\n",
-    "\n",
-    "\n",
-    "# Add project root to sys.path\n",
-    "add_project_path_to_sys_path()\n",
-    "\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "\n",
-    "try:\n",
-    "    import src.hdf5_ops as hdf5_ops\n",
-    "    import visualization.napp_plotlib as napp\n",
-    "    #import pipelines.metadata_revision as metadata_revision\n",
-    "    print(\"Imports successful!\")\n",
-    "except ImportError as e:\n",
-    "    print(f\"Import error: {e}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Define h5 file name and make sure file is located at the current working dir\n",
-    "filename = '../input_files/FileList_v2.h5'\n",
-    "\n",
-    "# Read h5 file into dataframe\n",
-    "dataframe = hdf5_ops.read_mtable_as_dataframe(filename)\n",
-    "\n",
-    "\n",
-    "dataframe['lastModifiedDatestr']\n",
-    "print(dataframe.columns)\n",
-    "\n",
-    "dataframe.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dataframe['image'][0].shape\n",
-    "\n",
-    "name_filter = (dataframe['name'] == '0116116_Cl2p_750eV.ibw').to_numpy()\n",
-    "date_filter = np.array(['Jun-2023' in date for date in dataframe['lastModifiedDatestr']])\n",
-    "\n",
-    "filter = np.logical_and(name_filter.flatten(),date_filter.flatten()) \n",
-    "\n",
-    "napp.plot_image(dataframe,filter)\n",
-    "napp.plot_spectra(dataframe,filter)\n",
-    "\n",
-    "name_filter = np.array(['merge' in name for name in dataframe['name'] ])\n",
-    "date_filter = np.array(['Jun-2023' in date for date in dataframe['lastModifiedDatestr']])\n",
-    "filter = np.logical_and(name_filter.flatten(),date_filter.flatten()) \n",
-    "\n",
-    "\n",
-    "napp.plot_spectra(dataframe,filter)\n",
-    "\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "multiphase_chemistry_env",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from nbutils import add_project_path_to_sys_path\n",
+    "\n",
+    "\n",
+    "# Add project root to sys.path\n",
+    "add_project_path_to_sys_path()\n",
+    "\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "try:\n",
+    "    import src.hdf5_ops as hdf5_ops\n",
+    "    import visualization.napp_plotlib as napp\n",
+    "    #import pipelines.metadata_revision as metadata_revision\n",
+    "    print(\"Imports successful!\")\n",
+    "except ImportError as e:\n",
+    "    print(f\"Import error: {e}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define h5 file name and make sure file is located at the current working dir\n",
+    "filename = '../input_files/FileList_v2.h5'\n",
+    "\n",
+    "# Read h5 file into dataframe\n",
+    "dataframe = hdf5_ops.read_mtable_as_dataframe(filename)\n",
+    "\n",
+    "\n",
+    "dataframe['lastModifiedDatestr']\n",
+    "print(dataframe.columns)\n",
+    "\n",
+    "dataframe.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataframe['image'][0].shape\n",
+    "\n",
+    "name_filter = (dataframe['name'] == '0116116_Cl2p_750eV.ibw').to_numpy()\n",
+    "date_filter = np.array(['Jun-2023' in date for date in dataframe['lastModifiedDatestr']])\n",
+    "\n",
+    "filter = np.logical_and(name_filter.flatten(),date_filter.flatten()) \n",
+    "\n",
+    "napp.plot_image(dataframe,filter)\n",
+    "napp.plot_spectra(dataframe,filter)\n",
+    "\n",
+    "name_filter = np.array(['merge' in name for name in dataframe['name'] ])\n",
+    "date_filter = np.array(['Jun-2023' in date for date in dataframe['lastModifiedDatestr']])\n",
+    "filter = np.logical_and(name_filter.flatten(),date_filter.flatten()) \n",
+    "\n",
+    "\n",
+    "napp.plot_spectra(dataframe,filter)\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "multiphase_chemistry_env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/demo_single_sample_update_to_openbis.py
+++ b/notebooks/demo_single_sample_update_to_openbis.py
@ -1,98 +1,98 @@
-import os
-from nbutils import add_project_path_to_sys_path
-
-
-# Add project root to sys.path
-add_project_path_to_sys_path()
-
-import datetime
-import logging
-
-try:
-    import src.openbis_lib as openbis_lib
-    import src.hdf5_ops as hdf5_ops
-    #import pipelines.metadata_revision as metadata_revision
-    print("Imports successful!")
-except ImportError as e:
-    print(f"Import error: {e}")
-
-
-def main():
-
-    #df_h5 = hdf5_lib.read_hdf5_as_dataframe_v2('BeamTimeMetaData.h5')
-    #df_h5['lastModifiedDatestr'] = df_h5['lastModifiedDatestr'].astype('datetime64[ns]')
-    #df_h5 = df_h5.sort_values(by='lastModifiedDatestr')
-
-    
-    openbis_obj = openbis_lib.initialize_openbis_obj()
-    
-    # Create df with sample measurements of type 'ISS_MEASUREMENT'
-    samples = openbis_obj.get_samples(type='ISS_MEASUREMENT',props=['FILENUMBER'])
-    for sample in samples:
-        print(type(sample))
-        print(sample.identifier)
-    df_openbis = samples.df.copy(deep=True)
-    h5_file_path = os.path.join(os.path.curdir,'input_files\\BeamTimeMetaData.h5')
-    df_h5 = hdf5_ops.read_mtable_as_dataframe(h5_file_path)
-
-    # dataframe preprocessing steps
-    df_h5, df_openbis = openbis_lib.align_datetime_observation_windows(df_h5, df_openbis)    
-    df_openbis = openbis_lib.pair_openbis_and_h5_dataframes(df_openbis, df_h5, 'REFORMATED_FILENUMBER', 'name')
-
-    
-
-    current_date = datetime.date.today()
-    log_filename = 'logs\\computed_openbis_props_logs_' + current_date.strftime('%d-%m-%Y') + '.log'
-    logging_flag = True
-
-    #logger = logging.getLogger(__name__)
-    #logger.setLevel(logging.DEBUG)
-
-    log_file_path = os.path.join(os.path.curdir,log_filename)
-
-    logging.basicConfig(filename=log_file_path,
-                        level=logging.DEBUG,
-                        format="%(asctime)s %(levelname)s %(message)s",                        
-                        datefmt="%d-%m-%Y %H:%M:%S",
-                        )  
-
-    # update sample properties in openbis database only if they are labeled as bad
-
-    props_include_list = ['sample_name', 'temp', 'cell_pressure','method_name', 'region', 'lens_mode', 'acq_mode', 'dwell_time']
-    props_include_list = ['ke_range_center','ke_range_step']
-    props_include_list = [ 'temp', 'cell_pressure','photon_energy','dwell_time','passenergy','ke_range_center','ke_step_center','position_x','position_y','position_z']
-    
-    props_include_list = ['position_x','position_y','position_z']
-    props_include_list = [ 'temp', 'cell_pressure','photon_energy','dwell_time','passenergy','ke_range_center','ke_step_center']
-
-
-    for sample_idx in df_openbis.index:
-
-        #    logging.basicConfig(log_filename)
-        #print(formatted_dict)
-        sample_props_dict = openbis_lib.compute_openbis_sample_props_from_h5(df_openbis, df_h5, sample_idx)
-
-        #sample_props_dict[ke_range_center]
-
-        formatted_dict = [f"{key}:{value}" for key, value in sample_props_dict.items()]
-        formatted_dict = "\n".join(formatted_dict)
-        logging.debug('\n'+formatted_dict)
-        try:
-            filenumber = -1 if sample_props_dict['FILENUMBER'] == '' else int(sample_props_dict['FILENUMBER'])
-            
-            if filenumber >= 85 :
-                print(filenumber)
-            #if 'bad' in sample_props_dict['sample_name']:
-                logging.info('The above sample is to be updated in openbis:')                
-                openbis_lib.single_sample_update(sample_props_dict,samples,props_include_list)
-        except KeyError:
-            logging.error(KeyError)
-        #print(props_dict)
-    openbis_obj.logout()
-
-    # Choose samples and specifici properties to update: create a log 
-
-
-if __name__=="__main__":
-    main()
-
+import os
+from nbutils import add_project_path_to_sys_path
+
+
+# Add project root to sys.path
+add_project_path_to_sys_path()
+
+import datetime
+import logging
+
+try:
+    import src.openbis_lib as openbis_lib
+    import src.hdf5_ops as hdf5_ops
+    #import pipelines.metadata_revision as metadata_revision
+    print("Imports successful!")
+except ImportError as e:
+    print(f"Import error: {e}")
+
+
+def main():
+
+    #df_h5 = hdf5_lib.read_hdf5_as_dataframe_v2('BeamTimeMetaData.h5')
+    #df_h5['lastModifiedDatestr'] = df_h5['lastModifiedDatestr'].astype('datetime64[ns]')
+    #df_h5 = df_h5.sort_values(by='lastModifiedDatestr')
+
+    
+    openbis_obj = openbis_lib.initialize_openbis_obj()
+    
+    # Create df with sample measurements of type 'ISS_MEASUREMENT'
+    samples = openbis_obj.get_samples(type='ISS_MEASUREMENT',props=['FILENUMBER'])
+    for sample in samples:
+        print(type(sample))
+        print(sample.identifier)
+    df_openbis = samples.df.copy(deep=True)
+    h5_file_path = os.path.join(os.path.curdir,'input_files\\BeamTimeMetaData.h5')
+    df_h5 = hdf5_ops.read_mtable_as_dataframe(h5_file_path)
+
+    # dataframe preprocessing steps
+    df_h5, df_openbis = openbis_lib.align_datetime_observation_windows(df_h5, df_openbis)    
+    df_openbis = openbis_lib.pair_openbis_and_h5_dataframes(df_openbis, df_h5, 'REFORMATED_FILENUMBER', 'name')
+
+    
+
+    current_date = datetime.date.today()
+    log_filename = 'logs\\computed_openbis_props_logs_' + current_date.strftime('%d-%m-%Y') + '.log'
+    logging_flag = True
+
+    #logger = logging.getLogger(__name__)
+    #logger.setLevel(logging.DEBUG)
+
+    log_file_path = os.path.join(os.path.curdir,log_filename)
+
+    logging.basicConfig(filename=log_file_path,
+                        level=logging.DEBUG,
+                        format="%(asctime)s %(levelname)s %(message)s",                        
+                        datefmt="%d-%m-%Y %H:%M:%S",
+                        )  
+
+    # update sample properties in openbis database only if they are labeled as bad
+
+    props_include_list = ['sample_name', 'temp', 'cell_pressure','method_name', 'region', 'lens_mode', 'acq_mode', 'dwell_time']
+    props_include_list = ['ke_range_center','ke_range_step']
+    props_include_list = [ 'temp', 'cell_pressure','photon_energy','dwell_time','passenergy','ke_range_center','ke_step_center','position_x','position_y','position_z']
+    
+    props_include_list = ['position_x','position_y','position_z']
+    props_include_list = [ 'temp', 'cell_pressure','photon_energy','dwell_time','passenergy','ke_range_center','ke_step_center']
+
+
+    for sample_idx in df_openbis.index:
+
+        #    logging.basicConfig(log_filename)
+        #print(formatted_dict)
+        sample_props_dict = openbis_lib.compute_openbis_sample_props_from_h5(df_openbis, df_h5, sample_idx)
+
+        #sample_props_dict[ke_range_center]
+
+        formatted_dict = [f"{key}:{value}" for key, value in sample_props_dict.items()]
+        formatted_dict = "\n".join(formatted_dict)
+        logging.debug('\n'+formatted_dict)
+        try:
+            filenumber = -1 if sample_props_dict['FILENUMBER'] == '' else int(sample_props_dict['FILENUMBER'])
+            
+            if filenumber >= 85 :
+                print(filenumber)
+            #if 'bad' in sample_props_dict['sample_name']:
+                logging.info('The above sample is to be updated in openbis:')                
+                openbis_lib.single_sample_update(sample_props_dict,samples,props_include_list)
+        except KeyError:
+            logging.error(KeyError)
+        #print(props_dict)
+    openbis_obj.logout()
+
+    # Choose samples and specifici properties to update: create a log 
+
+
+if __name__=="__main__":
+    main()
+
--- a/notebooks/example_workflow_metadata_annotation.ipynb
+++ b/notebooks/example_workflow_metadata_annotation.ipynb
@ -1,172 +1,172 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Metadata Annotation Process\n",
-    "\n",
-    "In this notebook, we will go through a simple metadata annotation process. This involves the following steps:\n",
-    "\n",
-    "1. Define an HDF5 file.\n",
-    "2. Create a YAML representation of the HDF5 file.\n",
-    "3. Edit and augment the YAML with metadata.\n",
-    "4. Update the original file based on the edited YAML.\n",
-    "\n",
-    "\n",
-    "## Import libraries and modules\n",
-    "\n",
-    "* Excecute (or Run) the Cell below"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Imports successful!\n"
-     ]
-    }
-   ],
-   "source": [
-    "import os\n",
-    "from nbutils import add_project_path_to_sys_path\n",
-    "\n",
-    "\n",
-    "# Add project root to sys.path\n",
-    "add_project_path_to_sys_path()\n",
-    "\n",
-    "try:\n",
-    "    import src.hdf5_ops as hdf5_ops\n",
-    "    import pipelines.metadata_revision as metadata_revision\n",
-    "    print(\"Imports successful!\")\n",
-    "except ImportError as e:\n",
-    "    print(f\"Import error: {e}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 1: Define an HDF5 file\n",
-    "\n",
-    "* Set up the string variable `hdf5_file_path` with the path to the HDF5 file of interest.\n",
-    "* Excecute Cell."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "hdf5_file_path = \"../output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.h5\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 2: Create a YAML Representation of the File\n",
-    "\n",
-    "We now convert HDF5 file structure and existing metadata into a YAML format. This will be used to add and edit metadata attributes.\n",
-    "\n",
-    "* Excecute Cell."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The YAML file representation output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.json of the HDF5 file output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.h5 was created successfully.\n"
-     ]
-    }
-   ],
-   "source": [
-    "yaml_file_path = hdf5_ops.serialize_metadata(hdf5_file_path,output_format='json')\n",
-    "\n",
-    "if os.path.exists(yaml_file_path):\n",
-    "    print(f'The YAML file representation {yaml_file_path} of the HDF5 file {hdf5_file_path} was created successfully.')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 3: Edit and Augment YAML with Metadata\n",
-    "\n",
-    "We can now manually edit the YAML file to add metadata.\n",
-    "* (Optional) automate your metadata annotation process by creating a program that takes the YAMl file and returns the modified version of it.\n",
-    "* Excecute Cell."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def metadata_annotation_process(yaml_file_path):\n",
-    "\n",
-    "    # Include metadata annotation logic, e.g., load yaml file and modify its content accordingly\n",
-    "\n",
-    "    print(f'Ensure your edits to {yaml_file_path} have been properly incorporated and saved.')\n",
-    "\n",
-    "    return yaml_file_path\n",
-    "\n",
-    "yaml_file_path = metadata_annotation_process(yaml_file_path)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Step 4: Update the Original File Based on the Edited YAML\n",
-    "\n",
-    "Lastly, we will update the original file with the metadata from the YAML file.\n",
-    "\n",
-    "* Excecute Cell."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "metadata_revision.update_hdf5_file_with_review(hdf5_file_path,yaml_file_path)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "multiphase_chemistry_env",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Metadata Annotation Process\n",
+    "\n",
+    "In this notebook, we will go through a simple metadata annotation process. This involves the following steps:\n",
+    "\n",
+    "1. Define an HDF5 file.\n",
+    "2. Create a YAML representation of the HDF5 file.\n",
+    "3. Edit and augment the YAML with metadata.\n",
+    "4. Update the original file based on the edited YAML.\n",
+    "\n",
+    "\n",
+    "## Import libraries and modules\n",
+    "\n",
+    "* Excecute (or Run) the Cell below"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Imports successful!\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from nbutils import add_project_path_to_sys_path\n",
+    "\n",
+    "\n",
+    "# Add project root to sys.path\n",
+    "add_project_path_to_sys_path()\n",
+    "\n",
+    "try:\n",
+    "    import src.hdf5_ops as hdf5_ops\n",
+    "    import pipelines.metadata_revision as metadata_revision\n",
+    "    print(\"Imports successful!\")\n",
+    "except ImportError as e:\n",
+    "    print(f\"Import error: {e}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 1: Define an HDF5 file\n",
+    "\n",
+    "* Set up the string variable `hdf5_file_path` with the path to the HDF5 file of interest.\n",
+    "* Excecute Cell."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hdf5_file_path = \"../output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.h5\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 2: Create a YAML Representation of the File\n",
+    "\n",
+    "We now convert HDF5 file structure and existing metadata into a YAML format. This will be used to add and edit metadata attributes.\n",
+    "\n",
+    "* Excecute Cell."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The YAML file representation output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.json of the HDF5 file output_files/collection_kinetic_flowtube_study_LuciaI_2022-01-31_2023-06-29/kinetic_flowtube_study_LuciaI_2023-06-29.h5 was created successfully.\n"
+     ]
+    }
+   ],
+   "source": [
+    "yaml_file_path = hdf5_ops.serialize_metadata(hdf5_file_path,output_format='json')\n",
+    "\n",
+    "if os.path.exists(yaml_file_path):\n",
+    "    print(f'The YAML file representation {yaml_file_path} of the HDF5 file {hdf5_file_path} was created successfully.')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 3: Edit and Augment YAML with Metadata\n",
+    "\n",
+    "We can now manually edit the YAML file to add metadata.\n",
+    "* (Optional) automate your metadata annotation process by creating a program that takes the YAMl file and returns the modified version of it.\n",
+    "* Excecute Cell."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def metadata_annotation_process(yaml_file_path):\n",
+    "\n",
+    "    # Include metadata annotation logic, e.g., load yaml file and modify its content accordingly\n",
+    "\n",
+    "    print(f'Ensure your edits to {yaml_file_path} have been properly incorporated and saved.')\n",
+    "\n",
+    "    return yaml_file_path\n",
+    "\n",
+    "yaml_file_path = metadata_annotation_process(yaml_file_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Step 4: Update the Original File Based on the Edited YAML\n",
+    "\n",
+    "Lastly, we will update the original file with the metadata from the YAML file.\n",
+    "\n",
+    "* Excecute Cell."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "metadata_revision.update_hdf5_file_with_review(hdf5_file_path,yaml_file_path)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "multiphase_chemistry_env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/nbutils.py
+++ b/notebooks/nbutils.py
@ -1,16 +1,16 @@
-import sys
-import os
-
-def add_project_path_to_sys_path():
-    """
-    Adds the project path (root directory containing the package) to sys.path.
-    """
-    # Determine the root directory (project_root, which contains 'dima')
-    notebook_dir = os.getcwd()  # Current working directory (assumes running from notebooks/)
-    project_path = os.path.normpath(os.path.join(notebook_dir, ".."))  # Move up to project root
-
-    if project_path not in sys.path:  # Avoid duplicate entries
-        sys.path.append(project_path)
-
-if __name__ == "__main__":
+import sys
+import os
+
+def add_project_path_to_sys_path():
+    """
+    Adds the project path (root directory containing the package) to sys.path.
+    """
+    # Determine the root directory (project_root, which contains 'dima')
+    notebook_dir = os.getcwd()  # Current working directory (assumes running from notebooks/)
+    project_path = os.path.normpath(os.path.join(notebook_dir, ".."))  # Move up to project root
+
+    if project_path not in sys.path:  # Avoid duplicate entries
+        sys.path.append(project_path)
+
+if __name__ == "__main__":
    add_project_path_to_sys_path()