acsm-fairifier/notebooks/demo_visualize_diagnostic_flags_from_hdf5_file.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "# Set up project root directory\n",
    "\n",
    "\n",
    "notebook_dir = os.getcwd()  # Current working directory (assumes running from notebooks/)\n",
    "project_path = os.path.normpath(os.path.join(notebook_dir, \"..\"))  # Move up to project root\n",
    "dima_path = os.path.normpath(os.path.join(project_path, \"dima\"))  # Move up to project root\n",
    "\n",
    "for item in sys.path:\n",
    "    print(item)\n",
    "\n",
    "\n",
    "if project_path not in sys.path:  # Avoid duplicate entries\n",
    "    sys.path.append(project_path)\n",
    "    print(project_path)\n",
    "if dima_path not in sys.path:\n",
    "    sys.path.insert(0,dima_path)\n",
    "    print(dima_path)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Explore Datasets in HDF5 File\n",
    "\n",
    "* Use the HDF5 data manager object to load the metadata of the HDF5 file's datasets.\n",
    "* Display metadata and identify dataset of interest for next step.\n",
    "* Excecute the cell.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import dima.src.hdf5_ops as dataOps\n",
    "\n",
    "CAMPAIGN_DATA_FILE = \"../data/collection_JFJ_2024_2025-04-11_2025-04-11.h5\"\n",
    "APPEND_DIR = os.path.splitext(CAMPAIGN_DATA_FILE)[0]\n",
    "\n",
    "path_to_data_file = CAMPAIGN_DATA_FILE\n",
    "dataManager = dataOps.HDF5DataOpsManager(path_to_data_file)\n",
    "\n",
    "dataManager.load_file_obj()\n",
    "dataManager.extract_and_load_dataset_metadata()\n",
    "dataset_metadata_df = dataManager.dataset_metadata_df\n",
    "\n",
    "print(dataset_metadata_df.head(n=15))\n",
    "dataManager.unload_file_obj()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Specify Dataset and Flags Dataset to Be Visualized Based on Dataset Index\n",
    "\n",
    "* Specify the dataset index based on previous step.\n",
    "* Excecute the cell."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "# Specify diagnostic variables and the associated flags \n",
    "dataset_idx = 0\n",
    "dataset_name = dataset_metadata_df['dataset_name'][dataset_idx]\n",
    "parent_instrument = dataset_metadata_df['parent_instrument'][dataset_idx]\n",
    "\n",
    "# Infer expected flags dataset name, which is associated with the above defined dataset\n",
    "\n",
    "flags_dataset_name = dataset_name.split(sep='/')\n",
    "flags_dataset_name[0] = f'{flags_dataset_name[0]}_flags'\n",
    "flags_dataset_name = '/'.join(flags_dataset_name)\n",
    "\n",
    "print(dataset_name)\n",
    "print(flags_dataset_name)\n",
    "\n",
    "# Visualize dataset column names and identify the time variable and y variable channels for next step\n",
    "try:\n",
    "    dataManager.load_file_obj()\n",
    "    dataset_df = dataManager.extract_dataset_as_dataframe(dataset_name)\n",
    "    print(dataset_df.columns)\n",
    "except Exception as e:\n",
    "    print(f\"Exception occurred while loading dataset: {e}\")\n",
    "finally:\n",
    "    dataManager.unload_file_obj()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Visualize Diagnostic Variables Alongside the Associated Flags\n",
    "\n",
    "* Ensure that `dataset_name` and `flags_dataset_name` are properly defined in the previous step.\n",
    "* Build a list `diagnostic_variables` with the variable names you would like to visualize, based on the previously displayed options.\n",
    "* Define `time_var` with the time variable name from the previously displayed variable names.\n",
    "* Execute the cell.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import pipelines.steps.visualize_datatable_vars as vis\n",
    "\n",
    "\n",
    "\n",
    "diagnostic_variables = ['VaporizerTemp_C', 'HeaterBias_V', 'FlowRefWave', 'FlowRate_mb', 'FlowRate_ccs', 'FilamentEmission_mA', 'Detector_V',\n",
    "                        'AnalogInput06_V', 'ABRefWave', 'ABsamp', 'ABCorrFact']\n",
    "\n",
    "\n",
    "time_var = 't_base'\n",
    "figs = vis.visualize_table_variables(path_to_data_file, \n",
    "                                        dataset_name, \n",
    "                                        flags_dataset_name,\n",
    "                                        x_var  = time_var,\n",
    "                                        y_vars = diagnostic_variables)\n",
    "\n",
    "\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "dash_multi_chem_env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}