{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "import os\n", "# Set up project root directory\n", "\n", "\n", "notebook_dir = os.getcwd() # Current working directory (assumes running from notebooks/)\n", "project_path = os.path.normpath(os.path.join(notebook_dir, \"..\")) # Move up to project root\n", "dima_path = os.path.normpath(os.path.join(project_path, \"dima\")) # Move up to project root\n", "\n", "for item in sys.path:\n", " print(item)\n", "\n", "\n", "if project_path not in sys.path: # Avoid duplicate entries\n", " sys.path.append(project_path)\n", " print(project_path)\n", "if dima_path not in sys.path:\n", " sys.path.insert(0,dima_path)\n", " print(dima_path)\n", " \n", "from pipelines.steps.utils import load_project_yaml_files\n", "campaign_descriptor = load_project_yaml_files(project_path, \"campaignDescriptor.yaml\")\n", "YEAR = campaign_descriptor['year']\n", "STATION_ABBR = campaign_descriptor['station_abbr']\n", "\n", "workflow_fname = f'workflow_acsm_data_{STATION_ABBR}_{YEAR}'\n", "\n", "print(workflow_fname)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Explore Datasets in HDF5 File\n", "\n", "* Use the HDF5 data manager object to load the metadata of the HDF5 file's datasets.\n", "* Display metadata and identify dataset of interest for next step.\n", "* Excecute the cell.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import dima.src.hdf5_ops as dataOps\n", "\n", "CAMPAIGN_DATA_FILE = \"../data/collection_PAY_2024_2025-06-05_2025-06-05.h5\"\n", "APPEND_DIR = os.path.splitext(CAMPAIGN_DATA_FILE)[0]\n", "\n", "path_to_data_file = CAMPAIGN_DATA_FILE\n", "dataManager = dataOps.HDF5DataOpsManager(path_to_data_file)\n", "\n", "dataManager.load_file_obj()\n", "dataManager.extract_and_load_dataset_metadata()\n", "dataset_metadata_df = dataManager.dataset_metadata_df\n", "\n", "print(dataset_metadata_df.head(n=15))\n", "dataManager.unload_file_obj()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Specify Dataset and Flags Dataset to Be Visualized Based on Dataset Index\n", "\n", "* Specify the dataset index based on previous step.\n", "* Excecute the cell." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "# Specify diagnostic variables and the associated flags \n", "dataset_idx = 0\n", "dataset_name = dataset_metadata_df['dataset_name'][dataset_idx]\n", "parent_instrument = dataset_metadata_df['parent_instrument'][dataset_idx]\n", "\n", "# Infer expected flags dataset name, which is associated with the above defined dataset\n", "\n", "flags_dataset_name = dataset_name.split(sep='/')\n", "flags_dataset_name[0] = f'{flags_dataset_name[0]}_flags'\n", "flags_dataset_name = '/'.join(flags_dataset_name)\n", "\n", "print(dataset_name)\n", "print(flags_dataset_name)\n", "\n", "# Visualize dataset column names and identify the time variable and y variable channels for next step\n", "try:\n", " dataManager.load_file_obj()\n", " dataset_df = dataManager.extract_dataset_as_dataframe(dataset_name)\n", " print(dataset_df.columns)\n", "except Exception as e:\n", " print(f\"Exception occurred while loading dataset: {e}\")\n", "finally:\n", " dataManager.unload_file_obj()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Visualize Diagnostic Variables Alongside the Associated Flags\n", "\n", "* Ensure that `dataset_name` and `flags_dataset_name` are properly defined in the previous step.\n", "* Build a list `diagnostic_variables` with the variable names you would like to visualize, based on the previously displayed options.\n", "* Define `time_var` with the time variable name from the previously displayed variable names.\n", "* Execute the cell.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import pipelines.steps.visualize_datatable_vars as vis\n", "\n", "\n", "variable_sets = {\n", " \"diagnostic\": {\n", " \"variables\": [\n", " 'VaporizerTemp_C', 'FlowRate_ccs', 'FilamentEmission_mA', 'ABsamp'\n", " ],\n", " \"time_var\": \"t_base\"\n", " },\n", " \"cpc\": {\n", " \"variables\": [\"conc\"],\n", " \"time_var\": \"end_time\"\n", " },\n", " \"species\": {\n", " \"variables\": ['Chl_11000', 'NH4_11000', 'SO4_11000', 'NO3_11000', 'Org_11000'],\n", " \"time_var\": \"t_start_Buf\"\n", " }\n", "}\n", "\n", "# Choose one: \"diagnostic\", \"cpc\", or \"species\"\n", "selected_set = \"diagnostic\"\n", "\n", "variables = variable_sets[selected_set][\"variables\"]\n", "time_var = variable_sets[selected_set][\"time_var\"]\n", "\n", "yaxis_range_dict = {'FlowRate_ccs' : [0,100],\n", " 'VaporizerTemp_C': [590,610]}\n", "vis.visualize_table_variables(path_to_data_file, \n", " dataset_name, \n", " flags_dataset_name,\n", " x_var = time_var,\n", " y_vars = variables,\n", " yaxis_range_dict = yaxis_range_dict,\n", " capture_renku_metadata=True,workflow_name=workflow_fname)\n", "\n", "\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.10" } }, "nbformat": 4, "nbformat_minor": 4 }