mirror of
https://gitea.psi.ch/APOG/acsmnode.git
synced 2025-06-26 01:01:09 +02:00
205 lines
6.5 KiB
Plaintext
205 lines
6.5 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import sys\n",
|
|
"import os\n",
|
|
"# Set up project root directory\n",
|
|
"\n",
|
|
"\n",
|
|
"notebook_dir = os.getcwd() # Current working directory (assumes running from notebooks/)\n",
|
|
"project_path = os.path.normpath(os.path.join(notebook_dir, \"..\")) # Move up to project root\n",
|
|
"dima_path = os.path.normpath(os.path.join(project_path, \"dima\")) # Move up to project root\n",
|
|
"\n",
|
|
"for item in sys.path:\n",
|
|
" print(item)\n",
|
|
"\n",
|
|
"\n",
|
|
"if project_path not in sys.path: # Avoid duplicate entries\n",
|
|
" sys.path.append(project_path)\n",
|
|
" print(project_path)\n",
|
|
"if dima_path not in sys.path:\n",
|
|
" sys.path.insert(0,dima_path)\n",
|
|
" print(dima_path)\n",
|
|
" \n",
|
|
"from pipelines.steps.utils import load_project_yaml_files\n",
|
|
"campaign_descriptor = load_project_yaml_files(project_path, \"campaignDescriptor.yaml\")\n",
|
|
"YEAR = campaign_descriptor['year']\n",
|
|
"STATION_ABBR = campaign_descriptor['station_abbr']\n",
|
|
"\n",
|
|
"workflow_fname = f'workflow_acsm_data_{STATION_ABBR}_{YEAR}'\n",
|
|
"\n",
|
|
"print(workflow_fname)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Explore Datasets in HDF5 File\n",
|
|
"\n",
|
|
"* Use the HDF5 data manager object to load the metadata of the HDF5 file's datasets.\n",
|
|
"* Display metadata and identify dataset of interest for next step.\n",
|
|
"* Excecute the cell.\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import dima.src.hdf5_ops as dataOps\n",
|
|
"\n",
|
|
"CAMPAIGN_DATA_FILE = \"../data/collection_PAY_2024_2025-06-05_2025-06-05.h5\"\n",
|
|
"APPEND_DIR = os.path.splitext(CAMPAIGN_DATA_FILE)[0]\n",
|
|
"\n",
|
|
"path_to_data_file = CAMPAIGN_DATA_FILE\n",
|
|
"dataManager = dataOps.HDF5DataOpsManager(path_to_data_file)\n",
|
|
"\n",
|
|
"dataManager.load_file_obj()\n",
|
|
"dataManager.extract_and_load_dataset_metadata()\n",
|
|
"dataset_metadata_df = dataManager.dataset_metadata_df\n",
|
|
"\n",
|
|
"print(dataset_metadata_df.head(n=15))\n",
|
|
"dataManager.unload_file_obj()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Specify Dataset and Flags Dataset to Be Visualized Based on Dataset Index\n",
|
|
"\n",
|
|
"* Specify the dataset index based on previous step.\n",
|
|
"* Excecute the cell."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"# Specify diagnostic variables and the associated flags \n",
|
|
"dataset_idx = 0\n",
|
|
"dataset_name = dataset_metadata_df['dataset_name'][dataset_idx]\n",
|
|
"parent_instrument = dataset_metadata_df['parent_instrument'][dataset_idx]\n",
|
|
"\n",
|
|
"# Infer expected flags dataset name, which is associated with the above defined dataset\n",
|
|
"\n",
|
|
"flags_dataset_name = dataset_name.split(sep='/')\n",
|
|
"flags_dataset_name[0] = f'{flags_dataset_name[0]}_flags'\n",
|
|
"flags_dataset_name = '/'.join(flags_dataset_name)\n",
|
|
"\n",
|
|
"print(dataset_name)\n",
|
|
"print(flags_dataset_name)\n",
|
|
"\n",
|
|
"# Visualize dataset column names and identify the time variable and y variable channels for next step\n",
|
|
"try:\n",
|
|
" dataManager.load_file_obj()\n",
|
|
" dataset_df = dataManager.extract_dataset_as_dataframe(dataset_name)\n",
|
|
" print(dataset_df.columns)\n",
|
|
"except Exception as e:\n",
|
|
" print(f\"Exception occurred while loading dataset: {e}\")\n",
|
|
"finally:\n",
|
|
" dataManager.unload_file_obj()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Visualize Diagnostic Variables Alongside the Associated Flags\n",
|
|
"\n",
|
|
"* Ensure that `dataset_name` and `flags_dataset_name` are properly defined in the previous step.\n",
|
|
"* Build a list `diagnostic_variables` with the variable names you would like to visualize, based on the previously displayed options.\n",
|
|
"* Define `time_var` with the time variable name from the previously displayed variable names.\n",
|
|
"* Execute the cell.\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import pipelines.steps.visualize_datatable_vars as vis\n",
|
|
"\n",
|
|
"\n",
|
|
"variable_sets = {\n",
|
|
" \"diagnostic\": {\n",
|
|
" \"variables\": [\n",
|
|
" 'VaporizerTemp_C', 'FlowRate_ccs', 'FilamentEmission_mA', 'ABsamp'\n",
|
|
" ],\n",
|
|
" \"time_var\": \"t_base\"\n",
|
|
" },\n",
|
|
" \"cpc\": {\n",
|
|
" \"variables\": [\"conc\"],\n",
|
|
" \"time_var\": \"end_time\"\n",
|
|
" },\n",
|
|
" \"species\": {\n",
|
|
" \"variables\": ['Chl_11000', 'NH4_11000', 'SO4_11000', 'NO3_11000', 'Org_11000'],\n",
|
|
" \"time_var\": \"t_start_Buf\"\n",
|
|
" }\n",
|
|
"}\n",
|
|
"\n",
|
|
"# Choose one: \"diagnostic\", \"cpc\", or \"species\"\n",
|
|
"selected_set = \"diagnostic\"\n",
|
|
"\n",
|
|
"variables = variable_sets[selected_set][\"variables\"]\n",
|
|
"time_var = variable_sets[selected_set][\"time_var\"]\n",
|
|
"\n",
|
|
"yaxis_range_dict = {'FlowRate_ccs' : [0,100],\n",
|
|
" 'VaporizerTemp_C': [590,610]}\n",
|
|
"vis.visualize_table_variables(path_to_data_file, \n",
|
|
" dataset_name, \n",
|
|
" flags_dataset_name,\n",
|
|
" x_var = time_var,\n",
|
|
" y_vars = variables,\n",
|
|
" yaxis_range_dict = yaxis_range_dict,\n",
|
|
" capture_renku_metadata=True,workflow_name=workflow_fname)\n",
|
|
"\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.10"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|