mirror of
https://gitea.psi.ch/APOG/acsmnode.git
synced 2025-06-24 21:21:08 +02:00
Add new demo for acsm pipeline and clearout the other notebooks
This commit is contained in:
294
notebooks/demo_acsm_pipeline.ipynb
Normal file
294
notebooks/demo_acsm_pipeline.ipynb
Normal file
@ -0,0 +1,294 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"c:\\Users\\florez_j\\Documents\\GitLab\\acsmnode\\dima\n",
|
||||
"c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\\python310.zip\n",
|
||||
"c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\\DLLs\n",
|
||||
"c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\\lib\n",
|
||||
"c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\n",
|
||||
"\n",
|
||||
"C:\\Users\\florez_j\\AppData\\Roaming\\Python\\Python310\\site-packages\n",
|
||||
"c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\\lib\\site-packages\n",
|
||||
"c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\\lib\\site-packages\\win32\n",
|
||||
"c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\\lib\\site-packages\\win32\\lib\n",
|
||||
"c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\\lib\\site-packages\\Pythonwin\n",
|
||||
"c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\\lib\\site-packages\\setuptools\\_vendor\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"# Set up project root directory\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"notebook_dir = os.getcwd() # Current working directory (assumes running from notebooks/)\n",
|
||||
"#project_path = os.path.normpath(os.path.join(notebook_dir, \"..\")) # Move up to project root\n",
|
||||
"dima_path = os.path.normpath(os.path.join(notebook_dir, \"dima\")) # Move up to project root\n",
|
||||
"\n",
|
||||
"#if project_path not in sys.path: # Avoid duplicate entries\n",
|
||||
"# sys.path.append(project_path)\n",
|
||||
"if dima_path not in sys.path:\n",
|
||||
" sys.path.insert(0,dima_path)\n",
|
||||
"#sys.path.append(os.path.join(root_dir,'dima','instruments'))\n",
|
||||
"#sys.path.append(os.path.join(root_dir,'dima','src'))\n",
|
||||
"#sys.path.append(os.path.join(root_dir,'dima','utils'))\n",
|
||||
"\n",
|
||||
"#import dima.visualization.hdf5_vis as hdf5_vis\n",
|
||||
"#import dima.pipelines.data_integration as data_integration\n",
|
||||
"import subprocess\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"for item in sys.path:\n",
|
||||
" print(item)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"File path: c:\\Users\\florez_j\\Documents\\GitLab\\acsmnode\\pipelines\\steps\\apply_calibration_factors.py\n",
|
||||
"data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10.h5\n",
|
||||
" dataset_name ... parent_file\n",
|
||||
"0 ACSM_TOFWARE/2024/ACSM_JFJ_2024_meta.txt/data_... ... ACSM_JFJ_2024_meta.txt\n",
|
||||
"1 ACSM_TOFWARE/2024/ACSM_JFJ_2024_timeseries.txt... ... ACSM_JFJ_2024_timeseries.txt\n",
|
||||
"2 ACSM_TOFWARE/2024/Org_data_valid.csv/data_table ... Org_data_valid.csv\n",
|
||||
"3 ACSM_TOFWARE/2024/Org_err_valid.csv/data_table ... Org_err_valid.csv\n",
|
||||
"4 ACSM_TOFWARE/2024/Org_mz_valid.csv/data_table ... Org_mz_valid.csv\n",
|
||||
"\n",
|
||||
"[5 rows x 3 columns]\n",
|
||||
"ACSM_JFJ_2024_timeseries.txt\n",
|
||||
"pipelines/params/calibration_factors.yaml\n",
|
||||
"Path to output directory : data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10\n",
|
||||
"Processing script : pipelines\\steps\\apply_calibration_factors.py\n",
|
||||
"Output directory : data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10/ACSM_TOFWARE_processed/2024\n",
|
||||
"NO3_11000\n",
|
||||
"SO4_11000\n",
|
||||
"NH4_11000\n",
|
||||
"Org_11000\n",
|
||||
"Chl_11000\n",
|
||||
"Org_44_11000\n",
|
||||
"Org_43_11000\n",
|
||||
"Org_60_11000\n",
|
||||
"NO3_30_11000\n",
|
||||
"SO4_98_11000\n",
|
||||
"SO4_81_11000\n",
|
||||
"SO4_82_11000\n",
|
||||
"SO4_62_11000\n",
|
||||
"SO4_48_11000\n",
|
||||
"Metadata for calibrated data saved to data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10/ACSM_TOFWARE_processed/2024/data_lineage_metadata.json\n",
|
||||
"Metadata for calibrated data saved to data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10/ACSM_TOFWARE_processed/2024/data_lineage_metadata.json\n",
|
||||
"Calibration factors saved to data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10/ACSM_TOFWARE_processed/2024/ACSM_JFJ_2024_timeseries_calibration_factors.csv\n",
|
||||
"Calibrated data saved to data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10/ACSM_TOFWARE_processed/2024/ACSM_JFJ_2024_timeseries_calibrated.csv\n",
|
||||
"Data lineage saved to data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"path_to_data_file = 'data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10.h5'\n",
|
||||
"dataset_name = 'ACSM_TOFWARE/2024/ACSM_JFJ_2024_timeseries.txt/data_table'\n",
|
||||
"path_to_calibration_file = 'pipelines/params/calibration_factors.yaml'\n",
|
||||
"command = ['python', 'pipelines/steps/apply_calibration_factors.py', path_to_data_file, dataset_name, path_to_calibration_file]\n",
|
||||
"status = subprocess.run(command, capture_output=True, check=True)\n",
|
||||
"\n",
|
||||
"print(status.stdout.decode())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"c:\\Users\\florez_j\\Documents\\GitLab\\acsmnode\\pipelines\\steps\\compute_automated_flags.py\n",
|
||||
"data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10.h5\n",
|
||||
" dataset_name ... parent_file\n",
|
||||
"0 ACSM_TOFWARE/2024/ACSM_JFJ_2024_meta.txt/data_... ... ACSM_JFJ_2024_meta.txt\n",
|
||||
"1 ACSM_TOFWARE/2024/ACSM_JFJ_2024_timeseries.txt... ... ACSM_JFJ_2024_timeseries.txt\n",
|
||||
"2 ACSM_TOFWARE/2024/Org_data_valid.csv/data_table ... Org_data_valid.csv\n",
|
||||
"3 ACSM_TOFWARE/2024/Org_err_valid.csv/data_table ... Org_err_valid.csv\n",
|
||||
"4 ACSM_TOFWARE/2024/Org_mz_valid.csv/data_table ... Org_mz_valid.csv\n",
|
||||
"\n",
|
||||
"[5 rows x 3 columns]\n",
|
||||
"pipelines/params/validity_thresholds.yaml\n",
|
||||
"Path to output directory : data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10\n",
|
||||
"Processing script %s: pipelines\\steps\\compute_automated_flags.py\n",
|
||||
"Output directory: %s data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10/ACSM_TOFWARE_flags/2024\n",
|
||||
"t_base\n",
|
||||
"Diagnostic variable t_base has not defined limits in {'calibration_params': {'path_to_file': 'pipelines/params/calibration_params.yaml'}, 'validity_thresholds': {'description': 'Defines the value range of a particular variable', 'variables': {'VaporizerTemp_C': {'lower_lim': 538, 'upper_lim': 600, 'description': 'heater temperature'}, 'ABsamp': {'lower_lim': 20000, 'upper_lim': 500000}, 'FlowRate_ccs': {'lower_lim': 2.1, 'upper_lim': 2.3}, 'FilamentEmission_mA': {'lower_lim': 0.75, 'upper_lim': 1.5}}}}.\n",
|
||||
"VaporizerTemp_C\n",
|
||||
"HeaterBias_V\n",
|
||||
"Diagnostic variable HeaterBias_V has not defined limits in {'calibration_params': {'path_to_file': 'pipelines/params/calibration_params.yaml'}, 'validity_thresholds': {'description': 'Defines the value range of a particular variable', 'variables': {'VaporizerTemp_C': {'lower_lim': 538, 'upper_lim': 600, 'description': 'heater temperature'}, 'ABsamp': {'lower_lim': 20000, 'upper_lim': 500000}, 'FlowRate_ccs': {'lower_lim': 2.1, 'upper_lim': 2.3}, 'FilamentEmission_mA': {'lower_lim': 0.75, 'upper_lim': 1.5}}}}.\n",
|
||||
"FlowRefWave\n",
|
||||
"Diagnostic variable FlowRefWave has not defined limits in {'calibration_params': {'path_to_file': 'pipelines/params/calibration_params.yaml'}, 'validity_thresholds': {'description': 'Defines the value range of a particular variable', 'variables': {'VaporizerTemp_C': {'lower_lim': 538, 'upper_lim': 600, 'description': 'heater temperature'}, 'ABsamp': {'lower_lim': 20000, 'upper_lim': 500000}, 'FlowRate_ccs': {'lower_lim': 2.1, 'upper_lim': 2.3}, 'FilamentEmission_mA': {'lower_lim': 0.75, 'upper_lim': 1.5}}}}.\n",
|
||||
"FlowRate_mb\n",
|
||||
"Diagnostic variable FlowRate_mb has not defined limits in {'calibration_params': {'path_to_file': 'pipelines/params/calibration_params.yaml'}, 'validity_thresholds': {'description': 'Defines the value range of a particular variable', 'variables': {'VaporizerTemp_C': {'lower_lim': 538, 'upper_lim': 600, 'description': 'heater temperature'}, 'ABsamp': {'lower_lim': 20000, 'upper_lim': 500000}, 'FlowRate_ccs': {'lower_lim': 2.1, 'upper_lim': 2.3}, 'FilamentEmission_mA': {'lower_lim': 0.75, 'upper_lim': 1.5}}}}.\n",
|
||||
"FlowRate_ccs\n",
|
||||
"FilamentEmission_mA\n",
|
||||
"Detector_V\n",
|
||||
"Diagnostic variable Detector_V has not defined limits in {'calibration_params': {'path_to_file': 'pipelines/params/calibration_params.yaml'}, 'validity_thresholds': {'description': 'Defines the value range of a particular variable', 'variables': {'VaporizerTemp_C': {'lower_lim': 538, 'upper_lim': 600, 'description': 'heater temperature'}, 'ABsamp': {'lower_lim': 20000, 'upper_lim': 500000}, 'FlowRate_ccs': {'lower_lim': 2.1, 'upper_lim': 2.3}, 'FilamentEmission_mA': {'lower_lim': 0.75, 'upper_lim': 1.5}}}}.\n",
|
||||
"AnalogInput06_V\n",
|
||||
"Diagnostic variable AnalogInput06_V has not defined limits in {'calibration_params': {'path_to_file': 'pipelines/params/calibration_params.yaml'}, 'validity_thresholds': {'description': 'Defines the value range of a particular variable', 'variables': {'VaporizerTemp_C': {'lower_lim': 538, 'upper_lim': 600, 'description': 'heater temperature'}, 'ABsamp': {'lower_lim': 20000, 'upper_lim': 500000}, 'FlowRate_ccs': {'lower_lim': 2.1, 'upper_lim': 2.3}, 'FilamentEmission_mA': {'lower_lim': 0.75, 'upper_lim': 1.5}}}}.\n",
|
||||
"ABRefWave\n",
|
||||
"Diagnostic variable ABRefWave has not defined limits in {'calibration_params': {'path_to_file': 'pipelines/params/calibration_params.yaml'}, 'validity_thresholds': {'description': 'Defines the value range of a particular variable', 'variables': {'VaporizerTemp_C': {'lower_lim': 538, 'upper_lim': 600, 'description': 'heater temperature'}, 'ABsamp': {'lower_lim': 20000, 'upper_lim': 500000}, 'FlowRate_ccs': {'lower_lim': 2.1, 'upper_lim': 2.3}, 'FilamentEmission_mA': {'lower_lim': 0.75, 'upper_lim': 1.5}}}}.\n",
|
||||
"ABsamp\n",
|
||||
"ABCorrFact\n",
|
||||
"Diagnostic variable ABCorrFact has not defined limits in {'calibration_params': {'path_to_file': 'pipelines/params/calibration_params.yaml'}, 'validity_thresholds': {'description': 'Defines the value range of a particular variable', 'variables': {'VaporizerTemp_C': {'lower_lim': 538, 'upper_lim': 600, 'description': 'heater temperature'}, 'ABsamp': {'lower_lim': 20000, 'upper_lim': 500000}, 'FlowRate_ccs': {'lower_lim': 2.1, 'upper_lim': 2.3}, 'FilamentEmission_mA': {'lower_lim': 0.75, 'upper_lim': 1.5}}}}.\n",
|
||||
"Metadata for calibrated data saved to data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10/ACSM_TOFWARE_flags/2024/data_lineage_metadata.json\n",
|
||||
"Flags saved to data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10/ACSM_TOFWARE_flags/2024/ACSM_JFJ_2024_meta_flags.csv\n",
|
||||
"Data lineage saved to data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"path_to_data_file = 'data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10.h5'\n",
|
||||
"dataset_name = 'ACSM_TOFWARE/2024/ACSM_JFJ_2024_meta.txt/data_table'\n",
|
||||
"path_to_config_file = 'pipelines/params/validity_thresholds.yaml'\n",
|
||||
"command = ['python', 'pipelines/steps/compute_automated_flags.py', path_to_data_file, dataset_name, path_to_config_file]\n",
|
||||
"status = subprocess.run(command, capture_output=True, check=True)\n",
|
||||
"\n",
|
||||
"print(status.stdout.decode())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"[Start] Data integration :\n",
|
||||
"Source: data\\collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10\n",
|
||||
"Destination: data\\collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10.h5\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"c:\\Users\\florez_j\\Documents\\GitLab\\acsmnode\\dima\\instruments\\readers\\acsm_tofware_reader.py:98: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
|
||||
" df = pd.read_csv(tmp_filename,\n",
|
||||
"c:\\Users\\florez_j\\Documents\\GitLab\\acsmnode\\dima\\instruments\\readers\\acsm_tofware_reader.py:98: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
|
||||
" df = pd.read_csv(tmp_filename,\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[=================================-------------------------------------------------------------------] 33.3% ...\n",
|
||||
"Completed data transfer for instFolder: /ACSM_TOFWARE/2024\n",
|
||||
"Starting data transfer from instFolder: /ACSM_TOFWARE_flags/2024\n",
|
||||
"[===================================================================---------------------------------] 66.7% ...\n",
|
||||
"Completed data transfer for instFolder: /ACSM_TOFWARE_flags/2024\n",
|
||||
"Starting data transfer from instFolder: /ACSM_TOFWARE_processed/2024\n",
|
||||
"[====================================================================================================] 100.0% ...\n",
|
||||
"Completed data transfer for instFolder: /ACSM_TOFWARE_processed/2024\n",
|
||||
"[End] Data integration\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import dima.src.hdf5_ops as dataOps \n",
|
||||
"\n",
|
||||
"dataManager = dataOps.HDF5DataOpsManager(path_to_data_file)\n",
|
||||
"dataManager.load_file_obj()\n",
|
||||
"dataManager.update_file('data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10')\n",
|
||||
"dataManager.unload_file_obj()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"path_to_data_file = 'data/collection_JFJ_2024_LeilaS_2025-02-07_2025-02-07.h5'\n",
|
||||
"dataset_name = 'ACSM_TOFWARE/2024/ACSM_JFJ_2024_meta.txt/data_table'\n",
|
||||
"path_to_calibration_file = 'pipelines/params/validity_thresholds.yaml'\n",
|
||||
"command = ['python', 'pipelines/steps/apply_diagnostic_flags.py', path_to_data_file, dataset_name, path_to_calibration_file]\n",
|
||||
"status = subprocess.run(command, capture_output=True, check=True)\n",
|
||||
"\n",
|
||||
"print(status.stdout.decode()) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "NameError",
|
||||
"evalue": "name 'dataManager' is not defined",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[1;32mIn[3], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mdataManager\u001b[49m\u001b[38;5;241m.\u001b[39mload_file_obj()\n\u001b[0;32m 2\u001b[0m dataManager\u001b[38;5;241m.\u001b[39mextract_and_load_dataset_metadata()\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(dataManager\u001b[38;5;241m.\u001b[39mdataset_metadata_df\u001b[38;5;241m.\u001b[39mhead())\n",
|
||||
"\u001b[1;31mNameError\u001b[0m: name 'dataManager' is not defined"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dataManager.load_file_obj()\n",
|
||||
"dataManager.extract_and_load_dataset_metadata()\n",
|
||||
"print(dataManager.dataset_metadata_df.head())\n",
|
||||
"dataManager.unload_file_obj()\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "flaggingapp_env",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -19,7 +19,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -33,8 +33,10 @@
|
||||
"#sys.path.append(os.path.join(root_dir,'dima','src'))\n",
|
||||
"#sys.path.append(os.path.join(root_dir,'dima','utils'))\n",
|
||||
"\n",
|
||||
"import dima.src.hdf5_vis as hdf5_vis\n",
|
||||
"import dima.pipelines.data_integration as dilib\n"
|
||||
"import dima.visualization.hdf5_vis as hdf5_vis\n",
|
||||
"import dima.pipelines.data_integration as data_integration\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -50,11 +52,12 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"yaml_config_file_path = 'dima_config.yaml'"
|
||||
"#yaml_config_file_path = 'dima/input_files/data_integr_config_file_TBR.yaml' \n",
|
||||
"yaml_config_file_path ='campaignDescriptor.yaml'"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -73,7 +76,16 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"hdf5_file_path = dilib.integrate_data_sources(yaml_config_file_path)"
|
||||
"hdf5_file_path = data_integration.run_pipeline(yaml_config_file_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hdf5_file_path"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -98,6 +110,11 @@
|
||||
" hdf5_vis.display_group_hierarchy_on_a_treemap(hdf5_file_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@ -107,7 +124,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -145,7 +162,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "multiphase_chemistry_env",
|
||||
"display_name": "flaggingapp_env",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -159,7 +176,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.10.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -15,7 +15,8 @@
|
||||
"\n",
|
||||
"import data_flagging_utils as utils\n",
|
||||
"\n",
|
||||
"DataOpsObj = utils.FlaggingAppDataManager('data_products/collection_acsm_campaign_NoraN_2024-01-01_2024-02-29.h5')\n",
|
||||
"path_to_file = 'data_products/collection_JFJ_2024_acsm_campaign_NoraN_2024-01-01_2024-02-29.h5'\n",
|
||||
"DataOpsObj = utils.FlaggingAppDataManager(path_to_file)\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
@ -25,14 +26,105 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"path_to_append_dir = 'data_products/collection_acsm_campaign_NoraN_2024-01-01_2024-02-29'\n",
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"import pandas as pd\n",
|
||||
"# Set up project root directory\n",
|
||||
"root_dir = os.path.abspath(os.curdir)\n",
|
||||
"sys.path.append(root_dir)\n",
|
||||
"sys.path.append(os.path.join(root_dir,'dima'))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from pipelines.steps.apply_calibration_factors import apply_calibration_factors\n",
|
||||
"\n",
|
||||
"import dima.src.hdf5_ops as dataOps\n",
|
||||
"\n",
|
||||
"file_path = \"data/collection_JFJ_2024_LeilaS_2025-02-03_2025-02-03.h5\"\n",
|
||||
"dataOpsManager = dataOps.HDF5DataOpsManager(file_path)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"dataOpsManager.load_file_obj()\n",
|
||||
"dataOpsManager.extract_and_load_dataset_metadata()\n",
|
||||
"print(dataOpsManager.dataset_metadata_df.head())\n",
|
||||
"#dataOpsManager.unload_file_obj\n",
|
||||
"\n",
|
||||
"#dataOpsManager.unload_file_obj()\n",
|
||||
"dataset_name = \"ACSM_TOFWARE/2024/ACSM_JFJ_2024_timeseries.txt/data_table\"\n",
|
||||
"data_table = dataOpsManager.extract_dataset_as_dataframe(dataset_name)\n",
|
||||
"datetime_var, datetime_format = dataOpsManager.infer_datetime_variable(dataset_name)\n",
|
||||
"\n",
|
||||
"#data_table['t_start_Buf'] = data_table['t_start_Buf'].apply(lambda x : x.decode())\n",
|
||||
"dataOpsManager.unload_file_obj()\n",
|
||||
"\n",
|
||||
"a, b = apply_calibration_factors(data_table, datetime_var,'pipelines/params/calibration_factors.yaml')\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"a.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"apply_calib"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"# Set up project root directory\n",
|
||||
"root_dir = os.path.abspath(os.curdir)\n",
|
||||
"sys.path.append(root_dir)\n",
|
||||
"sys.path.append(os.path.join(root_dir,'dima'))\n",
|
||||
"\n",
|
||||
"import dima.src.hdf5_ops as data_ops\n",
|
||||
"\n",
|
||||
"path_to_file = 'data/collection_JFJ_2024_NoraN_2024-01-01_2024-02-29.h5'\n",
|
||||
"DataOpsObj = data_ops.HDF5DataOpsManager(path_to_file)\n",
|
||||
"\n",
|
||||
"DataOpsObj.load_file_obj()\n",
|
||||
"data_table = DataOpsObj.extract_dataset_as_dataframe('/ACSM_TOFWARE/2024/ACSM_JFJ_2024_JantoFeb_timeseries.txt/data_table')\n",
|
||||
"DataOpsObj.unload_file_obj()\n",
|
||||
"\n",
|
||||
"data_table.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"path_to_append_dir = 'data_products/collection_JFJ_2024_acsm_campaign_NoraN_2024-01-01_2024-02-29'\n",
|
||||
"DataOpsObj.load_file_obj()\n",
|
||||
"DataOpsObj.transfer_flags()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -61,7 +153,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -70,7 +162,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -81,7 +173,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -93,7 +185,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "multiphase_chemistry_env",
|
||||
"display_name": "flaggingapp_env",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -107,7 +199,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
"version": "3.10.15"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
Reference in New Issue
Block a user