diff --git a/notebooks/demo_acsm_pipeline.ipynb b/notebooks/demo_acsm_pipeline.ipynb index 5e079aa..84e2456 100644 --- a/notebooks/demo_acsm_pipeline.ipynb +++ b/notebooks/demo_acsm_pipeline.ipynb @@ -2,28 +2,9 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "c:\\Users\\florez_j\\Documents\\GitLab\\acsmnode\\dima\n", - "c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\\python310.zip\n", - "c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\\DLLs\n", - "c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\\lib\n", - "c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\n", - "\n", - "C:\\Users\\florez_j\\AppData\\Roaming\\Python\\Python310\\site-packages\n", - "c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\\lib\\site-packages\n", - "c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\\lib\\site-packages\\win32\n", - "c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\\lib\\site-packages\\win32\\lib\n", - "c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\\lib\\site-packages\\Pythonwin\n", - "c:\\ProgramData\\Anaconda3\\envs\\flaggingapp_env\\lib\\site-packages\\setuptools\\_vendor\n" - ] - } - ], + "outputs": [], "source": [ "import sys\n", "import os\n", @@ -31,11 +12,11 @@ "\n", "\n", "notebook_dir = os.getcwd() # Current working directory (assumes running from notebooks/)\n", - "#project_path = os.path.normpath(os.path.join(notebook_dir, \"..\")) # Move up to project root\n", - "dima_path = os.path.normpath(os.path.join(notebook_dir, \"dima\")) # Move up to project root\n", + "project_path = os.path.normpath(os.path.join(notebook_dir, \"..\")) # Move up to project root\n", + "dima_path = os.path.normpath(os.path.join(project_path, \"dima\")) # Move up to project root\n", "\n", - "#if project_path not in sys.path: # Avoid duplicate entries\n", - "# sys.path.append(project_path)\n", + "if project_path not in sys.path: # Avoid duplicate entries\n", + " sys.path.append(project_path)\n", "if dima_path not in sys.path:\n", " sys.path.insert(0,dima_path)\n", "#sys.path.append(os.path.join(root_dir,'dima','instruments'))\n", @@ -48,7 +29,9 @@ "\n", "\n", "for item in sys.path:\n", - " print(item)\n" + " print(item)\n", + "\n", + "CAMPAIGN_DATA_FILE = \"../data/collection_JFJ_2024_2025-03-17_2025-02-17.h5\"" ] }, { @@ -58,60 +41,20 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File path: c:\\Users\\florez_j\\Documents\\GitLab\\acsmnode\\pipelines\\steps\\apply_calibration_factors.py\n", - "data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10.h5\n", - " dataset_name ... parent_file\n", - "0 ACSM_TOFWARE/2024/ACSM_JFJ_2024_meta.txt/data_... ... ACSM_JFJ_2024_meta.txt\n", - "1 ACSM_TOFWARE/2024/ACSM_JFJ_2024_timeseries.txt... ... ACSM_JFJ_2024_timeseries.txt\n", - "2 ACSM_TOFWARE/2024/Org_data_valid.csv/data_table ... Org_data_valid.csv\n", - "3 ACSM_TOFWARE/2024/Org_err_valid.csv/data_table ... Org_err_valid.csv\n", - "4 ACSM_TOFWARE/2024/Org_mz_valid.csv/data_table ... Org_mz_valid.csv\n", - "\n", - "[5 rows x 3 columns]\n", - "ACSM_JFJ_2024_timeseries.txt\n", - "pipelines/params/calibration_factors.yaml\n", - "Path to output directory : data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10\n", - "Processing script : pipelines\\steps\\apply_calibration_factors.py\n", - "Output directory : data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10/ACSM_TOFWARE_processed/2024\n", - "NO3_11000\n", - "SO4_11000\n", - "NH4_11000\n", - "Org_11000\n", - "Chl_11000\n", - "Org_44_11000\n", - "Org_43_11000\n", - "Org_60_11000\n", - "NO3_30_11000\n", - "SO4_98_11000\n", - "SO4_81_11000\n", - "SO4_82_11000\n", - "SO4_62_11000\n", - "SO4_48_11000\n", - "Metadata for calibrated data saved to data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10/ACSM_TOFWARE_processed/2024/data_lineage_metadata.json\n", - "Metadata for calibrated data saved to data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10/ACSM_TOFWARE_processed/2024/data_lineage_metadata.json\n", - "Calibration factors saved to data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10/ACSM_TOFWARE_processed/2024/ACSM_JFJ_2024_timeseries_calibration_factors.csv\n", - "Calibrated data saved to data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10/ACSM_TOFWARE_processed/2024/ACSM_JFJ_2024_timeseries_calibrated.csv\n", - "Data lineage saved to data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10\n", - "\n" - ] - } - ], + "outputs": [], "source": [ + "from pipelines.steps.apply_calibration_factors import main as run_apply_calibration_factors\n", "\n", - "path_to_data_file = 'data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10.h5'\n", + "path_to_data_file = CAMPAIGN_DATA_FILE\n", + "path_to_calibration_file = '../pipelines/params/calibration_factors.yaml'\n", "dataset_name = 'ACSM_TOFWARE/2024/ACSM_JFJ_2024_timeseries.txt/data_table'\n", - "path_to_calibration_file = 'pipelines/params/calibration_factors.yaml'\n", - "command = ['python', 'pipelines/steps/apply_calibration_factors.py', path_to_data_file, dataset_name, path_to_calibration_file]\n", - "status = subprocess.run(command, capture_output=True, check=True)\n", + "#command = ['python', 'pipelines/steps/apply_calibration_factors.py', path_to_data_file, dataset_name, path_to_calibration_file]\n", + "#status = subprocess.run(command, capture_output=True, check=True)\n", + "#print(status.stdout.decode())\n", "\n", - "print(status.stdout.decode())" + "run_apply_calibration_factors(path_to_data_file,path_to_calibration_file)\n" ] }, { @@ -121,113 +64,18 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "c:\\Users\\florez_j\\Documents\\GitLab\\acsmnode\\pipelines\\steps\\compute_automated_flags.py\n", - "data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10.h5\n", - " dataset_name ... parent_file\n", - "0 ACSM_TOFWARE/2024/ACSM_JFJ_2024_meta.txt/data_... ... ACSM_JFJ_2024_meta.txt\n", - "1 ACSM_TOFWARE/2024/ACSM_JFJ_2024_timeseries.txt... ... ACSM_JFJ_2024_timeseries.txt\n", - "2 ACSM_TOFWARE/2024/Org_data_valid.csv/data_table ... Org_data_valid.csv\n", - "3 ACSM_TOFWARE/2024/Org_err_valid.csv/data_table ... Org_err_valid.csv\n", - "4 ACSM_TOFWARE/2024/Org_mz_valid.csv/data_table ... Org_mz_valid.csv\n", - "\n", - "[5 rows x 3 columns]\n", - "pipelines/params/validity_thresholds.yaml\n", - "Path to output directory : data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10\n", - "Processing script %s: pipelines\\steps\\compute_automated_flags.py\n", - "Output directory: %s data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10/ACSM_TOFWARE_flags/2024\n", - "t_base\n", - "Diagnostic variable t_base has not defined limits in {'calibration_params': {'path_to_file': 'pipelines/params/calibration_params.yaml'}, 'validity_thresholds': {'description': 'Defines the value range of a particular variable', 'variables': {'VaporizerTemp_C': {'lower_lim': 538, 'upper_lim': 600, 'description': 'heater temperature'}, 'ABsamp': {'lower_lim': 20000, 'upper_lim': 500000}, 'FlowRate_ccs': {'lower_lim': 2.1, 'upper_lim': 2.3}, 'FilamentEmission_mA': {'lower_lim': 0.75, 'upper_lim': 1.5}}}}.\n", - "VaporizerTemp_C\n", - "HeaterBias_V\n", - "Diagnostic variable HeaterBias_V has not defined limits in {'calibration_params': {'path_to_file': 'pipelines/params/calibration_params.yaml'}, 'validity_thresholds': {'description': 'Defines the value range of a particular variable', 'variables': {'VaporizerTemp_C': {'lower_lim': 538, 'upper_lim': 600, 'description': 'heater temperature'}, 'ABsamp': {'lower_lim': 20000, 'upper_lim': 500000}, 'FlowRate_ccs': {'lower_lim': 2.1, 'upper_lim': 2.3}, 'FilamentEmission_mA': {'lower_lim': 0.75, 'upper_lim': 1.5}}}}.\n", - "FlowRefWave\n", - "Diagnostic variable FlowRefWave has not defined limits in {'calibration_params': {'path_to_file': 'pipelines/params/calibration_params.yaml'}, 'validity_thresholds': {'description': 'Defines the value range of a particular variable', 'variables': {'VaporizerTemp_C': {'lower_lim': 538, 'upper_lim': 600, 'description': 'heater temperature'}, 'ABsamp': {'lower_lim': 20000, 'upper_lim': 500000}, 'FlowRate_ccs': {'lower_lim': 2.1, 'upper_lim': 2.3}, 'FilamentEmission_mA': {'lower_lim': 0.75, 'upper_lim': 1.5}}}}.\n", - "FlowRate_mb\n", - "Diagnostic variable FlowRate_mb has not defined limits in {'calibration_params': {'path_to_file': 'pipelines/params/calibration_params.yaml'}, 'validity_thresholds': {'description': 'Defines the value range of a particular variable', 'variables': {'VaporizerTemp_C': {'lower_lim': 538, 'upper_lim': 600, 'description': 'heater temperature'}, 'ABsamp': {'lower_lim': 20000, 'upper_lim': 500000}, 'FlowRate_ccs': {'lower_lim': 2.1, 'upper_lim': 2.3}, 'FilamentEmission_mA': {'lower_lim': 0.75, 'upper_lim': 1.5}}}}.\n", - "FlowRate_ccs\n", - "FilamentEmission_mA\n", - "Detector_V\n", - "Diagnostic variable Detector_V has not defined limits in {'calibration_params': {'path_to_file': 'pipelines/params/calibration_params.yaml'}, 'validity_thresholds': {'description': 'Defines the value range of a particular variable', 'variables': {'VaporizerTemp_C': {'lower_lim': 538, 'upper_lim': 600, 'description': 'heater temperature'}, 'ABsamp': {'lower_lim': 20000, 'upper_lim': 500000}, 'FlowRate_ccs': {'lower_lim': 2.1, 'upper_lim': 2.3}, 'FilamentEmission_mA': {'lower_lim': 0.75, 'upper_lim': 1.5}}}}.\n", - "AnalogInput06_V\n", - "Diagnostic variable AnalogInput06_V has not defined limits in {'calibration_params': {'path_to_file': 'pipelines/params/calibration_params.yaml'}, 'validity_thresholds': {'description': 'Defines the value range of a particular variable', 'variables': {'VaporizerTemp_C': {'lower_lim': 538, 'upper_lim': 600, 'description': 'heater temperature'}, 'ABsamp': {'lower_lim': 20000, 'upper_lim': 500000}, 'FlowRate_ccs': {'lower_lim': 2.1, 'upper_lim': 2.3}, 'FilamentEmission_mA': {'lower_lim': 0.75, 'upper_lim': 1.5}}}}.\n", - "ABRefWave\n", - "Diagnostic variable ABRefWave has not defined limits in {'calibration_params': {'path_to_file': 'pipelines/params/calibration_params.yaml'}, 'validity_thresholds': {'description': 'Defines the value range of a particular variable', 'variables': {'VaporizerTemp_C': {'lower_lim': 538, 'upper_lim': 600, 'description': 'heater temperature'}, 'ABsamp': {'lower_lim': 20000, 'upper_lim': 500000}, 'FlowRate_ccs': {'lower_lim': 2.1, 'upper_lim': 2.3}, 'FilamentEmission_mA': {'lower_lim': 0.75, 'upper_lim': 1.5}}}}.\n", - "ABsamp\n", - "ABCorrFact\n", - "Diagnostic variable ABCorrFact has not defined limits in {'calibration_params': {'path_to_file': 'pipelines/params/calibration_params.yaml'}, 'validity_thresholds': {'description': 'Defines the value range of a particular variable', 'variables': {'VaporizerTemp_C': {'lower_lim': 538, 'upper_lim': 600, 'description': 'heater temperature'}, 'ABsamp': {'lower_lim': 20000, 'upper_lim': 500000}, 'FlowRate_ccs': {'lower_lim': 2.1, 'upper_lim': 2.3}, 'FilamentEmission_mA': {'lower_lim': 0.75, 'upper_lim': 1.5}}}}.\n", - "Metadata for calibrated data saved to data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10/ACSM_TOFWARE_flags/2024/data_lineage_metadata.json\n", - "Flags saved to data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10/ACSM_TOFWARE_flags/2024/ACSM_JFJ_2024_meta_flags.csv\n", - "Data lineage saved to data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "path_to_data_file = 'data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10.h5'\n", + "from pipelines.steps.generate_flags import main as run_generate_flags\n", + "path_to_data_file = CAMPAIGN_DATA_FILE\n", "dataset_name = 'ACSM_TOFWARE/2024/ACSM_JFJ_2024_meta.txt/data_table'\n", "path_to_config_file = 'pipelines/params/validity_thresholds.yaml'\n", - "command = ['python', 'pipelines/steps/compute_automated_flags.py', path_to_data_file, dataset_name, path_to_config_file]\n", - "status = subprocess.run(command, capture_output=True, check=True)\n", - "\n", - "print(status.stdout.decode())" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "[Start] Data integration :\n", - "Source: data\\collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10\n", - "Destination: data\\collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10.h5\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\florez_j\\Documents\\GitLab\\acsmnode\\dima\\instruments\\readers\\acsm_tofware_reader.py:98: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n", - " df = pd.read_csv(tmp_filename,\n", - "c:\\Users\\florez_j\\Documents\\GitLab\\acsmnode\\dima\\instruments\\readers\\acsm_tofware_reader.py:98: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n", - " df = pd.read_csv(tmp_filename,\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[=================================-------------------------------------------------------------------] 33.3% ...\n", - "Completed data transfer for instFolder: /ACSM_TOFWARE/2024\n", - "Starting data transfer from instFolder: /ACSM_TOFWARE_flags/2024\n", - "[===================================================================---------------------------------] 66.7% ...\n", - "Completed data transfer for instFolder: /ACSM_TOFWARE_flags/2024\n", - "Starting data transfer from instFolder: /ACSM_TOFWARE_processed/2024\n", - "[====================================================================================================] 100.0% ...\n", - "Completed data transfer for instFolder: /ACSM_TOFWARE_processed/2024\n", - "[End] Data integration\n" - ] - } - ], - "source": [ - "import dima.src.hdf5_ops as dataOps \n", - "\n", - "dataManager = dataOps.HDF5DataOpsManager(path_to_data_file)\n", - "dataManager.load_file_obj()\n", - "dataManager.update_file('data/collection_JFJ_2024_LeilaS_2025-02-10_2025-02-10')\n", - "dataManager.unload_file_obj()" + "#command = ['python', 'pipelines/steps/compute_automated_flags.py', path_to_data_file, dataset_name, path_to_config_file]\n", + "#status = subprocess.run(command, capture_output=True, check=True)\n", + "#print(status.stdout.decode())\n", + "run_generate_flags(path_to_data_file, 'diagnostics')\n" ] }, { @@ -236,43 +84,46 @@ "metadata": {}, "outputs": [], "source": [ - "path_to_data_file = 'data/collection_JFJ_2024_LeilaS_2025-02-07_2025-02-07.h5'\n", + "from pipelines.steps.generate_flags import main as run_generate_flags\n", + "path_to_data_file = CAMPAIGN_DATA_FILE\n", "dataset_name = 'ACSM_TOFWARE/2024/ACSM_JFJ_2024_meta.txt/data_table'\n", - "path_to_calibration_file = 'pipelines/params/validity_thresholds.yaml'\n", - "command = ['python', 'pipelines/steps/apply_diagnostic_flags.py', path_to_data_file, dataset_name, path_to_calibration_file]\n", - "status = subprocess.run(command, capture_output=True, check=True)\n", - "\n", - "print(status.stdout.decode()) " + "path_to_config_file = 'pipelines/params/validity_thresholds.yaml'\n", + "#command = ['python', 'pipelines/steps/compute_automated_flags.py', path_to_data_file, dataset_name, path_to_config_file]\n", + "#status = subprocess.run(command, capture_output=True, check=True)\n", + "#print(status.stdout.decode())\n", + "run_generate_flags(path_to_data_file, 'species')" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'dataManager' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[3], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mdataManager\u001b[49m\u001b[38;5;241m.\u001b[39mload_file_obj()\n\u001b[0;32m 2\u001b[0m dataManager\u001b[38;5;241m.\u001b[39mextract_and_load_dataset_metadata()\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(dataManager\u001b[38;5;241m.\u001b[39mdataset_metadata_df\u001b[38;5;241m.\u001b[39mhead())\n", - "\u001b[1;31mNameError\u001b[0m: name 'dataManager' is not defined" - ] - } - ], + "outputs": [], "source": [ + "import dima.src.hdf5_ops as dataOps \n", + "\n", + "dataManager = dataOps.HDF5DataOpsManager(CAMPAIGN_DATA_FILE)\n", + "dataManager.update_file('../data/collection_JFJ_2024_LeilaS_2025-02-17_2025-02-17')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataManager = dataOps.HDF5DataOpsManager(path_to_data_file)\n", "dataManager.load_file_obj()\n", "dataManager.extract_and_load_dataset_metadata()\n", - "print(dataManager.dataset_metadata_df.head())\n", - "dataManager.unload_file_obj()\n" + "df = dataManager.dataset_metadata_df\n", + "print(df.head(10))\n", + "dataManager.unload_file_obj()" ] } ], "metadata": { "kernelspec": { - "display_name": "flaggingapp_env", + "display_name": "dash_multi_chem_env", "language": "python", "name": "python3" }, @@ -286,7 +137,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.15" + "version": "3.11.9" } }, "nbformat": 4,