From 9e8be7745c810ca57818a2d57fb9a81a5edc8d5f Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Fri, 20 Jun 2025 12:37:58 +0200 Subject: [PATCH] Add documentation to notebook. This includes examples to test the data integration pipeline. --- notebooks/demo_data_integration.ipynb | 42 ++++++++++++++++++--------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/notebooks/demo_data_integration.ipynb b/notebooks/demo_data_integration.ipynb index 5888231..64b8d91 100644 --- a/notebooks/demo_data_integration.ipynb +++ b/notebooks/demo_data_integration.ipynb @@ -43,24 +43,21 @@ " print(dima_path)\n", "\n", "import dima.visualization.hdf5_vis as hdf5_vis\n", - "import dima.pipelines.data_integration as data_integration\n", - "\n", - "\n" + "import dima.pipelines.data_integration as data_integration\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Step 1: Specify data integration task through YAML configuration file\n", + "## Step 1: Specify Data Integration Task via YAML Configuration\n", "\n", - "* Open the `campaignDescriptor.yaml` file located in the root directory.\n", + "* Open the `campaignDescriptor.yaml` file located in the project root, and fill it out to describe your dataset.\n", "\n", - "* Refer to examples in `/dima/input_files/` for guidance.\n", + "* Refer to example descriptors in `/dima/input_files/` for guidance.\n", "\n", - "* Specify the input and output directory paths.\n", - "\n", - "* Execute the cell to initiate the configuration.\n" + "* Run the cell below to load your configuration — or skip it and go to the next cell to test the pipeline using one of the predefined campaign descriptors.\n", + "\n" ] }, { @@ -69,7 +66,24 @@ "metadata": {}, "outputs": [], "source": [ - "yaml_config_file_path ='../campaignDescriptor.yaml'" + "descriptor_path ='../campaignDescriptor.yaml'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Uncomment the lines below to test the data integration pipeline\n", + "# using predefined campaign descriptors for existing datasets in 5505.\n", + "\n", + "# Choose a predefined descriptor:\n", + "# Options: (1, 'LI'), (2, 'TBR'), (3, 'NG')\n", + "# num, initials = 1, 'LI'\n", + "\n", + "# Construct the path to the YAML descriptor\n", + "# descriptor_path = f'../dima/input_files/campaignDescriptor{num}_{initials}.yaml'\n" ] }, { @@ -88,7 +102,7 @@ "outputs": [], "source": [ "\n", - "hdf5_file_path = data_integration.run_pipeline(yaml_config_file_path)" + "hdf5_file_path = data_integration.run_pipeline(descriptor_path)" ] }, { @@ -97,7 +111,7 @@ "metadata": {}, "outputs": [], "source": [ - "hdf5_file_path" + "hdf5_file_path = ['../data/collection_experiment_type_YYYY_YYYY-MM-DD_YYYY-MM-DD.h5']" ] }, { @@ -132,7 +146,7 @@ ], "metadata": { "kernelspec": { - "display_name": "dash_multi_chem_env", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -146,7 +160,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.10" } }, "nbformat": 4,