From 9e8be7745c810ca57818a2d57fb9a81a5edc8d5f Mon Sep 17 00:00:00 2001
From: Florez Ospina Juan Felipe <juan.florez-ospina@psi.ch>
Date: Fri, 20 Jun 2025 12:37:58 +0200
Subject: [PATCH] Add documentation to notebook. This includes examples to test
 the data integration pipeline.

---
 notebooks/demo_data_integration.ipynb | 42 ++++++++++++++++++---------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/notebooks/demo_data_integration.ipynb b/notebooks/demo_data_integration.ipynb
index 5888231..64b8d91 100644
--- a/notebooks/demo_data_integration.ipynb
+++ b/notebooks/demo_data_integration.ipynb
@@ -43,24 +43,21 @@
     "    print(dima_path)\n",
     "\n",
     "import dima.visualization.hdf5_vis as hdf5_vis\n",
-    "import dima.pipelines.data_integration as data_integration\n",
-    "\n",
-    "\n"
+    "import dima.pipelines.data_integration as data_integration\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Step 1: Specify data integration task through YAML configuration file\n",
+    "## Step 1: Specify Data Integration Task via YAML Configuration\n",
     "\n",
-    "* Open the `campaignDescriptor.yaml` file located in the root directory.\n",
+    "* Open the `campaignDescriptor.yaml` file located in the project root, and fill it out to describe your dataset.\n",
     "\n",
-    "* Refer to examples in `/dima/input_files/` for guidance.\n",
+    "* Refer to example descriptors in `/dima/input_files/` for guidance.\n",
     "\n",
-    "* Specify the input and output directory paths.\n",
-    "\n",
-    "* Execute the cell to initiate the configuration.\n"
+    "* Run the cell below to load your configuration — or skip it and go to the next cell to test the pipeline using one of the predefined campaign descriptors.\n",
+    "\n"
    ]
   },
   {
@@ -69,7 +66,24 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "yaml_config_file_path ='../campaignDescriptor.yaml'"
+    "descriptor_path ='../campaignDescriptor.yaml'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Uncomment the lines below to test the data integration pipeline\n",
+    "# using predefined campaign descriptors for existing datasets in 5505.\n",
+    "\n",
+    "# Choose a predefined descriptor:\n",
+    "# Options: (1, 'LI'), (2, 'TBR'), (3, 'NG')\n",
+    "# num, initials = 1, 'LI'\n",
+    "\n",
+    "# Construct the path to the YAML descriptor\n",
+    "# descriptor_path = f'../dima/input_files/campaignDescriptor{num}_{initials}.yaml'\n"
    ]
   },
   {
@@ -88,7 +102,7 @@
    "outputs": [],
    "source": [
     "\n",
-    "hdf5_file_path = data_integration.run_pipeline(yaml_config_file_path)"
+    "hdf5_file_path = data_integration.run_pipeline(descriptor_path)"
    ]
   },
   {
@@ -97,7 +111,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "hdf5_file_path"
+    "hdf5_file_path = ['../data/collection_experiment_type_YYYY_YYYY-MM-DD_YYYY-MM-DD.h5']"
    ]
   },
   {
@@ -132,7 +146,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "dash_multi_chem_env",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -146,7 +160,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.10"
   }
  },
  "nbformat": 4,