Add documentation to notebook. This includes examples to test the data integration pipeline.

2025-06-20 12:37:58 +02:00
parent 32a6565285
commit 9e8be7745c
1 changed files with 28 additions and 14 deletions
--- a/notebooks/demo_data_integration.ipynb
+++ b/notebooks/demo_data_integration.ipynb
@ -43,24 +43,21 @@
    "    print(dima_path)\n",
    "\n",
    "import dima.visualization.hdf5_vis as hdf5_vis\n",
-    "import dima.pipelines.data_integration as data_integration\n",
-    "\n",
-    "\n"
+    "import dima.pipelines.data_integration as data_integration\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Step 1: Specify data integration task through YAML configuration file\n",
+    "## Step 1: Specify Data Integration Task via YAML Configuration\n",
    "\n",
-    "* Open the `campaignDescriptor.yaml` file located in the root directory.\n",
+    "* Open the `campaignDescriptor.yaml` file located in the project root, and fill it out to describe your dataset.\n",
    "\n",
-    "* Refer to examples in `/dima/input_files/` for guidance.\n",
+    "* Refer to example descriptors in `/dima/input_files/` for guidance.\n",
    "\n",
-    "* Specify the input and output directory paths.\n",
-    "\n",
-    "* Execute the cell to initiate the configuration.\n"
+    "* Run the cell below to load your configuration — or skip it and go to the next cell to test the pipeline using one of the predefined campaign descriptors.\n",
+    "\n"
   ]
  },
  {
@ -69,7 +66,24 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "yaml_config_file_path ='../campaignDescriptor.yaml'"
+    "descriptor_path ='../campaignDescriptor.yaml'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Uncomment the lines below to test the data integration pipeline\n",
+    "# using predefined campaign descriptors for existing datasets in 5505.\n",
+    "\n",
+    "# Choose a predefined descriptor:\n",
+    "# Options: (1, 'LI'), (2, 'TBR'), (3, 'NG')\n",
+    "# num, initials = 1, 'LI'\n",
+    "\n",
+    "# Construct the path to the YAML descriptor\n",
+    "# descriptor_path = f'../dima/input_files/campaignDescriptor{num}_{initials}.yaml'\n"
   ]
  },
  {
@ -88,7 +102,7 @@
   "outputs": [],
   "source": [
    "\n",
-    "hdf5_file_path = data_integration.run_pipeline(yaml_config_file_path)"
+    "hdf5_file_path = data_integration.run_pipeline(descriptor_path)"
   ]
  },
  {
@ -97,7 +111,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "hdf5_file_path"
+    "hdf5_file_path = ['../data/collection_experiment_type_YYYY_YYYY-MM-DD_YYYY-MM-DD.h5']"
   ]
  },
  {
@ -132,7 +146,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "dash_multi_chem_env",
+   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
@ -146,7 +160,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.10"
  }
 },
 "nbformat": 4,