Add documentation to notebook. This includes examples to test the data integration pipeline.

This commit is contained in:
2025-06-20 12:37:58 +02:00
parent 32a6565285
commit 9e8be7745c

View File

@ -43,24 +43,21 @@
" print(dima_path)\n",
"\n",
"import dima.visualization.hdf5_vis as hdf5_vis\n",
"import dima.pipelines.data_integration as data_integration\n",
"\n",
"\n"
"import dima.pipelines.data_integration as data_integration\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 1: Specify data integration task through YAML configuration file\n",
"## Step 1: Specify Data Integration Task via YAML Configuration\n",
"\n",
"* Open the `campaignDescriptor.yaml` file located in the root directory.\n",
"* Open the `campaignDescriptor.yaml` file located in the project root, and fill it out to describe your dataset.\n",
"\n",
"* Refer to examples in `/dima/input_files/` for guidance.\n",
"* Refer to example descriptors in `/dima/input_files/` for guidance.\n",
"\n",
"* Specify the input and output directory paths.\n",
"\n",
"* Execute the cell to initiate the configuration.\n"
"* Run the cell below to load your configuration — or skip it and go to the next cell to test the pipeline using one of the predefined campaign descriptors.\n",
"\n"
]
},
{
@ -69,7 +66,24 @@
"metadata": {},
"outputs": [],
"source": [
"yaml_config_file_path ='../campaignDescriptor.yaml'"
"descriptor_path ='../campaignDescriptor.yaml'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Uncomment the lines below to test the data integration pipeline\n",
"# using predefined campaign descriptors for existing datasets in 5505.\n",
"\n",
"# Choose a predefined descriptor:\n",
"# Options: (1, 'LI'), (2, 'TBR'), (3, 'NG')\n",
"# num, initials = 1, 'LI'\n",
"\n",
"# Construct the path to the YAML descriptor\n",
"# descriptor_path = f'../dima/input_files/campaignDescriptor{num}_{initials}.yaml'\n"
]
},
{
@ -88,7 +102,7 @@
"outputs": [],
"source": [
"\n",
"hdf5_file_path = data_integration.run_pipeline(yaml_config_file_path)"
"hdf5_file_path = data_integration.run_pipeline(descriptor_path)"
]
},
{
@ -97,7 +111,7 @@
"metadata": {},
"outputs": [],
"source": [
"hdf5_file_path"
"hdf5_file_path = ['../data/collection_experiment_type_YYYY_YYYY-MM-DD_YYYY-MM-DD.h5']"
]
},
{
@ -132,7 +146,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "dash_multi_chem_env",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
@ -146,7 +160,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
"version": "3.11.10"
}
},
"nbformat": 4,