Update README.md campaign descriptor template and docker environment

2025-06-18 16:33:17 +02:00
parent f9b9e1226b
commit 490a542126
4 changed files with 161 additions and 4 deletions
--- a/47
+++ b/47
@ -0,0 +1,47 @@
+# Get additional info about the Dockerfile at https://docs.docker.com/reference/dockerfile/ 
+
+FROM continuumio/miniconda3:latest
+
+# Define the name of the environment
+ARG ENV_NAME=idear_env
+ENV ENV_NAME=idear_env
+
+# Set the working directory
+WORKDIR /idear
+
+#RUN apt-get update && apt-get install -y cifs-utils
+
+# Use mamba if available for faster installation
+RUN conda install -y -n base -c conda-forge mamba && \
+    mamba create -y -n $ENV_NAME -c conda-forge python=3.11 \
+    jupyter numpy h5py pandas matplotlib plotly=5.24 scipy pip && \
+    conda clean --all -y && rm -rf /root/.cache/pip
+
+# Activate the environment and install additional pip packages
+RUN conda run -n $ENV_NAME pip install pybis==1.35 igor2 ipykernel sphinx dash dash-bootstrap-components
+
+# Set the default environment when the container starts
+ENV CONDA_DEFAULT_ENV=$ENV_NAME
+ENV PATH=/opt/conda/envs/$ENV_NAME/bin:$PATH
+
+# Create necessary directories for VOLUME
+RUN mkdir -p /idear/data /idear/figures /idear/notebooks /idear/scripts
+#RUN mkdir -p /mnt/lac_ord
+
+# Copy project files, excluding certain directories (handled via .dockerignore)
+COPY . /idear
+
+# Copy and install dependencies from requirements.txt
+COPY requirements.txt /idear/requirements.txt
+RUN conda run -n $ENV_NAME pip install -r /idear/requirements.txt
+
+# Define volumes for excluded directories
+# VOLUME ["/idear/data", "/idear/figures", "/idear/notebooks", "/idear/scripts"]
+
+# Add JupyterLab
+RUN pip install graphviz
+RUN pip install --no-cache-dir jupyterlab
+
+# If you want to set JupyterLab as the default command
+#CMD ["jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root", "--NotebookApp.token='my-token'"]
+CMD ["/bin/bash"]
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
-# IDEAR FAIRification Toolkit
+# IDEAR Project Name

 This is a **containerized, JupyterLab-based data toolkit** developed as part of the IDEAR project. It supports efficient, reproducible, and metadata-enriched data processing workflows for instrument-generated datasets.

@ -75,13 +75,14 @@ cd <your-repo-name>

 This toolkit includes a containerized JupyterLab environment for executing the data processing pipeline, plus an optional dashboard for manual flagging.

-1. Open **PowerShell as Administrator** and navigate to the `acsmnode` repository.  
-2. Create a `.env` file in the root of `acsmnode/`.  
+1. Open **PowerShell as Administrator** and navigate to the `your-repo-name` repository.  
+2. Create a `.env` file in the root of `your-repo-name/`.  
 3. **Securely store your network drive access credentials** in the `.env` file by adding the following lines: 
   ```plaintext
   CIFS_USER=<your-username>
   CIFS_PASS=<your-password>
   JUPYTER_TOKEN=my-token
+   NETWORK_MOUNT=//your-server/your-share
   ```
   **To protect your credentials:**
   - Do not share the .env file with others.
@ -103,7 +104,7 @@ This toolkit includes a containerized JupyterLab environment for executing the d
  docker compose up idear_processor_networked

 6. Access:
-   - **Jupyter Lab**: [http://localhost:8889/lab/tree/notebooks/](http://localhost:8889/lab/tree/notebooks/)
+   - **Jupyter Lab**: [http://localhost:8889/lab/tree/](http://localhost:8889/lab/tree/)

 7. Stop the app:
   In the previously open PowerShell terminal, enter:
--- a/campaignDescriptor.yaml
+++ b/campaignDescriptor.yaml
@ -0,0 +1,67 @@
+# ------------------------------------------------------------------------------
+# Configuration for FAIR Data Integration Pipeline
+# ------------------------------------------------------------------------------
+
+# INPUT DATA DIRECTORY
+# Can be a local or network path. Examples:
+#   - Local: '../data/data_folder/' # manually create data_folder/ with instrument folders
+#   - Network: /mnt/network_drive/data_folder (Docker)
+input_file_directory: '/mnt/network_drive/Data'
+
+# OUTPUT DATA DIRECTORY
+# Always relative to notebook location. If run from `notebooks/`,
+# output will be saved in `../data/`.
+# Do not modify unless you're running from the project root.
+output_file_directory: '../data/'
+
+# ------------------------------------------------------------------------------
+# Project Metadata
+# ------------------------------------------------------------------------------
+
+project: 'Insert project title here'
+contact: 'Your Name or Team'
+group_id: '0000'  # Optional internal group or project ID
+
+# Type of experiment (e.g., campaign, flow_tube_study, smog_chamber, lab_study)
+experiment: 'experiment_type'
+
+# Dataset coverage range (optional but recommended)
+dataset_startdate: 'YYYY-MM-DD'
+dataset_enddate: 'YYYY-MM-DD'
+
+# Data processing level (typically '0', '1', or '2'; follows ACTRIS or custom standards)
+actris_level: '1'
+
+# ------------------------------------------------------------------------------
+# Output File Naming Convention (Optional)
+# ------------------------------------------------------------------------------
+
+# Year of observation
+year: 'YYYY'
+
+# Format string used to define output filenames.
+# You may use any field from this config as a part, comma-separated.
+# Example: 'experiment, year' → experiment_year.h5
+filename_format: 'experiment, year'
+
+# ------------------------------------------------------------------------------
+# Instrument Data Source
+# ------------------------------------------------------------------------------
+
+# Relative subdirectories inside the input directory that contain instrument data.
+# Use one or more folder paths as needed.
+instrument_datafolder:
+  - 'instFolder1/subfolder/'
+  - 'instFolder2'
+
+# ------------------------------------------------------------------------------
+# Data Integration Options
+# ------------------------------------------------------------------------------
+
+# Integration mode: 'collection' or 'single_experiment'.
+integration_mode: 'collection'
+
+# Optional: list of timestamps marking experimental phases or steps.
+# Format each entry as: 'YYYY-MM-DD HH:MM:SS'
+datetime_steps: []
+
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -0,0 +1,42 @@
+version: '3.9'
+
+services:
+  idear_processor_networked:
+    image: idear_processor
+    restart: unless-stopped
+    environment:
+      - DOCKER_CONTAINER=1
+    ports:
+      - "${JUPYTER_PORT:-8889}:8888"
+    volumes:
+      - ./:/idear
+      - network_drive:/mnt/network_drive:rw
+    command: >
+      bash -c "
+      jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token='${JUPYTER_TOKEN:-my-token}'
+      "
+    profiles:
+      - networked
+
+  idear_processor:
+    image: idear_processor
+    restart: unless-stopped
+    environment:
+      - DOCKER_CONTAINER=1
+    ports:
+      - "${JUPYTER_PORT:-8889}:8888"
+    volumes:
+      - ./:/idear
+    command: >
+      bash -c "
+      jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token='${JUPYTER_TOKEN:-my-token}'
+      "
+    profiles:
+      - local-only
+
+volumes:
+  network_drive:
+    driver_opts:
+      type: cifs
+      o: "username=${CIFS_USER},password=${CIFS_PASS},vers=3.0"
+      device: "${NETWORK_MOUNT}"