Update README.md campaign descriptor template and docker environment

2025-06-18 16:33:17 +02:00
parent f9b9e1226b
commit 490a542126
4 changed files with 161 additions and 4 deletions
--- a/47
+++ b/47
@ -0,0 +1,47 @@
 # Get additional info about the Dockerfile at https://docs.docker.com/reference/dockerfile/ 
 FROM continuumio/miniconda3:latest
 # Define the name of the environment
 ARG ENV_NAME=idear_env
 ENV ENV_NAME=idear_env
 # Set the working directory
 WORKDIR /idear
 #RUN apt-get update && apt-get install -y cifs-utils
 # Use mamba if available for faster installation
 RUN conda install -y -n base -c conda-forge mamba && \
    mamba create -y -n $ENV_NAME -c conda-forge python=3.11 \
    jupyter numpy h5py pandas matplotlib plotly=5.24 scipy pip && \
    conda clean --all -y && rm -rf /root/.cache/pip
 # Activate the environment and install additional pip packages
 RUN conda run -n $ENV_NAME pip install pybis==1.35 igor2 ipykernel sphinx dash dash-bootstrap-components
 # Set the default environment when the container starts
 ENV CONDA_DEFAULT_ENV=$ENV_NAME
 ENV PATH=/opt/conda/envs/$ENV_NAME/bin:$PATH
 # Create necessary directories for VOLUME
 RUN mkdir -p /idear/data /idear/figures /idear/notebooks /idear/scripts
 #RUN mkdir -p /mnt/lac_ord
 # Copy project files, excluding certain directories (handled via .dockerignore)
 COPY . /idear
 # Copy and install dependencies from requirements.txt
 COPY requirements.txt /idear/requirements.txt
 RUN conda run -n $ENV_NAME pip install -r /idear/requirements.txt
 # Define volumes for excluded directories
 # VOLUME ["/idear/data", "/idear/figures", "/idear/notebooks", "/idear/scripts"]
 # Add JupyterLab
 RUN pip install graphviz
 RUN pip install --no-cache-dir jupyterlab
 # If you want to set JupyterLab as the default command
 #CMD ["jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root", "--NotebookApp.token='my-token'"]
 CMD ["/bin/bash"]
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
-# IDEAR FAIRification Toolkit
+# IDEAR Project Name
 This is a **containerized, JupyterLab-based data toolkit** developed as part of the IDEAR project. It supports efficient, reproducible, and metadata-enriched data processing workflows for instrument-generated datasets.
@ -75,13 +75,14 @@ cd <your-repo-name>
 This toolkit includes a containerized JupyterLab environment for executing the data processing pipeline, plus an optional dashboard for manual flagging.
-1. Open **PowerShell as Administrator** and navigate to the `acsmnode` repository.  
+1. Open **PowerShell as Administrator** and navigate to the `your-repo-name` repository.  
-2. Create a `.env` file in the root of `acsmnode/`.  
+2. Create a `.env` file in the root of `your-repo-name/`.  
 3. **Securely store your network drive access credentials** in the `.env` file by adding the following lines: 
   ```plaintext
   CIFS_USER=<your-username>
   CIFS_PASS=<your-password>
   JUPYTER_TOKEN=my-token
   NETWORK_MOUNT=//your-server/your-share
   ```
   **To protect your credentials:**
   - Do not share the .env file with others.
@ -103,7 +104,7 @@ This toolkit includes a containerized JupyterLab environment for executing the d
  docker compose up idear_processor_networked
 6. Access:
-   - **Jupyter Lab**: [http://localhost:8889/lab/tree/notebooks/](http://localhost:8889/lab/tree/notebooks/)
+   - **Jupyter Lab**: [http://localhost:8889/lab/tree/](http://localhost:8889/lab/tree/)
 7. Stop the app:
   In the previously open PowerShell terminal, enter:
--- a/campaignDescriptor.yaml
+++ b/campaignDescriptor.yaml
@ -0,0 +1,67 @@
 # ------------------------------------------------------------------------------
 # Configuration for FAIR Data Integration Pipeline
 # ------------------------------------------------------------------------------
 # INPUT DATA DIRECTORY
 # Can be a local or network path. Examples:
 #   - Local: '../data/data_folder/' # manually create data_folder/ with instrument folders
 #   - Network: /mnt/network_drive/data_folder (Docker)
 input_file_directory: '/mnt/network_drive/Data'
 # OUTPUT DATA DIRECTORY
 # Always relative to notebook location. If run from `notebooks/`,
 # output will be saved in `../data/`.
 # Do not modify unless you're running from the project root.
 output_file_directory: '../data/'
 # ------------------------------------------------------------------------------
 # Project Metadata
 # ------------------------------------------------------------------------------
 project: 'Insert project title here'
 contact: 'Your Name or Team'
 group_id: '0000'  # Optional internal group or project ID
 # Type of experiment (e.g., campaign, flow_tube_study, smog_chamber, lab_study)
 experiment: 'experiment_type'
 # Dataset coverage range (optional but recommended)
 dataset_startdate: 'YYYY-MM-DD'
 dataset_enddate: 'YYYY-MM-DD'
 # Data processing level (typically '0', '1', or '2'; follows ACTRIS or custom standards)
 actris_level: '1'
 # ------------------------------------------------------------------------------
 # Output File Naming Convention (Optional)
 # ------------------------------------------------------------------------------
 # Year of observation
 year: 'YYYY'
 # Format string used to define output filenames.
 # You may use any field from this config as a part, comma-separated.
 # Example: 'experiment, year' → experiment_year.h5
 filename_format: 'experiment, year'
 # ------------------------------------------------------------------------------
 # Instrument Data Source
 # ------------------------------------------------------------------------------
 # Relative subdirectories inside the input directory that contain instrument data.
 # Use one or more folder paths as needed.
 instrument_datafolder:
  - 'instFolder1/subfolder/'
  - 'instFolder2'
 # ------------------------------------------------------------------------------
 # Data Integration Options
 # ------------------------------------------------------------------------------
 # Integration mode: 'collection' or 'single_experiment'.
 integration_mode: 'collection'
 # Optional: list of timestamps marking experimental phases or steps.
 # Format each entry as: 'YYYY-MM-DD HH:MM:SS'
 datetime_steps: []
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -0,0 +1,42 @@
 version: '3.9'
 services:
  idear_processor_networked:
    image: idear_processor
    restart: unless-stopped
    environment:
      - DOCKER_CONTAINER=1
    ports:
      - "${JUPYTER_PORT:-8889}:8888"
    volumes:
      - ./:/idear
      - network_drive:/mnt/network_drive:rw
    command: >
      bash -c "
      jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token='${JUPYTER_TOKEN:-my-token}'
      "
    profiles:
      - networked
  idear_processor:
    image: idear_processor
    restart: unless-stopped
    environment:
      - DOCKER_CONTAINER=1
    ports:
      - "${JUPYTER_PORT:-8889}:8888"
    volumes:
      - ./:/idear
    command: >
      bash -c "
      jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token='${JUPYTER_TOKEN:-my-token}'
      "
    profiles:
      - local-only
 volumes:
  network_drive:
    driver_opts:
      type: cifs
      o: "username=${CIFS_USER},password=${CIFS_PASS},vers=3.0"
      device: "${NETWORK_MOUNT}"