Update README.md campaign descriptor template and docker environment

This commit is contained in:
2025-06-18 16:33:17 +02:00
parent f9b9e1226b
commit 490a542126
4 changed files with 161 additions and 4 deletions

47
Dockerfile Normal file
View File

@ -0,0 +1,47 @@
# Get additional info about the Dockerfile at https://docs.docker.com/reference/dockerfile/
FROM continuumio/miniconda3:latest
# Define the name of the environment
ARG ENV_NAME=idear_env
ENV ENV_NAME=idear_env
# Set the working directory
WORKDIR /idear
#RUN apt-get update && apt-get install -y cifs-utils
# Use mamba if available for faster installation
RUN conda install -y -n base -c conda-forge mamba && \
mamba create -y -n $ENV_NAME -c conda-forge python=3.11 \
jupyter numpy h5py pandas matplotlib plotly=5.24 scipy pip && \
conda clean --all -y && rm -rf /root/.cache/pip
# Activate the environment and install additional pip packages
RUN conda run -n $ENV_NAME pip install pybis==1.35 igor2 ipykernel sphinx dash dash-bootstrap-components
# Set the default environment when the container starts
ENV CONDA_DEFAULT_ENV=$ENV_NAME
ENV PATH=/opt/conda/envs/$ENV_NAME/bin:$PATH
# Create necessary directories for VOLUME
RUN mkdir -p /idear/data /idear/figures /idear/notebooks /idear/scripts
#RUN mkdir -p /mnt/lac_ord
# Copy project files, excluding certain directories (handled via .dockerignore)
COPY . /idear
# Copy and install dependencies from requirements.txt
COPY requirements.txt /idear/requirements.txt
RUN conda run -n $ENV_NAME pip install -r /idear/requirements.txt
# Define volumes for excluded directories
# VOLUME ["/idear/data", "/idear/figures", "/idear/notebooks", "/idear/scripts"]
# Add JupyterLab
RUN pip install graphviz
RUN pip install --no-cache-dir jupyterlab
# If you want to set JupyterLab as the default command
#CMD ["jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root", "--NotebookApp.token='my-token'"]
CMD ["/bin/bash"]

View File

@ -1,4 +1,4 @@
# IDEAR FAIRification Toolkit # IDEAR Project Name
This is a **containerized, JupyterLab-based data toolkit** developed as part of the IDEAR project. It supports efficient, reproducible, and metadata-enriched data processing workflows for instrument-generated datasets. This is a **containerized, JupyterLab-based data toolkit** developed as part of the IDEAR project. It supports efficient, reproducible, and metadata-enriched data processing workflows for instrument-generated datasets.
@ -75,13 +75,14 @@ cd <your-repo-name>
This toolkit includes a containerized JupyterLab environment for executing the data processing pipeline, plus an optional dashboard for manual flagging. This toolkit includes a containerized JupyterLab environment for executing the data processing pipeline, plus an optional dashboard for manual flagging.
1. Open **PowerShell as Administrator** and navigate to the `acsmnode` repository. 1. Open **PowerShell as Administrator** and navigate to the `your-repo-name` repository.
2. Create a `.env` file in the root of `acsmnode/`. 2. Create a `.env` file in the root of `your-repo-name/`.
3. **Securely store your network drive access credentials** in the `.env` file by adding the following lines: 3. **Securely store your network drive access credentials** in the `.env` file by adding the following lines:
```plaintext ```plaintext
CIFS_USER=<your-username> CIFS_USER=<your-username>
CIFS_PASS=<your-password> CIFS_PASS=<your-password>
JUPYTER_TOKEN=my-token JUPYTER_TOKEN=my-token
NETWORK_MOUNT=//your-server/your-share
``` ```
**To protect your credentials:** **To protect your credentials:**
- Do not share the .env file with others. - Do not share the .env file with others.
@ -103,7 +104,7 @@ This toolkit includes a containerized JupyterLab environment for executing the d
docker compose up idear_processor_networked docker compose up idear_processor_networked
6. Access: 6. Access:
- **Jupyter Lab**: [http://localhost:8889/lab/tree/notebooks/](http://localhost:8889/lab/tree/notebooks/) - **Jupyter Lab**: [http://localhost:8889/lab/tree/](http://localhost:8889/lab/tree/)
7. Stop the app: 7. Stop the app:
In the previously open PowerShell terminal, enter: In the previously open PowerShell terminal, enter:

67
campaignDescriptor.yaml Normal file
View File

@ -0,0 +1,67 @@
# ------------------------------------------------------------------------------
# Configuration for FAIR Data Integration Pipeline
# ------------------------------------------------------------------------------
# INPUT DATA DIRECTORY
# Can be a local or network path. Examples:
# - Local: '../data/data_folder/' # manually create data_folder/ with instrument folders
# - Network: /mnt/network_drive/data_folder (Docker)
input_file_directory: '/mnt/network_drive/Data'
# OUTPUT DATA DIRECTORY
# Always relative to notebook location. If run from `notebooks/`,
# output will be saved in `../data/`.
# Do not modify unless you're running from the project root.
output_file_directory: '../data/'
# ------------------------------------------------------------------------------
# Project Metadata
# ------------------------------------------------------------------------------
project: 'Insert project title here'
contact: 'Your Name or Team'
group_id: '0000' # Optional internal group or project ID
# Type of experiment (e.g., campaign, flow_tube_study, smog_chamber, lab_study)
experiment: 'experiment_type'
# Dataset coverage range (optional but recommended)
dataset_startdate: 'YYYY-MM-DD'
dataset_enddate: 'YYYY-MM-DD'
# Data processing level (typically '0', '1', or '2'; follows ACTRIS or custom standards)
actris_level: '1'
# ------------------------------------------------------------------------------
# Output File Naming Convention (Optional)
# ------------------------------------------------------------------------------
# Year of observation
year: 'YYYY'
# Format string used to define output filenames.
# You may use any field from this config as a part, comma-separated.
# Example: 'experiment, year' → experiment_year.h5
filename_format: 'experiment, year'
# ------------------------------------------------------------------------------
# Instrument Data Source
# ------------------------------------------------------------------------------
# Relative subdirectories inside the input directory that contain instrument data.
# Use one or more folder paths as needed.
instrument_datafolder:
- 'instFolder1/subfolder/'
- 'instFolder2'
# ------------------------------------------------------------------------------
# Data Integration Options
# ------------------------------------------------------------------------------
# Integration mode: 'collection' or 'single_experiment'.
integration_mode: 'collection'
# Optional: list of timestamps marking experimental phases or steps.
# Format each entry as: 'YYYY-MM-DD HH:MM:SS'
datetime_steps: []

42
docker-compose.yaml Normal file
View File

@ -0,0 +1,42 @@
version: '3.9'
services:
idear_processor_networked:
image: idear_processor
restart: unless-stopped
environment:
- DOCKER_CONTAINER=1
ports:
- "${JUPYTER_PORT:-8889}:8888"
volumes:
- ./:/idear
- network_drive:/mnt/network_drive:rw
command: >
bash -c "
jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token='${JUPYTER_TOKEN:-my-token}'
"
profiles:
- networked
idear_processor:
image: idear_processor
restart: unless-stopped
environment:
- DOCKER_CONTAINER=1
ports:
- "${JUPYTER_PORT:-8889}:8888"
volumes:
- ./:/idear
command: >
bash -c "
jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token='${JUPYTER_TOKEN:-my-token}'
"
profiles:
- local-only
volumes:
network_drive:
driver_opts:
type: cifs
o: "username=${CIFS_USER},password=${CIFS_PASS},vers=3.0"
device: "${NETWORK_MOUNT}"