Update README.md campaign descriptor template and docker environment
This commit is contained in:
47
Dockerfile
Normal file
47
Dockerfile
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
# Get additional info about the Dockerfile at https://docs.docker.com/reference/dockerfile/
|
||||||
|
|
||||||
|
FROM continuumio/miniconda3:latest
|
||||||
|
|
||||||
|
# Define the name of the environment
|
||||||
|
ARG ENV_NAME=idear_env
|
||||||
|
ENV ENV_NAME=idear_env
|
||||||
|
|
||||||
|
# Set the working directory
|
||||||
|
WORKDIR /idear
|
||||||
|
|
||||||
|
#RUN apt-get update && apt-get install -y cifs-utils
|
||||||
|
|
||||||
|
# Use mamba if available for faster installation
|
||||||
|
RUN conda install -y -n base -c conda-forge mamba && \
|
||||||
|
mamba create -y -n $ENV_NAME -c conda-forge python=3.11 \
|
||||||
|
jupyter numpy h5py pandas matplotlib plotly=5.24 scipy pip && \
|
||||||
|
conda clean --all -y && rm -rf /root/.cache/pip
|
||||||
|
|
||||||
|
# Activate the environment and install additional pip packages
|
||||||
|
RUN conda run -n $ENV_NAME pip install pybis==1.35 igor2 ipykernel sphinx dash dash-bootstrap-components
|
||||||
|
|
||||||
|
# Set the default environment when the container starts
|
||||||
|
ENV CONDA_DEFAULT_ENV=$ENV_NAME
|
||||||
|
ENV PATH=/opt/conda/envs/$ENV_NAME/bin:$PATH
|
||||||
|
|
||||||
|
# Create necessary directories for VOLUME
|
||||||
|
RUN mkdir -p /idear/data /idear/figures /idear/notebooks /idear/scripts
|
||||||
|
#RUN mkdir -p /mnt/lac_ord
|
||||||
|
|
||||||
|
# Copy project files, excluding certain directories (handled via .dockerignore)
|
||||||
|
COPY . /idear
|
||||||
|
|
||||||
|
# Copy and install dependencies from requirements.txt
|
||||||
|
COPY requirements.txt /idear/requirements.txt
|
||||||
|
RUN conda run -n $ENV_NAME pip install -r /idear/requirements.txt
|
||||||
|
|
||||||
|
# Define volumes for excluded directories
|
||||||
|
# VOLUME ["/idear/data", "/idear/figures", "/idear/notebooks", "/idear/scripts"]
|
||||||
|
|
||||||
|
# Add JupyterLab
|
||||||
|
RUN pip install graphviz
|
||||||
|
RUN pip install --no-cache-dir jupyterlab
|
||||||
|
|
||||||
|
# If you want to set JupyterLab as the default command
|
||||||
|
#CMD ["jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root", "--NotebookApp.token='my-token'"]
|
||||||
|
CMD ["/bin/bash"]
|
@ -1,4 +1,4 @@
|
|||||||
# IDEAR FAIRification Toolkit
|
# IDEAR Project Name
|
||||||
|
|
||||||
This is a **containerized, JupyterLab-based data toolkit** developed as part of the IDEAR project. It supports efficient, reproducible, and metadata-enriched data processing workflows for instrument-generated datasets.
|
This is a **containerized, JupyterLab-based data toolkit** developed as part of the IDEAR project. It supports efficient, reproducible, and metadata-enriched data processing workflows for instrument-generated datasets.
|
||||||
|
|
||||||
@ -75,13 +75,14 @@ cd <your-repo-name>
|
|||||||
|
|
||||||
This toolkit includes a containerized JupyterLab environment for executing the data processing pipeline, plus an optional dashboard for manual flagging.
|
This toolkit includes a containerized JupyterLab environment for executing the data processing pipeline, plus an optional dashboard for manual flagging.
|
||||||
|
|
||||||
1. Open **PowerShell as Administrator** and navigate to the `acsmnode` repository.
|
1. Open **PowerShell as Administrator** and navigate to the `your-repo-name` repository.
|
||||||
2. Create a `.env` file in the root of `acsmnode/`.
|
2. Create a `.env` file in the root of `your-repo-name/`.
|
||||||
3. **Securely store your network drive access credentials** in the `.env` file by adding the following lines:
|
3. **Securely store your network drive access credentials** in the `.env` file by adding the following lines:
|
||||||
```plaintext
|
```plaintext
|
||||||
CIFS_USER=<your-username>
|
CIFS_USER=<your-username>
|
||||||
CIFS_PASS=<your-password>
|
CIFS_PASS=<your-password>
|
||||||
JUPYTER_TOKEN=my-token
|
JUPYTER_TOKEN=my-token
|
||||||
|
NETWORK_MOUNT=//your-server/your-share
|
||||||
```
|
```
|
||||||
**To protect your credentials:**
|
**To protect your credentials:**
|
||||||
- Do not share the .env file with others.
|
- Do not share the .env file with others.
|
||||||
@ -103,7 +104,7 @@ This toolkit includes a containerized JupyterLab environment for executing the d
|
|||||||
docker compose up idear_processor_networked
|
docker compose up idear_processor_networked
|
||||||
|
|
||||||
6. Access:
|
6. Access:
|
||||||
- **Jupyter Lab**: [http://localhost:8889/lab/tree/notebooks/](http://localhost:8889/lab/tree/notebooks/)
|
- **Jupyter Lab**: [http://localhost:8889/lab/tree/](http://localhost:8889/lab/tree/)
|
||||||
|
|
||||||
7. Stop the app:
|
7. Stop the app:
|
||||||
In the previously open PowerShell terminal, enter:
|
In the previously open PowerShell terminal, enter:
|
||||||
|
67
campaignDescriptor.yaml
Normal file
67
campaignDescriptor.yaml
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# Configuration for FAIR Data Integration Pipeline
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# INPUT DATA DIRECTORY
|
||||||
|
# Can be a local or network path. Examples:
|
||||||
|
# - Local: '../data/data_folder/' # manually create data_folder/ with instrument folders
|
||||||
|
# - Network: /mnt/network_drive/data_folder (Docker)
|
||||||
|
input_file_directory: '/mnt/network_drive/Data'
|
||||||
|
|
||||||
|
# OUTPUT DATA DIRECTORY
|
||||||
|
# Always relative to notebook location. If run from `notebooks/`,
|
||||||
|
# output will be saved in `../data/`.
|
||||||
|
# Do not modify unless you're running from the project root.
|
||||||
|
output_file_directory: '../data/'
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# Project Metadata
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
project: 'Insert project title here'
|
||||||
|
contact: 'Your Name or Team'
|
||||||
|
group_id: '0000' # Optional internal group or project ID
|
||||||
|
|
||||||
|
# Type of experiment (e.g., campaign, flow_tube_study, smog_chamber, lab_study)
|
||||||
|
experiment: 'experiment_type'
|
||||||
|
|
||||||
|
# Dataset coverage range (optional but recommended)
|
||||||
|
dataset_startdate: 'YYYY-MM-DD'
|
||||||
|
dataset_enddate: 'YYYY-MM-DD'
|
||||||
|
|
||||||
|
# Data processing level (typically '0', '1', or '2'; follows ACTRIS or custom standards)
|
||||||
|
actris_level: '1'
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# Output File Naming Convention (Optional)
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Year of observation
|
||||||
|
year: 'YYYY'
|
||||||
|
|
||||||
|
# Format string used to define output filenames.
|
||||||
|
# You may use any field from this config as a part, comma-separated.
|
||||||
|
# Example: 'experiment, year' → experiment_year.h5
|
||||||
|
filename_format: 'experiment, year'
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# Instrument Data Source
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Relative subdirectories inside the input directory that contain instrument data.
|
||||||
|
# Use one or more folder paths as needed.
|
||||||
|
instrument_datafolder:
|
||||||
|
- 'instFolder1/subfolder/'
|
||||||
|
- 'instFolder2'
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
# Data Integration Options
|
||||||
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Integration mode: 'collection' or 'single_experiment'.
|
||||||
|
integration_mode: 'collection'
|
||||||
|
|
||||||
|
# Optional: list of timestamps marking experimental phases or steps.
|
||||||
|
# Format each entry as: 'YYYY-MM-DD HH:MM:SS'
|
||||||
|
datetime_steps: []
|
||||||
|
|
42
docker-compose.yaml
Normal file
42
docker-compose.yaml
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
version: '3.9'
|
||||||
|
|
||||||
|
services:
|
||||||
|
idear_processor_networked:
|
||||||
|
image: idear_processor
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
- DOCKER_CONTAINER=1
|
||||||
|
ports:
|
||||||
|
- "${JUPYTER_PORT:-8889}:8888"
|
||||||
|
volumes:
|
||||||
|
- ./:/idear
|
||||||
|
- network_drive:/mnt/network_drive:rw
|
||||||
|
command: >
|
||||||
|
bash -c "
|
||||||
|
jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token='${JUPYTER_TOKEN:-my-token}'
|
||||||
|
"
|
||||||
|
profiles:
|
||||||
|
- networked
|
||||||
|
|
||||||
|
idear_processor:
|
||||||
|
image: idear_processor
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
- DOCKER_CONTAINER=1
|
||||||
|
ports:
|
||||||
|
- "${JUPYTER_PORT:-8889}:8888"
|
||||||
|
volumes:
|
||||||
|
- ./:/idear
|
||||||
|
command: >
|
||||||
|
bash -c "
|
||||||
|
jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token='${JUPYTER_TOKEN:-my-token}'
|
||||||
|
"
|
||||||
|
profiles:
|
||||||
|
- local-only
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
network_drive:
|
||||||
|
driver_opts:
|
||||||
|
type: cifs
|
||||||
|
o: "username=${CIFS_USER},password=${CIFS_PASS},vers=3.0"
|
||||||
|
device: "${NETWORK_MOUNT}"
|
Reference in New Issue
Block a user