Update README.md campaign descriptor template and docker environment
This commit is contained in:
47
Dockerfile
Normal file
47
Dockerfile
Normal file
@ -0,0 +1,47 @@
|
||||
# Get additional info about the Dockerfile at https://docs.docker.com/reference/dockerfile/
|
||||
|
||||
FROM continuumio/miniconda3:latest
|
||||
|
||||
# Define the name of the environment
|
||||
ARG ENV_NAME=idear_env
|
||||
ENV ENV_NAME=idear_env
|
||||
|
||||
# Set the working directory
|
||||
WORKDIR /idear
|
||||
|
||||
#RUN apt-get update && apt-get install -y cifs-utils
|
||||
|
||||
# Use mamba if available for faster installation
|
||||
RUN conda install -y -n base -c conda-forge mamba && \
|
||||
mamba create -y -n $ENV_NAME -c conda-forge python=3.11 \
|
||||
jupyter numpy h5py pandas matplotlib plotly=5.24 scipy pip && \
|
||||
conda clean --all -y && rm -rf /root/.cache/pip
|
||||
|
||||
# Activate the environment and install additional pip packages
|
||||
RUN conda run -n $ENV_NAME pip install pybis==1.35 igor2 ipykernel sphinx dash dash-bootstrap-components
|
||||
|
||||
# Set the default environment when the container starts
|
||||
ENV CONDA_DEFAULT_ENV=$ENV_NAME
|
||||
ENV PATH=/opt/conda/envs/$ENV_NAME/bin:$PATH
|
||||
|
||||
# Create necessary directories for VOLUME
|
||||
RUN mkdir -p /idear/data /idear/figures /idear/notebooks /idear/scripts
|
||||
#RUN mkdir -p /mnt/lac_ord
|
||||
|
||||
# Copy project files, excluding certain directories (handled via .dockerignore)
|
||||
COPY . /idear
|
||||
|
||||
# Copy and install dependencies from requirements.txt
|
||||
COPY requirements.txt /idear/requirements.txt
|
||||
RUN conda run -n $ENV_NAME pip install -r /idear/requirements.txt
|
||||
|
||||
# Define volumes for excluded directories
|
||||
# VOLUME ["/idear/data", "/idear/figures", "/idear/notebooks", "/idear/scripts"]
|
||||
|
||||
# Add JupyterLab
|
||||
RUN pip install graphviz
|
||||
RUN pip install --no-cache-dir jupyterlab
|
||||
|
||||
# If you want to set JupyterLab as the default command
|
||||
#CMD ["jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root", "--NotebookApp.token='my-token'"]
|
||||
CMD ["/bin/bash"]
|
@ -1,4 +1,4 @@
|
||||
# IDEAR FAIRification Toolkit
|
||||
# IDEAR Project Name
|
||||
|
||||
This is a **containerized, JupyterLab-based data toolkit** developed as part of the IDEAR project. It supports efficient, reproducible, and metadata-enriched data processing workflows for instrument-generated datasets.
|
||||
|
||||
@ -75,13 +75,14 @@ cd <your-repo-name>
|
||||
|
||||
This toolkit includes a containerized JupyterLab environment for executing the data processing pipeline, plus an optional dashboard for manual flagging.
|
||||
|
||||
1. Open **PowerShell as Administrator** and navigate to the `acsmnode` repository.
|
||||
2. Create a `.env` file in the root of `acsmnode/`.
|
||||
1. Open **PowerShell as Administrator** and navigate to the `your-repo-name` repository.
|
||||
2. Create a `.env` file in the root of `your-repo-name/`.
|
||||
3. **Securely store your network drive access credentials** in the `.env` file by adding the following lines:
|
||||
```plaintext
|
||||
CIFS_USER=<your-username>
|
||||
CIFS_PASS=<your-password>
|
||||
JUPYTER_TOKEN=my-token
|
||||
NETWORK_MOUNT=//your-server/your-share
|
||||
```
|
||||
**To protect your credentials:**
|
||||
- Do not share the .env file with others.
|
||||
@ -103,7 +104,7 @@ This toolkit includes a containerized JupyterLab environment for executing the d
|
||||
docker compose up idear_processor_networked
|
||||
|
||||
6. Access:
|
||||
- **Jupyter Lab**: [http://localhost:8889/lab/tree/notebooks/](http://localhost:8889/lab/tree/notebooks/)
|
||||
- **Jupyter Lab**: [http://localhost:8889/lab/tree/](http://localhost:8889/lab/tree/)
|
||||
|
||||
7. Stop the app:
|
||||
In the previously open PowerShell terminal, enter:
|
||||
|
67
campaignDescriptor.yaml
Normal file
67
campaignDescriptor.yaml
Normal file
@ -0,0 +1,67 @@
|
||||
# ------------------------------------------------------------------------------
|
||||
# Configuration for FAIR Data Integration Pipeline
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
# INPUT DATA DIRECTORY
|
||||
# Can be a local or network path. Examples:
|
||||
# - Local: '../data/data_folder/' # manually create data_folder/ with instrument folders
|
||||
# - Network: /mnt/network_drive/data_folder (Docker)
|
||||
input_file_directory: '/mnt/network_drive/Data'
|
||||
|
||||
# OUTPUT DATA DIRECTORY
|
||||
# Always relative to notebook location. If run from `notebooks/`,
|
||||
# output will be saved in `../data/`.
|
||||
# Do not modify unless you're running from the project root.
|
||||
output_file_directory: '../data/'
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Project Metadata
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
project: 'Insert project title here'
|
||||
contact: 'Your Name or Team'
|
||||
group_id: '0000' # Optional internal group or project ID
|
||||
|
||||
# Type of experiment (e.g., campaign, flow_tube_study, smog_chamber, lab_study)
|
||||
experiment: 'experiment_type'
|
||||
|
||||
# Dataset coverage range (optional but recommended)
|
||||
dataset_startdate: 'YYYY-MM-DD'
|
||||
dataset_enddate: 'YYYY-MM-DD'
|
||||
|
||||
# Data processing level (typically '0', '1', or '2'; follows ACTRIS or custom standards)
|
||||
actris_level: '1'
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Output File Naming Convention (Optional)
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
# Year of observation
|
||||
year: 'YYYY'
|
||||
|
||||
# Format string used to define output filenames.
|
||||
# You may use any field from this config as a part, comma-separated.
|
||||
# Example: 'experiment, year' → experiment_year.h5
|
||||
filename_format: 'experiment, year'
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Instrument Data Source
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
# Relative subdirectories inside the input directory that contain instrument data.
|
||||
# Use one or more folder paths as needed.
|
||||
instrument_datafolder:
|
||||
- 'instFolder1/subfolder/'
|
||||
- 'instFolder2'
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Data Integration Options
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
# Integration mode: 'collection' or 'single_experiment'.
|
||||
integration_mode: 'collection'
|
||||
|
||||
# Optional: list of timestamps marking experimental phases or steps.
|
||||
# Format each entry as: 'YYYY-MM-DD HH:MM:SS'
|
||||
datetime_steps: []
|
||||
|
42
docker-compose.yaml
Normal file
42
docker-compose.yaml
Normal file
@ -0,0 +1,42 @@
|
||||
version: '3.9'
|
||||
|
||||
services:
|
||||
idear_processor_networked:
|
||||
image: idear_processor
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- DOCKER_CONTAINER=1
|
||||
ports:
|
||||
- "${JUPYTER_PORT:-8889}:8888"
|
||||
volumes:
|
||||
- ./:/idear
|
||||
- network_drive:/mnt/network_drive:rw
|
||||
command: >
|
||||
bash -c "
|
||||
jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token='${JUPYTER_TOKEN:-my-token}'
|
||||
"
|
||||
profiles:
|
||||
- networked
|
||||
|
||||
idear_processor:
|
||||
image: idear_processor
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- DOCKER_CONTAINER=1
|
||||
ports:
|
||||
- "${JUPYTER_PORT:-8889}:8888"
|
||||
volumes:
|
||||
- ./:/idear
|
||||
command: >
|
||||
bash -c "
|
||||
jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token='${JUPYTER_TOKEN:-my-token}'
|
||||
"
|
||||
profiles:
|
||||
- local-only
|
||||
|
||||
volumes:
|
||||
network_drive:
|
||||
driver_opts:
|
||||
type: cifs
|
||||
o: "username=${CIFS_USER},password=${CIFS_PASS},vers=3.0"
|
||||
device: "${NETWORK_MOUNT}"
|
Reference in New Issue
Block a user