diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..826205b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,47 @@ +# Get additional info about the Dockerfile at https://docs.docker.com/reference/dockerfile/ + +FROM continuumio/miniconda3:latest + +# Define the name of the environment +ARG ENV_NAME=idear_env +ENV ENV_NAME=idear_env + +# Set the working directory +WORKDIR /idear + +#RUN apt-get update && apt-get install -y cifs-utils + +# Use mamba if available for faster installation +RUN conda install -y -n base -c conda-forge mamba && \ + mamba create -y -n $ENV_NAME -c conda-forge python=3.11 \ + jupyter numpy h5py pandas matplotlib plotly=5.24 scipy pip && \ + conda clean --all -y && rm -rf /root/.cache/pip + +# Activate the environment and install additional pip packages +RUN conda run -n $ENV_NAME pip install pybis==1.35 igor2 ipykernel sphinx dash dash-bootstrap-components + +# Set the default environment when the container starts +ENV CONDA_DEFAULT_ENV=$ENV_NAME +ENV PATH=/opt/conda/envs/$ENV_NAME/bin:$PATH + +# Create necessary directories for VOLUME +RUN mkdir -p /idear/data /idear/figures /idear/notebooks /idear/scripts +#RUN mkdir -p /mnt/lac_ord + +# Copy project files, excluding certain directories (handled via .dockerignore) +COPY . /idear + +# Copy and install dependencies from requirements.txt +COPY requirements.txt /idear/requirements.txt +RUN conda run -n $ENV_NAME pip install -r /idear/requirements.txt + +# Define volumes for excluded directories +# VOLUME ["/idear/data", "/idear/figures", "/idear/notebooks", "/idear/scripts"] + +# Add JupyterLab +RUN pip install graphviz +RUN pip install --no-cache-dir jupyterlab + +# If you want to set JupyterLab as the default command +#CMD ["jupyter", "lab", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root", "--NotebookApp.token='my-token'"] +CMD ["/bin/bash"] \ No newline at end of file diff --git a/README.md b/README.md index 2ecfbbb..da89907 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# IDEAR FAIRification Toolkit +# IDEAR Project Name This is a **containerized, JupyterLab-based data toolkit** developed as part of the IDEAR project. It supports efficient, reproducible, and metadata-enriched data processing workflows for instrument-generated datasets. @@ -75,13 +75,14 @@ cd This toolkit includes a containerized JupyterLab environment for executing the data processing pipeline, plus an optional dashboard for manual flagging. -1. Open **PowerShell as Administrator** and navigate to the `acsmnode` repository. -2. Create a `.env` file in the root of `acsmnode/`. +1. Open **PowerShell as Administrator** and navigate to the `your-repo-name` repository. +2. Create a `.env` file in the root of `your-repo-name/`. 3. **Securely store your network drive access credentials** in the `.env` file by adding the following lines: ```plaintext CIFS_USER= CIFS_PASS= JUPYTER_TOKEN=my-token + NETWORK_MOUNT=//your-server/your-share ``` **To protect your credentials:** - Do not share the .env file with others. @@ -103,7 +104,7 @@ This toolkit includes a containerized JupyterLab environment for executing the d docker compose up idear_processor_networked 6. Access: - - **Jupyter Lab**: [http://localhost:8889/lab/tree/notebooks/](http://localhost:8889/lab/tree/notebooks/) + - **Jupyter Lab**: [http://localhost:8889/lab/tree/](http://localhost:8889/lab/tree/) 7. Stop the app: In the previously open PowerShell terminal, enter: diff --git a/campaignDescriptor.yaml b/campaignDescriptor.yaml new file mode 100644 index 0000000..b327c7f --- /dev/null +++ b/campaignDescriptor.yaml @@ -0,0 +1,67 @@ +# ------------------------------------------------------------------------------ +# Configuration for FAIR Data Integration Pipeline +# ------------------------------------------------------------------------------ + +# INPUT DATA DIRECTORY +# Can be a local or network path. Examples: +# - Local: '../data/data_folder/' # manually create data_folder/ with instrument folders +# - Network: /mnt/network_drive/data_folder (Docker) +input_file_directory: '/mnt/network_drive/Data' + +# OUTPUT DATA DIRECTORY +# Always relative to notebook location. If run from `notebooks/`, +# output will be saved in `../data/`. +# Do not modify unless you're running from the project root. +output_file_directory: '../data/' + +# ------------------------------------------------------------------------------ +# Project Metadata +# ------------------------------------------------------------------------------ + +project: 'Insert project title here' +contact: 'Your Name or Team' +group_id: '0000' # Optional internal group or project ID + +# Type of experiment (e.g., campaign, flow_tube_study, smog_chamber, lab_study) +experiment: 'experiment_type' + +# Dataset coverage range (optional but recommended) +dataset_startdate: 'YYYY-MM-DD' +dataset_enddate: 'YYYY-MM-DD' + +# Data processing level (typically '0', '1', or '2'; follows ACTRIS or custom standards) +actris_level: '1' + +# ------------------------------------------------------------------------------ +# Output File Naming Convention (Optional) +# ------------------------------------------------------------------------------ + +# Year of observation +year: 'YYYY' + +# Format string used to define output filenames. +# You may use any field from this config as a part, comma-separated. +# Example: 'experiment, year' → experiment_year.h5 +filename_format: 'experiment, year' + +# ------------------------------------------------------------------------------ +# Instrument Data Source +# ------------------------------------------------------------------------------ + +# Relative subdirectories inside the input directory that contain instrument data. +# Use one or more folder paths as needed. +instrument_datafolder: + - 'instFolder1/subfolder/' + - 'instFolder2' + +# ------------------------------------------------------------------------------ +# Data Integration Options +# ------------------------------------------------------------------------------ + +# Integration mode: 'collection' or 'single_experiment'. +integration_mode: 'collection' + +# Optional: list of timestamps marking experimental phases or steps. +# Format each entry as: 'YYYY-MM-DD HH:MM:SS' +datetime_steps: [] + diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..6272870 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,42 @@ +version: '3.9' + +services: + idear_processor_networked: + image: idear_processor + restart: unless-stopped + environment: + - DOCKER_CONTAINER=1 + ports: + - "${JUPYTER_PORT:-8889}:8888" + volumes: + - ./:/idear + - network_drive:/mnt/network_drive:rw + command: > + bash -c " + jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token='${JUPYTER_TOKEN:-my-token}' + " + profiles: + - networked + + idear_processor: + image: idear_processor + restart: unless-stopped + environment: + - DOCKER_CONTAINER=1 + ports: + - "${JUPYTER_PORT:-8889}:8888" + volumes: + - ./:/idear + command: > + bash -c " + jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token='${JUPYTER_TOKEN:-my-token}' + " + profiles: + - local-only + +volumes: + network_drive: + driver_opts: + type: cifs + o: "username=${CIFS_USER},password=${CIFS_PASS},vers=3.0" + device: "${NETWORK_MOUNT}"