From cc0cd5388dfe55865b1135b2ce754613dbda9892 Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Fri, 8 Oct 2021 14:34:25 +0200 Subject: [PATCH 1/3] alphafold/2.0.0-b88f8da release --- MX/alphafold/build | 37 +++++++++++++++++++++++++++++++++++++ MX/alphafold/files/variants | 1 + MX/alphafold/modulefile | 15 +++++++++++++++ 3 files changed, 53 insertions(+) create mode 100755 MX/alphafold/build create mode 100644 MX/alphafold/files/variants create mode 100644 MX/alphafold/modulefile diff --git a/MX/alphafold/build b/MX/alphafold/build new file mode 100755 index 0000000..ffd9f4b --- /dev/null +++ b/MX/alphafold/build @@ -0,0 +1,37 @@ +#!/usr/bin/env modbuild + +pbuild::add_to_group 'MX' + + +pbuild::prep() { + : +} + +pbuild::configure() { + : +} + +pbuild::compile() { + : +} + +pbuild::install() { + ALPHAFOLD_HOME="$PREFIX/alphafold" + + local BRANCH + if [[ "${#V_RELEASE}" -eq 7 ]]; then + # Release looks like a git hash + BRANCH="${V_RELEASE}" + else + BRANCH="v${V_PKG}" + fi + + git clone --depth=1 -b "$BRANCH" https://github.com/deepmind/alphafold.git "$ALPHAFOLD_HOME" || return $? + if ! [ -f "$ALPHAFOLD_HOME/alphafold/common/stereo_chemical_props.txt" ]; then + wget -q -P "$ALPHAFOLD_HOME/alphafold/common/" \ + https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt + fi + wget -O "$ALPHAFOLD_HOME/run_alphafold.sh" \ + https://raw.githubusercontent.com/kalininalab/alphafold_non_docker/main/run_alphafold.sh + chmod +x "$ALPHAFOLD_HOME/run_alphafold.sh" +} diff --git a/MX/alphafold/files/variants b/MX/alphafold/files/variants new file mode 100644 index 0000000..37ff276 --- /dev/null +++ b/MX/alphafold/files/variants @@ -0,0 +1 @@ +alphafold/2.0.0-b88f8da anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3 diff --git a/MX/alphafold/modulefile b/MX/alphafold/modulefile new file mode 100644 index 0000000..e489db6 --- /dev/null +++ b/MX/alphafold/modulefile @@ -0,0 +1,15 @@ +#%Module1.0 + +module-whatis "AlphaFold" +module-url "https://github.com/deepmind/alphafold/" +module-license "Code: Apache 2.0 License. Parameters: Noncommercial CC-BY-NC 4.0" +module-maintainer "Spencer Bliven Date: Fri, 8 Oct 2021 15:19:17 +0200 Subject: [PATCH 2/3] Add submission scripts --- MX/alphafold/bin/submit.sh | 91 +++++++++++++++++++++++++++++++ MX/alphafold/bin/submit_merlin.sh | 23 ++++++++ MX/alphafold/bin/submit_ra.sh | 30 ++++++++++ 3 files changed, 144 insertions(+) create mode 100755 MX/alphafold/bin/submit.sh create mode 100755 MX/alphafold/bin/submit_merlin.sh create mode 100755 MX/alphafold/bin/submit_ra.sh diff --git a/MX/alphafold/bin/submit.sh b/MX/alphafold/bin/submit.sh new file mode 100755 index 0000000..de5e3c5 --- /dev/null +++ b/MX/alphafold/bin/submit.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +# Generic alphafold submission script. +# Set the ALPHAFOLD_DATA variable before running. +# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit.sh fasta_file [max_template_date] +# +# Output will be in the same directory as the fasta_file. +# Slurm logs will be in the current directory. +# +# 2021-08-09 Spencer Bliven, D.Ozerov +# + +# Bash strict mode +set -euo pipefail +IFS=$'\n\t' + +usage () { + echo "Usage: sbatch [slurm_opts] \$ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date]" +} + +# Parse parameters +if [ "$#" -lt 1 ] +then + echo "No fasta_file name" >&2 + usage >&2 + exit +fi + +FASTA_FILE=`readlink -f $1` +if [ ! -e ${FASTA_FILE} ] || [ "$FASTA_FILE" == "" ] +then + echo "${FASTA_FILE} is not reachable (input argument was $1)" + exit +fi + +DIR_QUERY=`dirname ${FASTA_FILE}` +LOG="${DIR_QUERY}/alphafold.out" + +if [ "$#" -ge 2 ] +then + MAX_TEMPLATE_DATE=$2 +else + MAX_TEMPLATE_DATE=$(date '+%Y-%m-%d') +fi + +date > "$LOG" +hostname >> "$LOG" + +set +u # Allow unset variables in activate commands +module purge +module use MX unstable +module load anaconda/2019.07 cuda/11.0.3 alphafold/2.0.0-b88f8da 2>> "$LOG" +conda activate "${ALPHAFOLD_ENV:?"Error: ALPHAFOLD_ENV not set. Try 'module use MX unstable; module load alphafold'"}" +set -u + +# Check the module loaded correctly +if ! [ -d "${ALPHAFOLD_HOME}" ]; then + echo "Error: $ALPHAFOLD_HOME not available" >&2 + exit 1 +fi + +# Data dir +if ! [ -d "${ALPHAFOLD_DATA:?Set ALPHAFOLD_DATA before running}" ]; then + echo "Error: ALPHAFOLD_DATA directory not available ($ALPHAFOLD_DATA)" >&2 + exit 1 +fi + +echo "GPUs: ${CUDA_VISIBLE_DEVICES:-None}" >> "$LOG" +echo "Detecting GPUs with Tensorflow:" >> "$LOG" +python -c 'import tensorflow as tf; tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))' 2>&1 | + sed -rn 's/^.* (Created TensorFlow device.*)$/\1/p' >> "$LOG" +echo >> "$LOG" + +echo "Running alphafold from $PWD for fasta sequence : " >> "$LOG" +cat ${FASTA_FILE} >> "$LOG" +echo "and max_template_date : ${MAX_TEMPLATE_DATE} " >> "$LOG" +echo >> "$LOG" + + +cd "${ALPHAFOLD_HOME}" +CMD=("./run_alphafold.sh" -p full_dbs -d "${ALPHAFOLD_DATA}" -o "${DIR_QUERY}" -m model_1,model_2,model_3,model_4,model_5 -f "${FASTA_FILE}" -t "${MAX_TEMPLATE_DATE}") +if [ -z "${CUDA_VISIBLE_DEVICES:-}" ] +then + CMD+=(-g false) +else + CMD+=(-a "$CUDA_VISIBLE_DEVICES") +fi + +echo "Run: ${CMD[@]}" >> "$LOG" +echo >> "$LOG" +( ( time "${CMD[@]}" ) 2>&1 ) >> "$LOG" diff --git a/MX/alphafold/bin/submit_merlin.sh b/MX/alphafold/bin/submit_merlin.sh new file mode 100755 index 0000000..2df1348 --- /dev/null +++ b/MX/alphafold/bin/submit_merlin.sh @@ -0,0 +1,23 @@ +#!/bin/bash +#SBATCH -p gpu +#SBATCH -J alphafold +#SBATCH -M gmerlin6 +#SBATCH --gpus=1 +#SBATCH -n 1 +#SBATCH -c 10 + +# Alphafold submission script for the merlin cluster +# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date] +# +# Output will be in the same directory as the fasta_file. +# Slurm logs will be in the current directory. +# +# 2021-08-09 Spencer Bliven, D.Ozerov +# + +export ALPHAFOLD_DATA=/data/project/bio/shared/alphafold +module purge +module use MX unstable +module load anaconda/2019.07 cuda/11.0.3 alphafold/2.0.0-b88f8da + +exec "${ALPHAFOLD_DIR:?Error loading module}/bin/submit.sh" "$@" diff --git a/MX/alphafold/bin/submit_ra.sh b/MX/alphafold/bin/submit_ra.sh new file mode 100755 index 0000000..3b8bb22 --- /dev/null +++ b/MX/alphafold/bin/submit_ra.sh @@ -0,0 +1,30 @@ +#!/bin/bash +#SBATCH -p day +#SBATCH -t 1-00:00:00 +#SBATCH -J alphafold +#SBATCH -n 1 +#SBATCH -c 10 + +# Alphafold submission script for the ra cluster +# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date] +# +# Output will be in the same directory as the fasta_file. +# Slurm logs will be in the current directory. +# +# 2021-08-09 Spencer Bliven, D.Ozerov +# + +export ALPHAFOLD_DATA=/das/work/common/opt/alphafold/data + +# Need at least rc6 to see alphafold +PMODULES_VERSION=1.0.0rc10; +source /opt/psi/config/profile.bash; + +module --version + +module purge +module use MX unstable Programming +module load anaconda/2019.07 cuda/11.0.3 alphafold/2.0.0-b88f8da +module list + +exec "${ALPHAFOLD_DIR:?Error loading module}/bin/submit.sh" "$@" From 5233ffd6e6b47569d0bc88c2cf3ab9984469ab72 Mon Sep 17 00:00:00 2001 From: Spencer Bliven Date: Fri, 8 Oct 2021 20:57:18 +0200 Subject: [PATCH 3/3] Release Alphafold 2.0.1 - Improves the build system to install the submission scripts in bin - Add README --- MX/alphafold/README.md | 60 +++++++++++++++++++ MX/alphafold/bin/submit.sh | 2 +- MX/alphafold/bin/submit_merlin.sh | 2 +- MX/alphafold/bin/submit_ra.sh | 2 +- MX/alphafold/build | 5 ++ MX/alphafold/files/variants | 3 +- .../2019.07/conda-env-defs/alphafold/README | 2 + 7 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 MX/alphafold/README.md create mode 100644 Programming/anaconda/2019.07/conda-env-defs/alphafold/README diff --git a/MX/alphafold/README.md b/MX/alphafold/README.md new file mode 100644 index 0000000..5a4d8be --- /dev/null +++ b/MX/alphafold/README.md @@ -0,0 +1,60 @@ +# Alphafold + +Alphafold contains two parts: +1. A conda environment containing dependencies +2. The alphafold module itself, containing the current code and submission scripts. + +## Conda Environment + +Alphafold was installed based on Dima's instructions on ra +(`/das/work/common/opt/alphafold/2021-07/INSTALL`). + +On pmod6 as an admin user: + +``` +conda create --name alphafold python==3.8 +conda update -n base conda + +source miniconda3/etc/profile.d/conda.sh +conda activate alphafold + +conda install -y -c conda-forge openmm==7.5.1 cudnn==8.2.1.32 cudatoolkit==11.0.3 pdbfixer==1.7 +conda install -y -c bioconda hmmer==3.3.2 hhsuite==3.3.0 kalign2==2.04 + +pip install absl-py==0.13.0 biopython==1.79 chex==0.0.7 dm-haiku==0.0.4 \ + dm-tree==0.1.6 immutabledict==2.0.0 jax==0.2.14 ml-collections==0.1.0 \ + numpy==1.19.5 scipy==1.7.0 tensorflow==2.5.0 +pip install --upgrade jax jaxlib==0.1.69+cuda111 \ + -f https://storage.googleapis.com/jax-releases/jax_releases.html +``` + +If this needs to be updated in the future we may need to have versioned conda envs. + +## Alphafold module + +Add version to files/variants. The version number should match a github tag +(e.g. `v2.0.1`) or else have the commit hash as `$V_RELEASE`. + +As admin user: +``` +cd MX/alphafold +./build +``` + +## Testing + +Here's an example sequence: + +``` +mkdir example +cd example +cat > query.fasta <dummy_sequence +GWSTELEKHREELKEFLKKEGITNVEIRIDNGRLEVRVEGGTERLKRFLEELRQKLEKKGYTVDIKIE +EOF + +module use MX unstable +module load alphafold/2.0.1 +sbatch $ALPHAFOLD_DIR/bin/submit_merlin.sh query.fasta +``` + diff --git a/MX/alphafold/bin/submit.sh b/MX/alphafold/bin/submit.sh index de5e3c5..9912bb9 100755 --- a/MX/alphafold/bin/submit.sh +++ b/MX/alphafold/bin/submit.sh @@ -49,7 +49,7 @@ hostname >> "$LOG" set +u # Allow unset variables in activate commands module purge module use MX unstable -module load anaconda/2019.07 cuda/11.0.3 alphafold/2.0.0-b88f8da 2>> "$LOG" +module load alphafold/ALPHAFOLD_VERSION 2>> "$LOG" conda activate "${ALPHAFOLD_ENV:?"Error: ALPHAFOLD_ENV not set. Try 'module use MX unstable; module load alphafold'"}" set -u diff --git a/MX/alphafold/bin/submit_merlin.sh b/MX/alphafold/bin/submit_merlin.sh index 2df1348..15cf40c 100755 --- a/MX/alphafold/bin/submit_merlin.sh +++ b/MX/alphafold/bin/submit_merlin.sh @@ -18,6 +18,6 @@ export ALPHAFOLD_DATA=/data/project/bio/shared/alphafold module purge module use MX unstable -module load anaconda/2019.07 cuda/11.0.3 alphafold/2.0.0-b88f8da +module load alphafold/ALPHAFOLD_VERSION exec "${ALPHAFOLD_DIR:?Error loading module}/bin/submit.sh" "$@" diff --git a/MX/alphafold/bin/submit_ra.sh b/MX/alphafold/bin/submit_ra.sh index 3b8bb22..1ef0c2f 100755 --- a/MX/alphafold/bin/submit_ra.sh +++ b/MX/alphafold/bin/submit_ra.sh @@ -24,7 +24,7 @@ module --version module purge module use MX unstable Programming -module load anaconda/2019.07 cuda/11.0.3 alphafold/2.0.0-b88f8da +module load alphafold/ALPHAFOLD_VERSION module list exec "${ALPHAFOLD_DIR:?Error loading module}/bin/submit.sh" "$@" diff --git a/MX/alphafold/build b/MX/alphafold/build index ffd9f4b..80565fe 100755 --- a/MX/alphafold/build +++ b/MX/alphafold/build @@ -29,9 +29,14 @@ pbuild::install() { git clone --depth=1 -b "$BRANCH" https://github.com/deepmind/alphafold.git "$ALPHAFOLD_HOME" || return $? if ! [ -f "$ALPHAFOLD_HOME/alphafold/common/stereo_chemical_props.txt" ]; then wget -q -P "$ALPHAFOLD_HOME/alphafold/common/" \ + --no-check-certificate `# wget root certs are old` \ https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt fi wget -O "$ALPHAFOLD_HOME/run_alphafold.sh" \ https://raw.githubusercontent.com/kalininalab/alphafold_non_docker/main/run_alphafold.sh chmod +x "$ALPHAFOLD_HOME/run_alphafold.sh" + + cp -r "$BUILDBLOCK_DIR/bin" "$PREFIX/" + sed -i "s/ALPHAFOLD_VERSION/$V/g" "$PREFIX/bin/"* } + diff --git a/MX/alphafold/files/variants b/MX/alphafold/files/variants index 37ff276..ddc8119 100644 --- a/MX/alphafold/files/variants +++ b/MX/alphafold/files/variants @@ -1 +1,2 @@ -alphafold/2.0.0-b88f8da anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3 +alphafold/2.0.0-b88f8da unstable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3 +alphafold/2.0.1 unstable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3 diff --git a/Programming/anaconda/2019.07/conda-env-defs/alphafold/README b/Programming/anaconda/2019.07/conda-env-defs/alphafold/README new file mode 100644 index 0000000..3067d29 --- /dev/null +++ b/Programming/anaconda/2019.07/conda-env-defs/alphafold/README @@ -0,0 +1,2 @@ +The alphafold environment is a complex mixture of conda and pip. See +MX/alphafold/README.md.