diff --git a/MX/alphafold/README.md b/MX/alphafold/README.md new file mode 100644 index 0000000..5a4d8be --- /dev/null +++ b/MX/alphafold/README.md @@ -0,0 +1,60 @@ +# Alphafold + +Alphafold contains two parts: +1. A conda environment containing dependencies +2. The alphafold module itself, containing the current code and submission scripts. + +## Conda Environment + +Alphafold was installed based on Dima's instructions on ra +(`/das/work/common/opt/alphafold/2021-07/INSTALL`). + +On pmod6 as an admin user: + +``` +conda create --name alphafold python==3.8 +conda update -n base conda + +source miniconda3/etc/profile.d/conda.sh +conda activate alphafold + +conda install -y -c conda-forge openmm==7.5.1 cudnn==8.2.1.32 cudatoolkit==11.0.3 pdbfixer==1.7 +conda install -y -c bioconda hmmer==3.3.2 hhsuite==3.3.0 kalign2==2.04 + +pip install absl-py==0.13.0 biopython==1.79 chex==0.0.7 dm-haiku==0.0.4 \ + dm-tree==0.1.6 immutabledict==2.0.0 jax==0.2.14 ml-collections==0.1.0 \ + numpy==1.19.5 scipy==1.7.0 tensorflow==2.5.0 +pip install --upgrade jax jaxlib==0.1.69+cuda111 \ + -f https://storage.googleapis.com/jax-releases/jax_releases.html +``` + +If this needs to be updated in the future we may need to have versioned conda envs. + +## Alphafold module + +Add version to files/variants. The version number should match a github tag +(e.g. `v2.0.1`) or else have the commit hash as `$V_RELEASE`. + +As admin user: +``` +cd MX/alphafold +./build +``` + +## Testing + +Here's an example sequence: + +``` +mkdir example +cd example +cat > query.fasta <dummy_sequence +GWSTELEKHREELKEFLKKEGITNVEIRIDNGRLEVRVEGGTERLKRFLEELRQKLEKKGYTVDIKIE +EOF + +module use MX unstable +module load alphafold/2.0.1 +sbatch $ALPHAFOLD_DIR/bin/submit_merlin.sh query.fasta +``` + diff --git a/MX/alphafold/bin/submit.sh b/MX/alphafold/bin/submit.sh new file mode 100755 index 0000000..9912bb9 --- /dev/null +++ b/MX/alphafold/bin/submit.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +# Generic alphafold submission script. +# Set the ALPHAFOLD_DATA variable before running. +# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit.sh fasta_file [max_template_date] +# +# Output will be in the same directory as the fasta_file. +# Slurm logs will be in the current directory. +# +# 2021-08-09 Spencer Bliven, D.Ozerov +# + +# Bash strict mode +set -euo pipefail +IFS=$'\n\t' + +usage () { + echo "Usage: sbatch [slurm_opts] \$ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date]" +} + +# Parse parameters +if [ "$#" -lt 1 ] +then + echo "No fasta_file name" >&2 + usage >&2 + exit +fi + +FASTA_FILE=`readlink -f $1` +if [ ! -e ${FASTA_FILE} ] || [ "$FASTA_FILE" == "" ] +then + echo "${FASTA_FILE} is not reachable (input argument was $1)" + exit +fi + +DIR_QUERY=`dirname ${FASTA_FILE}` +LOG="${DIR_QUERY}/alphafold.out" + +if [ "$#" -ge 2 ] +then + MAX_TEMPLATE_DATE=$2 +else + MAX_TEMPLATE_DATE=$(date '+%Y-%m-%d') +fi + +date > "$LOG" +hostname >> "$LOG" + +set +u # Allow unset variables in activate commands +module purge +module use MX unstable +module load alphafold/ALPHAFOLD_VERSION 2>> "$LOG" +conda activate "${ALPHAFOLD_ENV:?"Error: ALPHAFOLD_ENV not set. Try 'module use MX unstable; module load alphafold'"}" +set -u + +# Check the module loaded correctly +if ! [ -d "${ALPHAFOLD_HOME}" ]; then + echo "Error: $ALPHAFOLD_HOME not available" >&2 + exit 1 +fi + +# Data dir +if ! [ -d "${ALPHAFOLD_DATA:?Set ALPHAFOLD_DATA before running}" ]; then + echo "Error: ALPHAFOLD_DATA directory not available ($ALPHAFOLD_DATA)" >&2 + exit 1 +fi + +echo "GPUs: ${CUDA_VISIBLE_DEVICES:-None}" >> "$LOG" +echo "Detecting GPUs with Tensorflow:" >> "$LOG" +python -c 'import tensorflow as tf; tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))' 2>&1 | + sed -rn 's/^.* (Created TensorFlow device.*)$/\1/p' >> "$LOG" +echo >> "$LOG" + +echo "Running alphafold from $PWD for fasta sequence : " >> "$LOG" +cat ${FASTA_FILE} >> "$LOG" +echo "and max_template_date : ${MAX_TEMPLATE_DATE} " >> "$LOG" +echo >> "$LOG" + + +cd "${ALPHAFOLD_HOME}" +CMD=("./run_alphafold.sh" -p full_dbs -d "${ALPHAFOLD_DATA}" -o "${DIR_QUERY}" -m model_1,model_2,model_3,model_4,model_5 -f "${FASTA_FILE}" -t "${MAX_TEMPLATE_DATE}") +if [ -z "${CUDA_VISIBLE_DEVICES:-}" ] +then + CMD+=(-g false) +else + CMD+=(-a "$CUDA_VISIBLE_DEVICES") +fi + +echo "Run: ${CMD[@]}" >> "$LOG" +echo >> "$LOG" +( ( time "${CMD[@]}" ) 2>&1 ) >> "$LOG" diff --git a/MX/alphafold/bin/submit_merlin.sh b/MX/alphafold/bin/submit_merlin.sh new file mode 100755 index 0000000..15cf40c --- /dev/null +++ b/MX/alphafold/bin/submit_merlin.sh @@ -0,0 +1,23 @@ +#!/bin/bash +#SBATCH -p gpu +#SBATCH -J alphafold +#SBATCH -M gmerlin6 +#SBATCH --gpus=1 +#SBATCH -n 1 +#SBATCH -c 10 + +# Alphafold submission script for the merlin cluster +# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date] +# +# Output will be in the same directory as the fasta_file. +# Slurm logs will be in the current directory. +# +# 2021-08-09 Spencer Bliven, D.Ozerov +# + +export ALPHAFOLD_DATA=/data/project/bio/shared/alphafold +module purge +module use MX unstable +module load alphafold/ALPHAFOLD_VERSION + +exec "${ALPHAFOLD_DIR:?Error loading module}/bin/submit.sh" "$@" diff --git a/MX/alphafold/bin/submit_ra.sh b/MX/alphafold/bin/submit_ra.sh new file mode 100755 index 0000000..1ef0c2f --- /dev/null +++ b/MX/alphafold/bin/submit_ra.sh @@ -0,0 +1,30 @@ +#!/bin/bash +#SBATCH -p day +#SBATCH -t 1-00:00:00 +#SBATCH -J alphafold +#SBATCH -n 1 +#SBATCH -c 10 + +# Alphafold submission script for the ra cluster +# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date] +# +# Output will be in the same directory as the fasta_file. +# Slurm logs will be in the current directory. +# +# 2021-08-09 Spencer Bliven, D.Ozerov +# + +export ALPHAFOLD_DATA=/das/work/common/opt/alphafold/data + +# Need at least rc6 to see alphafold +PMODULES_VERSION=1.0.0rc10; +source /opt/psi/config/profile.bash; + +module --version + +module purge +module use MX unstable Programming +module load alphafold/ALPHAFOLD_VERSION +module list + +exec "${ALPHAFOLD_DIR:?Error loading module}/bin/submit.sh" "$@" diff --git a/MX/alphafold/build b/MX/alphafold/build new file mode 100755 index 0000000..80565fe --- /dev/null +++ b/MX/alphafold/build @@ -0,0 +1,42 @@ +#!/usr/bin/env modbuild + +pbuild::add_to_group 'MX' + + +pbuild::prep() { + : +} + +pbuild::configure() { + : +} + +pbuild::compile() { + : +} + +pbuild::install() { + ALPHAFOLD_HOME="$PREFIX/alphafold" + + local BRANCH + if [[ "${#V_RELEASE}" -eq 7 ]]; then + # Release looks like a git hash + BRANCH="${V_RELEASE}" + else + BRANCH="v${V_PKG}" + fi + + git clone --depth=1 -b "$BRANCH" https://github.com/deepmind/alphafold.git "$ALPHAFOLD_HOME" || return $? + if ! [ -f "$ALPHAFOLD_HOME/alphafold/common/stereo_chemical_props.txt" ]; then + wget -q -P "$ALPHAFOLD_HOME/alphafold/common/" \ + --no-check-certificate `# wget root certs are old` \ + https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt + fi + wget -O "$ALPHAFOLD_HOME/run_alphafold.sh" \ + https://raw.githubusercontent.com/kalininalab/alphafold_non_docker/main/run_alphafold.sh + chmod +x "$ALPHAFOLD_HOME/run_alphafold.sh" + + cp -r "$BUILDBLOCK_DIR/bin" "$PREFIX/" + sed -i "s/ALPHAFOLD_VERSION/$V/g" "$PREFIX/bin/"* +} + diff --git a/MX/alphafold/files/variants b/MX/alphafold/files/variants new file mode 100644 index 0000000..ddc8119 --- /dev/null +++ b/MX/alphafold/files/variants @@ -0,0 +1,2 @@ +alphafold/2.0.0-b88f8da unstable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3 +alphafold/2.0.1 unstable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3 diff --git a/MX/alphafold/modulefile b/MX/alphafold/modulefile new file mode 100644 index 0000000..e489db6 --- /dev/null +++ b/MX/alphafold/modulefile @@ -0,0 +1,15 @@ +#%Module1.0 + +module-whatis "AlphaFold" +module-url "https://github.com/deepmind/alphafold/" +module-license "Code: Apache 2.0 License. Parameters: Noncommercial CC-BY-NC 4.0" +module-maintainer "Spencer Bliven