Merge branch 'alphafold' into 'master'

Alphafold buildblock

See merge request Pmodules/buildblocks!228
This commit is contained in:
2021-10-27 07:26:16 +00:00
8 changed files with 265 additions and 0 deletions

60
MX/alphafold/README.md Normal file
View File

@@ -0,0 +1,60 @@
# Alphafold
Alphafold contains two parts:
1. A conda environment containing dependencies
2. The alphafold module itself, containing the current code and submission scripts.
## Conda Environment
Alphafold was installed based on Dima's instructions on ra
(`/das/work/common/opt/alphafold/2021-07/INSTALL`).
On pmod6 as an admin user:
```
conda create --name alphafold python==3.8
conda update -n base conda
source miniconda3/etc/profile.d/conda.sh
conda activate alphafold
conda install -y -c conda-forge openmm==7.5.1 cudnn==8.2.1.32 cudatoolkit==11.0.3 pdbfixer==1.7
conda install -y -c bioconda hmmer==3.3.2 hhsuite==3.3.0 kalign2==2.04
pip install absl-py==0.13.0 biopython==1.79 chex==0.0.7 dm-haiku==0.0.4 \
dm-tree==0.1.6 immutabledict==2.0.0 jax==0.2.14 ml-collections==0.1.0 \
numpy==1.19.5 scipy==1.7.0 tensorflow==2.5.0
pip install --upgrade jax jaxlib==0.1.69+cuda111 \
-f https://storage.googleapis.com/jax-releases/jax_releases.html
```
If this needs to be updated in the future we may need to have versioned conda envs.
## Alphafold module
Add version to files/variants. The version number should match a github tag
(e.g. `v2.0.1`) or else have the commit hash as `$V_RELEASE`.
As admin user:
```
cd MX/alphafold
./build <version>
```
## Testing
Here's an example sequence:
```
mkdir example
cd example
cat > query.fasta <<EOF
>dummy_sequence
GWSTELEKHREELKEFLKKEGITNVEIRIDNGRLEVRVEGGTERLKRFLEELRQKLEKKGYTVDIKIE
EOF
module use MX unstable
module load alphafold/2.0.1
sbatch $ALPHAFOLD_DIR/bin/submit_merlin.sh query.fasta
```

91
MX/alphafold/bin/submit.sh Executable file
View File

@@ -0,0 +1,91 @@
#!/bin/bash
# Generic alphafold submission script.
# Set the ALPHAFOLD_DATA variable before running.
# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit.sh fasta_file [max_template_date]
#
# Output will be in the same directory as the fasta_file.
# Slurm logs will be in the current directory.
#
# 2021-08-09 Spencer Bliven, D.Ozerov
#
# Bash strict mode
set -euo pipefail
IFS=$'\n\t'
usage () {
echo "Usage: sbatch [slurm_opts] \$ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date]"
}
# Parse parameters
if [ "$#" -lt 1 ]
then
echo "No fasta_file name" >&2
usage >&2
exit
fi
FASTA_FILE=`readlink -f $1`
if [ ! -e ${FASTA_FILE} ] || [ "$FASTA_FILE" == "" ]
then
echo "${FASTA_FILE} is not reachable (input argument was $1)"
exit
fi
DIR_QUERY=`dirname ${FASTA_FILE}`
LOG="${DIR_QUERY}/alphafold.out"
if [ "$#" -ge 2 ]
then
MAX_TEMPLATE_DATE=$2
else
MAX_TEMPLATE_DATE=$(date '+%Y-%m-%d')
fi
date > "$LOG"
hostname >> "$LOG"
set +u # Allow unset variables in activate commands
module purge
module use MX unstable
module load alphafold/ALPHAFOLD_VERSION 2>> "$LOG"
conda activate "${ALPHAFOLD_ENV:?"Error: ALPHAFOLD_ENV not set. Try 'module use MX unstable; module load alphafold'"}"
set -u
# Check the module loaded correctly
if ! [ -d "${ALPHAFOLD_HOME}" ]; then
echo "Error: $ALPHAFOLD_HOME not available" >&2
exit 1
fi
# Data dir
if ! [ -d "${ALPHAFOLD_DATA:?Set ALPHAFOLD_DATA before running}" ]; then
echo "Error: ALPHAFOLD_DATA directory not available ($ALPHAFOLD_DATA)" >&2
exit 1
fi
echo "GPUs: ${CUDA_VISIBLE_DEVICES:-None}" >> "$LOG"
echo "Detecting GPUs with Tensorflow:" >> "$LOG"
python -c 'import tensorflow as tf; tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))' 2>&1 |
sed -rn 's/^.* (Created TensorFlow device.*)$/\1/p' >> "$LOG"
echo >> "$LOG"
echo "Running alphafold from $PWD for fasta sequence : " >> "$LOG"
cat ${FASTA_FILE} >> "$LOG"
echo "and max_template_date : ${MAX_TEMPLATE_DATE} " >> "$LOG"
echo >> "$LOG"
cd "${ALPHAFOLD_HOME}"
CMD=("./run_alphafold.sh" -p full_dbs -d "${ALPHAFOLD_DATA}" -o "${DIR_QUERY}" -m model_1,model_2,model_3,model_4,model_5 -f "${FASTA_FILE}" -t "${MAX_TEMPLATE_DATE}")
if [ -z "${CUDA_VISIBLE_DEVICES:-}" ]
then
CMD+=(-g false)
else
CMD+=(-a "$CUDA_VISIBLE_DEVICES")
fi
echo "Run: ${CMD[@]}" >> "$LOG"
echo >> "$LOG"
( ( time "${CMD[@]}" ) 2>&1 ) >> "$LOG"

View File

@@ -0,0 +1,23 @@
#!/bin/bash
#SBATCH -p gpu
#SBATCH -J alphafold
#SBATCH -M gmerlin6
#SBATCH --gpus=1
#SBATCH -n 1
#SBATCH -c 10
# Alphafold submission script for the merlin cluster
# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date]
#
# Output will be in the same directory as the fasta_file.
# Slurm logs will be in the current directory.
#
# 2021-08-09 Spencer Bliven, D.Ozerov
#
export ALPHAFOLD_DATA=/data/project/bio/shared/alphafold
module purge
module use MX unstable
module load alphafold/ALPHAFOLD_VERSION
exec "${ALPHAFOLD_DIR:?Error loading module}/bin/submit.sh" "$@"

30
MX/alphafold/bin/submit_ra.sh Executable file
View File

@@ -0,0 +1,30 @@
#!/bin/bash
#SBATCH -p day
#SBATCH -t 1-00:00:00
#SBATCH -J alphafold
#SBATCH -n 1
#SBATCH -c 10
# Alphafold submission script for the ra cluster
# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date]
#
# Output will be in the same directory as the fasta_file.
# Slurm logs will be in the current directory.
#
# 2021-08-09 Spencer Bliven, D.Ozerov
#
export ALPHAFOLD_DATA=/das/work/common/opt/alphafold/data
# Need at least rc6 to see alphafold
PMODULES_VERSION=1.0.0rc10;
source /opt/psi/config/profile.bash;
module --version
module purge
module use MX unstable Programming
module load alphafold/ALPHAFOLD_VERSION
module list
exec "${ALPHAFOLD_DIR:?Error loading module}/bin/submit.sh" "$@"

42
MX/alphafold/build Executable file
View File

@@ -0,0 +1,42 @@
#!/usr/bin/env modbuild
pbuild::add_to_group 'MX'
pbuild::prep() {
:
}
pbuild::configure() {
:
}
pbuild::compile() {
:
}
pbuild::install() {
ALPHAFOLD_HOME="$PREFIX/alphafold"
local BRANCH
if [[ "${#V_RELEASE}" -eq 7 ]]; then
# Release looks like a git hash
BRANCH="${V_RELEASE}"
else
BRANCH="v${V_PKG}"
fi
git clone --depth=1 -b "$BRANCH" https://github.com/deepmind/alphafold.git "$ALPHAFOLD_HOME" || return $?
if ! [ -f "$ALPHAFOLD_HOME/alphafold/common/stereo_chemical_props.txt" ]; then
wget -q -P "$ALPHAFOLD_HOME/alphafold/common/" \
--no-check-certificate `# wget root certs are old` \
https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt
fi
wget -O "$ALPHAFOLD_HOME/run_alphafold.sh" \
https://raw.githubusercontent.com/kalininalab/alphafold_non_docker/main/run_alphafold.sh
chmod +x "$ALPHAFOLD_HOME/run_alphafold.sh"
cp -r "$BUILDBLOCK_DIR/bin" "$PREFIX/"
sed -i "s/ALPHAFOLD_VERSION/$V/g" "$PREFIX/bin/"*
}

View File

@@ -0,0 +1,2 @@
alphafold/2.0.0-b88f8da unstable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3
alphafold/2.0.1 unstable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3

15
MX/alphafold/modulefile Normal file
View File

@@ -0,0 +1,15 @@
#%Module1.0
module-whatis "AlphaFold"
module-url "https://github.com/deepmind/alphafold/"
module-license "Code: Apache 2.0 License. Parameters: Noncommercial CC-BY-NC 4.0"
module-maintainer "Spencer Bliven <spencer.bliven@psi.ch"
module-help "The AlphaFold 2 protein structure prediction method by Google DeepMind.
Jumper, J., Evans, R., Pritzel, A. et al. Highly accurate protein structure prediction with AlphaFold. Nature (2021). https://doi.org/10.1038/s41586-021-03819-2
"
setenv ALPHAFOLD_HOME "$PREFIX/alphafold"
setenv ALPHAFOLD_ENV "alphafold"

View File

@@ -0,0 +1,2 @@
The alphafold environment is a complex mixture of conda and pip. See
MX/alphafold/README.md.