Merge branch 'alphafold' into 'master'
Alphafold buildblock See merge request Pmodules/buildblocks!228
This commit is contained in:
60
MX/alphafold/README.md
Normal file
60
MX/alphafold/README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
# Alphafold
|
||||
|
||||
Alphafold contains two parts:
|
||||
1. A conda environment containing dependencies
|
||||
2. The alphafold module itself, containing the current code and submission scripts.
|
||||
|
||||
## Conda Environment
|
||||
|
||||
Alphafold was installed based on Dima's instructions on ra
|
||||
(`/das/work/common/opt/alphafold/2021-07/INSTALL`).
|
||||
|
||||
On pmod6 as an admin user:
|
||||
|
||||
```
|
||||
conda create --name alphafold python==3.8
|
||||
conda update -n base conda
|
||||
|
||||
source miniconda3/etc/profile.d/conda.sh
|
||||
conda activate alphafold
|
||||
|
||||
conda install -y -c conda-forge openmm==7.5.1 cudnn==8.2.1.32 cudatoolkit==11.0.3 pdbfixer==1.7
|
||||
conda install -y -c bioconda hmmer==3.3.2 hhsuite==3.3.0 kalign2==2.04
|
||||
|
||||
pip install absl-py==0.13.0 biopython==1.79 chex==0.0.7 dm-haiku==0.0.4 \
|
||||
dm-tree==0.1.6 immutabledict==2.0.0 jax==0.2.14 ml-collections==0.1.0 \
|
||||
numpy==1.19.5 scipy==1.7.0 tensorflow==2.5.0
|
||||
pip install --upgrade jax jaxlib==0.1.69+cuda111 \
|
||||
-f https://storage.googleapis.com/jax-releases/jax_releases.html
|
||||
```
|
||||
|
||||
If this needs to be updated in the future we may need to have versioned conda envs.
|
||||
|
||||
## Alphafold module
|
||||
|
||||
Add version to files/variants. The version number should match a github tag
|
||||
(e.g. `v2.0.1`) or else have the commit hash as `$V_RELEASE`.
|
||||
|
||||
As admin user:
|
||||
```
|
||||
cd MX/alphafold
|
||||
./build <version>
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
Here's an example sequence:
|
||||
|
||||
```
|
||||
mkdir example
|
||||
cd example
|
||||
cat > query.fasta <<EOF
|
||||
>dummy_sequence
|
||||
GWSTELEKHREELKEFLKKEGITNVEIRIDNGRLEVRVEGGTERLKRFLEELRQKLEKKGYTVDIKIE
|
||||
EOF
|
||||
|
||||
module use MX unstable
|
||||
module load alphafold/2.0.1
|
||||
sbatch $ALPHAFOLD_DIR/bin/submit_merlin.sh query.fasta
|
||||
```
|
||||
|
||||
91
MX/alphafold/bin/submit.sh
Executable file
91
MX/alphafold/bin/submit.sh
Executable file
@@ -0,0 +1,91 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Generic alphafold submission script.
|
||||
# Set the ALPHAFOLD_DATA variable before running.
|
||||
# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit.sh fasta_file [max_template_date]
|
||||
#
|
||||
# Output will be in the same directory as the fasta_file.
|
||||
# Slurm logs will be in the current directory.
|
||||
#
|
||||
# 2021-08-09 Spencer Bliven, D.Ozerov
|
||||
#
|
||||
|
||||
# Bash strict mode
|
||||
set -euo pipefail
|
||||
IFS=$'\n\t'
|
||||
|
||||
usage () {
|
||||
echo "Usage: sbatch [slurm_opts] \$ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date]"
|
||||
}
|
||||
|
||||
# Parse parameters
|
||||
if [ "$#" -lt 1 ]
|
||||
then
|
||||
echo "No fasta_file name" >&2
|
||||
usage >&2
|
||||
exit
|
||||
fi
|
||||
|
||||
FASTA_FILE=`readlink -f $1`
|
||||
if [ ! -e ${FASTA_FILE} ] || [ "$FASTA_FILE" == "" ]
|
||||
then
|
||||
echo "${FASTA_FILE} is not reachable (input argument was $1)"
|
||||
exit
|
||||
fi
|
||||
|
||||
DIR_QUERY=`dirname ${FASTA_FILE}`
|
||||
LOG="${DIR_QUERY}/alphafold.out"
|
||||
|
||||
if [ "$#" -ge 2 ]
|
||||
then
|
||||
MAX_TEMPLATE_DATE=$2
|
||||
else
|
||||
MAX_TEMPLATE_DATE=$(date '+%Y-%m-%d')
|
||||
fi
|
||||
|
||||
date > "$LOG"
|
||||
hostname >> "$LOG"
|
||||
|
||||
set +u # Allow unset variables in activate commands
|
||||
module purge
|
||||
module use MX unstable
|
||||
module load alphafold/ALPHAFOLD_VERSION 2>> "$LOG"
|
||||
conda activate "${ALPHAFOLD_ENV:?"Error: ALPHAFOLD_ENV not set. Try 'module use MX unstable; module load alphafold'"}"
|
||||
set -u
|
||||
|
||||
# Check the module loaded correctly
|
||||
if ! [ -d "${ALPHAFOLD_HOME}" ]; then
|
||||
echo "Error: $ALPHAFOLD_HOME not available" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Data dir
|
||||
if ! [ -d "${ALPHAFOLD_DATA:?Set ALPHAFOLD_DATA before running}" ]; then
|
||||
echo "Error: ALPHAFOLD_DATA directory not available ($ALPHAFOLD_DATA)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "GPUs: ${CUDA_VISIBLE_DEVICES:-None}" >> "$LOG"
|
||||
echo "Detecting GPUs with Tensorflow:" >> "$LOG"
|
||||
python -c 'import tensorflow as tf; tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))' 2>&1 |
|
||||
sed -rn 's/^.* (Created TensorFlow device.*)$/\1/p' >> "$LOG"
|
||||
echo >> "$LOG"
|
||||
|
||||
echo "Running alphafold from $PWD for fasta sequence : " >> "$LOG"
|
||||
cat ${FASTA_FILE} >> "$LOG"
|
||||
echo "and max_template_date : ${MAX_TEMPLATE_DATE} " >> "$LOG"
|
||||
echo >> "$LOG"
|
||||
|
||||
|
||||
cd "${ALPHAFOLD_HOME}"
|
||||
CMD=("./run_alphafold.sh" -p full_dbs -d "${ALPHAFOLD_DATA}" -o "${DIR_QUERY}" -m model_1,model_2,model_3,model_4,model_5 -f "${FASTA_FILE}" -t "${MAX_TEMPLATE_DATE}")
|
||||
if [ -z "${CUDA_VISIBLE_DEVICES:-}" ]
|
||||
then
|
||||
CMD+=(-g false)
|
||||
else
|
||||
CMD+=(-a "$CUDA_VISIBLE_DEVICES")
|
||||
fi
|
||||
|
||||
echo "Run: ${CMD[@]}" >> "$LOG"
|
||||
echo >> "$LOG"
|
||||
( ( time "${CMD[@]}" ) 2>&1 ) >> "$LOG"
|
||||
23
MX/alphafold/bin/submit_merlin.sh
Executable file
23
MX/alphafold/bin/submit_merlin.sh
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -p gpu
|
||||
#SBATCH -J alphafold
|
||||
#SBATCH -M gmerlin6
|
||||
#SBATCH --gpus=1
|
||||
#SBATCH -n 1
|
||||
#SBATCH -c 10
|
||||
|
||||
# Alphafold submission script for the merlin cluster
|
||||
# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date]
|
||||
#
|
||||
# Output will be in the same directory as the fasta_file.
|
||||
# Slurm logs will be in the current directory.
|
||||
#
|
||||
# 2021-08-09 Spencer Bliven, D.Ozerov
|
||||
#
|
||||
|
||||
export ALPHAFOLD_DATA=/data/project/bio/shared/alphafold
|
||||
module purge
|
||||
module use MX unstable
|
||||
module load alphafold/ALPHAFOLD_VERSION
|
||||
|
||||
exec "${ALPHAFOLD_DIR:?Error loading module}/bin/submit.sh" "$@"
|
||||
30
MX/alphafold/bin/submit_ra.sh
Executable file
30
MX/alphafold/bin/submit_ra.sh
Executable file
@@ -0,0 +1,30 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -p day
|
||||
#SBATCH -t 1-00:00:00
|
||||
#SBATCH -J alphafold
|
||||
#SBATCH -n 1
|
||||
#SBATCH -c 10
|
||||
|
||||
# Alphafold submission script for the ra cluster
|
||||
# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date]
|
||||
#
|
||||
# Output will be in the same directory as the fasta_file.
|
||||
# Slurm logs will be in the current directory.
|
||||
#
|
||||
# 2021-08-09 Spencer Bliven, D.Ozerov
|
||||
#
|
||||
|
||||
export ALPHAFOLD_DATA=/das/work/common/opt/alphafold/data
|
||||
|
||||
# Need at least rc6 to see alphafold
|
||||
PMODULES_VERSION=1.0.0rc10;
|
||||
source /opt/psi/config/profile.bash;
|
||||
|
||||
module --version
|
||||
|
||||
module purge
|
||||
module use MX unstable Programming
|
||||
module load alphafold/ALPHAFOLD_VERSION
|
||||
module list
|
||||
|
||||
exec "${ALPHAFOLD_DIR:?Error loading module}/bin/submit.sh" "$@"
|
||||
42
MX/alphafold/build
Executable file
42
MX/alphafold/build
Executable file
@@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env modbuild
|
||||
|
||||
pbuild::add_to_group 'MX'
|
||||
|
||||
|
||||
pbuild::prep() {
|
||||
:
|
||||
}
|
||||
|
||||
pbuild::configure() {
|
||||
:
|
||||
}
|
||||
|
||||
pbuild::compile() {
|
||||
:
|
||||
}
|
||||
|
||||
pbuild::install() {
|
||||
ALPHAFOLD_HOME="$PREFIX/alphafold"
|
||||
|
||||
local BRANCH
|
||||
if [[ "${#V_RELEASE}" -eq 7 ]]; then
|
||||
# Release looks like a git hash
|
||||
BRANCH="${V_RELEASE}"
|
||||
else
|
||||
BRANCH="v${V_PKG}"
|
||||
fi
|
||||
|
||||
git clone --depth=1 -b "$BRANCH" https://github.com/deepmind/alphafold.git "$ALPHAFOLD_HOME" || return $?
|
||||
if ! [ -f "$ALPHAFOLD_HOME/alphafold/common/stereo_chemical_props.txt" ]; then
|
||||
wget -q -P "$ALPHAFOLD_HOME/alphafold/common/" \
|
||||
--no-check-certificate `# wget root certs are old` \
|
||||
https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt
|
||||
fi
|
||||
wget -O "$ALPHAFOLD_HOME/run_alphafold.sh" \
|
||||
https://raw.githubusercontent.com/kalininalab/alphafold_non_docker/main/run_alphafold.sh
|
||||
chmod +x "$ALPHAFOLD_HOME/run_alphafold.sh"
|
||||
|
||||
cp -r "$BUILDBLOCK_DIR/bin" "$PREFIX/"
|
||||
sed -i "s/ALPHAFOLD_VERSION/$V/g" "$PREFIX/bin/"*
|
||||
}
|
||||
|
||||
2
MX/alphafold/files/variants
Normal file
2
MX/alphafold/files/variants
Normal file
@@ -0,0 +1,2 @@
|
||||
alphafold/2.0.0-b88f8da unstable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3
|
||||
alphafold/2.0.1 unstable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3
|
||||
15
MX/alphafold/modulefile
Normal file
15
MX/alphafold/modulefile
Normal file
@@ -0,0 +1,15 @@
|
||||
#%Module1.0
|
||||
|
||||
module-whatis "AlphaFold"
|
||||
module-url "https://github.com/deepmind/alphafold/"
|
||||
module-license "Code: Apache 2.0 License. Parameters: Noncommercial CC-BY-NC 4.0"
|
||||
module-maintainer "Spencer Bliven <spencer.bliven@psi.ch"
|
||||
|
||||
module-help "The AlphaFold 2 protein structure prediction method by Google DeepMind.
|
||||
|
||||
Jumper, J., Evans, R., Pritzel, A. et al. Highly accurate protein structure prediction with AlphaFold. Nature (2021). https://doi.org/10.1038/s41586-021-03819-2
|
||||
"
|
||||
|
||||
setenv ALPHAFOLD_HOME "$PREFIX/alphafold"
|
||||
setenv ALPHAFOLD_ENV "alphafold"
|
||||
|
||||
@@ -0,0 +1,2 @@
|
||||
The alphafold environment is a complex mixture of conda and pip. See
|
||||
MX/alphafold/README.md.
|
||||
Reference in New Issue
Block a user