Merge branch 'alphafold' into 'master'
Alphafold/2.1.1 with multimer support! See merge request Pmodules/buildblocks!291
This commit is contained in:
@@ -23,7 +23,7 @@ conda install -y -c bioconda hmmer==3.3.2 hhsuite==3.3.0 kalign2==2.04
|
||||
|
||||
pip install absl-py==0.13.0 biopython==1.79 chex==0.0.7 dm-haiku==0.0.4 \
|
||||
dm-tree==0.1.6 immutabledict==2.0.0 jax==0.2.14 ml-collections==0.1.0 \
|
||||
numpy==1.19.5 scipy==1.7.0 tensorflow==2.5.0
|
||||
numpy==1.19.5 scipy==1.7.0 tensorflow==2.5.0 pandas==1.3.4
|
||||
pip install --upgrade jax jaxlib==0.1.69+cuda111 \
|
||||
-f https://storage.googleapis.com/jax-releases/jax_releases.html
|
||||
```
|
||||
@@ -54,7 +54,7 @@ GWSTELEKHREELKEFLKKEGITNVEIRIDNGRLEVRVEGGTERLKRFLEELRQKLEKKGYTVDIKIE
|
||||
EOF
|
||||
|
||||
module use MX unstable
|
||||
module load alphafold/2.0.1
|
||||
sbatch $ALPHAFOLD_DIR/bin/submit_merlin.sh query.fasta
|
||||
module load alphafold/2.1.1
|
||||
sbatch alphafold_merlin.sh query.fasta
|
||||
```
|
||||
|
||||
|
||||
34
MX/alphafold/bin/alphafold_merlin.sh
Executable file
34
MX/alphafold/bin/alphafold_merlin.sh
Executable file
@@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -p gpu
|
||||
#SBATCH -J alphafold
|
||||
#SBATCH -M gmerlin6
|
||||
#SBATCH --gpus=1
|
||||
#SBATCH -n 1
|
||||
#SBATCH -c 10
|
||||
|
||||
# Alphafold submission script for the merlin cluster
|
||||
# Usage: sbatch [slurm_opts] alphafold_merlin.sh [options] fasta_file
|
||||
#
|
||||
# OPTIONS
|
||||
# All alphafold options are set automatically, but can be overwritten.
|
||||
# Some common options:
|
||||
#
|
||||
# --max_template_date=YYYY-MM-DD (default: today)
|
||||
# --output_dir (default: current directory
|
||||
# --helpfull List all options
|
||||
#
|
||||
# 2021-12-22 Spencer Bliven, D.Ozerov
|
||||
#
|
||||
|
||||
export ALPHAFOLD_DATA=/data/project/bio/shared/alphafold/versions/latest
|
||||
module purge
|
||||
module use MX unstable
|
||||
module load alphafold/2.1.1
|
||||
conda activate "${ALPHAFOLD_ENV:?"Error: ALPHAFOLD_ENV not set. Try 'module use MX unstable; module load alphafold'"}"
|
||||
|
||||
echo "hostname=$(hostname)"
|
||||
echo "python=$(which python)"
|
||||
echo "ALPHAFOLD_DATA=$(realpath "$ALPHAFOLD_DATA")"
|
||||
|
||||
python "${ALPHAFOLD_DIR:?Error loading module}/bin/alphafold_runner.py" -v 0 "$@"
|
||||
|
||||
42
MX/alphafold/bin/alphafold_ra.sh
Executable file
42
MX/alphafold/bin/alphafold_ra.sh
Executable file
@@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -p gpu-week
|
||||
#SBATCH -t 2-00:00:00
|
||||
#SBATCH -J alphafold
|
||||
#SBATCH --gres=gpu:1
|
||||
#SBATCH -J alphafold
|
||||
#SBATCH -n 1
|
||||
#SBATCH -c 10
|
||||
|
||||
# Alphafold submission script for the merlin cluster
|
||||
# Usage: sbatch [slurm_opts] alphafold_merlin.sh [options] fasta_file
|
||||
#
|
||||
# OPTIONS
|
||||
# All alphafold options are set automatically, but can be overwritten.
|
||||
# Some common options:
|
||||
#
|
||||
# --max_template_date=YYYY-MM-DD (default: today)
|
||||
# --output_dir (default: current directory)
|
||||
# --helpfull List all options
|
||||
#
|
||||
# 2021-12-22 Spencer Bliven, D.Ozerov
|
||||
#
|
||||
|
||||
export ALPHAFOLD_DATA=/das/work/common/opt/alphafold/data_2.1.1/versions/latest
|
||||
|
||||
# Need at least rc6 to see alphafold
|
||||
PMODULES_VERSION=1.0.0rc10;
|
||||
source /opt/psi/config/profile.bash;
|
||||
|
||||
module --version
|
||||
|
||||
module purge
|
||||
module use MX unstable Programming
|
||||
module load alphafold/2.1.1
|
||||
conda activate "${ALPHAFOLD_ENV:?"Error: ALPHAFOLD_ENV not set. Try 'module use MX unstable; module load alphafold'"}"
|
||||
|
||||
echo "hostname=$(hostname)"
|
||||
echo "python=$(which python)"
|
||||
echo "ALPHAFOLD_DATA=$(realpath "$ALPHAFOLD_DATA")"
|
||||
|
||||
python "${ALPHAFOLD_DIR:?Error loading module}/bin/alphafold_runner.py" -v 0 "$@"
|
||||
|
||||
129
MX/alphafold/bin/alphafold_runner.py
Executable file
129
MX/alphafold/bin/alphafold_runner.py
Executable file
@@ -0,0 +1,129 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Wrapper script for Alphafold 2, with automatic setting of common options
|
||||
|
||||
usage: python alphafold_runner.py [alphafold options] input.fa
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
import importlib
|
||||
import subprocess
|
||||
import logging
|
||||
import argparse
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
from absl import app
|
||||
from absl.flags import FLAGS
|
||||
from absl import logging
|
||||
|
||||
def import_alphafold():
|
||||
"Import run_alphafold.py from ALPHAFOLD_HOME"
|
||||
home = os.environ.get('ALPHAFOLD_HOME', str(Path(__file__).parent.resolve("../alphafold")))
|
||||
sys.path.append(home)
|
||||
try:
|
||||
return importlib.import_module("run_alphafold")
|
||||
except ImportError:
|
||||
sys.stderr.write(f"Unable to find run_alphafold.py\n")
|
||||
sys.stderr.write(f"path:{', '.join(sys.path)}")
|
||||
sys.exit(1)
|
||||
af = import_alphafold()
|
||||
|
||||
def multi_fasta(fasta_path):
|
||||
entries = 0
|
||||
with open(fasta_path, 'r') as fasta:
|
||||
for line in fasta:
|
||||
if line and line[0] == '>':
|
||||
entries += 1
|
||||
if entries > 1:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def guess_model_preset(fasta_paths):
|
||||
if any(multi_fasta(f) for f in fasta_paths):
|
||||
logging.info("Input appears to be multimer")
|
||||
return "multimer"
|
||||
logging.info("Input appears to be monomer")
|
||||
return "monomer"
|
||||
|
||||
|
||||
def main(argv):
|
||||
"""Set some option defaults and then call alphafold's main method
|
||||
|
||||
Most alphafold options have defaults set automatically:
|
||||
|
||||
- database files are set from the ALPHAFOLD_DATA variable or the --data_dir option
|
||||
(assuming the versioned layout, which differs slightly from the default)
|
||||
- `--model_preset` is set to either monomer or multimer depending on the number of sequences in the fasta file
|
||||
- `--max_template_date` defaults to the current date
|
||||
"""
|
||||
|
||||
|
||||
if len(argv) > 2:
|
||||
raise app.UsageError('Too many command-line arguments.')
|
||||
|
||||
# Accept positional fasta_paths
|
||||
if len(argv) > 1:
|
||||
if FLAGS["fasta_paths"].present:
|
||||
raise app.UsageError("Both the --fasta_paths option and a fasta file argument were given")
|
||||
FLAGS["fasta_paths"].parse(argv[1])
|
||||
elif not FLAGS.fasta_paths:
|
||||
raise app.UsageError("No fasta file specified")
|
||||
|
||||
# Database flags
|
||||
if FLAGS["data_dir"].present:
|
||||
data_dir = FLAGS.data_dir
|
||||
elif "ALPHAFOLD_DATA" in os.environ:
|
||||
data_dir = os.environ["ALPHAFOLD_DATA"]
|
||||
logging.info(f"Using ALPHAFOLD_DATA={data_dir}")
|
||||
FLAGS['data_dir'].value = data_dir
|
||||
else:
|
||||
raise app.UsageError("Specify --data_dir or set ALPHAFOLD_DATA")
|
||||
|
||||
if not FLAGS["model_preset"].present:
|
||||
FLAGS.model_preset = guess_model_preset(FLAGS.fasta_paths)
|
||||
|
||||
use_small_bfd = FLAGS.db_preset == 'reduced_dbs'
|
||||
|
||||
if use_small_bfd:
|
||||
if not FLAGS.small_bfd_database_path:
|
||||
FLAGS.small_bfd_database_path = os.path.join(data_dir, "small_bfd", "bfd-first_non_concensus_sequences.fasta")
|
||||
else:
|
||||
if not FLAGS.bfd_database_path:
|
||||
FLAGS.bfd_database_path = os.path.join(data_dir, "bfd", "bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt")
|
||||
if not FLAGS.uniclust30_database_path:
|
||||
FLAGS.uniclust30_database_path = os.path.join(data_dir, "uniclust30", "uniclust30_2018_08")
|
||||
|
||||
run_multimer_system = 'multimer' in FLAGS.model_preset
|
||||
|
||||
if run_multimer_system:
|
||||
if not FLAGS.pdb_seqres_database_path:
|
||||
FLAGS.pdb_seqres_database_path = os.path.join(data_dir, "pdb", "pdb_seqres.txt")
|
||||
if not FLAGS.uniprot_database_path:
|
||||
FLAGS.uniprot_database_path = os.path.join(data_dir, "uniprot", "uniprot.fasta")
|
||||
else:
|
||||
if not FLAGS.pdb70_database_path:
|
||||
FLAGS.pdb70_database_path = os.path.join(data_dir, "pdb70", "pdb70")
|
||||
|
||||
if not FLAGS.mgnify_database_path:
|
||||
FLAGS.mgnify_database_path = os.path.join(data_dir, "mgnify", "mgy_clusters_2018_12.fa")
|
||||
if not FLAGS.obsolete_pdbs_path:
|
||||
FLAGS.obsolete_pdbs_path = os.path.join(data_dir, "pdb", "obsolete.dat")
|
||||
if not FLAGS.template_mmcif_dir:
|
||||
FLAGS.template_mmcif_dir = os.path.join(data_dir, "pdb", "mmcif_files")
|
||||
if not FLAGS.uniref90_database_path:
|
||||
FLAGS.uniref90_database_path = os.path.join(data_dir, "uniprot", "uniref90.fasta")
|
||||
|
||||
if not FLAGS.output_dir:
|
||||
FLAGS.output_dir = os.getcwd()
|
||||
|
||||
if not FLAGS.max_template_date:
|
||||
FLAGS["max_template_date"].parse(date.today().isoformat())
|
||||
|
||||
af.main(argv[0:1])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(main)
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -p gpu
|
||||
#SBATCH -J alphafold
|
||||
#SBATCH -M gmerlin6
|
||||
#SBATCH --gpus=1
|
||||
#SBATCH -n 1
|
||||
#SBATCH -c 10
|
||||
|
||||
# Alphafold submission script for the merlin cluster
|
||||
# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date]
|
||||
#
|
||||
# Output will be in the same directory as the fasta_file.
|
||||
# Slurm logs will be in the current directory.
|
||||
#
|
||||
# 2021-08-09 Spencer Bliven, D.Ozerov
|
||||
#
|
||||
|
||||
export ALPHAFOLD_DATA=/data/project/bio/shared/alphafold
|
||||
module purge
|
||||
module use MX unstable
|
||||
module load alphafold/ALPHAFOLD_VERSION
|
||||
|
||||
exec "${ALPHAFOLD_DIR:?Error loading module}/bin/submit.sh" "$@"
|
||||
@@ -1,30 +0,0 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -p day
|
||||
#SBATCH -t 1-00:00:00
|
||||
#SBATCH -J alphafold
|
||||
#SBATCH -n 1
|
||||
#SBATCH -c 10
|
||||
|
||||
# Alphafold submission script for the ra cluster
|
||||
# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date]
|
||||
#
|
||||
# Output will be in the same directory as the fasta_file.
|
||||
# Slurm logs will be in the current directory.
|
||||
#
|
||||
# 2021-08-09 Spencer Bliven, D.Ozerov
|
||||
#
|
||||
|
||||
export ALPHAFOLD_DATA=/das/work/common/opt/alphafold/data
|
||||
|
||||
# Need at least rc6 to see alphafold
|
||||
PMODULES_VERSION=1.0.0rc10;
|
||||
source /opt/psi/config/profile.bash;
|
||||
|
||||
module --version
|
||||
|
||||
module purge
|
||||
module use MX unstable Programming
|
||||
module load alphafold/ALPHAFOLD_VERSION
|
||||
module list
|
||||
|
||||
exec "${ALPHAFOLD_DIR:?Error loading module}/bin/submit.sh" "$@"
|
||||
@@ -27,14 +27,12 @@ pbuild::install() {
|
||||
fi
|
||||
|
||||
git clone --depth=1 -b "$BRANCH" https://github.com/deepmind/alphafold.git "$ALPHAFOLD_HOME" || return $?
|
||||
|
||||
if ! [ -f "$ALPHAFOLD_HOME/alphafold/common/stereo_chemical_props.txt" ]; then
|
||||
wget -q -P "$ALPHAFOLD_HOME/alphafold/common/" \
|
||||
--no-check-certificate `# wget root certs are old` \
|
||||
|
||||
curl -fLsS -o "$ALPHAFOLD_HOME/alphafold/common/stereo_chemical_props.txt" \
|
||||
https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt
|
||||
fi
|
||||
wget -O "$ALPHAFOLD_HOME/run_alphafold.sh" \
|
||||
https://raw.githubusercontent.com/kalininalab/alphafold_non_docker/main/run_alphafold.sh
|
||||
chmod +x "$ALPHAFOLD_HOME/run_alphafold.sh"
|
||||
|
||||
cp -r "$BUILDBLOCK_DIR/bin" "$PREFIX/"
|
||||
sed -i "s/ALPHAFOLD_VERSION/$V/g" "$PREFIX/bin/"*
|
||||
|
||||
@@ -1,2 +1,3 @@
|
||||
alphafold/2.0.0-b88f8da unstable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3
|
||||
alphafold/2.0.1 unstable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3
|
||||
alphafold/2.0.1 stable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3
|
||||
alphafold/2.1.1 unstable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3
|
||||
|
||||
Reference in New Issue
Block a user