diff --git a/MX/alphafold/README.md b/MX/alphafold/README.md index 5a4d8be..6a7ba74 100644 --- a/MX/alphafold/README.md +++ b/MX/alphafold/README.md @@ -23,7 +23,7 @@ conda install -y -c bioconda hmmer==3.3.2 hhsuite==3.3.0 kalign2==2.04 pip install absl-py==0.13.0 biopython==1.79 chex==0.0.7 dm-haiku==0.0.4 \ dm-tree==0.1.6 immutabledict==2.0.0 jax==0.2.14 ml-collections==0.1.0 \ - numpy==1.19.5 scipy==1.7.0 tensorflow==2.5.0 + numpy==1.19.5 scipy==1.7.0 tensorflow==2.5.0 pandas==1.3.4 pip install --upgrade jax jaxlib==0.1.69+cuda111 \ -f https://storage.googleapis.com/jax-releases/jax_releases.html ``` @@ -54,7 +54,7 @@ GWSTELEKHREELKEFLKKEGITNVEIRIDNGRLEVRVEGGTERLKRFLEELRQKLEKKGYTVDIKIE EOF module use MX unstable -module load alphafold/2.0.1 -sbatch $ALPHAFOLD_DIR/bin/submit_merlin.sh query.fasta +module load alphafold/2.1.1 +sbatch alphafold_merlin.sh query.fasta ``` diff --git a/MX/alphafold/bin/alphafold_merlin.sh b/MX/alphafold/bin/alphafold_merlin.sh new file mode 100755 index 0000000..4c5b17b --- /dev/null +++ b/MX/alphafold/bin/alphafold_merlin.sh @@ -0,0 +1,34 @@ +#!/bin/bash +#SBATCH -p gpu +#SBATCH -J alphafold +#SBATCH -M gmerlin6 +#SBATCH --gpus=1 +#SBATCH -n 1 +#SBATCH -c 10 + +# Alphafold submission script for the merlin cluster +# Usage: sbatch [slurm_opts] alphafold_merlin.sh [options] fasta_file +# +# OPTIONS +# All alphafold options are set automatically, but can be overwritten. +# Some common options: +# +# --max_template_date=YYYY-MM-DD (default: today) +# --output_dir (default: current directory +# --helpfull List all options +# +# 2021-12-22 Spencer Bliven, D.Ozerov +# + +export ALPHAFOLD_DATA=/data/project/bio/shared/alphafold/versions/latest +module purge +module use MX unstable +module load alphafold/2.1.1 +conda activate "${ALPHAFOLD_ENV:?"Error: ALPHAFOLD_ENV not set. Try 'module use MX unstable; module load alphafold'"}" + +echo "hostname=$(hostname)" +echo "python=$(which python)" +echo "ALPHAFOLD_DATA=$(realpath "$ALPHAFOLD_DATA")" + +python "${ALPHAFOLD_DIR:?Error loading module}/bin/alphafold_runner.py" -v 0 "$@" + diff --git a/MX/alphafold/bin/alphafold_ra.sh b/MX/alphafold/bin/alphafold_ra.sh new file mode 100755 index 0000000..05820fb --- /dev/null +++ b/MX/alphafold/bin/alphafold_ra.sh @@ -0,0 +1,42 @@ +#!/bin/bash +#SBATCH -p gpu-week +#SBATCH -t 2-00:00:00 +#SBATCH -J alphafold +#SBATCH --gres=gpu:1 +#SBATCH -J alphafold +#SBATCH -n 1 +#SBATCH -c 10 + +# Alphafold submission script for the merlin cluster +# Usage: sbatch [slurm_opts] alphafold_merlin.sh [options] fasta_file +# +# OPTIONS +# All alphafold options are set automatically, but can be overwritten. +# Some common options: +# +# --max_template_date=YYYY-MM-DD (default: today) +# --output_dir (default: current directory) +# --helpfull List all options +# +# 2021-12-22 Spencer Bliven, D.Ozerov +# + +export ALPHAFOLD_DATA=/das/work/common/opt/alphafold/data_2.1.1/versions/latest + +# Need at least rc6 to see alphafold +PMODULES_VERSION=1.0.0rc10; +source /opt/psi/config/profile.bash; + +module --version + +module purge +module use MX unstable Programming +module load alphafold/2.1.1 +conda activate "${ALPHAFOLD_ENV:?"Error: ALPHAFOLD_ENV not set. Try 'module use MX unstable; module load alphafold'"}" + +echo "hostname=$(hostname)" +echo "python=$(which python)" +echo "ALPHAFOLD_DATA=$(realpath "$ALPHAFOLD_DATA")" + +python "${ALPHAFOLD_DIR:?Error loading module}/bin/alphafold_runner.py" -v 0 "$@" + diff --git a/MX/alphafold/bin/alphafold_runner.py b/MX/alphafold/bin/alphafold_runner.py new file mode 100755 index 0000000..0a608ba --- /dev/null +++ b/MX/alphafold/bin/alphafold_runner.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python +""" +Wrapper script for Alphafold 2, with automatic setting of common options + +usage: python alphafold_runner.py [alphafold options] input.fa +""" +import sys +import os +import importlib +import subprocess +import logging +import argparse +from datetime import date +from pathlib import Path +from typing import Union +from absl import app +from absl.flags import FLAGS +from absl import logging + +def import_alphafold(): + "Import run_alphafold.py from ALPHAFOLD_HOME" + home = os.environ.get('ALPHAFOLD_HOME', str(Path(__file__).parent.resolve("../alphafold"))) + sys.path.append(home) + try: + return importlib.import_module("run_alphafold") + except ImportError: + sys.stderr.write(f"Unable to find run_alphafold.py\n") + sys.stderr.write(f"path:{', '.join(sys.path)}") + sys.exit(1) +af = import_alphafold() + +def multi_fasta(fasta_path): + entries = 0 + with open(fasta_path, 'r') as fasta: + for line in fasta: + if line and line[0] == '>': + entries += 1 + if entries > 1: + return True + return False + + +def guess_model_preset(fasta_paths): + if any(multi_fasta(f) for f in fasta_paths): + logging.info("Input appears to be multimer") + return "multimer" + logging.info("Input appears to be monomer") + return "monomer" + + +def main(argv): + """Set some option defaults and then call alphafold's main method + + Most alphafold options have defaults set automatically: + + - database files are set from the ALPHAFOLD_DATA variable or the --data_dir option + (assuming the versioned layout, which differs slightly from the default) + - `--model_preset` is set to either monomer or multimer depending on the number of sequences in the fasta file + - `--max_template_date` defaults to the current date + """ + + + if len(argv) > 2: + raise app.UsageError('Too many command-line arguments.') + + # Accept positional fasta_paths + if len(argv) > 1: + if FLAGS["fasta_paths"].present: + raise app.UsageError("Both the --fasta_paths option and a fasta file argument were given") + FLAGS["fasta_paths"].parse(argv[1]) + elif not FLAGS.fasta_paths: + raise app.UsageError("No fasta file specified") + + # Database flags + if FLAGS["data_dir"].present: + data_dir = FLAGS.data_dir + elif "ALPHAFOLD_DATA" in os.environ: + data_dir = os.environ["ALPHAFOLD_DATA"] + logging.info(f"Using ALPHAFOLD_DATA={data_dir}") + FLAGS['data_dir'].value = data_dir + else: + raise app.UsageError("Specify --data_dir or set ALPHAFOLD_DATA") + + if not FLAGS["model_preset"].present: + FLAGS.model_preset = guess_model_preset(FLAGS.fasta_paths) + + use_small_bfd = FLAGS.db_preset == 'reduced_dbs' + + if use_small_bfd: + if not FLAGS.small_bfd_database_path: + FLAGS.small_bfd_database_path = os.path.join(data_dir, "small_bfd", "bfd-first_non_concensus_sequences.fasta") + else: + if not FLAGS.bfd_database_path: + FLAGS.bfd_database_path = os.path.join(data_dir, "bfd", "bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt") + if not FLAGS.uniclust30_database_path: + FLAGS.uniclust30_database_path = os.path.join(data_dir, "uniclust30", "uniclust30_2018_08") + + run_multimer_system = 'multimer' in FLAGS.model_preset + + if run_multimer_system: + if not FLAGS.pdb_seqres_database_path: + FLAGS.pdb_seqres_database_path = os.path.join(data_dir, "pdb", "pdb_seqres.txt") + if not FLAGS.uniprot_database_path: + FLAGS.uniprot_database_path = os.path.join(data_dir, "uniprot", "uniprot.fasta") + else: + if not FLAGS.pdb70_database_path: + FLAGS.pdb70_database_path = os.path.join(data_dir, "pdb70", "pdb70") + + if not FLAGS.mgnify_database_path: + FLAGS.mgnify_database_path = os.path.join(data_dir, "mgnify", "mgy_clusters_2018_12.fa") + if not FLAGS.obsolete_pdbs_path: + FLAGS.obsolete_pdbs_path = os.path.join(data_dir, "pdb", "obsolete.dat") + if not FLAGS.template_mmcif_dir: + FLAGS.template_mmcif_dir = os.path.join(data_dir, "pdb", "mmcif_files") + if not FLAGS.uniref90_database_path: + FLAGS.uniref90_database_path = os.path.join(data_dir, "uniprot", "uniref90.fasta") + + if not FLAGS.output_dir: + FLAGS.output_dir = os.getcwd() + + if not FLAGS.max_template_date: + FLAGS["max_template_date"].parse(date.today().isoformat()) + + af.main(argv[0:1]) + + +if __name__ == "__main__": + app.run(main) + diff --git a/MX/alphafold/bin/submit_merlin.sh b/MX/alphafold/bin/submit_merlin.sh deleted file mode 100755 index 15cf40c..0000000 --- a/MX/alphafold/bin/submit_merlin.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -#SBATCH -p gpu -#SBATCH -J alphafold -#SBATCH -M gmerlin6 -#SBATCH --gpus=1 -#SBATCH -n 1 -#SBATCH -c 10 - -# Alphafold submission script for the merlin cluster -# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date] -# -# Output will be in the same directory as the fasta_file. -# Slurm logs will be in the current directory. -# -# 2021-08-09 Spencer Bliven, D.Ozerov -# - -export ALPHAFOLD_DATA=/data/project/bio/shared/alphafold -module purge -module use MX unstable -module load alphafold/ALPHAFOLD_VERSION - -exec "${ALPHAFOLD_DIR:?Error loading module}/bin/submit.sh" "$@" diff --git a/MX/alphafold/bin/submit_ra.sh b/MX/alphafold/bin/submit_ra.sh deleted file mode 100755 index 1ef0c2f..0000000 --- a/MX/alphafold/bin/submit_ra.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -#SBATCH -p day -#SBATCH -t 1-00:00:00 -#SBATCH -J alphafold -#SBATCH -n 1 -#SBATCH -c 10 - -# Alphafold submission script for the ra cluster -# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date] -# -# Output will be in the same directory as the fasta_file. -# Slurm logs will be in the current directory. -# -# 2021-08-09 Spencer Bliven, D.Ozerov -# - -export ALPHAFOLD_DATA=/das/work/common/opt/alphafold/data - -# Need at least rc6 to see alphafold -PMODULES_VERSION=1.0.0rc10; -source /opt/psi/config/profile.bash; - -module --version - -module purge -module use MX unstable Programming -module load alphafold/ALPHAFOLD_VERSION -module list - -exec "${ALPHAFOLD_DIR:?Error loading module}/bin/submit.sh" "$@" diff --git a/MX/alphafold/build b/MX/alphafold/build index 80565fe..341396d 100755 --- a/MX/alphafold/build +++ b/MX/alphafold/build @@ -27,14 +27,12 @@ pbuild::install() { fi git clone --depth=1 -b "$BRANCH" https://github.com/deepmind/alphafold.git "$ALPHAFOLD_HOME" || return $? + if ! [ -f "$ALPHAFOLD_HOME/alphafold/common/stereo_chemical_props.txt" ]; then - wget -q -P "$ALPHAFOLD_HOME/alphafold/common/" \ - --no-check-certificate `# wget root certs are old` \ + + curl -fLsS -o "$ALPHAFOLD_HOME/alphafold/common/stereo_chemical_props.txt" \ https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt fi - wget -O "$ALPHAFOLD_HOME/run_alphafold.sh" \ - https://raw.githubusercontent.com/kalininalab/alphafold_non_docker/main/run_alphafold.sh - chmod +x "$ALPHAFOLD_HOME/run_alphafold.sh" cp -r "$BUILDBLOCK_DIR/bin" "$PREFIX/" sed -i "s/ALPHAFOLD_VERSION/$V/g" "$PREFIX/bin/"* diff --git a/MX/alphafold/files/variants b/MX/alphafold/files/variants index ddc8119..590cc1b 100644 --- a/MX/alphafold/files/variants +++ b/MX/alphafold/files/variants @@ -1,2 +1,3 @@ alphafold/2.0.0-b88f8da unstable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3 -alphafold/2.0.1 unstable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3 +alphafold/2.0.1 stable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3 +alphafold/2.1.1 unstable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3