Files
MX_Pmodule/alphafold/bin/submit.sh
2024-07-18 10:49:35 +02:00

92 lines
2.4 KiB
Bash
Executable File

#!/bin/bash
# Generic alphafold submission script.
# Set the ALPHAFOLD_DATA variable before running.
# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit.sh fasta_file [max_template_date]
#
# Output will be in the same directory as the fasta_file.
# Slurm logs will be in the current directory.
#
# 2021-08-09 Spencer Bliven, D.Ozerov
#
# Bash strict mode
set -euo pipefail
IFS=$'\n\t'
usage () {
echo "Usage: sbatch [slurm_opts] \$ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date]"
}
# Parse parameters
if [ "$#" -lt 1 ]
then
echo "No fasta_file name" >&2
usage >&2
exit
fi
FASTA_FILE=`readlink -f $1`
if [ ! -e ${FASTA_FILE} ] || [ "$FASTA_FILE" == "" ]
then
echo "${FASTA_FILE} is not reachable (input argument was $1)"
exit
fi
DIR_QUERY=`dirname ${FASTA_FILE}`
LOG="${DIR_QUERY}/alphafold.out"
if [ "$#" -ge 2 ]
then
MAX_TEMPLATE_DATE=$2
else
MAX_TEMPLATE_DATE=$(date '+%Y-%m-%d')
fi
date > "$LOG"
hostname >> "$LOG"
set +u # Allow unset variables in activate commands
module purge
module use MX unstable
module load alphafold/ALPHAFOLD_VERSION 2>> "$LOG"
conda activate "${ALPHAFOLD_ENV:?"Error: ALPHAFOLD_ENV not set. Try 'module use MX unstable; module load alphafold'"}"
set -u
# Check the module loaded correctly
if ! [ -d "${ALPHAFOLD_HOME}" ]; then
echo "Error: $ALPHAFOLD_HOME not available" >&2
exit 1
fi
# Data dir
if ! [ -d "${ALPHAFOLD_DATA:?Set ALPHAFOLD_DATA before running}" ]; then
echo "Error: ALPHAFOLD_DATA directory not available ($ALPHAFOLD_DATA)" >&2
exit 1
fi
echo "GPUs: ${CUDA_VISIBLE_DEVICES:-None}" >> "$LOG"
echo "Detecting GPUs with Tensorflow:" >> "$LOG"
python -c 'import tensorflow as tf; tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))' 2>&1 |
sed -rn 's/^.* (Created TensorFlow device.*)$/\1/p' >> "$LOG"
echo >> "$LOG"
echo "Running alphafold from $PWD for fasta sequence : " >> "$LOG"
cat ${FASTA_FILE} >> "$LOG"
echo "and max_template_date : ${MAX_TEMPLATE_DATE} " >> "$LOG"
echo >> "$LOG"
cd "${ALPHAFOLD_HOME}"
CMD=("./run_alphafold.sh" -p full_dbs -d "${ALPHAFOLD_DATA}" -o "${DIR_QUERY}" -m model_1,model_2,model_3,model_4,model_5 -f "${FASTA_FILE}" -t "${MAX_TEMPLATE_DATE}")
if [ -z "${CUDA_VISIBLE_DEVICES:-}" ]
then
CMD+=(-g false)
else
CMD+=(-a "$CUDA_VISIBLE_DEVICES")
fi
echo "Run: ${CMD[@]}" >> "$LOG"
echo >> "$LOG"
( ( time "${CMD[@]}" ) 2>&1 ) >> "$LOG"