diff --git a/lyso_test/anomalous_scoreboard.sh b/lyso_test/anomalous_scoreboard.sh new file mode 100755 index 00000000..1a7dfc04 --- /dev/null +++ b/lyso_test/anomalous_scoreboard.sh @@ -0,0 +1,100 @@ +#!/usr/bin/env bash +# Anomalous-accuracy scoreboard: score a jfjoch anomalous merge against XDS on the HEWL S/Cl +# anomalous signal. Accuracy (not just precision): ANODE puts the anomalous density peaks at the +# model's real anomalous atoms, and the peak height (sigma) is how strongly the data sees them. +# +# ./anomalous_scoreboard.sh [label] +# +# is a jfjoch `--scaling-output txt` file produced with -A (anomalous): +# whitespace-separated "h k l I sigma [rfree]". Prints per shell (SHELXC) and the averaged +# anomalous peak height at SD_MET / SG_CYS / CL_CL for the input vs XDS, with the XDS/input ratio +# (1.0 = matched XDS). The XDS reference is computed once and cached (delete .anom_xds_cache to redo). +# +# The SHELX binaries are run under qemu-user because this WSL2 kernel sets vsyscall=none, which the +# old static SHELX builds need (otherwise they exit 174 silently). Paths are overridable via env. +# (No set -e/-u: sourcing CCP4's setup trips -u, and many greps below legitimately find nothing.) +set -o pipefail + +INPUT="${1:?usage: anomalous_scoreboard.sh [label]}" +LABEL="${2:-jfjoch}" +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +SHELXC="${SHELXC_BIN:-/home/leonarski_f/shelxc}" +ANODE="${ANODE_BIN:-/home/leonarski_f/anode}" +QEMU="${QEMU_BIN:-/usr/bin/qemu-x86_64-static}" +CCP4_SETUP="${CCP4_SETUP:-/opt/xtal/ccp4-9/bin/ccp4.setup-sh}" +MODEL_CIF="${MODEL_CIF:-$HERE/6G8A_refine_001.cif}" +XDS_REF="${XDS_REF:-$HERE/XDS_ASCII.HKL}" +CELL="${CELL:-78.22 78.22 37.76 90 90 90}" +SG="${SG:-P43212}" +WAVELENGTH="${WAVELENGTH:-0.95329}" + +# shellcheck disable=SC1090 +source "$CCP4_SETUP" 2>/dev/null || true +WORK="$(mktemp -d)"; trap 'rm -rf "$WORK"' EXIT + +# Model PDB for ANODE (cached): add the wavelength REMARK, drop ANISOU. +MODEL_PDB="$HERE/6G8A_anode.pdb" +if [ ! -s "$MODEL_PDB" ]; then + gemmi convert "$MODEL_CIF" "$WORK/m.pdb" + { echo "REMARK 200 WAVELENGTH OR RANGE (A) : $WAVELENGTH" + grep -E '^CRYST1|^ATOM|^HETATM|^TER|^END' "$WORK/m.pdb"; } > "$MODEL_PDB" +fi + +# run_one : SHELXC + ANODE; echoes "SD_MET SG_CYS CL_CL" densities, +# leaves .shelxc / .lsa in $WORK. +run_one() { + local name="$1" data="$2" is_xds="$3" sadfile="$2" + cp "$MODEL_PDB" "$WORK/$name.pdb" + if [ "$is_xds" = 0 ]; then # jfjoch txt -> SHELX HKLF4 (3I4,2F8.2) + terminator + awk '{printf "%4d%4d%4d%8.2f%8.2f\n",$1,$2,$3,$4,$5}' "$data" > "$WORK/${name}_in.hkl" + printf "%4d%4d%4d%8.2f%8.2f\n" 0 0 0 0 0 >> "$WORK/${name}_in.hkl" + sadfile="$WORK/${name}_in.hkl" + fi + ( cd "$WORK" + "$QEMU" "$SHELXC" "$name" > "$name.shelxc" 2>&1 < "$name.anode" 2>&1 ) + local sd sg cl + sd=$(grep -E ' SD_MET *$' "$WORK/$name.lsa" | awk '{print $1}') + sg=$(grep -E ' SG_CYS *$' "$WORK/$name.lsa" | awk '{print $1}') + cl=$(grep -E ' CL_CL *$' "$WORK/$name.lsa" | awk '{print $1}') + echo "${sd:-NA} ${sg:-NA} ${cl:-NA}" +} + +echo "Scoring '$LABEL' ($INPUT) vs XDS ..." +read -r O_SD O_SG O_CL < <(run_one ours "$INPUT" 0) +O_DSIG=$(grep -E '' "$WORK/ours.shelxc" | head -1 | sed 's/.*//') +O_RESL=$(grep -E '^ Resl' "$WORK/ours.shelxc" | head -1) + +XDS_CACHE="$HERE/.anom_xds_cache" +if [ -s "$XDS_CACHE" ]; then + # shellcheck disable=SC1090 + source "$XDS_CACHE" +else + read -r X_SD X_SG X_CL < <(run_one xds "$XDS_REF" 1) + X_DSIG=$(grep -E '' "$WORK/xds.shelxc" | head -1 | sed 's/.*//') + { echo "X_SD='$X_SD'"; echo "X_SG='$X_SG'"; echo "X_CL='$X_CL'"; echo "X_DSIG='$X_DSIG'"; } > "$XDS_CACHE" +fi + +ratio() { awk -v a="$1" -v b="$2" 'BEGIN{ if(a+0>0) printf "%.2fx", b/a; else print "NA" }'; } +echo +echo "=== per resolution shell (0.80 = no anomalous signal) ===" +echo " $O_RESL" | sed 's/Resl\./Resl./' +printf "%-8s %s\n" "$LABEL" "$O_DSIG" +printf "%-8s %s\n" "XDS" "$X_DSIG" +echo +echo "=== averaged anomalous peak height at the model atoms (sigma) ===" +printf "%-8s %9s %9s %8s\n" "atom" "$LABEL" "XDS" "XDS/in" +printf "%-8s %9s %9s %8s\n" "SD_MET" "$O_SD" "$X_SD" "$(ratio "$O_SD" "$X_SD")" +printf "%-8s %9s %9s %8s\n" "SG_CYS" "$O_SG" "$X_SG" "$(ratio "$O_SG" "$X_SG")" +printf "%-8s %9s %9s %8s\n" "CL_CL" "$O_CL" "$X_CL" "$(ratio "$O_CL" "$X_CL")" +echo +awk -v a="$O_SD" -v b="$O_SG" -v xa="$X_SD" -v xb="$X_SG" 'BEGIN{ + if(a+0>0 && b+0>0) printf "Summary: S peak height = %.2fx XDS (SD_MET %.2f, SG_CYS %.2f vs %.2f, %.2f). 1.0x = matched XDS.\n", \ + (a+b)/(xa+xb), a, b, xa, xb }'