Add anomalous-accuracy scoreboard (HEWL ANODE vs XDS)

One command to score a jfjoch anomalous merge against XDS on physical
accuracy: feed the -A merge through SHELXC + ANODE (run under qemu-user,
since this WSL2 kernel's vsyscall=none breaks the static SHELX binaries)
and report <d"/sig> per shell and the averaged anomalous peak height at
the model S/Cl sites vs XDS. Unlike R-meas/CC1/2 (precision) it catches
accuracy regressions - e.g. outlier rejection raises CC1/2 but lowers
the anomalous peaks.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-25 20:43:04 +02:00
co-authored by Claude Opus 4.8
parent 9ba2401b53
commit 7cc418ed2f
+100
View File
@@ -0,0 +1,100 @@
#!/usr/bin/env bash
# Anomalous-accuracy scoreboard: score a jfjoch anomalous merge against XDS on the HEWL S/Cl
# anomalous signal. Accuracy (not just precision): ANODE puts the anomalous density peaks at the
# model's real anomalous atoms, and the peak height (sigma) is how strongly the data sees them.
#
# ./anomalous_scoreboard.sh <jfjoch_anomalous.hkl> [label]
#
# <jfjoch_anomalous.hkl> is a jfjoch `--scaling-output txt` file produced with -A (anomalous):
# whitespace-separated "h k l I sigma [rfree]". Prints <d"/sig> per shell (SHELXC) and the averaged
# anomalous peak height at SD_MET / SG_CYS / CL_CL for the input vs XDS, with the XDS/input ratio
# (1.0 = matched XDS). The XDS reference is computed once and cached (delete .anom_xds_cache to redo).
#
# The SHELX binaries are run under qemu-user because this WSL2 kernel sets vsyscall=none, which the
# old static SHELX builds need (otherwise they exit 174 silently). Paths are overridable via env.
# (No set -e/-u: sourcing CCP4's setup trips -u, and many greps below legitimately find nothing.)
set -o pipefail
INPUT="${1:?usage: anomalous_scoreboard.sh <jfjoch_anomalous.hkl> [label]}"
LABEL="${2:-jfjoch}"
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SHELXC="${SHELXC_BIN:-/home/leonarski_f/shelxc}"
ANODE="${ANODE_BIN:-/home/leonarski_f/anode}"
QEMU="${QEMU_BIN:-/usr/bin/qemu-x86_64-static}"
CCP4_SETUP="${CCP4_SETUP:-/opt/xtal/ccp4-9/bin/ccp4.setup-sh}"
MODEL_CIF="${MODEL_CIF:-$HERE/6G8A_refine_001.cif}"
XDS_REF="${XDS_REF:-$HERE/XDS_ASCII.HKL}"
CELL="${CELL:-78.22 78.22 37.76 90 90 90}"
SG="${SG:-P43212}"
WAVELENGTH="${WAVELENGTH:-0.95329}"
# shellcheck disable=SC1090
source "$CCP4_SETUP" 2>/dev/null || true
WORK="$(mktemp -d)"; trap 'rm -rf "$WORK"' EXIT
# Model PDB for ANODE (cached): add the wavelength REMARK, drop ANISOU.
MODEL_PDB="$HERE/6G8A_anode.pdb"
if [ ! -s "$MODEL_PDB" ]; then
gemmi convert "$MODEL_CIF" "$WORK/m.pdb"
{ echo "REMARK 200 WAVELENGTH OR RANGE (A) : $WAVELENGTH"
grep -E '^CRYST1|^ATOM|^HETATM|^TER|^END' "$WORK/m.pdb"; } > "$MODEL_PDB"
fi
# run_one <name> <datafile> <is_xds 0|1> : SHELXC + ANODE; echoes "SD_MET SG_CYS CL_CL" densities,
# leaves <name>.shelxc / <name>.lsa in $WORK.
run_one() {
local name="$1" data="$2" is_xds="$3" sadfile="$2"
cp "$MODEL_PDB" "$WORK/$name.pdb"
if [ "$is_xds" = 0 ]; then # jfjoch txt -> SHELX HKLF4 (3I4,2F8.2) + terminator
awk '{printf "%4d%4d%4d%8.2f%8.2f\n",$1,$2,$3,$4,$5}' "$data" > "$WORK/${name}_in.hkl"
printf "%4d%4d%4d%8.2f%8.2f\n" 0 0 0 0 0 >> "$WORK/${name}_in.hkl"
sadfile="$WORK/${name}_in.hkl"
fi
( cd "$WORK"
"$QEMU" "$SHELXC" "$name" > "$name.shelxc" 2>&1 <<EOF
CELL $CELL
SPAG $SG
SAD $sadfile
FIND 10
SFAC S
EOF
"$QEMU" "$ANODE" "$name" > "$name.anode" 2>&1 )
local sd sg cl
sd=$(grep -E ' SD_MET *$' "$WORK/$name.lsa" | awk '{print $1}')
sg=$(grep -E ' SG_CYS *$' "$WORK/$name.lsa" | awk '{print $1}')
cl=$(grep -E ' CL_CL *$' "$WORK/$name.lsa" | awk '{print $1}')
echo "${sd:-NA} ${sg:-NA} ${cl:-NA}"
}
echo "Scoring '$LABEL' ($INPUT) vs XDS ..."
read -r O_SD O_SG O_CL < <(run_one ours "$INPUT" 0)
O_DSIG=$(grep -E '<d"/sig>' "$WORK/ours.shelxc" | head -1 | sed 's/.*<d"\/sig>//')
O_RESL=$(grep -E '^ Resl' "$WORK/ours.shelxc" | head -1)
XDS_CACHE="$HERE/.anom_xds_cache"
if [ -s "$XDS_CACHE" ]; then
# shellcheck disable=SC1090
source "$XDS_CACHE"
else
read -r X_SD X_SG X_CL < <(run_one xds "$XDS_REF" 1)
X_DSIG=$(grep -E '<d"/sig>' "$WORK/xds.shelxc" | head -1 | sed 's/.*<d"\/sig>//')
{ echo "X_SD='$X_SD'"; echo "X_SG='$X_SG'"; echo "X_CL='$X_CL'"; echo "X_DSIG='$X_DSIG'"; } > "$XDS_CACHE"
fi
ratio() { awk -v a="$1" -v b="$2" 'BEGIN{ if(a+0>0) printf "%.2fx", b/a; else print "NA" }'; }
echo
echo "=== <d\"/sig> per resolution shell (0.80 = no anomalous signal) ==="
echo " $O_RESL" | sed 's/Resl\./Resl./'
printf "%-8s %s\n" "$LABEL" "$O_DSIG"
printf "%-8s %s\n" "XDS" "$X_DSIG"
echo
echo "=== averaged anomalous peak height at the model atoms (sigma) ==="
printf "%-8s %9s %9s %8s\n" "atom" "$LABEL" "XDS" "XDS/in"
printf "%-8s %9s %9s %8s\n" "SD_MET" "$O_SD" "$X_SD" "$(ratio "$O_SD" "$X_SD")"
printf "%-8s %9s %9s %8s\n" "SG_CYS" "$O_SG" "$X_SG" "$(ratio "$O_SG" "$X_SG")"
printf "%-8s %9s %9s %8s\n" "CL_CL" "$O_CL" "$X_CL" "$(ratio "$O_CL" "$X_CL")"
echo
awk -v a="$O_SD" -v b="$O_SG" -v xa="$X_SD" -v xb="$X_SG" 'BEGIN{
if(a+0>0 && b+0>0) printf "Summary: S peak height = %.2fx XDS (SD_MET %.2f, SG_CYS %.2f vs %.2f, %.2f). 1.0x = matched XDS.\n", \
(a+b)/(xa+xb), a, b, xa, xb }'