Add anomalous-accuracy scoreboard (HEWL ANODE vs XDS)
One command to score a jfjoch anomalous merge against XDS on physical accuracy: feed the -A merge through SHELXC + ANODE (run under qemu-user, since this WSL2 kernel's vsyscall=none breaks the static SHELX binaries) and report <d"/sig> per shell and the averaged anomalous peak height at the model S/Cl sites vs XDS. Unlike R-meas/CC1/2 (precision) it catches accuracy regressions - e.g. outlier rejection raises CC1/2 but lowers the anomalous peaks. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Executable
+100
@@ -0,0 +1,100 @@
|
||||
#!/usr/bin/env bash
|
||||
# Anomalous-accuracy scoreboard: score a jfjoch anomalous merge against XDS on the HEWL S/Cl
|
||||
# anomalous signal. Accuracy (not just precision): ANODE puts the anomalous density peaks at the
|
||||
# model's real anomalous atoms, and the peak height (sigma) is how strongly the data sees them.
|
||||
#
|
||||
# ./anomalous_scoreboard.sh <jfjoch_anomalous.hkl> [label]
|
||||
#
|
||||
# <jfjoch_anomalous.hkl> is a jfjoch `--scaling-output txt` file produced with -A (anomalous):
|
||||
# whitespace-separated "h k l I sigma [rfree]". Prints <d"/sig> per shell (SHELXC) and the averaged
|
||||
# anomalous peak height at SD_MET / SG_CYS / CL_CL for the input vs XDS, with the XDS/input ratio
|
||||
# (1.0 = matched XDS). The XDS reference is computed once and cached (delete .anom_xds_cache to redo).
|
||||
#
|
||||
# The SHELX binaries are run under qemu-user because this WSL2 kernel sets vsyscall=none, which the
|
||||
# old static SHELX builds need (otherwise they exit 174 silently). Paths are overridable via env.
|
||||
# (No set -e/-u: sourcing CCP4's setup trips -u, and many greps below legitimately find nothing.)
|
||||
set -o pipefail
|
||||
|
||||
INPUT="${1:?usage: anomalous_scoreboard.sh <jfjoch_anomalous.hkl> [label]}"
|
||||
LABEL="${2:-jfjoch}"
|
||||
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
SHELXC="${SHELXC_BIN:-/home/leonarski_f/shelxc}"
|
||||
ANODE="${ANODE_BIN:-/home/leonarski_f/anode}"
|
||||
QEMU="${QEMU_BIN:-/usr/bin/qemu-x86_64-static}"
|
||||
CCP4_SETUP="${CCP4_SETUP:-/opt/xtal/ccp4-9/bin/ccp4.setup-sh}"
|
||||
MODEL_CIF="${MODEL_CIF:-$HERE/6G8A_refine_001.cif}"
|
||||
XDS_REF="${XDS_REF:-$HERE/XDS_ASCII.HKL}"
|
||||
CELL="${CELL:-78.22 78.22 37.76 90 90 90}"
|
||||
SG="${SG:-P43212}"
|
||||
WAVELENGTH="${WAVELENGTH:-0.95329}"
|
||||
|
||||
# shellcheck disable=SC1090
|
||||
source "$CCP4_SETUP" 2>/dev/null || true
|
||||
WORK="$(mktemp -d)"; trap 'rm -rf "$WORK"' EXIT
|
||||
|
||||
# Model PDB for ANODE (cached): add the wavelength REMARK, drop ANISOU.
|
||||
MODEL_PDB="$HERE/6G8A_anode.pdb"
|
||||
if [ ! -s "$MODEL_PDB" ]; then
|
||||
gemmi convert "$MODEL_CIF" "$WORK/m.pdb"
|
||||
{ echo "REMARK 200 WAVELENGTH OR RANGE (A) : $WAVELENGTH"
|
||||
grep -E '^CRYST1|^ATOM|^HETATM|^TER|^END' "$WORK/m.pdb"; } > "$MODEL_PDB"
|
||||
fi
|
||||
|
||||
# run_one <name> <datafile> <is_xds 0|1> : SHELXC + ANODE; echoes "SD_MET SG_CYS CL_CL" densities,
|
||||
# leaves <name>.shelxc / <name>.lsa in $WORK.
|
||||
run_one() {
|
||||
local name="$1" data="$2" is_xds="$3" sadfile="$2"
|
||||
cp "$MODEL_PDB" "$WORK/$name.pdb"
|
||||
if [ "$is_xds" = 0 ]; then # jfjoch txt -> SHELX HKLF4 (3I4,2F8.2) + terminator
|
||||
awk '{printf "%4d%4d%4d%8.2f%8.2f\n",$1,$2,$3,$4,$5}' "$data" > "$WORK/${name}_in.hkl"
|
||||
printf "%4d%4d%4d%8.2f%8.2f\n" 0 0 0 0 0 >> "$WORK/${name}_in.hkl"
|
||||
sadfile="$WORK/${name}_in.hkl"
|
||||
fi
|
||||
( cd "$WORK"
|
||||
"$QEMU" "$SHELXC" "$name" > "$name.shelxc" 2>&1 <<EOF
|
||||
CELL $CELL
|
||||
SPAG $SG
|
||||
SAD $sadfile
|
||||
FIND 10
|
||||
SFAC S
|
||||
EOF
|
||||
"$QEMU" "$ANODE" "$name" > "$name.anode" 2>&1 )
|
||||
local sd sg cl
|
||||
sd=$(grep -E ' SD_MET *$' "$WORK/$name.lsa" | awk '{print $1}')
|
||||
sg=$(grep -E ' SG_CYS *$' "$WORK/$name.lsa" | awk '{print $1}')
|
||||
cl=$(grep -E ' CL_CL *$' "$WORK/$name.lsa" | awk '{print $1}')
|
||||
echo "${sd:-NA} ${sg:-NA} ${cl:-NA}"
|
||||
}
|
||||
|
||||
echo "Scoring '$LABEL' ($INPUT) vs XDS ..."
|
||||
read -r O_SD O_SG O_CL < <(run_one ours "$INPUT" 0)
|
||||
O_DSIG=$(grep -E '<d"/sig>' "$WORK/ours.shelxc" | head -1 | sed 's/.*<d"\/sig>//')
|
||||
O_RESL=$(grep -E '^ Resl' "$WORK/ours.shelxc" | head -1)
|
||||
|
||||
XDS_CACHE="$HERE/.anom_xds_cache"
|
||||
if [ -s "$XDS_CACHE" ]; then
|
||||
# shellcheck disable=SC1090
|
||||
source "$XDS_CACHE"
|
||||
else
|
||||
read -r X_SD X_SG X_CL < <(run_one xds "$XDS_REF" 1)
|
||||
X_DSIG=$(grep -E '<d"/sig>' "$WORK/xds.shelxc" | head -1 | sed 's/.*<d"\/sig>//')
|
||||
{ echo "X_SD='$X_SD'"; echo "X_SG='$X_SG'"; echo "X_CL='$X_CL'"; echo "X_DSIG='$X_DSIG'"; } > "$XDS_CACHE"
|
||||
fi
|
||||
|
||||
ratio() { awk -v a="$1" -v b="$2" 'BEGIN{ if(a+0>0) printf "%.2fx", b/a; else print "NA" }'; }
|
||||
echo
|
||||
echo "=== <d\"/sig> per resolution shell (0.80 = no anomalous signal) ==="
|
||||
echo " $O_RESL" | sed 's/Resl\./Resl./'
|
||||
printf "%-8s %s\n" "$LABEL" "$O_DSIG"
|
||||
printf "%-8s %s\n" "XDS" "$X_DSIG"
|
||||
echo
|
||||
echo "=== averaged anomalous peak height at the model atoms (sigma) ==="
|
||||
printf "%-8s %9s %9s %8s\n" "atom" "$LABEL" "XDS" "XDS/in"
|
||||
printf "%-8s %9s %9s %8s\n" "SD_MET" "$O_SD" "$X_SD" "$(ratio "$O_SD" "$X_SD")"
|
||||
printf "%-8s %9s %9s %8s\n" "SG_CYS" "$O_SG" "$X_SG" "$(ratio "$O_SG" "$X_SG")"
|
||||
printf "%-8s %9s %9s %8s\n" "CL_CL" "$O_CL" "$X_CL" "$(ratio "$O_CL" "$X_CL")"
|
||||
echo
|
||||
awk -v a="$O_SD" -v b="$O_SG" -v xa="$X_SD" -v xb="$X_SG" 'BEGIN{
|
||||
if(a+0>0 && b+0>0) printf "Summary: S peak height = %.2fx XDS (SD_MET %.2f, SG_CYS %.2f vs %.2f, %.2f). 1.0x = matched XDS.\n", \
|
||||
(a+b)/(xa+xb), a, b, xa, xb }'
|
||||
Reference in New Issue
Block a user