Files
extrapolation/coherent_difference.py
John Beale 15ea8f8cd5 script dump
2026-02-17 08:52:57 +01:00

248 lines
5.9 KiB
Python

import gemmi
import numpy as np
def coherent_difference_fraction(F1, F2):
"""
Coherent Difference Fraction (CDF)
Parameters
----------
F1, F2 : array-like of complex
Complex structure factors on a common HKL set
Returns
-------
cdf : float
Fraction of coherent difference (0..1)
"""
dF = F2 - F1
num = np.abs(np.sum(dF))
den = np.sum(np.abs(dF))
if den <= 0:
return np.nan
return num / den
def resultant_angle(F1, F2, degrees=True):
"""
Angle between resultant structure-factor vectors.
Parameters
----------
F1, F2 : array-like of complex
Complex structure factors
degrees : bool
Return angle in degrees (default) or radians
Returns
-------
angle : float
Angle between resultants
"""
R1 = np.sum(F1)
R2 = np.sum(F2)
if np.abs(R1) == 0 or np.abs(R2) == 0:
return np.nan
cosang = np.real(R1 * np.conj(R2)) / (np.abs(R1) * np.abs(R2))
cosang = np.clip(cosang, -1.0, 1.0)
angle = np.arccos(cosang)
if degrees:
angle = np.degrees(angle)
return angle
def riso_xtrapol8(F1, F2):
A1 = np.abs(F1)
A2 = np.abs(F2)
denom = 0.5 * (A1 + A2)
mask = denom > 0
if np.sum(mask) == 0:
return np.nan
num = np.sum(np.abs(A1[mask] - A2[mask]))
den = np.sum(denom[mask])
return num / den
def extrapolation_score(F1, F2):
"""
Hybrid extrapolation success metric.
Returns
-------
score : float
0..1 (higher = better)
details : dict
Breakdown of components
"""
cdf = coherent_difference_fraction(F1, F2)
theta = resultant_angle(F1, F2, degrees=False)
riso = riso_xtrapol8(F1, F2)
if np.isnan(cdf) or np.isnan(theta):
return np.nan, {}
score = cdf * np.cos(theta)
details = {
"CDF": cdf,
"theta_rad": theta,
"theta_deg": np.degrees(theta),
"cos_theta": np.cos(theta),
"riso" : riso
}
return score, details
def read_sf_from_mtz(mtz_file,
F_col="F-obs-filtered",
PHI_col="PHIF-model",
require_positive_F=True):
"""
Read complex structure factors from an MTZ.
Returns
-------
F : np.ndarray (complex)
hkl : np.ndarray (N, 3) int
cell : gemmi.UnitCell
"""
mtz = gemmi.read_mtz_file(mtz_file)
cell = mtz.cell
# --- Extract columns ---
F = mtz.column_with_label(F_col).array
PHI = mtz.column_with_label(PHI_col).array
hkl = mtz.make_miller_array().astype(int)
if len(F) != len(hkl):
raise RuntimeError("HKL and F column length mismatch")
# --- Optional filtering ---
if require_positive_F:
mask = F > 0.0
F = F[mask]
PHI = PHI[mask]
hkl = hkl[mask]
# --- Build complex SFs ---
F_complex = F * np.exp(1j * np.deg2rad(PHI))
return F_complex, hkl, cell
def read_sf_from_pdb(pdb_file, dmin):
"""
Calculate structure factors from a PDB using Gemmi.
Returns complex SFs and matching HKLs.
"""
# -----------------------------
# Read structure
# -----------------------------
st = gemmi.read_structure(pdb_file)
st.setup_entities()
model = st[0]
cell = st.cell
sg = gemmi.find_spacegroup_by_name(st.spacegroup_hm)
# -----------------------------
# Generate HKLs
# -----------------------------
hkl = np.array(
gemmi.make_miller_array(cell, sg, dmin),
dtype=int
)
# -----------------------------
# SF calculator
# -----------------------------
calc = gemmi.StructureFactorCalculatorX(cell)
# -----------------------------
# Compute SFs one-by-one
# -----------------------------
F = np.empty(len(hkl), dtype=complex)
for i, (h, k, l) in enumerate(hkl):
F[i] = calc.calculate_sf_from_model(
model,
[int(h), int(k), int(l)]
)
return F, hkl, cell
def match_hkls(F1, hkl1, F2, hkl2):
"""
Match two SF arrays on common HKLs.
Returns
-------
F1c, F2c : np.ndarray (complex)
hkl_common : np.ndarray (N, 3)
"""
dict1 = {tuple(h): F1[i] for i, h in enumerate(hkl1)}
dict2 = {tuple(h): F2[i] for i, h in enumerate(hkl2)}
common = sorted(dict1.keys() & dict2.keys())
if len(common) == 0:
raise RuntimeError("No common HKLs between datasets")
F1c = np.array([dict1[h] for h in common])
F2c = np.array([dict2[h] for h in common])
hklc = np.array(common, dtype=int)
return F1c, F2c, hklc
def extrapolation_metrics(F1, F2):
"""
Compute all coherence-based extrapolation metrics.
"""
return {
"CDF": coherent_difference_fraction(F1, F2),
"theta_deg": resultant_angle(F1, F2, degrees=True),
"theta_rad": resultant_angle(F1, F2, degrees=False),
"score": extrapolation_score(F1, F2)[0],
"riso" : riso_xtrapol8(F1, F2)
}
#===============================================================================
# mtz_ref = "../data/apo_100k_refine_5.mtz"
# SF_ref, hkl_ref, cell_ref = read_sf_from_mtz( mtz_ref )
#
# mtz_trig = "../data/on_100k_refine_141.mtz"
# SF_trig, hkl_trig, cell_trig = read_sf_from_mtz( mtz_trig )
#
# SF_ref, SF_trig, hkl_comb = match_hkls(SF_ref, hkl_ref, SF_trig, hkl_trig)
#===============================================================================
dmin = 8.0
pdb_ref = "../data/hewl_apo.pdb"
#pdb_ref = "../data/ser-arg-loop-apo.pdb"
print("generating ref SFs")
SF_ref, hkl_ref, cell_ref = read_sf_from_pdb(pdb_ref, dmin)
print("DONE")
pdb_trig = "../data/hewl_1.0-I.pdb"
#pdb_trig = "../data/ser-arg-loop-1.0-switch.pdb"
print("generating trig. SFs")
SF_trig, hkl_trig, cell_trig = read_sf_from_pdb(pdb_trig, dmin)
print("DONE")
SF_ref, SF_trig, hkl_comb = match_hkls(SF_ref, hkl_ref, SF_trig, hkl_trig)
print( extrapolation_metrics(SF_ref, SF_trig) )