extrapolation/coherent_difference.py

import gemmi
import numpy as np

def coherent_difference_fraction(F1, F2):
    """
    Coherent Difference Fraction (CDF)

    Parameters
    ----------
    F1, F2 : array-like of complex
        Complex structure factors on a common HKL set

    Returns
    -------
    cdf : float
        Fraction of coherent difference (0..1)
    """
    dF = F2 - F1

    num = np.abs(np.sum(dF))
    den = np.sum(np.abs(dF))

    if den <= 0:
        return np.nan

    return num / den

def resultant_angle(F1, F2, degrees=True):
    """
    Angle between resultant structure-factor vectors.

    Parameters
    ----------
    F1, F2 : array-like of complex
        Complex structure factors
    degrees : bool
        Return angle in degrees (default) or radians

    Returns
    -------
    angle : float
        Angle between resultants
    """
    R1 = np.sum(F1)
    R2 = np.sum(F2)

    if np.abs(R1) == 0 or np.abs(R2) == 0:
        return np.nan

    cosang = np.real(R1 * np.conj(R2)) / (np.abs(R1) * np.abs(R2))
    cosang = np.clip(cosang, -1.0, 1.0)

    angle = np.arccos(cosang)

    if degrees:
        angle = np.degrees(angle)

    return angle

def riso_xtrapol8(F1, F2):
    A1 = np.abs(F1)
    A2 = np.abs(F2)

    denom = 0.5 * (A1 + A2)
    mask = denom > 0

    if np.sum(mask) == 0:
        return np.nan

    num = np.sum(np.abs(A1[mask] - A2[mask]))
    den = np.sum(denom[mask])

    return num / den

def extrapolation_score(F1, F2):
    """
    Hybrid extrapolation success metric.

    Returns
    -------
    score : float
        0..1 (higher = better)
    details : dict
        Breakdown of components
    """
    cdf = coherent_difference_fraction(F1, F2)
    theta = resultant_angle(F1, F2, degrees=False)
    riso = riso_xtrapol8(F1, F2)

    if np.isnan(cdf) or np.isnan(theta):
        return np.nan, {}

    score = cdf * np.cos(theta)

    details = {
        "CDF": cdf,
        "theta_rad": theta,
        "theta_deg": np.degrees(theta),
        "cos_theta": np.cos(theta),
        "riso" : riso
    }

    return score, details

def read_sf_from_mtz(mtz_file,
                     F_col="F-obs-filtered",
                     PHI_col="PHIF-model",
                     require_positive_F=True):
    """
    Read complex structure factors from an MTZ.

    Returns
    -------
    F : np.ndarray (complex)
    hkl : np.ndarray (N, 3) int
    cell : gemmi.UnitCell
    """

    mtz = gemmi.read_mtz_file(mtz_file)
    cell = mtz.cell

    # --- Extract columns ---
    F = mtz.column_with_label(F_col).array
    PHI = mtz.column_with_label(PHI_col).array
    hkl = mtz.make_miller_array().astype(int)

    if len(F) != len(hkl):
        raise RuntimeError("HKL and F column length mismatch")

    # --- Optional filtering ---
    if require_positive_F:
        mask = F > 0.0
        F = F[mask]
        PHI = PHI[mask]
        hkl = hkl[mask]

    # --- Build complex SFs ---
    F_complex = F * np.exp(1j * np.deg2rad(PHI))

    return F_complex, hkl, cell

def read_sf_from_pdb(pdb_file, dmin):
    """
    Calculate structure factors from a PDB using Gemmi.
    Returns complex SFs and matching HKLs.
    """

    # -----------------------------
    # Read structure
    # -----------------------------
    st = gemmi.read_structure(pdb_file)
    st.setup_entities()
    model = st[0]

    cell = st.cell
    sg = gemmi.find_spacegroup_by_name(st.spacegroup_hm)

    # -----------------------------
    # Generate HKLs
    # -----------------------------
    hkl = np.array(
        gemmi.make_miller_array(cell, sg, dmin),
        dtype=int
    )

    # -----------------------------
    # SF calculator
    # -----------------------------
    calc = gemmi.StructureFactorCalculatorX(cell)

    # -----------------------------
    # Compute SFs one-by-one
    # -----------------------------
    F = np.empty(len(hkl), dtype=complex)

    for i, (h, k, l) in enumerate(hkl):
        F[i] = calc.calculate_sf_from_model(
            model,
            [int(h), int(k), int(l)]
        )

    return F, hkl, cell


def match_hkls(F1, hkl1, F2, hkl2):
    """
    Match two SF arrays on common HKLs.

    Returns
    -------
    F1c, F2c : np.ndarray (complex)
    hkl_common : np.ndarray (N, 3)
    """

    dict1 = {tuple(h): F1[i] for i, h in enumerate(hkl1)}
    dict2 = {tuple(h): F2[i] for i, h in enumerate(hkl2)}

    common = sorted(dict1.keys() & dict2.keys())

    if len(common) == 0:
        raise RuntimeError("No common HKLs between datasets")

    F1c = np.array([dict1[h] for h in common])
    F2c = np.array([dict2[h] for h in common])
    hklc = np.array(common, dtype=int)

    return F1c, F2c, hklc

def extrapolation_metrics(F1, F2):
    """
    Compute all coherence-based extrapolation metrics.
    """
    return {
        "CDF": coherent_difference_fraction(F1, F2),
        "theta_deg": resultant_angle(F1, F2, degrees=True),
        "theta_rad": resultant_angle(F1, F2, degrees=False),
        "score": extrapolation_score(F1, F2)[0],
        "riso" : riso_xtrapol8(F1, F2)
    }

#===============================================================================
# mtz_ref = "../data/apo_100k_refine_5.mtz"
# SF_ref, hkl_ref, cell_ref = read_sf_from_mtz( mtz_ref )
#
# mtz_trig = "../data/on_100k_refine_141.mtz"
# SF_trig, hkl_trig, cell_trig = read_sf_from_mtz( mtz_trig )
#
# SF_ref, SF_trig, hkl_comb = match_hkls(SF_ref, hkl_ref, SF_trig, hkl_trig)
#===============================================================================

dmin = 8.0
pdb_ref    = "../data/hewl_apo.pdb"
#pdb_ref = "../data/ser-arg-loop-apo.pdb"
print("generating ref SFs")
SF_ref, hkl_ref, cell_ref = read_sf_from_pdb(pdb_ref, dmin)
print("DONE")


pdb_trig = "../data/hewl_1.0-I.pdb"
#pdb_trig = "../data/ser-arg-loop-1.0-switch.pdb"
print("generating trig. SFs")
SF_trig, hkl_trig, cell_trig = read_sf_from_pdb(pdb_trig, dmin)
print("DONE")

SF_ref, SF_trig, hkl_comb = match_hkls(SF_ref, hkl_ref, SF_trig, hkl_trig)

print( extrapolation_metrics(SF_ref, SF_trig) )