import gemmi import numpy as np def coherent_difference_fraction(F1, F2): """ Coherent Difference Fraction (CDF) Parameters ---------- F1, F2 : array-like of complex Complex structure factors on a common HKL set Returns ------- cdf : float Fraction of coherent difference (0..1) """ dF = F2 - F1 num = np.abs(np.sum(dF)) den = np.sum(np.abs(dF)) if den <= 0: return np.nan return num / den def resultant_angle(F1, F2, degrees=True): """ Angle between resultant structure-factor vectors. Parameters ---------- F1, F2 : array-like of complex Complex structure factors degrees : bool Return angle in degrees (default) or radians Returns ------- angle : float Angle between resultants """ R1 = np.sum(F1) R2 = np.sum(F2) if np.abs(R1) == 0 or np.abs(R2) == 0: return np.nan cosang = np.real(R1 * np.conj(R2)) / (np.abs(R1) * np.abs(R2)) cosang = np.clip(cosang, -1.0, 1.0) angle = np.arccos(cosang) if degrees: angle = np.degrees(angle) return angle def riso_xtrapol8(F1, F2): A1 = np.abs(F1) A2 = np.abs(F2) denom = 0.5 * (A1 + A2) mask = denom > 0 if np.sum(mask) == 0: return np.nan num = np.sum(np.abs(A1[mask] - A2[mask])) den = np.sum(denom[mask]) return num / den def extrapolation_score(F1, F2): """ Hybrid extrapolation success metric. Returns ------- score : float 0..1 (higher = better) details : dict Breakdown of components """ cdf = coherent_difference_fraction(F1, F2) theta = resultant_angle(F1, F2, degrees=False) riso = riso_xtrapol8(F1, F2) if np.isnan(cdf) or np.isnan(theta): return np.nan, {} score = cdf * np.cos(theta) details = { "CDF": cdf, "theta_rad": theta, "theta_deg": np.degrees(theta), "cos_theta": np.cos(theta), "riso" : riso } return score, details def read_sf_from_mtz(mtz_file, F_col="F-obs-filtered", PHI_col="PHIF-model", require_positive_F=True): """ Read complex structure factors from an MTZ. Returns ------- F : np.ndarray (complex) hkl : np.ndarray (N, 3) int cell : gemmi.UnitCell """ mtz = gemmi.read_mtz_file(mtz_file) cell = mtz.cell # --- Extract columns --- F = mtz.column_with_label(F_col).array PHI = mtz.column_with_label(PHI_col).array hkl = mtz.make_miller_array().astype(int) if len(F) != len(hkl): raise RuntimeError("HKL and F column length mismatch") # --- Optional filtering --- if require_positive_F: mask = F > 0.0 F = F[mask] PHI = PHI[mask] hkl = hkl[mask] # --- Build complex SFs --- F_complex = F * np.exp(1j * np.deg2rad(PHI)) return F_complex, hkl, cell def read_sf_from_pdb(pdb_file, dmin): """ Calculate structure factors from a PDB using Gemmi. Returns complex SFs and matching HKLs. """ # ----------------------------- # Read structure # ----------------------------- st = gemmi.read_structure(pdb_file) st.setup_entities() model = st[0] cell = st.cell sg = gemmi.find_spacegroup_by_name(st.spacegroup_hm) # ----------------------------- # Generate HKLs # ----------------------------- hkl = np.array( gemmi.make_miller_array(cell, sg, dmin), dtype=int ) # ----------------------------- # SF calculator # ----------------------------- calc = gemmi.StructureFactorCalculatorX(cell) # ----------------------------- # Compute SFs one-by-one # ----------------------------- F = np.empty(len(hkl), dtype=complex) for i, (h, k, l) in enumerate(hkl): F[i] = calc.calculate_sf_from_model( model, [int(h), int(k), int(l)] ) return F, hkl, cell def match_hkls(F1, hkl1, F2, hkl2): """ Match two SF arrays on common HKLs. Returns ------- F1c, F2c : np.ndarray (complex) hkl_common : np.ndarray (N, 3) """ dict1 = {tuple(h): F1[i] for i, h in enumerate(hkl1)} dict2 = {tuple(h): F2[i] for i, h in enumerate(hkl2)} common = sorted(dict1.keys() & dict2.keys()) if len(common) == 0: raise RuntimeError("No common HKLs between datasets") F1c = np.array([dict1[h] for h in common]) F2c = np.array([dict2[h] for h in common]) hklc = np.array(common, dtype=int) return F1c, F2c, hklc def extrapolation_metrics(F1, F2): """ Compute all coherence-based extrapolation metrics. """ return { "CDF": coherent_difference_fraction(F1, F2), "theta_deg": resultant_angle(F1, F2, degrees=True), "theta_rad": resultant_angle(F1, F2, degrees=False), "score": extrapolation_score(F1, F2)[0], "riso" : riso_xtrapol8(F1, F2) } #=============================================================================== # mtz_ref = "../data/apo_100k_refine_5.mtz" # SF_ref, hkl_ref, cell_ref = read_sf_from_mtz( mtz_ref ) # # mtz_trig = "../data/on_100k_refine_141.mtz" # SF_trig, hkl_trig, cell_trig = read_sf_from_mtz( mtz_trig ) # # SF_ref, SF_trig, hkl_comb = match_hkls(SF_ref, hkl_ref, SF_trig, hkl_trig) #=============================================================================== dmin = 8.0 pdb_ref = "../data/hewl_apo.pdb" #pdb_ref = "../data/ser-arg-loop-apo.pdb" print("generating ref SFs") SF_ref, hkl_ref, cell_ref = read_sf_from_pdb(pdb_ref, dmin) print("DONE") pdb_trig = "../data/hewl_1.0-I.pdb" #pdb_trig = "../data/ser-arg-loop-1.0-switch.pdb" print("generating trig. SFs") SF_trig, hkl_trig, cell_trig = read_sf_from_pdb(pdb_trig, dmin) print("DONE") SF_ref, SF_trig, hkl_comb = match_hkls(SF_ref, hkl_ref, SF_trig, hkl_trig) print( extrapolation_metrics(SF_ref, SF_trig) )