import gemmi import numpy as np import matplotlib.pyplot as plt from Bio.SeqUtils.ProtParamData import fs # ----------------------------- # User inputs # ----------------------------- #apo_pdb = "../data/hewl_apo.pdb" apo_pdb = "../data/ser-arg-loop-apo.pdb" #occ100_pdb = "../data/hewl_1.0-I.pdb" occ100_pdb = "../data/ser-arg-loop-1.0-switch.pdb" #occb_pdb = "../data/hewl_0.1-I.pdb" occb_pdb = "../data/ser-arg-loop-0.1-switch.pdb" b_val = 0.1 a_val = 1.0 - b_val dmin = 2.5 alpha = 0.0001 ref_mtz = "../data/apo_100k_refine_5.mtz" # ----------------------------- # Helper: compute SFs # ----------------------------- def compute_sf(pdb_file, dmin): st = gemmi.read_structure(pdb_file) st.setup_entities() st.assign_label_seq_id() st.remove_alternative_conformations() model = st[0] cell = st.cell sg = st.find_spacegroup() if sg is None: if st.spacegroup_hm: sg = gemmi.SpaceGroup(st.spacegroup_hm) else: raise RuntimeError("No space group found") hkl = gemmi.make_miller_array( cell=cell, spacegroup=sg, dmin=dmin, unique=True ) calc = gemmi.StructureFactorCalculatorX(cell) F = np.array([ calc.calculate_sf_from_model(model, [int(h), int(k), int(l)]) for h, k, l in hkl ], dtype=np.complex128) return F, hkl, cell def add_resolution_dependent_noise(SF, hkl, cell, alpha=0.01, B=20.0, rng=None): """ Add resolution-dependent fractional noise to complex structure factors. Noise is applied to amplitudes only, phases preserved. """ F = np.abs(SF) phase = np.angle(SF) d = np.array([cell.calculate_d(list(h)) for h in hkl]) if rng is None: rng = np.random.default_rng() scale = alpha * np.exp(B / (4.0 * d**2)) noise = rng.normal(0.0, scale) # <-- FIX HERE F_noise = F * (1.0 + noise) return F_noise * np.exp(1j * phase), d def scale_sf_against_ref_mtz(ref_mtz_file, F_gen, hkl_gen,n_bins=20 ): """ Scale generated complex structure factors against a reference MTZ using global RMS scaling followed by resolution-binned RMS refinement. Parameters ---------- ref_mtz_file : str Reference MTZ filename F_gen : ndarray (complex) Generated structure factors hkl_gen : ndarray (N,3) HKLs for generated SFs n_bins : int Number of equal-population resolution bins Returns ------- Fgen_scaled : ndarray (complex) Scaled generated SFs (matched reflections only) hkls_common : list of tuple HKLs used d : ndarray Resolution values (Å) """ # ----------------------------- # Read reference MTZ # ----------------------------- mtz = gemmi.read_mtz_file(ref_mtz_file) F_col = "F-obs-filtered" PHI_col = "PHIF-model" F_ref = mtz.column_with_label(F_col).array PHI_ref = mtz.column_with_label(PHI_col).array hkl_ref = mtz.make_miller_array() cell = mtz.cell # Build complex reference SFs F_ref_cplx = F_ref * np.exp(1j * np.deg2rad(PHI_ref)) # ----------------------------- # HKL → SF dictionaries # ----------------------------- ref_dict = { tuple(map(int, h)): F_ref_cplx[i] for i, h in enumerate(hkl_ref) if F_ref[i] > 0.0 } gen_dict = { tuple(map(int, h)): F_gen[i] for i, h in enumerate(hkl_gen) } # ----------------------------- # Find common reflections # ----------------------------- hkls_common = sorted(ref_dict.keys() & gen_dict.keys()) if len(hkls_common) == 0: raise RuntimeError("No common HKLs between reference MTZ and generated SFs") Fref = np.array([ref_dict[h] for h in hkls_common]) Fgen = np.array([gen_dict[h] for h in hkls_common]) # ----------------------------- # Resolution # ----------------------------- d = np.array([cell.calculate_d(list(h)) for h in hkls_common]) # ----------------------------- # Sort by resolution (low → high) # ----------------------------- order = np.argsort(d)[::-1] Fref = Fref[order] Fgen = Fgen[order] d = d[order] hkls_common = [hkls_common[i] for i in order] # ----------------------------- # GLOBAL RMS SCALING # ----------------------------- k0 = np.sqrt( np.mean(np.abs(Fref)**2) / np.mean(np.abs(Fgen)**2) ) Fgen *= k0 # ----------------------------- # BIN-WISE RMS REFINEMENT # ----------------------------- bins = np.array_split(np.arange(len(d)), n_bins) Fgen_scaled = Fgen.copy() for b in bins: if len(b) == 0: continue amp_ref = np.abs(Fref[b]) amp_gen = np.abs(Fgen_scaled[b]) den = np.sum(amp_gen**2) if den <= 0: continue k = np.sqrt(np.sum(amp_ref**2) / den) Fgen_scaled[b] *= k return Fgen_scaled, hkls_common def resultant(SF): R = np.sum(SF) # vector sum magnitude = np.abs(R) # |sum| phase_deg = np.angle(R, deg=True) # phase in degrees return magnitude, phase_deg def riso(F_ref, F_test): """ Xtrapol8 definition of Riso: sum(|F_ref - F_test|) / sum((F_ref + F_test)/2) """ # amplitudes (ensure real and non-negative) A_ref = np.abs(F_ref) A_test = np.abs(F_test) denom = 0.5*(A_ref + A_test) mask = denom > 0 # avoid zero denominator num = np.sum(np.abs(A_ref - A_test)) den = np.sum(denom) return num/den if den != 0 else np.nan def cciso(F_ref, F_test): """ Xtrapol8 definition of CCiso: Pearson CC of amplitudes F_ref and F_test """ A_ref = np.abs(F_ref) A_test = np.abs(F_test) # mask out invalids if any mask = np.isfinite(A_ref) & np.isfinite(A_test) A_ref = A_ref[mask] A_test = A_test[mask] if len(A_ref) < 2: return np.nan A_ref_mean = np.mean(A_ref) A_test_mean = np.mean(A_test) num = np.sum((A_ref - A_ref_mean)*(A_test - A_test_mean)) den = np.sqrt(np.sum((A_ref - A_ref_mean)**2)*np.sum((A_test - A_test_mean)**2)) return num/den if den != 0 else np.nan def make_equal_population_resolution_bins( d, nbins=20): d = np.asarray(d) # Sort reflections by resolution (low → high res) order = np.argsort(d)[::-1] # large d first d_sorted = d[order] n = len(d) bin_size = n // nbins bins = [] for i in range(nbins): start = i * bin_size end = (i + 1) * bin_size if i < nbins - 1 else n idx = order[start:end] bins.append({ "dmax": d_sorted[start], # low resolution edge "dmin": d_sorted[end - 1], # high resolution edge "indices": idx }) return bins def riso_cciso_vs_resolution(Fref, Ftest, d, nbins, min_per_bin=20): bins = make_equal_population_resolution_bins(d, nbins) dmid = [] riso_vals = [] cciso_vals = [] for b in bins: idx = b["indices"] if len(idx) < min_per_bin: continue F1 = Fref[idx] F2 = Ftest[idx] dmid.append(0.5 * (b["dmin"] + b["dmax"])) riso_vals.append(riso(F1, F2)) cciso_vals.append(cciso(F1, F2)) return np.array(dmid), np.array(riso_vals), np.array(cciso_vals) def plot_riso_cciso_vs_resolution(d_centers, riso_vals, cciso_vals): fig, ax1 = plt.subplots() ax1.plot(1/d_centers, riso_vals, color="red", marker='o') ax1.set_xlabel("Resolution (Å)") ax1.set_ylabel("Riso", color="red") ax2 = ax1.twinx() ax2.plot(1/d_centers, cciso_vals, color="blue", marker='s') ax2.set_ylabel("CCiso", color="blue") plt.title("Riso and CCiso vs Resolution") plt.tight_layout() plt.show() # ----------------------------- # Compute structure factors # ----------------------------- print( "making apo SFs" ) SF_apo, hkl_apo, cell_apo = compute_sf(apo_pdb, dmin) SF_apo, hkl_apo = scale_sf_against_ref_mtz( ref_mtz, SF_apo, hkl_apo ) #SF_apo, d = add_resolution_dependent_noise(SF_apo, hkl_apo, cell_apo, alpha, B=20.0, rng=None) print( "done" ) print( "making 100 % SFs" ) SF_100, hkl_100, cell_100 = compute_sf(occ100_pdb, dmin) # Fb_full SF_100, hkl_100 = scale_sf_against_ref_mtz( ref_mtz, SF_100, hkl_100 ) #SF_100, d = add_resolution_dependent_noise( SF_100, hkl_100, cell_100, alpha, B=20.0, rng=None ) print( "done" ) print( "making b % SFs" ) SF_b, hkl_b, cell_b = compute_sf(occb_pdb, dmin) # Fb_partial SF_b, hkl_b = scale_sf_against_ref_mtz( ref_mtz, SF_b, hkl_b ) SF_b, d = add_resolution_dependent_noise( SF_b, hkl_b, cell_b, alpha, B=20.0, rng=None ) print( "done" ) d_centers, riso_vals, cciso_vals = riso_cciso_vs_resolution(SF_apo, SF_b, d, 20) plot_riso_cciso_vs_resolution(d_centers, riso_vals, cciso_vals) F_apo = np.abs(SF_apo) phi_apo = np.angle(SF_apo) F_100 = np.abs(SF_100) phi_100 = np.angle(SF_100) F_b = np.abs(SF_b) phi_b = np.angle(SF_b) SF_100_phi_apo = F_100 * np.exp(1j * np.deg2rad(phi_apo)) SF_b_phi_apo = F_b * np.exp(1j * np.deg2rad(phi_b)) # Use only amplitudes of partial/full occupancy, phases from Fa #Fb_partial_extrap = np.abs(Fb_partial) * np.exp(1j*phi_apo) #Fb_full_extrap = np.abs(Fb_full) * np.exp(1j*phi_apo) # ----------------------------- # correct method # ----------------------------- # Forward extrapolation: Fab = aFa + bFb_partial SF_ab = a_val*SF_apo + b_val*SF_100 delta_forward = SF_ab - SF_b print("Correct method TEST: SFab = aSFa + bSF100") print("Mean |ΔF|:", np.mean(np.abs(delta_forward))) print("Mean phase Δ (deg):", np.mean(np.abs(np.angle(SF_ab / SF_b, deg=True)))) F_ab_calc, phi_ab_calc = resultant(SF_ab) F_ab_model, phi_ab_model = resultant(SF_b) print("Sum SF_ab calculated: |F| = {0}, phi = {1}".format( F_ab_calc, phi_ab_calc ) ) print("Sum SF_b model : |F| = {0}, phi = {1}".format( F_ab_model, phi_ab_model ) ) # ----------------------------- # Fext test # ----------------------------- SF_extr = (SF_b - SF_apo)/b_val + SF_apo delta_Fextr = SF_extr - SF_100 print("Fextr SF TEST: SF_extr = (SF_b - SF_apo)/b_val + SF_apo") print("Mean |ΔF|:", np.mean(np.abs(delta_Fextr))) print("Mean phase Δ (deg):", np.mean(np.abs(np.angle(SF_extr / SF_100, deg=True)))) F_100_calc, phi_100_calc = resultant(SF_extr) F_100_model, phi_100_model = resultant(SF_100) print("Sum SF_extr calculated: |F| = {0}, phi = {1}".format( F_100_calc, phi_100_calc ) ) print("Sum SF_100 model : |F| = {0}, phi = {1}".format( F_100_model, phi_100_model ) ) # ----------------------------- # correct method test with only Fs and # ----------------------------- # Forward extrapolation: Fab = aFa + bFb_partial F_ab = a_val*F_apo + b_val*F_100 SF_ab = F_ab * np.exp(1j * phi_apo) delta_forward = SF_ab - SF_b print("Correct method TEST: Fab = aFa + bF100") print("Mean |ΔF|:", np.mean(np.abs(delta_forward))) print("Mean phase Δ (deg):", np.mean(np.abs(np.angle(SF_ab / SF_b, deg=True)))) F_ab_calc, phi_ab_calc = resultant(SF_ab) F_ab_model, phi_ab_model = resultant(SF_b) print("Sum SF_ab calculated: |F| = {0}, phi = {1}".format( F_ab_calc, phi_ab_calc ) ) print("Sum SF_b model : |F| = {0}, phi = {1}".format( F_ab_model, phi_ab_model ) ) # ----------------------------- # Fext test just Fs # ----------------------------- F_extr = (F_b - F_apo)/b_val + F_apo SF_extr = F_extr * np.exp(1j * phi_apo) delta_Fextr = SF_extr - SF_100 print("Fextr TEST: SF_extr = (F_b - F_apo)/b_val + F_apo") print("Mean |ΔF|:", np.mean(np.abs(delta_Fextr))) print("Mean phase Δ (deg):", np.mean(np.abs(np.angle(SF_extr / SF_100, deg=True)))) F_100_calc, phi_100_calc = resultant(SF_extr) F_100_model, phi_100_model = resultant(SF_100) print("Sum SF_extr calculated: |F| = {0}, phi = {1}".format( F_100_calc, phi_100_calc ) ) print("Sum SF_100 model : |F| = {0}, phi = {1}".format( F_100_model, phi_100_model ) ) F_apo_model, phi_apo_model = resultant(SF_apo) print("Sum SF_apo : |F| = {0}, phi = {1}".format( F_apo_model, phi_apo_model ) ) # ----------------------------- # Resultant vectors for plotting # ----------------------------- sum_SF_apo = np.sum(SF_apo) sum_SF_100 = np.sum(SF_100) sum_SF_b = np.sum(SF_b) sum_SF_ab_calc = np.sum(SF_ab) sum_SF_100_calc = np.sum(SF_extr) # ----------------------------- # Plot resultant vectors # ----------------------------- plt.figure(figsize=(8,8)) ax = plt.gca() ax.set_aspect('equal') origin = 0 + 0j ax.arrow(origin.real, origin.imag, sum_SF_apo.real, sum_SF_apo.imag, head_width=500, head_length=500, fc='blue', ec='blue', label='Fapo model') ax.arrow(origin.real, origin.imag, sum_SF_100.real, sum_SF_100.imag, head_width=500, head_length=500, fc='red', ec='red', label='F100 model') ax.arrow(origin.real, origin.imag, sum_SF_b.real, sum_SF_b.imag, head_width=500, head_length=500, fc='green', ec='green', label='Fab model') # Vector showing addition to Fa #ax.arrow(origin.real, origin.imag, sum_SF_ab_calc.real, sum_SF_ab_calc.imag, # head_width=500, head_length=500, fc='yellow', ec='yellow', label='Fab calc') ax.arrow(origin.real, origin.imag, sum_SF_100_calc.real, sum_SF_100_calc.imag, head_width=500, head_length=500, fc='black', ec='black', label='Fextr calc') ax.set_xlabel("Re(F)") ax.set_ylabel("Im(F)") ax.set_title("SF Resultant Vectors") ax.grid(True) ax.legend() plt.show()