import gemmi
import numpy as np
import matplotlib.pyplot as plt
from Bio.SeqUtils.ProtParamData import fs

# -----------------------------
# User inputs
# -----------------------------
#apo_pdb    = "../data/hewl_apo.pdb"
apo_pdb    = "../data/ser-arg-loop-apo.pdb"
#occ100_pdb = "../data/hewl_1.0-I.pdb"
occ100_pdb = "../data/ser-arg-loop-1.0-switch.pdb"
#occb_pdb   = "../data/hewl_0.1-I.pdb"
occb_pdb = "../data/ser-arg-loop-0.1-switch.pdb"
b_val      = 0.1
a_val      = 1.0 - b_val
dmin       = 2.5
alpha      = 0.0001
ref_mtz    = "../data/apo_100k_refine_5.mtz"


# -----------------------------
# Helper: compute SFs
# -----------------------------

def compute_sf(pdb_file, dmin):
    
    st = gemmi.read_structure(pdb_file)
    st.setup_entities()
    st.assign_label_seq_id()
    st.remove_alternative_conformations()
    model = st[0]
    cell = st.cell

    sg = st.find_spacegroup()
    if sg is None:
        if st.spacegroup_hm:
            sg = gemmi.SpaceGroup(st.spacegroup_hm)
        else:
            raise RuntimeError("No space group found")

    hkl = gemmi.make_miller_array(
        cell=cell,
        spacegroup=sg,
        dmin=dmin,
        unique=True
    )

    calc = gemmi.StructureFactorCalculatorX(cell)
    F = np.array([
        calc.calculate_sf_from_model(model, [int(h), int(k), int(l)])
        for h, k, l in hkl
    ], dtype=np.complex128)

    return F, hkl, cell


def add_resolution_dependent_noise(SF, hkl, cell, alpha=0.01, B=20.0, rng=None):
    """
    Add resolution-dependent fractional noise to complex structure factors.

    Noise is applied to amplitudes only, phases preserved.
    """
    F = np.abs(SF)
    phase = np.angle(SF)

    d = np.array([cell.calculate_d(list(h)) for h in hkl])

    if rng is None:
        rng = np.random.default_rng()

    scale = alpha * np.exp(B / (4.0 * d**2))
    noise = rng.normal(0.0, scale)   # <-- FIX HERE

    F_noise = F * (1.0 + noise)

    return F_noise * np.exp(1j * phase), d

def scale_sf_against_ref_mtz(ref_mtz_file, F_gen, hkl_gen,n_bins=20 ):
    """
    Scale generated complex structure factors against a reference MTZ
    using global RMS scaling followed by resolution-binned RMS refinement.

    Parameters
    ----------
    ref_mtz_file : str
        Reference MTZ filename
    F_gen : ndarray (complex)
        Generated structure factors
    hkl_gen : ndarray (N,3)
        HKLs for generated SFs
    n_bins : int
        Number of equal-population resolution bins

    Returns
    -------
    Fgen_scaled : ndarray (complex)
        Scaled generated SFs (matched reflections only)
    hkls_common : list of tuple
        HKLs used
    d : ndarray
        Resolution values (Å)
    """

    # -----------------------------
    # Read reference MTZ
    # -----------------------------
    mtz = gemmi.read_mtz_file(ref_mtz_file)

    F_col = "F-obs-filtered"
    PHI_col = "PHIF-model"

    F_ref = mtz.column_with_label(F_col).array
    PHI_ref = mtz.column_with_label(PHI_col).array
    hkl_ref = mtz.make_miller_array()

    cell = mtz.cell

    # Build complex reference SFs
    F_ref_cplx = F_ref * np.exp(1j * np.deg2rad(PHI_ref))

    # -----------------------------
    # HKL → SF dictionaries
    # -----------------------------
    ref_dict = {
        tuple(map(int, h)): F_ref_cplx[i]
        for i, h in enumerate(hkl_ref)
        if F_ref[i] > 0.0
    }

    gen_dict = {
        tuple(map(int, h)): F_gen[i]
        for i, h in enumerate(hkl_gen)
    }

    # -----------------------------
    # Find common reflections
    # -----------------------------
    hkls_common = sorted(ref_dict.keys() & gen_dict.keys())
    if len(hkls_common) == 0:
        raise RuntimeError("No common HKLs between reference MTZ and generated SFs")

    Fref = np.array([ref_dict[h] for h in hkls_common])
    Fgen = np.array([gen_dict[h] for h in hkls_common])

    # -----------------------------
    # Resolution
    # -----------------------------
    d = np.array([cell.calculate_d(list(h)) for h in hkls_common])

    # -----------------------------
    # Sort by resolution (low → high)
    # -----------------------------
    order = np.argsort(d)[::-1]
    Fref = Fref[order]
    Fgen = Fgen[order]
    d = d[order]
    hkls_common = [hkls_common[i] for i in order]

    # -----------------------------
    # GLOBAL RMS SCALING
    # -----------------------------
    k0 = np.sqrt(
        np.mean(np.abs(Fref)**2) /
        np.mean(np.abs(Fgen)**2)
    )
    Fgen *= k0

    # -----------------------------
    # BIN-WISE RMS REFINEMENT
    # -----------------------------
    bins = np.array_split(np.arange(len(d)), n_bins)
    Fgen_scaled = Fgen.copy()

    for b in bins:
        if len(b) == 0:
            continue

        amp_ref = np.abs(Fref[b])
        amp_gen = np.abs(Fgen_scaled[b])

        den = np.sum(amp_gen**2)
        if den <= 0:
            continue

        k = np.sqrt(np.sum(amp_ref**2) / den)
        Fgen_scaled[b] *= k

    return Fgen_scaled, hkls_common


def resultant(SF):

    R = np.sum(SF)              # vector sum
    magnitude = np.abs(R)      # |sum|
    phase_deg = np.angle(R, deg=True)  # phase in degrees
    return magnitude, phase_deg

def riso(F_ref, F_test):
    """
    Xtrapol8 definition of Riso:
        sum(|F_ref - F_test|) / sum((F_ref + F_test)/2)
    """
    # amplitudes (ensure real and non-negative)
    A_ref = np.abs(F_ref)
    A_test = np.abs(F_test)

    denom = 0.5*(A_ref + A_test)
    mask = denom > 0  # avoid zero denominator

    num = np.sum(np.abs(A_ref - A_test))
    den = np.sum(denom)

    return num/den if den != 0 else np.nan

def cciso(F_ref, F_test):
    """
    Xtrapol8 definition of CCiso:
    Pearson CC of amplitudes F_ref and F_test
    """
    A_ref = np.abs(F_ref)
    A_test = np.abs(F_test)

    # mask out invalids if any
    mask = np.isfinite(A_ref) & np.isfinite(A_test)

    A_ref = A_ref[mask]
    A_test = A_test[mask]

    if len(A_ref) < 2:
        return np.nan

    A_ref_mean = np.mean(A_ref)
    A_test_mean = np.mean(A_test)

    num = np.sum((A_ref - A_ref_mean)*(A_test - A_test_mean))
    den = np.sqrt(np.sum((A_ref - A_ref_mean)**2)*np.sum((A_test - A_test_mean)**2))

    return num/den if den != 0 else np.nan

def make_equal_population_resolution_bins( d, nbins=20):

    d = np.asarray(d)

    # Sort reflections by resolution (low → high res)
    order = np.argsort(d)[::-1]   # large d first
    d_sorted = d[order]

    n = len(d)
    bin_size = n // nbins

    bins = []

    for i in range(nbins):
        start = i * bin_size
        end = (i + 1) * bin_size if i < nbins - 1 else n

        idx = order[start:end]

        bins.append({
            "dmax": d_sorted[start],      # low resolution edge
            "dmin": d_sorted[end - 1],    # high resolution edge
            "indices": idx
        })
        
    return bins


def riso_cciso_vs_resolution(Fref, Ftest, d, nbins, min_per_bin=20):
    
    bins = make_equal_population_resolution_bins(d, nbins)

    dmid = []
    riso_vals = []
    cciso_vals = []

    for b in bins:
        idx = b["indices"]

        if len(idx) < min_per_bin:
            continue

        F1 = Fref[idx]
        F2 = Ftest[idx]

        dmid.append(0.5 * (b["dmin"] + b["dmax"]))
        riso_vals.append(riso(F1, F2))
        cciso_vals.append(cciso(F1, F2))

    return np.array(dmid), np.array(riso_vals), np.array(cciso_vals)


def plot_riso_cciso_vs_resolution(d_centers, riso_vals, cciso_vals):
    
    fig, ax1 = plt.subplots()

    ax1.plot(1/d_centers, riso_vals, color="red", marker='o')
    ax1.set_xlabel("Resolution (Å)")
    ax1.set_ylabel("Riso", color="red")

    ax2 = ax1.twinx()
    ax2.plot(1/d_centers, cciso_vals, color="blue", marker='s')
    ax2.set_ylabel("CCiso", color="blue")

    plt.title("Riso and CCiso vs Resolution")
    plt.tight_layout()
    plt.show()

# -----------------------------
# Compute structure factors
# -----------------------------
print( "making apo SFs" )
SF_apo, hkl_apo, cell_apo = compute_sf(apo_pdb, dmin)
SF_apo, hkl_apo = scale_sf_against_ref_mtz( ref_mtz, SF_apo, hkl_apo  )
#SF_apo, d = add_resolution_dependent_noise(SF_apo, hkl_apo, cell_apo, alpha, B=20.0, rng=None)
print( "done" )

print( "making 100 % SFs" )
SF_100, hkl_100, cell_100 = compute_sf(occ100_pdb, dmin) # Fb_full
SF_100, hkl_100 = scale_sf_against_ref_mtz( ref_mtz, SF_100, hkl_100  )
#SF_100, d = add_resolution_dependent_noise( SF_100, hkl_100, cell_100, alpha, B=20.0, rng=None )
print( "done" )

print( "making b % SFs" )
SF_b, hkl_b, cell_b = compute_sf(occb_pdb, dmin) # Fb_partial
SF_b, hkl_b = scale_sf_against_ref_mtz( ref_mtz, SF_b, hkl_b  )
SF_b, d = add_resolution_dependent_noise( SF_b, hkl_b, cell_b, alpha, B=20.0, rng=None )
print( "done" )

d_centers, riso_vals, cciso_vals = riso_cciso_vs_resolution(SF_apo, SF_b, d, 20)
plot_riso_cciso_vs_resolution(d_centers, riso_vals, cciso_vals)


F_apo = np.abs(SF_apo)
phi_apo = np.angle(SF_apo)

F_100 = np.abs(SF_100)
phi_100 = np.angle(SF_100)

F_b = np.abs(SF_b)
phi_b = np.angle(SF_b)

SF_100_phi_apo = F_100 * np.exp(1j * np.deg2rad(phi_apo))

SF_b_phi_apo = F_b * np.exp(1j * np.deg2rad(phi_b))

# Use only amplitudes of partial/full occupancy, phases from Fa

#Fb_partial_extrap = np.abs(Fb_partial) * np.exp(1j*phi_apo)
#Fb_full_extrap    = np.abs(Fb_full)    * np.exp(1j*phi_apo)

# -----------------------------
# correct method
# -----------------------------
# Forward extrapolation: Fab = aFa + bFb_partial
SF_ab = a_val*SF_apo + b_val*SF_100

delta_forward = SF_ab - SF_b

print("Correct method TEST: SFab = aSFa + bSF100")
print("Mean |ΔF|:", np.mean(np.abs(delta_forward)))
print("Mean phase Δ (deg):",
      np.mean(np.abs(np.angle(SF_ab / SF_b, deg=True))))

F_ab_calc, phi_ab_calc = resultant(SF_ab)
F_ab_model, phi_ab_model = resultant(SF_b)

print("Sum SF_ab calculated: |F| = {0}, phi = {1}".format( F_ab_calc, phi_ab_calc ) )
print("Sum SF_b model      : |F| = {0}, phi = {1}".format( F_ab_model, phi_ab_model ) )

# -----------------------------
# Fext test
# -----------------------------
SF_extr = (SF_b - SF_apo)/b_val + SF_apo

delta_Fextr = SF_extr - SF_100

print("Fextr SF TEST: SF_extr = (SF_b - SF_apo)/b_val + SF_apo")
print("Mean |ΔF|:", np.mean(np.abs(delta_Fextr)))
print("Mean phase Δ (deg):",
      np.mean(np.abs(np.angle(SF_extr / SF_100, deg=True))))

F_100_calc, phi_100_calc = resultant(SF_extr)
F_100_model, phi_100_model = resultant(SF_100)

print("Sum SF_extr calculated: |F| = {0}, phi = {1}".format( F_100_calc, phi_100_calc ) )
print("Sum SF_100 model      : |F| = {0}, phi = {1}".format( F_100_model, phi_100_model ) )

# -----------------------------
# correct method test with only Fs and 
# -----------------------------
# Forward extrapolation: Fab = aFa + bFb_partial
F_ab = a_val*F_apo + b_val*F_100
SF_ab = F_ab * np.exp(1j * phi_apo)

delta_forward = SF_ab - SF_b

print("Correct method TEST: Fab = aFa + bF100")
print("Mean |ΔF|:", np.mean(np.abs(delta_forward)))
print("Mean phase Δ (deg):",
      np.mean(np.abs(np.angle(SF_ab / SF_b, deg=True))))

F_ab_calc, phi_ab_calc = resultant(SF_ab)
F_ab_model, phi_ab_model = resultant(SF_b)

print("Sum SF_ab calculated: |F| = {0}, phi = {1}".format( F_ab_calc, phi_ab_calc ) )
print("Sum SF_b model     : |F| = {0}, phi = {1}".format( F_ab_model, phi_ab_model ) )

# -----------------------------
# Fext test just Fs
# -----------------------------
F_extr = (F_b - F_apo)/b_val + F_apo
SF_extr = F_extr * np.exp(1j * phi_apo)

delta_Fextr = SF_extr - SF_100

print("Fextr TEST: SF_extr = (F_b - F_apo)/b_val + F_apo")
print("Mean |ΔF|:", np.mean(np.abs(delta_Fextr)))
print("Mean phase Δ (deg):",
      np.mean(np.abs(np.angle(SF_extr / SF_100, deg=True))))

F_100_calc, phi_100_calc = resultant(SF_extr)
F_100_model, phi_100_model = resultant(SF_100)

print("Sum SF_extr calculated: |F| = {0}, phi = {1}".format( F_100_calc, phi_100_calc ) )
print("Sum SF_100 model     : |F| = {0}, phi = {1}".format( F_100_model, phi_100_model ) )


F_apo_model, phi_apo_model = resultant(SF_apo)

print("Sum SF_apo    : |F| = {0}, phi = {1}".format( F_apo_model, phi_apo_model ) )


# -----------------------------
# Resultant vectors for plotting
# -----------------------------
sum_SF_apo = np.sum(SF_apo)
sum_SF_100 = np.sum(SF_100)
sum_SF_b = np.sum(SF_b)
sum_SF_ab_calc = np.sum(SF_ab)
sum_SF_100_calc = np.sum(SF_extr)

# -----------------------------
# Plot resultant vectors
# -----------------------------
plt.figure(figsize=(8,8))
ax = plt.gca()
ax.set_aspect('equal')
origin = 0 + 0j

ax.arrow(origin.real, origin.imag, sum_SF_apo.real, sum_SF_apo.imag,
         head_width=500, head_length=500, fc='blue', ec='blue', label='Fapo model')

ax.arrow(origin.real, origin.imag, sum_SF_100.real, sum_SF_100.imag,
         head_width=500, head_length=500, fc='red', ec='red', label='F100 model')

ax.arrow(origin.real, origin.imag, sum_SF_b.real, sum_SF_b.imag,
         head_width=500, head_length=500, fc='green', ec='green', label='Fab model')

# Vector showing addition to Fa
#ax.arrow(origin.real, origin.imag, sum_SF_ab_calc.real, sum_SF_ab_calc.imag,
#        head_width=500, head_length=500, fc='yellow', ec='yellow', label='Fab calc')

ax.arrow(origin.real, origin.imag, sum_SF_100_calc.real, sum_SF_100_calc.imag,
         head_width=500, head_length=500, fc='black', ec='black', label='Fextr calc')

ax.set_xlabel("Re(F)")
ax.set_ylabel("Im(F)")
ax.set_title("SF Resultant Vectors")
ax.grid(True)
ax.legend()
plt.show()