combine two pdbs as altconf with a defined occupancy
This commit is contained in:
@@ -0,0 +1,412 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import gemmi
|
||||
import argparse
|
||||
import tempfile
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Altloc validation
|
||||
# -----------------------------
|
||||
def validate_altlocs(struct, label):
|
||||
|
||||
allowed = {'A', 'B', '', ' ', '\x00'}
|
||||
found = set()
|
||||
|
||||
for chain in struct[0]:
|
||||
for res in chain:
|
||||
for atom in res:
|
||||
|
||||
alt = atom.altloc
|
||||
|
||||
if alt not in ('', ' ', '\x00'):
|
||||
found.add(alt)
|
||||
|
||||
if alt not in allowed:
|
||||
|
||||
print()
|
||||
print("ERROR: Unsupported altloc detected")
|
||||
print("----------------------------------")
|
||||
print(f"Structure : {label}")
|
||||
print(f"Chain : {chain.name}")
|
||||
print(f"Residue : {res.name} {res.seqid.num}")
|
||||
print(f"Atom : {atom.name}")
|
||||
print(f"Altloc : '{alt}'")
|
||||
print()
|
||||
print("Only altlocs A/B are allowed in input structures.")
|
||||
print()
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
if len(found) > 2:
|
||||
|
||||
print()
|
||||
print("ERROR: More than two conformations detected")
|
||||
print("-------------------------------------------")
|
||||
print(f"Structure : {label}")
|
||||
print(f"Altlocs : {sorted(found)}")
|
||||
print()
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
print(f"{label} altlocs detected:", sorted(found))
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Remove ANISOU records
|
||||
# -----------------------------
|
||||
def strip_anisou(pdb_in, pdb_out):
|
||||
|
||||
with open(pdb_in) as fin, open(pdb_out, "w") as fout:
|
||||
for line in fin:
|
||||
if not line.startswith("ANISOU"):
|
||||
fout.write(line)
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Altloc conversion helper
|
||||
# -----------------------------
|
||||
def convert_light_altloc(alt):
|
||||
|
||||
if alt in ('', ' ', '\x00', 'A'):
|
||||
return 'C'
|
||||
|
||||
if alt == 'B':
|
||||
return 'D'
|
||||
|
||||
raise RuntimeError(f"Unsupported altloc '{alt}' in light structure")
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# report differences between states
|
||||
# -----------------------------
|
||||
def report_state_differences(dark, light):
|
||||
|
||||
dark_atoms = set()
|
||||
light_atoms = set()
|
||||
|
||||
for chain in dark[0]:
|
||||
for res in chain:
|
||||
for atom in res:
|
||||
key = (
|
||||
chain.name,
|
||||
res.seqid.num,
|
||||
res.name,
|
||||
atom.name
|
||||
)
|
||||
dark_atoms.add(key)
|
||||
|
||||
for chain in light[0]:
|
||||
for res in chain:
|
||||
for atom in res:
|
||||
key = (
|
||||
chain.name,
|
||||
res.seqid.num,
|
||||
res.name,
|
||||
atom.name
|
||||
)
|
||||
light_atoms.add(key)
|
||||
|
||||
dark_only = dark_atoms - light_atoms
|
||||
light_only = light_atoms - dark_atoms
|
||||
|
||||
print()
|
||||
print("State difference summary")
|
||||
print("------------------------")
|
||||
print("Atoms only in dark :", len(dark_only))
|
||||
print("Atoms only in light:", len(light_only))
|
||||
print()
|
||||
|
||||
# Print a few examples
|
||||
if dark_only:
|
||||
print("Example dark-only atoms:")
|
||||
for x in list(dark_only)[:5]:
|
||||
print(" ", x)
|
||||
|
||||
if light_only:
|
||||
print("Example light-only atoms:")
|
||||
for x in list(light_only)[:5]:
|
||||
print(" ", x)
|
||||
|
||||
print()
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# occupancy sanity check
|
||||
# -----------------------------
|
||||
def check_occupancies(struct):
|
||||
|
||||
occ_map = {}
|
||||
|
||||
for chain in struct[0]:
|
||||
for res in chain:
|
||||
for atom in res:
|
||||
|
||||
key = (
|
||||
chain.name,
|
||||
res.seqid.num,
|
||||
res.name,
|
||||
atom.name
|
||||
)
|
||||
|
||||
occ_map.setdefault(key, 0.0)
|
||||
occ_map[key] += atom.occ
|
||||
|
||||
problems = []
|
||||
|
||||
for key, occ in occ_map.items():
|
||||
if occ > 1.01: # small tolerance
|
||||
problems.append((key, occ))
|
||||
|
||||
if problems:
|
||||
|
||||
print()
|
||||
print("WARNING: Occupancy > 1 detected")
|
||||
print("-------------------------------")
|
||||
|
||||
for p in problems[:10]:
|
||||
print(p)
|
||||
|
||||
print()
|
||||
|
||||
else:
|
||||
print("Occupancy check passed (no atoms >1).")
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# chain/residue check
|
||||
# -----------------------------
|
||||
def check_missing_light_residues(dark, light, ensemble):
|
||||
|
||||
dark_res = {(c.name, r.seqid.num, r.name) for c in dark[0] for r in c}
|
||||
light_res = {(c.name, r.seqid.num, r.name) for c in light[0] for r in c}
|
||||
ens_res = {(c.name, r.seqid.num, r.name) for c in ensemble[0] for r in c}
|
||||
|
||||
missing = light_res - ens_res
|
||||
|
||||
if missing:
|
||||
|
||||
print()
|
||||
print("ERROR: residues lost from light structure during merge")
|
||||
print("------------------------------------------------------")
|
||||
|
||||
for r in sorted(missing):
|
||||
print(" ", r)
|
||||
|
||||
print()
|
||||
print("Ensemble construction aborted.")
|
||||
print("Check residue creation or atom filtering.")
|
||||
print()
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
# -----------------------------
|
||||
# Build ensemble model
|
||||
# -----------------------------
|
||||
def build_ensemble(dark, light, light_occ, occ_cutoff=0.005, verbose=False):
|
||||
|
||||
dark_occ = 1.0 - light_occ
|
||||
|
||||
ensemble = dark.clone()
|
||||
model = ensemble[0]
|
||||
|
||||
dark_atoms = 0
|
||||
|
||||
# -----------------
|
||||
# Scale dark atoms
|
||||
# -----------------
|
||||
for chain in model:
|
||||
for res in chain:
|
||||
for atom in res:
|
||||
|
||||
atom.occ *= dark_occ
|
||||
|
||||
if atom.occ < occ_cutoff:
|
||||
continue
|
||||
|
||||
if atom.altloc in ('', ' ', '\x00'):
|
||||
atom.altloc = 'A'
|
||||
|
||||
dark_atoms += 1
|
||||
|
||||
light_atoms = 0
|
||||
|
||||
# -----------------
|
||||
# Add light atoms
|
||||
# -----------------
|
||||
for chain in light[0]:
|
||||
|
||||
target_chain = model.find_chain(chain.name)
|
||||
|
||||
if target_chain is None:
|
||||
target_chain = gemmi.Chain(chain.name)
|
||||
model.add_chain(target_chain)
|
||||
|
||||
# residue lookup dictionary (O(1) lookup)
|
||||
res_lookup = {
|
||||
(r.seqid.num, r.seqid.icode, r.name): r
|
||||
for r in target_chain
|
||||
}
|
||||
|
||||
for res in chain:
|
||||
|
||||
key = (res.seqid.num, res.seqid.icode, res.name)
|
||||
|
||||
target_res = res_lookup.get(key)
|
||||
|
||||
if target_res is None:
|
||||
|
||||
target_res = gemmi.Residue()
|
||||
target_res.name = res.name
|
||||
target_res.seqid = gemmi.SeqId(res.seqid.num, res.seqid.icode)
|
||||
|
||||
target_chain.add_residue(target_res)
|
||||
res_lookup[key] = target_res
|
||||
|
||||
for atom in res:
|
||||
|
||||
new_occ = atom.occ * light_occ
|
||||
|
||||
if new_occ < occ_cutoff:
|
||||
|
||||
if verbose:
|
||||
print(
|
||||
"Skipping low-occupancy atom:",
|
||||
f"{chain.name} {res.name} {res.seqid.num}",
|
||||
f"atom {atom.name}",
|
||||
f"orig_occ={atom.occ:.3f}",
|
||||
f"scaled_occ={new_occ:.4f}"
|
||||
)
|
||||
|
||||
continue
|
||||
|
||||
new_atom = gemmi.Atom()
|
||||
|
||||
new_atom.name = atom.name
|
||||
new_atom.pos = gemmi.Position(atom.pos.x, atom.pos.y, atom.pos.z)
|
||||
new_atom.element = atom.element
|
||||
new_atom.b_iso = atom.b_iso
|
||||
new_atom.occ = new_occ
|
||||
new_atom.altloc = convert_light_altloc(atom.altloc)
|
||||
|
||||
target_res.add_atom(new_atom)
|
||||
|
||||
light_atoms += 1
|
||||
|
||||
print()
|
||||
print("Ensemble build summary")
|
||||
print("----------------------")
|
||||
print("Dark atoms written :", dark_atoms)
|
||||
print("Light atoms written:", light_atoms)
|
||||
print()
|
||||
|
||||
return ensemble
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Mainfsortdcdds
|
||||
# -----------------------------
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate multicopy ensemble PDB (Barends-style refinement model).",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-r",
|
||||
"--reference",
|
||||
type=os.path.abspath,
|
||||
help="reference-state PDB file",
|
||||
required=True
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-t",
|
||||
"--triggered",
|
||||
type=os.path.abspath,
|
||||
help="Triggered-state PDB file",
|
||||
required=True
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--fraction",
|
||||
type=float,
|
||||
help="Triggered-state occupancy number < 1",
|
||||
required=True
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--output",
|
||||
default="ensemble.pdb",
|
||||
help="Output ensemble PDB",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-c",
|
||||
"--occ-cutoff",
|
||||
type=float,
|
||||
default=0.005,
|
||||
help="Atoms with scaled occupancy below this value are removed"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
action="store_true",
|
||||
help="Print detailed filtering information"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate occupancy
|
||||
if not (0.0 <= args.fraction <= 1.0):
|
||||
print("ERROR: light_occ must be between 0 and 1")
|
||||
sys.exit(1)
|
||||
|
||||
# Temporary files to remove any ANISOU records
|
||||
with tempfile.NamedTemporaryFile(suffix=".pdb", delete=False) as tmp_dark, \
|
||||
tempfile.NamedTemporaryFile(suffix=".pdb", delete=False) as tmp_light:
|
||||
|
||||
strip_anisou(args.reference, tmp_dark.name)
|
||||
strip_anisou(args.triggered, tmp_light.name)
|
||||
|
||||
dark = gemmi.read_structure(tmp_dark.name)
|
||||
light = gemmi.read_structure(tmp_light.name)
|
||||
|
||||
os.remove(tmp_dark.name)
|
||||
os.remove(tmp_light.name)
|
||||
|
||||
# Validate the altocs and check only two conformations present in structure
|
||||
validate_altlocs(dark, "dark")
|
||||
validate_altlocs(light, "light")
|
||||
|
||||
# report any residue differences
|
||||
report_state_differences(dark, light)
|
||||
|
||||
ensemble = build_ensemble(
|
||||
dark,
|
||||
light,
|
||||
args.fraction,
|
||||
occ_cutoff=args.occ_cutoff,
|
||||
verbose=args.verbose
|
||||
)
|
||||
|
||||
# check all the residues are there
|
||||
check_missing_light_residues(dark, light, ensemble)
|
||||
|
||||
# check occupancies in the ensemble = 1
|
||||
check_occupancies(ensemble)
|
||||
|
||||
# Write to file
|
||||
ensemble.write_pdb(args.output)
|
||||
|
||||
print("Ensemble written:", args.output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user