Files
crystfel_tools/reduction_tools/make_mtz.py
2025-01-22 15:20:01 +01:00

270 lines
9.9 KiB
Python

#!/usr/bin/python
# author J.Beale
"""
# aim
to make an mtz from an hkl file output from partialator or process_hkl
runs f2mtz and then truncate for create an mtz with other intensities and structure factors
# usage to make mtz from manually entered lengths and angles
python make_mtz.py -i <path to .hkl file from partialator>
-o name of .mtz file
-p project name in mtz
-x xtal name in mtz
-d dataset name in mtz
-g spacegroup
-c list of cell lengths and angles to use - 59.3,59.3,153.1,90.0,90.0,90.0
-r number of residues
-A resolution range - e.g. 40.0,2.0
# usage to make mtz from the mean angles and lengths in stream file
python make_mtz.py -i <path to .hkl file from partialator>
-o name of .mtz file
-p project name in mtz
-x xtal name in mtz
-d dataset name in mtz
-g spacegroup
-r number of residues
-A resolution range - e.g. 40.0,2.0
-s <path to stream file>
-u True
# usage to get list of mean lengths and angles from stream files
python make_mtz.py -s <path to stream file>
# output
- .mtz file - just intensities
- f2mtz.log file
- _F.mtz file - intensities and structure factors
- cuts data to desired resolution
- truncate.log file
"""
# modules
import subprocess
import pandas as pd
import numpy as np
import argparse
import os
import regex as re
def get_mean_cell( stream_file ):
# open stream file
stream = open( stream_file, "r" ).read()
# get uc values from stream file
# example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg
# scrub clen and return - else nan
try:
pattern = r"Cell\sparameters\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg"
cell_lst = re.findall( pattern, stream )
except AttributeError:
return np.nan
# make cellss into df and convert to float
cols = [ "a", "b", "c", "alpha", "beta", "gamma" ]
cell_df = pd.DataFrame( cell_lst, columns=cols )
cell_df = cell_df.astype(float)
# calculate mean cells values
mean_a = round( cell_df.a.mean()*10, 3 )
mean_b = round( cell_df.b.mean()*10, 3 )
mean_c = round( cell_df.c.mean()*10, 3 )
mean_alpha = round( cell_df.alpha.mean(), 3 )
mean_beta = round( cell_df.beta.mean(), 3 )
mean_gamma = round( cell_df.gamma.mean(), 3 )
mean_cell = [ mean_a, mean_b, mean_c, mean_alpha, mean_beta, mean_gamma ]
return mean_cell, len(cell_lst)
def make_mtz( hklout_file, mtzout_file, project, crystal, dataset, cell, spacegroup, residues, res_range ):
# make_mtz file name
mtz_run_file = "make_mtz.sh"
# make F file name
Fout_file = os.path.splitext( mtzout_file )[0] + "_F.mtz"
# write file
mtz_sh = open( mtz_run_file, "w" )
mtz_sh.write( "#!/bin/sh\n\n" )
mtz_sh.write( "module purge\n" )
mtz_sh.write( "module load ccp4/8.0\n\n" )
mtz_sh.write( "f2mtz HKLIN {0} HKLOUT {1} << EOF_hkl > f2mtz.log\n".format( hklout_file, mtzout_file ) )
mtz_sh.write( "TITLE Reflections from CrystFEL\n" )
mtz_sh.write( "NAME PROJECT {0} CRYSTAL {1} DATASET {2}\n".format( project, crystal, dataset ) )
mtz_sh.write( "CELL {0} {1} {2} {3} {4} {5}\n".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) )
mtz_sh.write( "SYMM {0}\n".format( spacegroup ) )
mtz_sh.write( "SKIP 3\n" )
mtz_sh.write( "LABOUT H K L I_stream SIGI_stream\n" )
mtz_sh.write( "CTYPE H H H J Q\n" )
mtz_sh.write( "FORMAT '(3(F4.0,1X),F10.2,10X,F10.2)'\n" )
mtz_sh.write( "SKIP 3\n" )
mtz_sh.write( "EOF_hkl\n\n\n" )
mtz_sh.write( "echo 'done'\n" )
mtz_sh.write( "echo 'I and SIGI from CrystFEL stream saved as I_stream and SIGI_stream'\n" )
mtz_sh.write( "echo 'I filename = {0}'\n\n\n".format( mtzout_file ) )
mtz_sh.write( "echo 'running truncate'\n" )
mtz_sh.write( "echo 'setting resolution range to {0}-{1}'\n".format( res_range[0], res_range[1] ) )
mtz_sh.write( "echo 'assuming that there are {0} residues in assymetric unit'\n\n\n".format( residues ) )
mtz_sh.write( "truncate HKLIN {0} HKLOUT {1} << EOF_F > truncate.log\n".format( mtzout_file, Fout_file ) )
mtz_sh.write( "truncate YES\n" )
mtz_sh.write( "anomalous NO\n" )
mtz_sh.write( "nresidue {0}\n".format( residues ) )
mtz_sh.write( "resolution {0} {1}\n".format( res_range[0], res_range[1] ) )
mtz_sh.write( "plot OFF\n" )
mtz_sh.write( "labin IMEAN=I_stream SIGIMEAN=SIGI_stream\n" )
mtz_sh.write( "labout F=F_stream SIGF=SIGF_stream\n" )
mtz_sh.write( "end\n" )
mtz_sh.write( "EOF_F\n\n\n" )
mtz_sh.write( "echo 'done'\n" )
mtz_sh.write( "echo 'I_stream and SIGI_stream from f2mtz converted to F_stream and F_stream'\n" )
mtz_sh.write( "echo 'F filename = {0} (contains both Is and Fs)'".format( Fout_file ) )
mtz_sh.close()
# make file executable
subprocess.call( [ "chmod", "+x", "{0}".format( mtz_run_file ) ] )
# run
subprocess.call( [ "./{0}".format( mtz_run_file ) ] )
def cut_hkl_file( hklin_file, hklout_file ):
# setup
hklout = open( hklout_file, 'w')
collect_lines = True
# Open the input file for reading
with open( hklin_file, 'r') as f:
for line in f:
if line.strip() == 'End of reflections':
collect_lines = False # Stop collecting lines
if collect_lines:
hklout.write( line )
hklout.close()
def main( hklin_file, hklout_file, mtzout, project, crystal, dataset, cell, spacegroup, residues, res_range ):
# remove final lines from crystfel hkl out
print( "removing final lines from crystfel hklin" )
cut_hkl_file( hklin_file, hklout_file )
print( "done" )
# running make mtz
print( "making mtz" )
print( "using cell constants\n{0} {1} {2} A {3} {4} {5} deg".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) )
print( "Titles in mtz will be:\nPROJECT {0} CRYSTAL {1} DATASET {2}".format( project, crystal, dataset ) )
make_mtz( hklout_file, mtzout, project, crystal, dataset, cell, spacegroup, residues, res_range )
print( "done" )
# remove *cut.hkl out
cwd = os.getcwd()
files = os.listdir( cwd )
for file in files:
if file.endswith( "cut.hkl" ):
os.remove( file )
def list_of_floats(arg):
return list(map(float, arg.split(',')))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-i",
"--hklin",
help=".hkl output from partialator or process_hkl",
type=os.path.abspath
)
parser.add_argument(
"-o",
"--mtzout",
help="name of mtz out file. The default will by the hklin with .mtz appended",
type=str
)
parser.add_argument(
"-p",
"--project",
help="name of PROJECT flag in mtz file. The default is - SERIAL-XTAL",
type=str,
default="SERIAL-XTAL"
)
parser.add_argument(
"-x",
"--crystal",
help="name of CRYSTAL flag in mtz file. The default is - XTALS",
type=str,
default="XTALS"
)
parser.add_argument(
"-d",
"--dataset",
help="name of DATASET flag in mtz file. The default is - DARK",
type=str,
default="DARK"
)
parser.add_argument(
"-g",
"--spacegroup",
help="spacegroup for making mtz, e.g P41212",
type=str
)
parser.add_argument(
"-c",
"--cell",
help="list of complete cell length and angles, e.g. 59.3,59.3,153.1,90.0,90.0,90.0. They all should be floats",
type=list_of_floats
)
parser.add_argument(
"-r",
"--residues",
help="number of residues for truncate, e.g., hewl = 129",
type=int
)
parser.add_argument(
"-A",
"--resolution_range",
help="list of 2 floats - low res then high res cut off, e.g., 50.0,1.3",
type=list_of_floats
)
parser.add_argument(
"-s",
"--stream_file",
help="this will simply output the mean unit cell for the subsequent make_mtz run",
type=os.path.abspath,
)
parser.add_argument(
"-u",
"--use_stream",
help="set -u True if you want to use the average lengths and angles from the stream file. Lengths and angles will be automatically averaged when appropriate",
type=bool,
default=False
)
args = parser.parse_args()
# logic on which task to perform
if args.stream_file:
print( "reading stream file" )
cell, xtals = get_mean_cell( args.stream_file )
print( "found {0} xtals".format( xtals ) )
print( "mean lengths = {0}, {1}, {2} A".format( cell[0], cell[1], cell[2] ) )
print( "mean angles = {0}, {1}, {2} deg".format( cell[3], cell[4], cell[5] ) )
print( "# for input to make_mtz = {0},{1},{2},{3},{4},{5}".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) )
if args.use_stream:
hklout_file = os.path.splitext( args.hklin )[0] + "_cut.hkl"
if args.mtzout:
mtzout = args.mtzout
else:
mtzout = os.path.splitext( args.hklin )[0] + ".mtz"
main( args.hklin, hklout_file, mtzout, args.project, args.crystal, args.dataset, cell, args.spacegroup, args.residues, args.resolution_range )
if args.stream_file == None and args.use_stream == False:
hklout_file = os.path.splitext( args.hklin )[0] + "_cut.hkl"
if args.mtzout:
mtzout = args.mtzout
else:
mtzout = os.path.splitext( args.hklin )[0] + ".mtz"
main( args.hklin, hklout_file, mtzout, args.project, args.crystal, args.dataset, args.cell, args.spacegroup, args.residues, args.resolution_range )