diff --git a/reduction_tools/make_mtz.py b/reduction_tools/make_mtz.py new file mode 100644 index 0000000..8d74892 --- /dev/null +++ b/reduction_tools/make_mtz.py @@ -0,0 +1,201 @@ +#!/usr/bin/python + +# author J.Beale + +""" +# aim +to make an mtz from an hkl file output from partialator or process_hkl + +# usage +python make_mtz.py + +# output +- .mtz file +- .html log file +""" + +# modules +import subprocess +import pandas as pd +import numpy as np +import argparse +import os +import regex as re + +def get_mean_cell( stream_file ): + + # open stream file + stream = open( stream_file, "r" ).read() + + # get uc values from stream file + # example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg + # scrub clen and return - else nan + try: + pattern = r"Cell\sparameters\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg" + cell_lst = re.findall( pattern, stream ) + except AttributeError: + return np.nan + + # make cellss into df and convert to float + cols = [ "a", "b", "c", "alpha", "beta", "gamma" ] + cell_df = pd.DataFrame( cell_lst, columns=cols ) + cell_df = cell_df.astype(float) + + # calculate mean cells values + mean_a = round( cell_df.a.mean()*10, 3 ) + mean_b = round( cell_df.b.mean()*10, 3 ) + mean_c = round( cell_df.c.mean()*10, 3 ) + mean_alpha = round( cell_df.alpha.mean(), 3 ) + mean_beta = round( cell_df.beta.mean(), 3 ) + mean_gamma = round( cell_df.gamma.mean(), 3 ) + + mean_cell = [ mean_a, mean_b, mean_c, mean_alpha, mean_beta, mean_gamma ] + + return mean_cell, len(cell_lst) + +def make_mtz( hklout_file, mtzout_file, project, crystal, dataset, cell, spacegroup ): + + # make_mtz file name + mtz_run_file = "make_mtz.sh" + + # write file + mtz_sh = open( mtz_run_file, "w" ) + mtz_sh.write( "#!/bin/sh\n\n" ) + mtz_sh.write( "module purge\n" ) + mtz_sh.write( "module load ccp4/8.0\n\n" ) + mtz_sh.write( "f2mtz HKLIN {0} HKLOUT {1} > out.html << EOF\n".format( hklout_file, mtzout_file ) ) + mtz_sh.write( "TITLE Reflections from CrystFEL\n" ) + mtz_sh.write( "NAME PROJECT {0} CRYSTAL {1} DATASET {2}\n".format( project, crystal, dataset ) ) + mtz_sh.write( "CELL {0} {1} {2} {3} {4} {5}\n".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) ) + mtz_sh.write( "SYMM {0}\n".format( spacegroup ) ) + mtz_sh.write( "SKIP 3\n" ) + mtz_sh.write( "LABOUT H K L IMEAN SIGIMEAN\n" ) + mtz_sh.write( "CTYPE H H H J Q\n" ) + mtz_sh.write( "FORMAT '(3(F4.0,1X),F10.2,10X,F10.2)'\n" ) + mtz_sh.write( "SKIP 3\n" ) + mtz_sh.write( "EOF" ) + mtz_sh.close() + + # make file executable + subprocess.call( [ "chmod", "+x", "{0}".format( mtz_run_file ) ] ) + + # run + subprocess.call( [ "./{0}".format( mtz_run_file ) ] ) + +def cut_hkl_file( hklin_file, hklout_file ): + + # setup + hklout = open( hklout_file, 'w') + collect_lines = True + + # Open the input file for reading + with open( hklin_file, 'r') as f: + + for line in f: + if line.strip() == 'End of reflections': + collect_lines = False # Stop collecting lines + if collect_lines: + hklout.write( line ) + + hklout.close() + +def main( hklin_file, hklout_file, mtzout, project, crystal, dataset, cell, spacegroup ): + + # remove final lines from crystfel hkl out + print( "removing final lines from crystfel hklin" ) + cut_hkl_file( hklin_file, hklout_file ) + print( "done" ) + + # running make mtz + print( "making mtz" ) + print( "using cell constants\n{0} {1} {2} A {3} {4} {5} deg".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) ) + print( "Titles in mtz will be:\nPROJECT {0} CRYSTAL {1} DATASET {2}".format( project, crystal, dataset ) ) + make_mtz( hklout_file, mtzout, project, crystal, dataset, cell, spacegroup ) + print( "done" ) + +def list_of_floats(arg): + return list(map(float, arg.split(','))) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "-i", + "--hklin", + help=".hkl output from partialator or process_hkl", + type=os.path.abspath + ) + parser.add_argument( + "-o", + "--mtzout", + help="name of mtz out file. The default will by the hklin with .mtz appended", + type=str, + ) + parser.add_argument( + "-c", + "--cell", + help="list of complete cell length and angles, e.g. 59.3,59.3,153.1,90.0,90.0,90.0. They all should be floats", + type=list_of_floats + ) + parser.add_argument( + "-s", + "--stream_file", + help="this will simply output the mean unit cell for the subsequent make_mtz run", + type=os.path.abspath, + ) + parser.add_argument( + "-p", + "--project", + help="name of PROJECT flag in mtz file. The default is - SERIAL-XTAL", + type=str, + default="SERIAL-XTAL" + ) + parser.add_argument( + "-x", + "--crystal", + help="name of CRYSTAL flag in mtz file. The default is - XTALS", + type=str, + default="XTALS" + ) + parser.add_argument( + "-d", + "--dataset", + help="name of DATASET flag in mtz file. The default is - DARK", + type=str, + default="DARK" + ) + parser.add_argument( + "-g", + "--spacegroup", + help="spacegroup for making mtz, e.g P41212", + type=str + ) + parser.add_argument( + "-u", + "--use_stream", + help="set -u True if you want to use the average lengths and angles from the stream file. Lengths and angles will be automatically averaged when appropriate", + type=bool, + default=False + ) + args = parser.parse_args() + # logic on which task to perform + if args.stream_file: + print( "reading stream file" ) + cell, xtals = get_mean_cell( args.stream_file ) + print( "found {0} xtats".format( xtals ) ) + print( "mean lengths = {0}, {1}, {2} A".format( cell[0], cell[1], cell[2] ) ) + print( "mean angles = {0}, {1}, {2} deg".format( cell[3], cell[4], cell[5] ) ) + print( "# for input to make_mtz = {0},{1},{2},{3},{4},{5}".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) ) + if args.use_stream: + hklout_file = os.path.splitext( args.hklin )[0] + "_cut.hkl" + if args.mtzout: + mtzout = args.mtzout + else: + mtzout = os.path.splitext( args.hklin )[0] + ".mtz" + main( args.hklin, hklout_file, mtzout, args.project, args.crystal, args.dataset, cell, args.spacegroup ) + if args.stream_file == None and args.use_stream == False: + hklout_file = os.path.splitext( args.hklin )[0] + "_cut.hkl" + if args.mtzout: + mtzout = args.mtzout + else: + mtzout = os.path.splitext( args.hklin )[0] + ".mtz" + main( args.hklin, hklout_file, mtzout, args.project, args.crystal, args.dataset, args.cell, args.spacegroup )