#!/usr/bin/python # author J.Beale """ # aim to make an mtz from an hkl file output from partialator or process_hkl runs f2mtz and then truncate for create an mtz with other intensities and structure factors # usage to make mtz from manually entered lengths and angles python make_mtz.py -i -o name of .mtz file -p project name in mtz -x xtal name in mtz -d dataset name in mtz -g spacegroup -c list of cell lengths and angles to use - 59.3,59.3,153.1,90.0,90.0,90.0 -r number of residues -A resolution range - e.g. 40.0,2.0 # usage to make mtz from the mean angles and lengths in stream file python make_mtz.py -i -o name of .mtz file -p project name in mtz -x xtal name in mtz -d dataset name in mtz -g spacegroup -r number of residues -A resolution range - e.g. 40.0,2.0 -s -u True # usage to get list of mean lengths and angles from stream files python make_mtz.py -s # output - .mtz file - just intensities - f2mtz.log file - _F.mtz file - intensities and structure factors - cuts data to desired resolution - truncate.log file """ # modules import subprocess import pandas as pd import numpy as np import argparse import os import regex as re def get_mean_cell( stream_file ): # open stream file stream = open( stream_file, "r" ).read() # get uc values from stream file # example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg # scrub clen and return - else nan try: pattern = r"Cell\sparameters\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg" cell_lst = re.findall( pattern, stream ) except AttributeError: return np.nan # make cellss into df and convert to float cols = [ "a", "b", "c", "alpha", "beta", "gamma" ] cell_df = pd.DataFrame( cell_lst, columns=cols ) cell_df = cell_df.astype(float) # calculate mean cells values mean_a = round( cell_df.a.mean()*10, 3 ) mean_b = round( cell_df.b.mean()*10, 3 ) mean_c = round( cell_df.c.mean()*10, 3 ) mean_alpha = round( cell_df.alpha.mean(), 3 ) mean_beta = round( cell_df.beta.mean(), 3 ) mean_gamma = round( cell_df.gamma.mean(), 3 ) mean_cell = [ mean_a, mean_b, mean_c, mean_alpha, mean_beta, mean_gamma ] return mean_cell, len(cell_lst) def make_mtz( hklout_file, mtzout_file, project, crystal, dataset, cell, spacegroup, residues, res_range ): # make_mtz file name mtz_run_file = "make_mtz.sh" # make F file name Fout_file = os.path.splitext( mtzout_file )[0] + "_F.mtz" # write file mtz_sh = open( mtz_run_file, "w" ) mtz_sh.write( "#!/bin/sh\n\n" ) mtz_sh.write( "module purge\n" ) mtz_sh.write( "module load ccp4/8.0\n\n" ) mtz_sh.write( "f2mtz HKLIN {0} HKLOUT {1} << EOF_hkl > f2mtz.log\n".format( hklout_file, mtzout_file ) ) mtz_sh.write( "TITLE Reflections from CrystFEL\n" ) mtz_sh.write( "NAME PROJECT {0} CRYSTAL {1} DATASET {2}\n".format( project, crystal, dataset ) ) mtz_sh.write( "CELL {0} {1} {2} {3} {4} {5}\n".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) ) mtz_sh.write( "SYMM {0}\n".format( spacegroup ) ) mtz_sh.write( "SKIP 3\n" ) mtz_sh.write( "LABOUT H K L I_stream SIGI_stream\n" ) mtz_sh.write( "CTYPE H H H J Q\n" ) mtz_sh.write( "FORMAT '(3(F4.0,1X),F10.2,10X,F10.2)'\n" ) mtz_sh.write( "SKIP 3\n" ) mtz_sh.write( "EOF_hkl\n\n\n" ) mtz_sh.write( "echo 'done'\n" ) mtz_sh.write( "echo 'I and SIGI from CrystFEL stream saved as I_stream and SIGI_stream'\n" ) mtz_sh.write( "echo 'I filename = {0}'\n\n\n".format( mtzout_file ) ) mtz_sh.write( "echo 'running truncate'\n" ) mtz_sh.write( "echo 'setting resolution range to {0}-{1}'\n".format( res_range[0], res_range[1] ) ) mtz_sh.write( "echo 'assuming that there are {0} residues in assymetric unit'\n\n\n".format( residues ) ) mtz_sh.write( "truncate HKLIN {0} HKLOUT {1} << EOF_F > truncate.log\n".format( mtzout_file, Fout_file ) ) mtz_sh.write( "truncate YES\n" ) mtz_sh.write( "anomalous NO\n" ) mtz_sh.write( "nresidue {0}\n".format( residues ) ) mtz_sh.write( "resolution {0} {1}\n".format( res_range[0], res_range[1] ) ) mtz_sh.write( "plot OFF\n" ) mtz_sh.write( "labin IMEAN=I_stream SIGIMEAN=SIGI_stream\n" ) mtz_sh.write( "labout F=F_stream SIGF=SIGF_stream\n" ) mtz_sh.write( "end\n" ) mtz_sh.write( "EOF_F\n\n\n" ) mtz_sh.write( "echo 'done'\n" ) mtz_sh.write( "echo 'I_stream and SIGI_stream from f2mtz converted to F_stream and F_stream'\n" ) mtz_sh.write( "echo 'F filename = {0} (contains both Is and Fs)'".format( Fout_file ) ) mtz_sh.close() # make file executable subprocess.call( [ "chmod", "+x", "{0}".format( mtz_run_file ) ] ) # run subprocess.call( [ "./{0}".format( mtz_run_file ) ] ) def cut_hkl_file( hklin_file, hklout_file ): # setup hklout = open( hklout_file, 'w') collect_lines = True # Open the input file for reading with open( hklin_file, 'r') as f: for line in f: if line.strip() == 'End of reflections': collect_lines = False # Stop collecting lines if collect_lines: hklout.write( line ) hklout.close() def main( hklin_file, hklout_file, mtzout, project, crystal, dataset, cell, spacegroup, residues, res_range ): # remove final lines from crystfel hkl out print( "removing final lines from crystfel hklin" ) cut_hkl_file( hklin_file, hklout_file ) print( "done" ) # running make mtz print( "making mtz" ) print( "using cell constants\n{0} {1} {2} A {3} {4} {5} deg".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) ) print( "Titles in mtz will be:\nPROJECT {0} CRYSTAL {1} DATASET {2}".format( project, crystal, dataset ) ) make_mtz( hklout_file, mtzout, project, crystal, dataset, cell, spacegroup, residues, res_range ) print( "done" ) # remove *cut.hkl out cwd = os.getcwd() files = os.listdir( cwd ) for file in files: if file.endswith( "cut.hkl" ): os.remove( file ) def list_of_floats(arg): return list(map(float, arg.split(','))) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "-i", "--hklin", help=".hkl output from partialator or process_hkl", type=os.path.abspath ) parser.add_argument( "-o", "--mtzout", help="name of mtz out file. The default will by the hklin with .mtz appended", type=str ) parser.add_argument( "-p", "--project", help="name of PROJECT flag in mtz file. The default is - SERIAL-XTAL", type=str, default="SERIAL-XTAL" ) parser.add_argument( "-x", "--crystal", help="name of CRYSTAL flag in mtz file. The default is - XTALS", type=str, default="XTALS" ) parser.add_argument( "-d", "--dataset", help="name of DATASET flag in mtz file. The default is - DARK", type=str, default="DARK" ) parser.add_argument( "-g", "--spacegroup", help="spacegroup for making mtz, e.g P41212", type=str ) parser.add_argument( "-c", "--cell", help="list of complete cell length and angles, e.g. 59.3,59.3,153.1,90.0,90.0,90.0. They all should be floats", type=list_of_floats ) parser.add_argument( "-r", "--residues", help="number of residues for truncate, e.g., hewl = 129", type=int ) parser.add_argument( "-A", "--resolution_range", help="list of 2 floats - low res then high res cut off, e.g., 50.0,1.3", type=list_of_floats ) parser.add_argument( "-s", "--stream_file", help="this will simply output the mean unit cell for the subsequent make_mtz run", type=os.path.abspath, ) parser.add_argument( "-u", "--use_stream", help="set -u True if you want to use the average lengths and angles from the stream file. Lengths and angles will be automatically averaged when appropriate", type=bool, default=False ) args = parser.parse_args() # logic on which task to perform if args.stream_file: print( "reading stream file" ) cell, xtals = get_mean_cell( args.stream_file ) print( "found {0} xtals".format( xtals ) ) print( "mean lengths = {0}, {1}, {2} A".format( cell[0], cell[1], cell[2] ) ) print( "mean angles = {0}, {1}, {2} deg".format( cell[3], cell[4], cell[5] ) ) print( "# for input to make_mtz = {0},{1},{2},{3},{4},{5}".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) ) if args.use_stream: hklout_file = os.path.splitext( args.hklin )[0] + "_cut.hkl" if args.mtzout: mtzout = args.mtzout else: mtzout = os.path.splitext( args.hklin )[0] + ".mtz" main( args.hklin, hklout_file, mtzout, args.project, args.crystal, args.dataset, cell, args.spacegroup, args.residues, args.resolution_range ) if args.stream_file == None and args.use_stream == False: hklout_file = os.path.splitext( args.hklin )[0] + "_cut.hkl" if args.mtzout: mtzout = args.mtzout else: mtzout = os.path.splitext( args.hklin )[0] + ".mtz" main( args.hklin, hklout_file, mtzout, args.project, args.crystal, args.dataset, args.cell, args.spacegroup, args.residues, args.resolution_range )