#!/usr/bin/python # author J.Beale """ # aim to make an mtz from an hkl file output from partialator or process_hkl # usage python make_mtz.py # output - .mtz file - .html log file """ # modules import subprocess import pandas as pd import numpy as np import argparse import os import regex as re def get_mean_cell( stream_file ): # open stream file stream = open( stream_file, "r" ).read() # get uc values from stream file # example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg # scrub clen and return - else nan try: pattern = r"Cell\sparameters\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg" cell_lst = re.findall( pattern, stream ) except AttributeError: return np.nan # make cellss into df and convert to float cols = [ "a", "b", "c", "alpha", "beta", "gamma" ] cell_df = pd.DataFrame( cell_lst, columns=cols ) cell_df = cell_df.astype(float) # calculate mean cells values mean_a = round( cell_df.a.mean()*10, 3 ) mean_b = round( cell_df.b.mean()*10, 3 ) mean_c = round( cell_df.c.mean()*10, 3 ) mean_alpha = round( cell_df.alpha.mean(), 3 ) mean_beta = round( cell_df.beta.mean(), 3 ) mean_gamma = round( cell_df.gamma.mean(), 3 ) mean_cell = [ mean_a, mean_b, mean_c, mean_alpha, mean_beta, mean_gamma ] return mean_cell, len(cell_lst) def make_mtz( hklout_file, mtzout_file, project, crystal, dataset, cell, spacegroup ): # make_mtz file name mtz_run_file = "make_mtz.sh" # write file mtz_sh = open( mtz_run_file, "w" ) mtz_sh.write( "#!/bin/sh\n\n" ) mtz_sh.write( "module purge\n" ) mtz_sh.write( "module load ccp4/8.0\n\n" ) mtz_sh.write( "f2mtz HKLIN {0} HKLOUT {1} > out.html << EOF\n".format( hklout_file, mtzout_file ) ) mtz_sh.write( "TITLE Reflections from CrystFEL\n" ) mtz_sh.write( "NAME PROJECT {0} CRYSTAL {1} DATASET {2}\n".format( project, crystal, dataset ) ) mtz_sh.write( "CELL {0} {1} {2} {3} {4} {5}\n".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) ) mtz_sh.write( "SYMM {0}\n".format( spacegroup ) ) mtz_sh.write( "SKIP 3\n" ) mtz_sh.write( "LABOUT H K L IMEAN SIGIMEAN\n" ) mtz_sh.write( "CTYPE H H H J Q\n" ) mtz_sh.write( "FORMAT '(3(F4.0,1X),F10.2,10X,F10.2)'\n" ) mtz_sh.write( "SKIP 3\n" ) mtz_sh.write( "EOF" ) mtz_sh.close() # make file executable subprocess.call( [ "chmod", "+x", "{0}".format( mtz_run_file ) ] ) # run subprocess.call( [ "./{0}".format( mtz_run_file ) ] ) def cut_hkl_file( hklin_file, hklout_file ): # setup hklout = open( hklout_file, 'w') collect_lines = True # Open the input file for reading with open( hklin_file, 'r') as f: for line in f: if line.strip() == 'End of reflections': collect_lines = False # Stop collecting lines if collect_lines: hklout.write( line ) hklout.close() def main( hklin_file, hklout_file, mtzout, project, crystal, dataset, cell, spacegroup ): # remove final lines from crystfel hkl out print( "removing final lines from crystfel hklin" ) cut_hkl_file( hklin_file, hklout_file ) print( "done" ) # running make mtz print( "making mtz" ) print( "using cell constants\n{0} {1} {2} A {3} {4} {5} deg".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) ) print( "Titles in mtz will be:\nPROJECT {0} CRYSTAL {1} DATASET {2}".format( project, crystal, dataset ) ) make_mtz( hklout_file, mtzout, project, crystal, dataset, cell, spacegroup ) print( "done" ) def list_of_floats(arg): return list(map(float, arg.split(','))) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "-i", "--hklin", help=".hkl output from partialator or process_hkl", type=os.path.abspath ) parser.add_argument( "-o", "--mtzout", help="name of mtz out file. The default will by the hklin with .mtz appended", type=str, ) parser.add_argument( "-c", "--cell", help="list of complete cell length and angles, e.g. 59.3,59.3,153.1,90.0,90.0,90.0. They all should be floats", type=list_of_floats ) parser.add_argument( "-s", "--stream_file", help="this will simply output the mean unit cell for the subsequent make_mtz run", type=os.path.abspath, ) parser.add_argument( "-p", "--project", help="name of PROJECT flag in mtz file. The default is - SERIAL-XTAL", type=str, default="SERIAL-XTAL" ) parser.add_argument( "-x", "--crystal", help="name of CRYSTAL flag in mtz file. The default is - XTALS", type=str, default="XTALS" ) parser.add_argument( "-d", "--dataset", help="name of DATASET flag in mtz file. The default is - DARK", type=str, default="DARK" ) parser.add_argument( "-g", "--spacegroup", help="spacegroup for making mtz, e.g P41212", type=str ) parser.add_argument( "-u", "--use_stream", help="set -u True if you want to use the average lengths and angles from the stream file. Lengths and angles will be automatically averaged when appropriate", type=bool, default=False ) args = parser.parse_args() # logic on which task to perform if args.stream_file: print( "reading stream file" ) cell, xtals = get_mean_cell( args.stream_file ) print( "found {0} xtats".format( xtals ) ) print( "mean lengths = {0}, {1}, {2} A".format( cell[0], cell[1], cell[2] ) ) print( "mean angles = {0}, {1}, {2} deg".format( cell[3], cell[4], cell[5] ) ) print( "# for input to make_mtz = {0},{1},{2},{3},{4},{5}".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) ) if args.use_stream: hklout_file = os.path.splitext( args.hklin )[0] + "_cut.hkl" if args.mtzout: mtzout = args.mtzout else: mtzout = os.path.splitext( args.hklin )[0] + ".mtz" main( args.hklin, hklout_file, mtzout, args.project, args.crystal, args.dataset, cell, args.spacegroup ) if args.stream_file == None and args.use_stream == False: hklout_file = os.path.splitext( args.hklin )[0] + "_cut.hkl" if args.mtzout: mtzout = args.mtzout else: mtzout = os.path.splitext( args.hklin )[0] + ".mtz" main( args.hklin, hklout_file, mtzout, args.project, args.crystal, args.dataset, args.cell, args.spacegroup )