script to make mtz files from .hkl outputs

This commit is contained in:
Beale John Henry
2023-11-03 10:14:31 +01:00
parent cc873061ba
commit e37f082597

201
reduction_tools/make_mtz.py Normal file
View File

@@ -0,0 +1,201 @@
#!/usr/bin/python
# author J.Beale
"""
# aim
to make an mtz from an hkl file output from partialator or process_hkl
# usage
python make_mtz.py
# output
- .mtz file
- .html log file
"""
# modules
import subprocess
import pandas as pd
import numpy as np
import argparse
import os
import regex as re
def get_mean_cell( stream_file ):
# open stream file
stream = open( stream_file, "r" ).read()
# get uc values from stream file
# example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg
# scrub clen and return - else nan
try:
pattern = r"Cell\sparameters\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg"
cell_lst = re.findall( pattern, stream )
except AttributeError:
return np.nan
# make cellss into df and convert to float
cols = [ "a", "b", "c", "alpha", "beta", "gamma" ]
cell_df = pd.DataFrame( cell_lst, columns=cols )
cell_df = cell_df.astype(float)
# calculate mean cells values
mean_a = round( cell_df.a.mean()*10, 3 )
mean_b = round( cell_df.b.mean()*10, 3 )
mean_c = round( cell_df.c.mean()*10, 3 )
mean_alpha = round( cell_df.alpha.mean(), 3 )
mean_beta = round( cell_df.beta.mean(), 3 )
mean_gamma = round( cell_df.gamma.mean(), 3 )
mean_cell = [ mean_a, mean_b, mean_c, mean_alpha, mean_beta, mean_gamma ]
return mean_cell, len(cell_lst)
def make_mtz( hklout_file, mtzout_file, project, crystal, dataset, cell, spacegroup ):
# make_mtz file name
mtz_run_file = "make_mtz.sh"
# write file
mtz_sh = open( mtz_run_file, "w" )
mtz_sh.write( "#!/bin/sh\n\n" )
mtz_sh.write( "module purge\n" )
mtz_sh.write( "module load ccp4/8.0\n\n" )
mtz_sh.write( "f2mtz HKLIN {0} HKLOUT {1} > out.html << EOF\n".format( hklout_file, mtzout_file ) )
mtz_sh.write( "TITLE Reflections from CrystFEL\n" )
mtz_sh.write( "NAME PROJECT {0} CRYSTAL {1} DATASET {2}\n".format( project, crystal, dataset ) )
mtz_sh.write( "CELL {0} {1} {2} {3} {4} {5}\n".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) )
mtz_sh.write( "SYMM {0}\n".format( spacegroup ) )
mtz_sh.write( "SKIP 3\n" )
mtz_sh.write( "LABOUT H K L IMEAN SIGIMEAN\n" )
mtz_sh.write( "CTYPE H H H J Q\n" )
mtz_sh.write( "FORMAT '(3(F4.0,1X),F10.2,10X,F10.2)'\n" )
mtz_sh.write( "SKIP 3\n" )
mtz_sh.write( "EOF" )
mtz_sh.close()
# make file executable
subprocess.call( [ "chmod", "+x", "{0}".format( mtz_run_file ) ] )
# run
subprocess.call( [ "./{0}".format( mtz_run_file ) ] )
def cut_hkl_file( hklin_file, hklout_file ):
# setup
hklout = open( hklout_file, 'w')
collect_lines = True
# Open the input file for reading
with open( hklin_file, 'r') as f:
for line in f:
if line.strip() == 'End of reflections':
collect_lines = False # Stop collecting lines
if collect_lines:
hklout.write( line )
hklout.close()
def main( hklin_file, hklout_file, mtzout, project, crystal, dataset, cell, spacegroup ):
# remove final lines from crystfel hkl out
print( "removing final lines from crystfel hklin" )
cut_hkl_file( hklin_file, hklout_file )
print( "done" )
# running make mtz
print( "making mtz" )
print( "using cell constants\n{0} {1} {2} A {3} {4} {5} deg".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) )
print( "Titles in mtz will be:\nPROJECT {0} CRYSTAL {1} DATASET {2}".format( project, crystal, dataset ) )
make_mtz( hklout_file, mtzout, project, crystal, dataset, cell, spacegroup )
print( "done" )
def list_of_floats(arg):
return list(map(float, arg.split(',')))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-i",
"--hklin",
help=".hkl output from partialator or process_hkl",
type=os.path.abspath
)
parser.add_argument(
"-o",
"--mtzout",
help="name of mtz out file. The default will by the hklin with .mtz appended",
type=str,
)
parser.add_argument(
"-c",
"--cell",
help="list of complete cell length and angles, e.g. 59.3,59.3,153.1,90.0,90.0,90.0. They all should be floats",
type=list_of_floats
)
parser.add_argument(
"-s",
"--stream_file",
help="this will simply output the mean unit cell for the subsequent make_mtz run",
type=os.path.abspath,
)
parser.add_argument(
"-p",
"--project",
help="name of PROJECT flag in mtz file. The default is - SERIAL-XTAL",
type=str,
default="SERIAL-XTAL"
)
parser.add_argument(
"-x",
"--crystal",
help="name of CRYSTAL flag in mtz file. The default is - XTALS",
type=str,
default="XTALS"
)
parser.add_argument(
"-d",
"--dataset",
help="name of DATASET flag in mtz file. The default is - DARK",
type=str,
default="DARK"
)
parser.add_argument(
"-g",
"--spacegroup",
help="spacegroup for making mtz, e.g P41212",
type=str
)
parser.add_argument(
"-u",
"--use_stream",
help="set -u True if you want to use the average lengths and angles from the stream file. Lengths and angles will be automatically averaged when appropriate",
type=bool,
default=False
)
args = parser.parse_args()
# logic on which task to perform
if args.stream_file:
print( "reading stream file" )
cell, xtals = get_mean_cell( args.stream_file )
print( "found {0} xtats".format( xtals ) )
print( "mean lengths = {0}, {1}, {2} A".format( cell[0], cell[1], cell[2] ) )
print( "mean angles = {0}, {1}, {2} deg".format( cell[3], cell[4], cell[5] ) )
print( "# for input to make_mtz = {0},{1},{2},{3},{4},{5}".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) )
if args.use_stream:
hklout_file = os.path.splitext( args.hklin )[0] + "_cut.hkl"
if args.mtzout:
mtzout = args.mtzout
else:
mtzout = os.path.splitext( args.hklin )[0] + ".mtz"
main( args.hklin, hklout_file, mtzout, args.project, args.crystal, args.dataset, cell, args.spacegroup )
if args.stream_file == None and args.use_stream == False:
hklout_file = os.path.splitext( args.hklin )[0] + "_cut.hkl"
if args.mtzout:
mtzout = args.mtzout
else:
mtzout = os.path.splitext( args.hklin )[0] + ".mtz"
main( args.hklin, hklout_file, mtzout, args.project, args.crystal, args.dataset, args.cell, args.spacegroup )