auto processing script combining split, partialator and make_mtz. in progress
This commit is contained in:
440
reduction_tools/auto_process.py
Normal file
440
reduction_tools/auto_process.py
Normal file
@@ -0,0 +1,440 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# author J.Beale
|
||||
|
||||
"""
|
||||
# aim
|
||||
automatically index, integrate, scale/merge and output mtzs for individual SwissFEL runs
|
||||
combines, hopefully intelligently, indexamajig, partialator, f2mtz and truncate to produce complete mtz files
|
||||
|
||||
# usage
|
||||
python auto_process.py
|
||||
|
||||
# output
|
||||
in run dir
|
||||
- integrated .stream files + logs
|
||||
- scaled/merged files
|
||||
- an mtz file
|
||||
- useful plots
|
||||
- log file of output
|
||||
"""
|
||||
|
||||
# modules
|
||||
import sys
|
||||
sys.path.insert( 1, "/sf/cristallina/applications/mx/crystfel_tools/reduction_tools/" )
|
||||
import partialator as part
|
||||
import subprocess
|
||||
import os, errno
|
||||
import shutil
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import glob
|
||||
import argparse
|
||||
import regex as re
|
||||
from loguru import logger
|
||||
|
||||
def read_cell( cell_file ):
|
||||
|
||||
# function to get cell parameter
|
||||
def get_parameter( line, parameter ):
|
||||
|
||||
# general parameter search
|
||||
pattern = r"{0}\s=\s(\d+\.\d+)".format( parameter )
|
||||
value = re.search( pattern, line ).group(1)
|
||||
return value
|
||||
|
||||
# loop through file and get parameters
|
||||
cell = open( cell_file, "r" )
|
||||
for line in cell:
|
||||
if line.startswith( "a " ):
|
||||
a = float( get_parameter( line, "a" ) )
|
||||
if line.startswith( "b " ):
|
||||
b = float( get_parameter( line, "b" ) )
|
||||
if line.startswith( "c " ):
|
||||
c = float( get_parameter( line, "c" ) )
|
||||
if line.startswith( "al " ):
|
||||
alpha = float( get_parameter( line, "al" ) )
|
||||
if line.startswith( "be " ):
|
||||
beta = float( get_parameter( line, "be" ) )
|
||||
if line.startswith( "ga " ):
|
||||
gamma = float( get_parameter( line, "ga" ) )
|
||||
|
||||
cell = [ a, b, c, alpha, beta, gamma ]
|
||||
|
||||
return cell
|
||||
|
||||
def cat_lst_files( script_dir, run_no, endstation, pgroup, jfj, label, output ):
|
||||
|
||||
cat_lst = "{0}/cat_lst.py".format( script_dir )
|
||||
|
||||
submit_cmd = [ "python", cat_lst,
|
||||
"{0}".format( run_no ),
|
||||
"--endstation={0}".format( endstation ),
|
||||
"--pgroup={0}".format( pgroup ),
|
||||
"--jfj={0}".format( jfj ),
|
||||
"--label={0}".format( label ),
|
||||
"--output={0}".format( output ),
|
||||
]
|
||||
|
||||
subprocess.run( submit_cmd )
|
||||
|
||||
def run_index( script_dir, name, lst_file, chunk_size, geom_file, cell_file,
|
||||
indexer, peakfinder, integrator, tolerance, threshold,
|
||||
min_snr, int_radius, multi, retry, min_pix,
|
||||
bg_rad, min_res, reservation
|
||||
):
|
||||
|
||||
indexamajig = "{0}/crystfel_split_var.py".format( script_dir )
|
||||
|
||||
if reservation:
|
||||
submit_cmd = [ "python", indexamajig,
|
||||
"--job_name={0}".format( name ),
|
||||
"--lst_file={0}".format( lst_file ),
|
||||
"--chunk_size={0}".format( chunk_size ),
|
||||
"--geom_file={0}".format( geom_file ),
|
||||
"--cell_file={0}".format( cell_file ),
|
||||
"--indexer={0}".format( indexer ),
|
||||
"--peakfinder={0}".format( peakfinder ),
|
||||
"--integrator={0}".format( integrator ),
|
||||
"--tolerance={0}".format( tolerance ),
|
||||
"--threshold={0}".format( threshold ),
|
||||
"--min_snr={0}".format( min_snr ),
|
||||
"--int_radius={0}".format( int_radius ),
|
||||
"--multi={0}".format( multi ),
|
||||
"--retry={0}".format( retry ),
|
||||
"--min_pix={0}".format( min_pix ),
|
||||
"--bg_rad={0}".format( bg_rad ),
|
||||
"--min_res={0}".format( min_res ),
|
||||
"--reservation={0}".format( reservation )
|
||||
]
|
||||
else:
|
||||
submit_cmd = [ "python", indexamajig,
|
||||
"--job_name={0}".format( name ),
|
||||
"--lst_file={0}".format( lst_file ),
|
||||
"--chunk_size={0}".format( chunk_size ),
|
||||
"--geom_file={0}".format( geom_file ),
|
||||
"--cell_file={0}".format( cell_file ),
|
||||
"--indexer={0}".format( indexer ),
|
||||
"--peakfinder={0}".format( peakfinder ),
|
||||
"--integrator={0}".format( integrator ),
|
||||
"--tolerance={0}".format( tolerance ),
|
||||
"--threshold={0}".format( threshold ),
|
||||
"--min_snr={0}".format( min_snr ),
|
||||
"--int_radius={0}".format( int_radius ),
|
||||
"--multi={0}".format( multi ),
|
||||
"--retry={0}".format( retry ),
|
||||
"--min_pix={0}".format( min_pix ),
|
||||
"--bg_rad={0}".format( bg_rad ),
|
||||
"--min_res={0}".format( min_res )
|
||||
]
|
||||
|
||||
subprocess.run( submit_cmd )
|
||||
|
||||
# make stream name
|
||||
stream_file = "{0}.stream".format( name )
|
||||
|
||||
return stream_file
|
||||
|
||||
def run_partialator( script_dir, name, stream_file, pointgroup, model, iterations,
|
||||
cell_file, bins, resolution, max_adu, reservation
|
||||
):
|
||||
|
||||
partialator = "{0}/partialator.py".format( script_dir, )
|
||||
|
||||
if reservation:
|
||||
submit_cmd = [ "python", partialator,
|
||||
"--name={0}".format( name ),
|
||||
"--stream_file={0}".format( stream_file ),
|
||||
"--pointgroup={0}".format( pointgroup ),
|
||||
"--model={0}".format( model ),
|
||||
"--iterations={0}".format( iterations ),
|
||||
"--cell_file={0}".format( cell_file ),
|
||||
"--bins={0}".format( bins ),
|
||||
"--resolution={0}".format( resolution ),
|
||||
"--max_adu={0}".format( max_adu ),
|
||||
"--reservation={0}".format( reservation )
|
||||
]
|
||||
else:
|
||||
submit_cmd = [ "python", partialator,
|
||||
"--name={0}".format( name ),
|
||||
"--stream_file={0}".format( stream_file ),
|
||||
"--pointgroup={0}".format( pointgroup ),
|
||||
"--model={0}".format( model ),
|
||||
"--iterations={0}".format( iterations ),
|
||||
"--cell_file={0}".format( cell_file ),
|
||||
"--bins={0}".format( bins ),
|
||||
"--resolution={0}".format( resolution ),
|
||||
"--max_adu={0}".format( max_adu )
|
||||
]
|
||||
|
||||
subprocess.run( submit_cmd )
|
||||
|
||||
def scrub_cc( part_dir_name ):
|
||||
|
||||
# open 1st partialator log file
|
||||
part_log = open( "{0}.log".format( part_dir_name ) )
|
||||
|
||||
# regex example = resolution at CC0.5 at 0.3 = 1.599
|
||||
pattern = r"resolution\sat\sCC0.5\sat\s0\.3\s=\s(\d+.\d+)"
|
||||
cc = re.search( pattern, part_log.read() ).group(1)
|
||||
|
||||
return float( cc )
|
||||
|
||||
def scrub_index( index_dir_name ):
|
||||
|
||||
# open 1st partialator log file
|
||||
index_log_file = open( "{0}.log".format( index_dir_name ) )
|
||||
index_log = index_log_file.read()
|
||||
|
||||
# regex example = crystals = 0
|
||||
images_pattern = r"images\s=\s(\d+)"
|
||||
images = re.search( images_pattern, index_log ).group(1)
|
||||
|
||||
# regex example = crystals = 0
|
||||
xtals_pattern = r"crystals\s=\s(\d+)"
|
||||
indexed = re.search( xtals_pattern, index_log ).group(1)
|
||||
|
||||
def get_index_cell( index_log, parameter ):
|
||||
|
||||
# general parameter search
|
||||
pattern = r"mean\s{0}\s=\s(\d+\.\d+)\s\+/-\s(\d+\.\d+)".format( parameter )
|
||||
parameters = re.search( pattern, index_log )
|
||||
value, error = parameters.group(1), parameters.group(2)
|
||||
return value, error
|
||||
|
||||
ind_res, ind_res_err = get_index_cell( index_log, "resolution" )
|
||||
obs, obs_err = get_index_cell( index_log, "observations" )
|
||||
a, a_err = get_index_cell( index_log, "a" )
|
||||
b, b_err = get_index_cell( index_log, "b" )
|
||||
c, c_err = get_index_cell( index_log, "c" )
|
||||
alpha, alpha_err = get_index_cell( index_log, "alpha" )
|
||||
beta, beta_err = get_index_cell( index_log, "beta" )
|
||||
gamma, gamma_err = get_index_cell( index_log, "gamma" )
|
||||
|
||||
# put results into df
|
||||
data = [ { "images" : int( images ), "indexed" : int( indexed ),
|
||||
"index_res" : float( ind_res ), "index_res_err" : float( ind_res_err ),
|
||||
"obs" : float( obs ), "obs_err" : float( obs_err ),
|
||||
"a" : float( a ), "a_err" : float( a_err ),
|
||||
"b" : float( b ), "b_err" : float( b_err ),
|
||||
"c" : float( c ), "c_err": float( c_err ),
|
||||
"alpha" : float( alpha ), "alpha_err" : float( alpha_err ),
|
||||
"beta" : float( beta ), "beta_err" : float( beta_err ),
|
||||
"gamma" : float( gamma ), "gamma_err" : float( gamma_err )
|
||||
} ]
|
||||
|
||||
stream_df = pd.DataFrame( data )
|
||||
|
||||
return stream_df
|
||||
|
||||
def run_make_mtz( script_dir, hklin_file, project, crystal, dataset, cell, spacegroup, residues, res_range ):
|
||||
|
||||
make_mtz = "{0}/make_mtz.py".format( script_dir )
|
||||
|
||||
submit_cmd = [ "python", make_mtz,
|
||||
"--hklin={0}".format( hklin_file ),
|
||||
"--project={0}".format( project ),
|
||||
"--crystal={0}".format( crystal ),
|
||||
"--dataset={0}".format( dataset ),
|
||||
"--cell={0},{1},{2},{3},{4},{5}".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ),
|
||||
"--spacegroup={0}".format( spacegroup ),
|
||||
"--residues={0}".format( residues ),
|
||||
"--resolution_range={0},{1}".format( res_range[0], res_range[1] )
|
||||
]
|
||||
subprocess.run( submit_cmd )
|
||||
|
||||
def scrub_prefix( endstation, pgroup, run_name ):
|
||||
|
||||
# regex example = /sf/cristallina/data/p22216/raw/run0001-hewl_det-calib_1
|
||||
file_path = "/sf/{0}/data/{1}/raw/{2}-*/".format( endstation, pgroup, run_name )
|
||||
try:
|
||||
file_path = glob.glob( file_path )[0]
|
||||
pattern = r"/sf/{0}/data/{1}/raw/{2}-(.*)/".format( endstation, pgroup, run_name )
|
||||
prefix = re.search( pattern, file_path ).group(1)
|
||||
except AttributeError as e:
|
||||
prefix = np.nan
|
||||
|
||||
return prefix
|
||||
|
||||
def make_process_dir( proc_dir ):
|
||||
# make process directory
|
||||
try:
|
||||
os.makedirs( proc_dir )
|
||||
except OSError as e:
|
||||
if e.errno != errno.EEXIST:
|
||||
logger.debug( "making directory error" )
|
||||
raise
|
||||
|
||||
def main( script_dir, cwd, runs,
|
||||
endstation, pgroup, jfj, label,
|
||||
chunk_size, geom_file, cell_file, indexer, peakfinder, integrator, tolerance, threshold,
|
||||
min_snr, int_radius, multi, retry, min_pix, bg_rad, min_res, reservation,
|
||||
pointgroup, model, iterations, bins, resolution, max_adu,
|
||||
project, crystal, spacegroup, residues
|
||||
):
|
||||
|
||||
# collate results for summary csv
|
||||
df = pd.DataFrame()
|
||||
|
||||
print( "beginning autoprocessing for runs {0}".format( runs ) )
|
||||
|
||||
for run in runs:
|
||||
|
||||
# make process dir
|
||||
run_name = "run{0}".format( str( run ).zfill(4) )
|
||||
print( "making run directory {0}".format( run_name ) )
|
||||
proc_dir = "{0}/{1}".format( cwd, run_name )
|
||||
make_process_dir( proc_dir )
|
||||
|
||||
# move to proc dir
|
||||
os.chdir( proc_dir )
|
||||
print( "done" )
|
||||
|
||||
# create lst file
|
||||
# make lst output name
|
||||
lst_name = "{0}_{1}.lst".format( run_name, label )
|
||||
if not os.path.isfile( "{0}/{1}".format( proc_dir, lst_name ) ):
|
||||
print( "makeing lst file" )
|
||||
cat_lst_files( script_dir, run, endstation, pgroup, jfj, label, run_name )
|
||||
print( "done" )
|
||||
else:
|
||||
print( "lst file already created" )
|
||||
|
||||
# scrub run prefix
|
||||
prefix = scrub_prefix( endstation, pgroup, run_name )
|
||||
|
||||
# run crystfel split with variable
|
||||
index_run_name = "{0}_index".format( run_name )
|
||||
if not os.path.isfile( "{0}/{1}.stream".format( proc_dir, index_run_name ) ):
|
||||
print( "running indexamajig" )
|
||||
stream_file = run_index( script_dir, index_run_name, lst_name, chunk_size, geom_file, cell_file,
|
||||
indexer, peakfinder, integrator, tolerance, threshold,
|
||||
min_snr, int_radius, multi, retry, min_pix,
|
||||
bg_rad, min_res, reservation
|
||||
)
|
||||
print( "done" )
|
||||
else:
|
||||
print( "indexamajig already run" )
|
||||
stream_file = "{0}.stream".format( index_run_name )
|
||||
|
||||
# scrub stream log
|
||||
stream_df = scrub_index( index_run_name )
|
||||
indexed = stream_df.indexed[0]
|
||||
|
||||
# run partialator
|
||||
if indexed > 100:
|
||||
print( "indexed {0} - now run partialator and make mtz".format( indexed ) )
|
||||
|
||||
part_run_name = "{0}_merge".format( run_name )
|
||||
hkl_file = "{0}.hkl".format( part_run_name )
|
||||
if not os.path.isfile( "{0}/{1}/{2}".format( proc_dir, part_run_name, hkl_file ) ):
|
||||
run_partialator( script_dir, part_run_name, stream_file, pointgroup, model, iterations,
|
||||
cell_file, bins, resolution, max_adu, reservation
|
||||
)
|
||||
else:
|
||||
print( "partialator has run" )
|
||||
|
||||
# get resolution at cc 0.5
|
||||
# scrub log file
|
||||
try:
|
||||
cc = scrub_cc( part_run_name )
|
||||
print( "scrubbed resolution = {0}".format( cc ) )
|
||||
if cc < 1.2:
|
||||
print( "predicted resolution too high. reset to 1.2 A" )
|
||||
cc = 1.2
|
||||
except AttributeError as e:
|
||||
cc = 3.0
|
||||
|
||||
# re-run check hkl
|
||||
print( "running cc check/compare" )
|
||||
os.chdir( part_run_name )
|
||||
check_run_file = part.run_compare_check( "{0}/{1}".format( proc_dir, part_run_name ), part_run_name, cell_file, bins, cc )
|
||||
submit_cmd = [ "{0}".format( check_run_file ) ]
|
||||
subprocess.call( submit_cmd )
|
||||
print( "done" )
|
||||
|
||||
# make mtz
|
||||
print( "making final mtz" )
|
||||
cell = read_cell( cell_file )
|
||||
mtz_file = "{0}/{1}/{1}.mtz".format( proc_dir, part_run_name )
|
||||
F_file = "{0}/{1}/{1}_F.mtz".format( proc_dir, part_run_name )
|
||||
res_range = [ 50.0, cc ]
|
||||
run_make_mtz( script_dir, hkl_file, project, crystal, run_name, cell, spacegroup, residues, res_range )
|
||||
try:
|
||||
shutil.move( mtz_file, proc_dir )
|
||||
except shutil.Error as e:
|
||||
pass
|
||||
try:
|
||||
shutil.move( F_file, proc_dir )
|
||||
except shutil.Error as e:
|
||||
pass
|
||||
print( "done {0}".format( run_name ) )
|
||||
|
||||
else:
|
||||
print( "partialator not run. < 100 indexed" )
|
||||
|
||||
data = [ { "run" : run,
|
||||
"prefix" : prefix,
|
||||
} ]
|
||||
|
||||
# move back to cwd
|
||||
os.chdir( cwd )
|
||||
|
||||
print( "finished all!" )
|
||||
|
||||
|
||||
|
||||
#script_dir = "/sf/cristallina/applications/mx/crystfel_tools/reduction_tools/"
|
||||
script_dir = "/sf/cristallina/data/p22216/work/processing/bach"
|
||||
cwd = os.getcwd()
|
||||
#runs = [ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
|
||||
# 113, 114, 115, 116, 117, 118, 119, 122, 123, 124, 125, 126, 127,
|
||||
# 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138 ]
|
||||
runs = [ 11 ]
|
||||
endstation = "cristallina"
|
||||
pgroup = "p22216"
|
||||
jfj = True
|
||||
label = "off"
|
||||
|
||||
geom_file = "{0}/{1}".format( cwd, "JF17T16V01_cf10_shift.geom" )
|
||||
jfj = True
|
||||
label = "off"
|
||||
chunk_size = 250
|
||||
cell_file = "/sf/cristallina/data/p22216/work/processing/bach/p1.cell"
|
||||
indexer = "xgandalf"
|
||||
peakfinder = "peakfinder8"
|
||||
integrator = "rings-nocen-nograd"
|
||||
tolerance = "10.0,10.0,10.0,2.0,3.0,2.0"
|
||||
threshold = 20
|
||||
min_snr = 5
|
||||
int_radius = "2,4,5"
|
||||
multi = True
|
||||
retry = True
|
||||
min_pix = 2
|
||||
bg_rad = 4
|
||||
min_res = 85
|
||||
reservation = None
|
||||
|
||||
pointgroup = 1
|
||||
model = "unity"
|
||||
iterations = 1
|
||||
bins = 20
|
||||
resolution = 2.0
|
||||
max_adu = 12000
|
||||
project = "Cr_fragment"
|
||||
crystal = "bach"
|
||||
spacegroup = "P1"
|
||||
residues = 124
|
||||
|
||||
|
||||
logfile = "{0}.log".format( "auto_process" )
|
||||
logger.remove()
|
||||
logger.add( logfile, format="{message}", level="INFO")
|
||||
main( script_dir, cwd, runs,
|
||||
endstation, pgroup, jfj, label,
|
||||
chunk_size, geom_file, cell_file, indexer, peakfinder, integrator, tolerance, threshold,
|
||||
min_snr, int_radius, multi, retry, min_pix, bg_rad, min_res, reservation,
|
||||
pointgroup, model, iterations, bins, resolution, max_adu,
|
||||
project, crystal, spacegroup, residues
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user