Merge branch 'master' of https://gitlab.psi.ch/sf-mx/crystfel_tools

2025-01-16 13:07:57 +01:00
parent 87223a5e26 c5868683f6
commit 4e3f11064c
8 changed files with 866 additions and 220 deletions
--- a/cool_tools/chip_uc_gather.py
+++ b/cool_tools/chip_uc_gather.py
@@ -0,0 +1,214 @@
 #!/usr/bin/env python3
 # author J.Beale
 """
 # aim
 order crystel hits from a .stream output and calcualte the unit-cell volume
 # usage
 python chip_uc_gather.py -s <path to stream file>
                           -a <size of swissfel acq>
                           -c <chip szie>
                           -o <output>
 # output
 csv file of aperture, mean, no-of-hits
 input for uc_analysis.py
 """
 # modules
 import re
 import argparse
 import pandas as pd
 import numpy as np
 import os
 def extract_chunks( input_file ):
    # setup
    chunk_df = pd.DataFrame()
    event_no = []
    chunks = []
    hits = []
    collect_lines = False
    # Open the input file for reading
    with open(input_file, 'r') as f:
        for line in f:
            # Check for the start condition
            if line.startswith('----- Begin chunk -----'):
                hit = False
                collect_lines = True
                chunk_lines = []
            if collect_lines:
                chunk_lines.append(line)
            # find image_no
            if line.startswith( "Image serial number:" ):
                event_search = re.findall( r"Image serial number:\s(\d+)", line )
                event = int(event_search[0])
                event_no.append( event )
            # is there a hit in chunk
            if line.startswith( "Cell parameters" ):
                hit = True
            if line.startswith('----- End chunk -----'):
                collect_lines = False  # Stop collecting lines
                chunks.append( chunk_lines )
                hits.append( hit )
    chunk_df[ "chunks" ] = chunks
    chunk_df[ "event" ] = event_no
    chunk_df[ "hit" ] = hits
    return chunk_df
 def scrub_acq( chunk ):
    for line in chunk:
        # example
        # "Image filename: /sf/cristallina/data/p21981/raw/run0141-hewl_cover_filter_30um/data/acq0001.JF17T16V01j.h5"
        if line.startswith( "Image filename" ):
            try: 
                pattern = r"data/acq(\d+)\.JF"
                acq = re.findall( pattern, line )
                if AttributeError:
                    return int(acq[0])
            except AttributeError:
                print( "scrub acquistion error" )
                return np.nan
 def scrub_cell( chunk ):
    for line in chunk:
        if line.startswith( "Cell parameters" ):
            try:
                pattern = r"Cell\sparameters\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg"
                cell_lst = re.findall( pattern, line )
                if AttributeError:
                    return cell_lst
            except AttributeError:
                print( "scrub_cells error" )
                return np.nan
 def V_unit( a, b, c, alpha, beta, gamma ):
    cos_alpha = np.cos(alpha * (np.pi / 180))
    cos_beta = np.cos(beta * (np.pi / 180))
    cos_gamma = np.cos(gamma * (np.pi / 180))
    m_matrix = np.array([[a * a            , a * b * cos_gamma, a * c * cos_beta ], \
                         [a * b * cos_gamma, b * b            , b * c * cos_alpha], \
                         [a * c * cos_beta , b * c * cos_alpha, c * c            ]])
    m_matrix_det = np.linalg.det(m_matrix)
    V_unit = np.sqrt(m_matrix_det)
    return V_unit
 def main( input_file, acq_size, chip_size, output ):
    # extract chunks
    print( "finding chucks" )
    chunk_df = extract_chunks( input_file )
    # display no. of chunks
    print( "found {0} chunks".format( len(chunk_df) ) )
    print( "found {0} crystals".format( chunk_df.hit.sum() ) )
    print( "done" )
    # find unit cells
    print( "geting unit cell data from from chunks" )
    xtal_df = pd.DataFrame()
    counter = 0
    for index, row in chunk_df.iterrows():
        chunk, hit, event = row[ "chunks" ], row[ "hit" ], row[ "event" ]
        if hit:
            # calc image number
            acq = scrub_acq( chunk )
            image_no = ( acq - 1 )*acq_size + event
            # get unit cells
            cell_lst = scrub_cell( chunk )
            # concat results
            cols = [ "a", "b", "c", "alpha", "beta", "gamma" ]
            xtal_df_1 = pd.DataFrame( cell_lst, columns=cols )
            # set datatype
            xtal_df_1 = xtal_df_1.astype( float )
            # calculate 
            xtal_df_1[ "image_no" ] = image_no
            xtal_df_1[ "uc_vol" ] = V_unit( xtal_df_1.a.values[0],
                                            xtal_df_1.b.values[0],
                                            xtal_df_1.c.values[0],
                                            xtal_df_1.alpha.values[0],
                                            xtal_df_1.beta.values[0],
                                            xtal_df_1.gamma.values[0] )
            xtal_df = pd.concat( ( xtal_df, xtal_df_1 ) )
            # add count and print every 1000s
            counter = counter + 1
            if counter % 1000 == 0:
                print( counter, end='\r' )
    print( "done" )
    print( "merging multiple crystals" )
    # average across multiple hits
    uc_df = xtal_df.groupby( "image_no" ).mean()
    uc_df[ "hits" ] = xtal_df.groupby( "image_no" ).count().a
    print( xtal_df.groupby( "image_no" ).count().a )
    # remove abc
    uc_df = uc_df[ [ "uc_vol", "hits" ] ]
    print( uc_df )
    # stretch index to include blanks
    chip_index = np.arange( 1, chip_size+1 )
    uc_df = uc_df.reindex( chip_index )
    print( "outputing to csv" )
    # output to file
    file_name = "{0}_uc.csv".format( output )
    uc_df.to_csv( file_name, sep="," )
    print( "done" )
 def list_of_floats(arg):
    return list(map(int, arg.split(',')))
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-s",
        "--stream",
        help="input stream file",
        required=True,
        type=os.path.abspath
    )
    parser.add_argument(
        "-a",
        "--acq_size",
        help="size of acquistions used when collecting data. default is 1000 images per acquisition",
        default=1000,
        type=int
    )
    parser.add_argument(
        "-c",
        "--chip_size",
        help="total number of wells in the chip. default = 26244",
        default=26244,
        type=int
    )
    parser.add_argument(
        "-o",
        "--output",
        help="output file name.",
        required=True,
        type=str
    )   
    args = parser.parse_args()
    # run main
    main( args.stream, args.acq_size, args.chip_size, args.output )
--- a/cool_tools/uc_analysis.py
+++ b/cool_tools/uc_analysis.py
@@ -0,0 +1,64 @@
 #!/usr/bin/env python3
 # author J.Beale
 """
 # aim
 compile results from 3 chips to give mean and std.
 # usage
 python uc_analysis.py <path to csv 1> <path to csv 2> <path to csv 3> output
 # output
 compile csv of 3 chips and gives a per aperture output of the hits, mean uc and std. uc
 """
 # modules
 import pandas as pd
 import numpy as np
 import sys
 def compile_inputs( csv_lst, output ):
    print( "compiling results" )
    # overall inputs
    compiled_df = pd.DataFrame()
    merged_df = pd.DataFrame()
    count=1
    for csv in csv_lst:
        uc_vol = "vol_{0}".format( count )
        hits = "hits_{0}".format( count )
        cols = [ uc_vol, hits ]
        csv_df = pd.read_csv( csv,
                                skiprows=1,
                                names=cols,
                                index_col=0,
                                sep=","
                                )
        compiled_df = pd.concat( ( compiled_df, csv_df ), axis=1 )
        count = count +1
    # merge hits
    merged_df[ "hits" ] = compiled_df[ [ "hits_1", "hits_2", "hits_3" ] ].sum(axis=1)
    merged_df[ "vol_mean" ] = compiled_df[ [ "vol_1", "vol_2", "vol_3" ] ].mean(axis=1)
    merged_df[ "vol_std" ] = compiled_df[ [ "vol_1", "vol_2", "vol_3" ] ].std(axis=1)
    # output results
    file_name = "{0}_uc.csv".format( output )
    merged_df.to_csv( file_name, sep="," )
    print( "done" )
 if __name__ == "__main__":
    csv_lst = [ sys.argv[1],
                sys.argv[2],
                sys.argv[3]
                ]
    print( sys.argv[1] )
    compile_inputs( csv_lst, sys.argv[4] )
--- a/reduction_tools/cat_lst.py
+++ b/reduction_tools/cat_lst.py
@@ -25,33 +25,46 @@ import pandas as pd
 import glob
 import os
 import numpy as np
 from sys import exit
-def concatenate_files( input_file_lst, output ):
+def concatenate_files( input_file_lst, output, label ):
-    output_file = "{0}.lst".format( output )
+    output_file = "{0}_{1}.lst".format( output, label )
    lines = 0
    # create output file
    with open( output_file, "w" ) as output:
        # loop through input list - read and write to output file
-        for lst_file_pwd in input_file_lst:
+        for lst_file_pwd in input_file_lst.lst_pwd:
            # open and write to output file
            with open( lst_file_pwd, "r" ) as lst_file:
                lines = lines + len( lst_file.readlines() )
                output.write( lst_file.read() )
                lst_file.close()
-def make_pwd( run_no, endstation, pgroup ):
+    output.close()
    print( "written {0} images to {1}".format( lines, output_file ) )
 def make_pwd( run_no, endstation, pgroup, jfj ):
    # if to determine folder for jfj/clara or old daq
    if jfj == True:
        lst_pwd = "/sf/{0}/data/{1}/res/run{2}*".format( endstation, pgroup, run_no )
    else:
        # construct lst folder path
-    lst_pwd = "/sf/{0}/data/{1}/raw/".format( endstation, pgroup ) + "run" + run_no + "*/data"
+        lst_pwd = "/sf/{0}/data/{1}/raw/run{2}*/data".format( endstation, pgroup, run_no )
    return lst_pwd
 def find_lst( lst_dir, label ):
-    # if label = both, i.e. both lights and darks, set label to lst - so it's alwasy found
+    if label == "on" or label == "off":
-    if label == "both":
+        tail = "{0}.list".format( label )
-        label = "lst"
+    if label == "light" or label == "dark":
        tail = "{0}.lst".format( label )
    # create df for all lst
    lst_dir_df = pd.DataFrame()
@@ -59,7 +72,8 @@ def find_lst( lst_dir, label ):
    # search for lst with appropriate labels
    for path, dirs, files in os.walk( lst_dir ):
        for name in files:
-            if name.endswith( ".lst" ):
+
            if name.endswith( tail ):
                # get lst pwd
                lst_pwd = os.path.join( path, name )
@@ -76,7 +90,7 @@ def find_lst( lst_dir, label ):
    # return df lst from this directory
    return lst_dir_df
-def generate_lst_df( run_lst, endstation, label, pgroup ):
+def generate_lst_df( run_lst, endstation, label, pgroup, jfj ):
    # make run number df
    cols = [ "run_no" ]
@@ -85,12 +99,11 @@ def generate_lst_df( run_lst, endstation, label, pgroup ):
    range_df[ "run_no" ] = range_df.run_no.str.zfill(4)
    # make new column of list paths
-    range_df[ "lst_app_dir" ] = range_df[ "run_no" ].apply( lambda x: make_pwd( x, endstation, pgroup ) )
+    range_df[ "lst_app_dir" ] = range_df[ "run_no" ].apply( lambda x: make_pwd( x, endstation, pgroup, jfj ) )
    # make df of lsts to be concatenated
    lst_df = pd.DataFrame()
    for index, row in range_df.iterrows():
        # get approximate dir pwd
@@ -114,13 +127,18 @@ def generate_lst_df( run_lst, endstation, label, pgroup ):
    return lst_df
-def main( run_lst, endstation, label, pgroup, output_file ):
+def main( run_lst, endstation, label, pgroup, output_file, jfj ):
    # make df of lst files
-    lst_df = generate_lst_df( run_lst, endstation, label, pgroup )
+    lst_df = generate_lst_df( run_lst, endstation, label, pgroup, jfj )
    # check to see if any files have been found
    if lst_df.empty:
        print( "no {0} lists were found in runs {1}".format( label, run_lst ) )
        exit()
    # concatinate all lst file in lst_df
-    concatenate_files( lst_df.lst_pwd, output_file )
+    concatenate_files( lst_df, output_file, label )
 def range_of_runs(arg):
    return list(map(int, arg.split(',')))
@@ -153,20 +171,38 @@ if __name__ == "__main__":
        help="pgroup the data are collected in",
        type=str
        )
    parser.add_argument(
        "-j",
        "--jfj",
        help="was the Jungfraujoch/Clara data processing pipeline used to process your data. Default = True",
        type=bool,
        default=False
        )
    parser.add_argument(
        "-l",
        "--label",
-        help="the label of the lst file, i.e. 'light', 'dark' or 'both'",
+        help="the activation label for the data. Not JFJ the labels should = 'light' or 'dark'. With JFJ the labels should = 'on' or 'off'.",
-        type=str,
+        type=str
        required=True
        )
    parser.add_argument(
        "-o",
        "--output",
        help="name of output file",
        type=str,
        default=None
        )
    args = parser.parse_args()
    # JFJ on/off non-JFJ light/dark logic\
    if args.label != "off" and args.label != "on" and args.label != "light" and args.label != "dark":
        print( "label flag (-l) must = either 'on' or 'off' with JFJ = True, or 'light' or 'dark' and JFJ = False." )
        exit()
    print( args.jfj )
    if ( args.label == "off" or args.label == "on" ) and args.jfj == False:
        print( "JFJ uses 'on' and 'off' flags. Please check inputs and whether the new JFJ/Clara processing pipeline was used." )
        exit()
    if ( args.label == "light" or args.label == "dark" ) and args.jfj == True:
        print( "The old daq uses 'light' and 'dark' flags. Please check inputs and whether the newJFJ/Clara processing pipeline was used." )
        exit()
    # make continuous list from input range limits
    range = []
    if args.range is not None:
@@ -180,7 +216,12 @@ if __name__ == "__main__":
    runs = args.runs
    run_lst = range + runs
    print( "appending {0} lst files from runs {1}".format( args.label, run_lst ) )
    # make default name
    if not args.output:
        output_name = "-".join( str(e) for e in run_lst )
        output_name = "run{0}".format( output_name )
    else:
        output_name = args.output
    # run main
-    main( run_lst, args.endstation, args.label, args.pgroup, args.output )
+    main( run_lst, args.endstation, args.label, args.pgroup, output_name, args.jfj )
    print( "done" )
--- a/reduction_tools/crystfel_split.py
+++ b/reduction_tools/crystfel_split.py
@@ -13,7 +13,7 @@ python crystfel_split.py -l <path-to-list-file>
                         -g <path-to-geom-file>                          
                         -c <path-to-cell-file>
                         -n <name-of-job>
-                         -e name of endstation
+                         -p photons or 
 # crystfel parameter may need some editing in the function - write_crystfel_run
@@ -24,11 +24,11 @@ a log file with .geom and evalation of indexing, cell etc
 """
 # modules
 import argparse
 import pandas as pd
 import subprocess
 import os, errno
 import time
 import argparse
 from tqdm import tqdm
 import regex as re
 import numpy as np
@@ -69,7 +69,7 @@ def scrub_res( stream ):
    # example - diffraction_resolution_limit = 4.07 nm^-1 or 2.46 A
    # scrub res_lst or return np.nan
    try:
-        pattern = r"diffraction_resolution_limit\s=\s\d\.\d+\snm\^-1\sor\s(\d\.\d+)\sA"
+        pattern = r"diffraction_resolution_limit\s=\s\d+\.\d+\snm\^-1\sor\s(\d+\.\d+)\sA"
        res_lst = re.findall( pattern, stream )
        if AttributeError:
            return res_lst
@@ -191,11 +191,23 @@ def make_process_dir( proc_dir ):
            logger.debug( "making directory error" )
            raise
-def submit_job( job_file ):
+def submit_job( job_file, reservation ):
    # submit the job
-    submit_cmd = ["sbatch",  "--cpus-per-task=32", "--" ,job_file]
+    if reservation:
        print( "using a ra beamtime reservation = {0}".format( reservation ) )
        logger.info( "using ra reservation to process data = {0}".format( reservation ) )
        submit_cmd = [ "sbatch", "-p", "hour", "--reservation={0}".format( reservation ), "--cpus-per-task=32", "--" , job_file ]
    else:
        submit_cmd = [ "sbatch", "-p", "hour", "--cpus-per-task=32", "--" , job_file ]
    logger.info( "using slurm command = {0}".format( submit_cmd ) )
    try:
        job_output = subprocess.check_output( submit_cmd )
        logger.info( "submited job = {0}".format( job_output ) )
    except subprocess.CalledProcessError as e:
        print( "please give the correct ra reservation or remove the -v from the arguements" )
        exit()
    # scrub job id from - example Submitted batch job 742403
    pattern = r"Submitted batch job (\d+)"
@@ -217,7 +229,7 @@ def wait_for_jobs( job_ids, total_jobs ):
            job_ids.difference_update(completed_jobs)
            time.sleep(2)
-def run_splits( list_df, cwd, name, lst, chunk_size, geom_file, cell_file, threshold ):
+def run_splits( list_df, cwd, name, geom_file, cell_file, threshold, reservation ):
    # set chunk counter
    chunk = 0
@@ -249,8 +261,7 @@ def run_splits( list_df, cwd, name, lst, chunk_size, geom_file, cell_file, thres
        stream_lst.append( "{0}/{1}".format( proc_dir, stream_file ) )
        # submit jobs
-        job_id = submit_job( cryst_run_file )
+        job_id = submit_job( cryst_run_file, reservation  )
        logger.info( f"Job submitted: { job_id }" )
        submitted_job_ids.add( job_id )
        # increase chunk counter
@@ -261,7 +272,7 @@ def run_splits( list_df, cwd, name, lst, chunk_size, geom_file, cell_file, thres
    return submitted_job_ids, chunk, stream_lst
-def main( cwd, name, lst, chunk_size, geom_file, cell_file, threshold ):
+def main( cwd, name, lst, chunk_size, geom_file, cell_file, threshold, reservation ):
    print( "reading SwissFEL lst file" )
    print( "creating {0} image chunks of lst".format( chunk_size ) )
@@ -270,7 +281,7 @@ def main( cwd, name, lst, chunk_size, geom_file, cell_file, threshold ):
    # run crystfel runs on individual splits
    print( "submitting jobs to cluster" )
-    submitted_job_ids, chunk, stream_lst = run_splits( list_df, cwd, name, lst, chunk_size, geom_file, cell_file, threshold )
+    submitted_job_ids, chunk, stream_lst = run_splits( list_df, cwd, name, geom_file, cell_file, threshold, reservation )
    # monitor progress of jobs
    time.sleep( 30 )
@@ -323,6 +334,20 @@ def main( cwd, name, lst, chunk_size, geom_file, cell_file, threshold ):
    logger.info( "mean beta = {0} +/- {1} A".format( mean_beta, std_beta ) )
    logger.info( "mean gamma = {0} +/- {1} A".format( mean_gamma, std_gamma ) )
    print( "printing stats" )
    print( "image = {0}".format( chunks ) )
    print( "crystals = {0}".format( xtals ) )
    print( "indexing rate = {0} %".format( index_rate ) )
    print( "mean resolution = {0} +/- {1} A".format( mean_res, std_res ) )
    print( "median resolution = {0} A".format( median_res  ) )
    print( "mean observations = {0} +/- {1}".format( mean_obs, std_obs ) )
    print( "mean a = {0} +/- {1} A".format( mean_a, std_a ) )
    print( "mean b = {0} +/- {1} A".format( mean_b, std_b ) )
    print( "mean c = {0} +/- {1} A".format( mean_c, std_c ) )
    print( "mean alpha = {0} +/- {1} A".format( mean_alpha, std_alpha ) )
    print( "mean beta = {0} +/- {1} A".format( mean_beta, std_beta ) )
    print( "mean gamma = {0} +/- {1} A".format( mean_gamma, std_gamma ) )
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
@@ -360,6 +385,20 @@ if __name__ == "__main__":
        type=str,
        default="split"
    )
    parser.add_argument(
        "-v",
        "--reservation",
        help="reservation name for ra cluster. Usually along the lines of P11111_2024-12-10",
        type=str,
        default=None
    )
    parser.add_argument(
        "-p",
        "--photons_or_energy",
        help="determines the threshold to use for CrystFEL. Photons counts have always been used in Cristallina and are now used on Alvra from 01.11.2024. Please use 'energy' for Alvra before this.",
        type=str,
        default="photons"
    )
    parser.add_argument(
        "-d",
        "--debug",
@@ -367,21 +406,9 @@ if __name__ == "__main__":
        type=bool,
        default=False
    )
    parser.add_argument(
        "-e",
        "--endstation",
        help="which endstation did you collect these data from, e.g., alvra or cristallina. Please over-write name depending on endstation.",
        type=str,
        default="cristallina"
    )
    args = parser.parse_args()
    # set current working directory
    cwd = os.getcwd()
    # set threshold based on endstation
    if args.endstation == "alvra":
        threshold = 3000
    elif args.endstation == "cristallina":
        threshold = 10
    # set loguru
    if not args.debug:
        logger.remove()
@@ -390,5 +417,10 @@ if __name__ == "__main__":
    # log geometry file
    geom = open( args.geom_file, "r" ).read()
    logger.info( geom )
-    main( cwd, args.job_name, args.lst_file, args.chunk_size, args.geom_file, args.cell_file, threshold )
+    # set threshold based on detector
    if args.photons_or_energy == "energy":
        threshold = 3000
    elif args.photons_or_energy == "photons":
        threshold = 15
    main( cwd, args.job_name, args.lst_file, args.chunk_size, args.geom_file, args.cell_file, threshold, args.reservation )
--- a/reduction_tools/crystfel_split_var.py
+++ b/reduction_tools/crystfel_split_var.py
@@ -27,12 +27,99 @@ a series of stream files from crystfel in the current working directory
 # modules
 import pandas as pd
 import numpy as np
 import subprocess
 import os, errno
 import time
 import argparse
 from tqdm import tqdm
 import regex as re
 from loguru import logger
 def count_chunks( stream ):
    # get number of chunks
    # example - ----- Begin chunk -----
    # count them
    try:
        pattern = r"-----\sBegin\schunk\s-----"
        chunks = re.findall( pattern, stream )
        if AttributeError:
            return len( chunks )
    except AttributeError:
        logger.debug( "count_chunks error" )
        return np.nan
 def scrub_cells( stream ):
    # get uc values from stream file
    # example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg
    # scrub clen and return - else nan
    try:
        pattern = r"Cell\sparameters\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg"
        cell_lst = re.findall( pattern, stream )
        xtals = len( cell_lst )
        if AttributeError:
            return cell_lst, xtals
    except AttributeError:
        logger.debug( "scrub_cells error" )
        return np.nan
 def scrub_res( stream ):
    # get diffraction limit
    # example - diffraction_resolution_limit = 4.07 nm^-1 or 2.46 A
    # scrub res_lst or return np.nan
    try:
        pattern = r"diffraction_resolution_limit\s=\s\d+\.\d+\snm\^-1\sor\s(\d+\.\d+)\sA"
        res_lst = re.findall( pattern, stream )
        if AttributeError:
            return res_lst
    except AttributeError:
        logger.debug( "scrub_res error" )
        return np.nan
 def scrub_obs( stream ):
    # get number of reflections
    # example - num_reflections = 308
    # scrub reflections or return np.nan
    try:
        pattern = r"num_reflections\s=\s(\d+)"
        obs_lst = re.findall( pattern, stream )
        if AttributeError:
            return obs_lst
    except AttributeError:
        logger.debug( "scrub_obs error" )
        return np.nan
 def calculate_stats( stream_pwd ):
    # open stream file
    stream = open( stream_pwd, "r" ).read()
    # get total number chunks
    chunks = count_chunks( stream )
    # get list of cells
    cell_lst, xtals = scrub_cells( stream )
    # get list of cells
    res_lst = scrub_res( stream )
    # get list of cells
    obs_lst = scrub_obs( stream )
    # res_df
    cols = [ "a", "b", "c", "alpha", "beta", "gamma" ]
    df = pd.DataFrame( cell_lst, columns=cols )
    df[ "resolution" ] = res_lst
    df[ "obs" ] = obs_lst
    # convert all to floats
    df = df.astype(float)
    return df, xtals, chunks
 def h5_split( lst, chunk_size ):
@@ -53,7 +140,8 @@ def h5_split( lst, chunk_size ):
    return list_df
 def write_crystfel_run( proc_dir, name, chunk, chunk_lst_file,
-                        geom_file, cell_file, threshold, min_snr,
+                        geom_file, cell_file, indexer, peakfinder,
                        integrator, tolerance, threshold, min_snr,
                        int_rad, multi, retry, min_pix, bg_rad, min_res ):
    # stream file name
@@ -71,10 +159,11 @@ def write_crystfel_run( proc_dir, name, chunk, chunk_lst_file,
    run_sh.write( "  --output={0} \\\n".format( stream_file ) )
    run_sh.write( "  --geometry={0} \\\n".format( geom_file ) )
    run_sh.write( "  --pdb={0} \\\n".format( cell_file ) )
-    run_sh.write( "  --indexing=xgandalf-latt-cell \\\n" )
+    run_sh.write( "  --indexing={0} \\\n".format( indexer ) )
-    run_sh.write( "  --peaks=peakfinder8 \\\n" )
+    run_sh.write( "  --peaks={0} \\\n".format( peakfinder ) )
-    run_sh.write( "  --integration=rings-grad \\\n" )
+    run_sh.write( "  --integration={0} \\\n".format( integrator ) )
-    run_sh.write( "  --tolerance=10.0,10.0,10.0,2,3,2 \\\n" )
+    run_sh.write( "  --tolerance={0},{1},{2},{3},{4},{5} \\\n".format( tolerance[0], tolerance[1], tolerance[2],
                                                                       tolerance[3], tolerance[4], tolerance[5] ) )
    run_sh.write( "  --threshold={0} \\\n".format( threshold ) )
    run_sh.write( "  --min-snr={0} \\\n".format( min_snr ) )
    run_sh.write( "  --int-radius={0},{1},{2} \\\n".format( int_rad[0], int_rad[1], int_rad[2] ) )
@@ -100,13 +189,26 @@ def make_process_dir( proc_dir ):
        os.makedirs( proc_dir )
    except OSError as e:
        if e.errno != errno.EEXIST:
            logger.debug( "making directory error" )
            raise
-def submit_job( job_file ):
+def submit_job( job_file, reservation ):
    # submit the job
-    submit_cmd = ["sbatch",  "--cpus-per-task=32", "--" ,job_file]
+    if reservation:
        print( "using a ra beamtime reservation = {0}".format( reservation ) )
        logger.info( "using ra reservation to process data = {0}".format( reservation ) )
        submit_cmd = [ "sbatch", "-p", "hour", "--reservation={0}".format( reservation ), "--cpus-per-task=32", "--" , job_file ]
    else:
        submit_cmd = [ "sbatch", "-p", "hour", "--cpus-per-task=32", "--" , job_file ]
    logger.info( "using slurm command = {0}".format( submit_cmd ) )
    try:
        job_output = subprocess.check_output( submit_cmd )
        logger.info( "submited job = {0}".format( job_output ) )
    except subprocess.CalledProcessError as e:
        print( "please give the correct ra reservation or remove the -v from the arguements" )
        exit()
    # scrub job id from - example Submitted batch job 742403
    pattern = r"Submitted batch job (\d+)"
@@ -126,16 +228,12 @@ def wait_for_jobs( job_ids, total_jobs ):
                    completed_jobs.add(job_id)
                    pbar.update(1)
            job_ids.difference_update(completed_jobs)
-            time.sleep(10)
+            time.sleep(2)
-def run_splits( cwd, name, lst, chunk_size, geom_file,
+def run_splits( list_df, cwd, name, geom_file, cell_file,
-                cell_file, progress, threshold, min_snr,
+               indexer, peakfinder, integrator, tolerance, threshold,
-                int_rad, multi, retry, min_pix ):
+               min_snr, int_rad, multi, retry, min_pix, bg_rad,
-
+               min_res, reservation ):
    print( "reading SwissFEL lst file" )
    print( "creating {0} image chunks of lst".format( chunk_size ) )
    list_df = h5_split( lst, chunk_size )
    print( "DONE" )
    # set chunk counter
    chunk = 0
@@ -146,10 +244,9 @@ def run_splits( cwd, name, lst, chunk_size, geom_file,
    # stream file list
    stream_lst = []
    print( "creating crystfel jobs for individual chunks" )
    for chunk_lst in list_df:
-        print( "chunk {0} = {1} images".format( chunk, len( chunk_lst ) ) ) 
+        logger.info( "chunk {0} = {1} images".format( chunk, len( chunk_lst ) ) )
        # define process directory
        proc_dir = "{0}/{1}/{1}_{2}".format( cwd, name, chunk )
@@ -165,13 +262,13 @@ def run_splits( cwd, name, lst, chunk_size, geom_file,
        # write crystfel file and append path to list
        cryst_run_file, stream_file = write_crystfel_run( proc_dir, name, chunk, chunk_lst_file,
-                                                            geom_file, cell_file, threshold, min_snr, 
+                                                          geom_file, cell_file, indexer, peakfinder,
-                                                            int_rad, multi, retry, min_pix )
+                                                          integrator, tolerance, threshold, min_snr,
                                                          int_rad, multi, retry, min_pix, bg_rad, min_res )
        stream_lst.append( "{0}/{1}".format( proc_dir, stream_file ) )
        # submit jobs
-        job_id = submit_job( cryst_run_file )
+        job_id = submit_job( cryst_run_file, reservation  )
        print(f"Job submitted: { job_id }")
        submitted_job_ids.add( job_id )
        # increase chunk counter
@@ -180,15 +277,34 @@ def run_splits( cwd, name, lst, chunk_size, geom_file,
        # move back to top dir
        os.chdir( cwd )
    return submitted_job_ids, chunk, stream_lst
 def main( cwd, name, lst, chunk_size, geom_file, cell_file, 
          indexer, peakfinder, integrator, tolerance, threshold,
          min_snr, int_rad, multi, retry, min_pix, bg_rad,
          min_res, reservation ):
    print( "reading SwissFEL lst file" )
    print( "creating {0} image chunks of lst".format( chunk_size ) )
    list_df = h5_split( lst, chunk_size )
    print( "DONE" )
    # run crystfel runs on individual splits
    print( "submitting jobs to cluster" )
    submitted_job_ids, chunk, stream_lst = run_splits( list_df, cwd, name, geom_file, cell_file,
                                                       indexer, peakfinder, integrator, tolerance, threshold,
                                                       min_snr, int_rad, multi, retry, min_pix, bg_rad,
                                                       min_res, reservation )
    # monitor progress of jobs
    time.sleep( 30 )
    wait_for_jobs( submitted_job_ids, chunk )
-    print("slurm processing done")
+    print( "done" )
    # make composite .stream file
    output_file = "{0}.stream".format( name )
-    print( "comp" )
+    print( "concatenating .streams from separate runs." )
    try:
        # Open the output file in 'append' mode
        with open(output_file, "a") as output:
@@ -197,110 +313,206 @@ def run_splits( cwd, name, lst, chunk_size, geom_file,
                    with open(file_name, "r") as input_file:
                        # Read the contents of the input file and append to the output file
                        output.write(input_file.read())
                    print(f"Appended contents from {file_name} to {output_file}")
                except FileNotFoundError:
-                    print(f"File {file_name} not found. Skipping.")
+                    logger.debug(f"File {file_name} not found. Skipping.")
    except IOError as e:
-        print(f"An error occurred while appending files: {e}")
+        logger.debug(f"An error occurred while appending files: {e}")
-    print( "DONE" )
+    print( "done" )
    df, xtals, chunks = calculate_stats( output_file )
    # stats
    index_rate = round( xtals/chunks*100, 2 )
    mean_res, std_res = round( df.resolution.mean(), 2 ), round( df.resolution.std(), 2 )
    median_res = df.resolution.median()
    mean_obs, std_obs = round( df.obs.mean(), 2 ), round( df.obs.std(), 2)
    mean_a, std_a = round( df.a.mean()*10, 2 ), round( df.a.std()*10, 2 )
    mean_b, std_b = round( df.b.mean()*10, 2 ), round( df.b.std()*10, 2 )
    mean_c, std_c = round( df.c.mean()*10, 2 ), round( df.c.std()*10, 2 )
    mean_alpha, std_alpha = round( df.alpha.mean(), 2 ), round( df.alpha.std(), 2 )
    mean_beta, std_beta = round(df.beta.mean(), 2 ), round( df.beta.std(), 2 )
    mean_gamma, std_gamma = round( df.gamma.mean(), 2 ), round( df.gamma.std(), 2 )
    logger.info( "image = {0}".format( chunks ) )
    logger.info( "crystals = {0}".format( xtals ) )
    logger.info( "indexing rate = {0} %".format( index_rate ) )
    logger.info( "mean resolution = {0} +/- {1} A".format( mean_res, std_res ) )
    logger.info( "median resolution = {0} A".format( median_res  ) )
    logger.info( "mean observations = {0} +/- {1}".format( mean_obs, std_obs ) )
    logger.info( "mean a = {0} +/- {1} A".format( mean_a, std_a ) )
    logger.info( "mean b = {0} +/- {1} A".format( mean_b, std_b ) )
    logger.info( "mean c = {0} +/- {1} A".format( mean_c, std_c ) )
    logger.info( "mean alpha = {0} +/- {1} A".format( mean_alpha, std_alpha ) )
    logger.info( "mean beta = {0} +/- {1} A".format( mean_beta, std_beta ) )
    logger.info( "mean gamma = {0} +/- {1} A".format( mean_gamma, std_gamma ) )
    print( "printing stats" )
    print( "image = {0}".format( chunks ) )
    print( "crystals = {0}".format( xtals ) )
    print( "indexing rate = {0} %".format( index_rate ) )
    print( "mean resolution = {0} +/- {1} A".format( mean_res, std_res ) )
    print( "median resolution = {0} A".format( median_res  ) )
    print( "mean observations = {0} +/- {1}".format( mean_obs, std_obs ) )
    print( "mean a = {0} +/- {1} A".format( mean_a, std_a ) )
    print( "mean b = {0} +/- {1} A".format( mean_b, std_b ) )
    print( "mean c = {0} +/- {1} A".format( mean_c, std_c ) )
    print( "mean alpha = {0} +/- {1} A".format( mean_alpha, std_alpha ) )
    print( "mean beta = {0} +/- {1} A".format( mean_beta, std_beta ) )
    print( "mean gamma = {0} +/- {1} A".format( mean_gamma, std_gamma ) )
 def list_of_ints(arg):
    return list(map(int, arg.split(',')))
 def list_of_floats(arg):
    return list(map(float, arg.split(',')))
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-l",
        "--lst_file",
        help="file from SwissFEL output to be processed quickly",
        type=os.path.abspath
    )
    parser.add_argument(
        "-k",
        "--chunk_size",
        help="how big should each chunk be? - the bigger the chunk, the fewer jobs, the slower it will be",
        type=int,
        default=1000
    )
    parser.add_argument(
        "-g",
        "--geom_file",
        help="path to geom file to be used in the refinement",
        type=os.path.abspath
    )
    parser.add_argument(
        "-c",
        "--cell_file",
        help="path to cell file of the crystals used in the refinement",
        type=os.path.abspath
    )
    parser.add_argument(
        "-n",
        "--job_name",
-        help="the name of the job to be done",
+        help="the name of the job to be done. Default = split",
        type=str,
        default="split"
    )
    parser.add_argument(
        "-l",
        "--lst_file",
        help="file from SwissFEL output to be processed quickly. Requried.",
        type=os.path.abspath,
        required=True
    )
    parser.add_argument(
        "-k",
        "--chunk_size",
        help="how big should each image split be? Default = 500. Fewer will be faster.",
        type=int,
        default=500
    )
    parser.add_argument(
        "-g",
        "--geom_file",
        help="path to geom file to be used in the refinement. Requried.",
        type=os.path.abspath,
        required=True
    )
    parser.add_argument(
        "-c",
        "--cell_file",
        help="path to cell file of the crystals used in the refinement. Requried.",
        type=os.path.abspath,
        required=True
    )
    parser.add_argument(
        "-x",
        "--indexer",
        help="indexer to use. Default = xgandalf-latt-cell",
        type=str,
        default="xgandalf-latt-cell"
    )
    parser.add_argument(
        "-f",
        "--peakfinder",
        help="peakfinder to use. Default = peakfinder8",
        type=str,
        default="peakfinder8"
    )
    parser.add_argument(
        "-a",
        "--integrator",
        help="integrator to use. Default = rings-nocen-nograd",
        type=str,
        default="rings-nocen-nograd"
    )
    parser.add_argument(
        "-y",
        "--tolerance",
        help="tolerance to use. Default = 10.0,10.0,10.0,2.0,3.0,2.0",
        type=list_of_floats,
        default=[10.0,10.0,10.0,2.0,3.0,2.0]
    )
    parser.add_argument(
        "-t",
        "--threshold",
-        help="threshold for crystfel run - peaks must be above this to be found",
+        help="peaks must be above this to be found during spot-finding. Default = 20",
        type=int,
-        default=10
+        default=20
    )
    parser.add_argument(
        "-s",
        "--min_snr",
-        help="min-snr for crystfel run - peaks must to above this to be counted",
+        help="peaks must to above this to be counted. Default = 5.",
        type=int,
        default=5
    )
    parser.add_argument(
        "-i",
        "--int_radius",
-        help="int_rad for crystfel run - peaks must to above this to be counted",
+        help="integration ring radii. Default = 2,3,5 = 2 for spot and then 3 and 5 to calculate background.",
        type=list_of_ints,
-        default=[3,5,9]
+        default=[2,3,5]
    )
    parser.add_argument(
        "-m",
        "--multi",
-        help="multi crystfel flag, do you wnat to look for multiple lattices",
+        help="do you wnat to look for multiple lattices. Default = True",
        type=bool,
-        default=False
+        default=True
    )
    parser.add_argument(
        "-r",
        "--retry",
-        help="retry crystfel flag, do you want to retry failed indexing patterns",
+        help="do you want to retry failed indexing patterns. Default = False",
        type=bool,
        default=False
    )
    parser.add_argument(
-        "-x",
+        "-p",
        "--min_pix",
-        help="min-pix-count for crystfel runs, minimum number of pixels a spot should contain in peak finding",
+        help="minimum number of pixels a spot should contain in peak finding.Default = 2",
        type=int,
        default=2
    )
    parser.add_argument(
        "-b",
        "--bg_rad",
-        help="crystfel background radius flag, radius (in pixels) used for the estimation of the local background",
+        help="radius (in pixels) used for the estimation of the local background. Default = 4",
        type=int,
-        default=2
+        default=4
    )
    parser.add_argument(
        "-q",
        "--min-res",
-        help="m",
+        help="min-res for spot-finding in pixels. Default = 85.",
        type=int,
-        default=2
+        default=85
    )
    parser.add_argument(
        "-v",
        "--reservation",
        help="reservation name for ra cluster. Usually along the lines of P11111_2024-12-10",
        type=str,
        default=None
    )
    parser.add_argument(
        "-d",
        "--debug",
        help="output debug to terminal.",
        type=bool,
        default=False
    )
    args = parser.parse_args()
    # run geom converter
    cwd = os.getcwd()
    # set loguru
    if not args.debug:
        logger.remove()
    logfile = "{0}.log".format( args.job_name )
    logger.add( logfile, format="{message}", level="INFO")
    # log geometry file
    geom = open( args.geom_file, "r" ).read()
    logger.info( geom )
    if args.multi == True:
        multi = "multi"
    else:
@@ -309,7 +521,8 @@ if __name__ == "__main__":
        retry = "retry"
    else:
        retry = "no-retry"
-    run_splits( cwd, args.job_name, args.lst_file, args.chunk_size,
+    main( cwd, args.job_name, args.lst_file, args.chunk_size,
-                args.geom_file, args.cell_file,
+                args.geom_file, args.cell_file, args.indexer, args.peakfinder,
-                args.threshold, args.min_snr, args.int_radius, 
+                args.integrator, args.tolerance, args.threshold, 
-                multi, retry, args.min_pix )
+                args.min_snr, args.int_radius, multi, retry, args.min_pix, args.bg_rad,
                args.min_res, args.reservation  )
--- a/reduction_tools/make_mtz.py
+++ b/reduction_tools/make_mtz.py
@@ -5,6 +5,7 @@
 """
 # aim
 to make an mtz from an hkl file output from partialator or process_hkl
 runs f2mtz and then truncate for create an mtz with other intensities and structure factors
 # usage to make mtz from manually entered lengths and angles
 python make_mtz.py -i <path to .hkl file from partialator>
@@ -14,6 +15,8 @@ python make_mtz.py -i <path to .hkl file from partialator>
                   -d dataset name in mtz
                   -g spacegroup
                   -c list of cell lengths and angles to use - 59.3,59.3,153.1,90.0,90.0,90.0
                   -r number of residues
                   -A resolution range - e.g. 40.0,2.0
 # usage to make mtz from the mean angles and lengths in stream file
 python make_mtz.py -i <path to .hkl file from partialator>
@@ -22,6 +25,8 @@ python make_mtz.py -i <path to .hkl file from partialator>
                   -x xtal name in mtz
                   -d dataset name in mtz
                   -g spacegroup
                   -r number of residues
                   -A resolution range - e.g. 40.0,2.0
                   -s <path to stream file>
                   -u True
@@ -29,8 +34,11 @@ python make_mtz.py -i <path to .hkl file from partialator>
 python make_mtz.py -s <path to stream file>
 # output
- .mtz file
+- .mtz file - just intensities
- .html log file
+- f2mtz.log file
 - _F.mtz file - intensities and structure factors
 - cuts data to desired resolution
 - truncate.log file
 """
 # modules
@@ -72,27 +80,49 @@ def get_mean_cell( stream_file ):
    return mean_cell, len(cell_lst)
-def make_mtz( hklout_file, mtzout_file, project, crystal, dataset, cell, spacegroup ):
+def make_mtz( hklout_file, mtzout_file, project, crystal, dataset, cell, spacegroup, residues, res_range ):
    # make_mtz file name
    mtz_run_file = "make_mtz.sh"
    # make F file name
    Fout_file = os.path.splitext( mtzout_file )[0] + "_F.mtz"
    # write file
    mtz_sh = open( mtz_run_file, "w" )
    mtz_sh.write( "#!/bin/sh\n\n" )
    mtz_sh.write( "module purge\n" )
    mtz_sh.write( "module load ccp4/8.0\n\n" )
-    mtz_sh.write( "f2mtz HKLIN {0} HKLOUT {1} > out.html << EOF\n".format( hklout_file, mtzout_file ) )
+    mtz_sh.write( "f2mtz HKLIN {0} HKLOUT {1} << EOF_hkl > f2mtz.log\n".format( hklout_file, mtzout_file ) )
    mtz_sh.write( "TITLE Reflections from CrystFEL\n" )
    mtz_sh.write( "NAME PROJECT {0} CRYSTAL {1} DATASET {2}\n".format( project, crystal, dataset ) )
    mtz_sh.write( "CELL {0} {1} {2} {3} {4} {5}\n".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) )
    mtz_sh.write( "SYMM {0}\n".format( spacegroup ) )
    mtz_sh.write( "SKIP 3\n" )
-    mtz_sh.write( "LABOUT H K L IMEAN SIGIMEAN\n" )
+    mtz_sh.write( "LABOUT H K L I_stream SIGI_stream\n" )
    mtz_sh.write( "CTYPE  H H H J        Q\n" )
    mtz_sh.write( "FORMAT '(3(F4.0,1X),F10.2,10X,F10.2)'\n" )
    mtz_sh.write( "SKIP 3\n" )
-    mtz_sh.write( "EOF" )
+    mtz_sh.write( "EOF_hkl\n\n\n" )
    mtz_sh.write( "echo 'done'\n" )
    mtz_sh.write( "echo 'I and SIGI from CrystFEL stream saved as I_stream and SIGI_stream'\n" )
    mtz_sh.write( "echo 'I filename = {0}'\n\n\n".format( mtzout_file ) )
    mtz_sh.write( "echo 'running truncate'\n" )
    mtz_sh.write( "echo 'setting resolution range to {0}-{1}'\n".format( res_range[0], res_range[1] ) )
    mtz_sh.write( "echo 'assuming that there are {0}' in assymetric unit\n\n\n".format( residues ) )
    mtz_sh.write( "truncate HKLIN {0} HKLOUT {1} << EOF_F > truncate.log\n".format( mtzout_file, Fout_file ) )
    mtz_sh.write( "truncate YES\n" )
    mtz_sh.write( "anomalous NO\n" )
    mtz_sh.write( "nresidue {0}\n".format( residues ) )
    mtz_sh.write( "resolution {0} {1}\n".format( res_range[0], res_range[1] ) )
    mtz_sh.write( "plot OFF\n" )
    mtz_sh.write( "labin IMEAN=I_stream SIGIMEAN=SIGI_stream\n" )
    mtz_sh.write( "labout F=F_stream SIGF=SIGF_stream\n" )
    mtz_sh.write( "end\n" )
    mtz_sh.write( "EOF_F\n\n\n" )
    mtz_sh.write( "echo 'done'\n" )
    mtz_sh.write( "echo 'I_stream and SIGI_stream from f2mtz converted to F_stream and F_stream'\n" )
    mtz_sh.write( "echo 'F filename = {0} (contains both Is and Fs)'".format( Fout_file ) )
    mtz_sh.close()
    # make file executable
@@ -118,7 +148,7 @@ def cut_hkl_file( hklin_file, hklout_file ):
    hklout.close()
-def main( hklin_file, hklout_file, mtzout, project, crystal, dataset, cell, spacegroup ):
+def main( hklin_file, hklout_file, mtzout, project, crystal, dataset, cell, spacegroup, residues, res_range ):
    # remove final lines from crystfel hkl out
    print( "removing final lines from crystfel hklin" )
@@ -129,7 +159,7 @@ def main( hklin_file, hklout_file, mtzout, project, crystal, dataset, cell, spac
    print( "making mtz" )
    print( "using cell constants\n{0} {1} {2} A {3} {4} {5} deg".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) )
    print( "Titles in mtz will be:\nPROJECT {0} CRYSTAL {1} DATASET {2}".format( project, crystal, dataset ) )
-    make_mtz( hklout_file, mtzout, project, crystal, dataset, cell, spacegroup )
+    make_mtz( hklout_file, mtzout, project, crystal, dataset, cell, spacegroup, residues, res_range )
    print( "done" )
 def list_of_floats(arg):
@@ -182,6 +212,18 @@ if __name__ == "__main__":
        help="list of complete cell length and angles, e.g. 59.3,59.3,153.1,90.0,90.0,90.0. They all should be floats",
        type=list_of_floats
    )
    parser.add_argument(
        "-r",
        "--residues",
        help="number of residues for truncate, e.g., hewl = 129",
        type=int
    )
    parser.add_argument(
        "-A",
        "--resolution_range",
        help="list of 2 floats - low res then high res cut off, e.g., 50.0,1.3",
        type=list_of_floats
    )
    parser.add_argument(
        "-s",
        "--stream_file",
@@ -200,7 +242,7 @@ if __name__ == "__main__":
    if args.stream_file:
        print( "reading stream file" )
        cell, xtals = get_mean_cell( args.stream_file )
-        print( "found {0} xtats".format( xtals ) )
+        print( "found {0} xtals".format( xtals ) )
        print( "mean lengths = {0}, {1}, {2} A".format( cell[0], cell[1], cell[2] ) )
        print( "mean angles = {0}, {1}, {2} deg".format( cell[3], cell[4], cell[5] ) )
        print( "# for input to make_mtz = {0},{1},{2},{3},{4},{5}".format( cell[0], cell[1], cell[2], cell[3], cell[4], cell[5] ) )
@@ -210,11 +252,11 @@ if __name__ == "__main__":
            mtzout = args.mtzout
        else:
            mtzout = os.path.splitext( args.hklin )[0] + ".mtz"
-        main( args.hklin, hklout_file, mtzout, args.project, args.crystal, args.dataset, cell, args.spacegroup )
+        main( args.hklin, hklout_file, mtzout, args.project, args.crystal, args.dataset, cell, args.spacegroup, args.residues, args.resolution_range )
    if args.stream_file == None and args.use_stream == False:
        hklout_file = os.path.splitext( args.hklin )[0] + "_cut.hkl"
        if args.mtzout:
            mtzout = args.mtzout
        else:
            mtzout = os.path.splitext( args.hklin )[0] + ".mtz"
-        main( args.hklin, hklout_file, mtzout, args.project, args.crystal, args.dataset, args.cell, args.spacegroup )
+        main( args.hklin, hklout_file, mtzout, args.project, args.crystal, args.dataset, args.cell, args.spacegroup, args.residues, args.resolution_range )
--- a/reduction_tools/partialator.py
+++ b/reduction_tools/partialator.py
@@ -16,15 +16,18 @@ python partialator.py -s <path-to-stream-file>
                       -b number of resolution bins - must be > 20
                       -r high-res limt. Needs a default. Default set to 1.3
                       -a max-adu. Default = 12000
                       -R ra reservation name if available
 # output
 - scaled/merged files
 - an mtz file
 - useful plots
 - useful summerized .dat files
 - log file of output
 """
 # modules
 from sys import exit
 import pandas as pd
 import numpy as np
 import subprocess
@@ -35,12 +38,27 @@ from tqdm import tqdm
 import regex as re
 import matplotlib.pyplot as plt
 from scipy.optimize import curve_fit
 import warnings
 warnings.filterwarnings( "ignore", category=RuntimeWarning ) 
 from loguru import logger
-def submit_job( job_file ):
+def submit_job( job_file, reservation ):
    # submit the job
-    submit_cmd = ["sbatch",  "--cpus-per-task=32", "--" ,job_file]
+    if reservation:
        print( "using a ra beamtime reservation = {0}".format( reservation ) )
        logger.info( "using ra reservation to process data = {0}".format( reservation ) )
        submit_cmd = [ "sbatch", "-p", "day", "--reservation={0}".format( reservation ), "--cpus-per-task=32", "--" , job_file ]
    else:
        submit_cmd = [ "sbatch", "-p", "day", "--cpus-per-task=32", "--" , job_file ]
    logger.info( "using slurm command = {0}".format( submit_cmd ) )
    try:
        job_output = subprocess.check_output( submit_cmd )
        logger.info( "submited job = {0}".format( job_output ) )
    except subprocess.CalledProcessError as e:
        print( "please give the correct ra reservation or remove the -R from the arguements" )
        exit()
    # scrub job id from - example Submitted batch job 742403
    pattern = r"Submitted batch job (\d+)"
@@ -78,6 +96,7 @@ def run_partialator( proc_dir, name, stream, pointgroup, model, iterations, cell
    part_sh.write( "  -y {0} \\\n".format( pointgroup ) )
    part_sh.write( "  --model={0} \\\n".format( model ) )
    part_sh.write( "  --max-adu={0} \\\n".format( adu ) )
    part_sh.write( "  -j 32 \\\n" )
    part_sh.write( "  --iterations={0}\n\n".format( iterations ) )
    part_sh.write( "check_hkl --shell-file=mult.dat *.hkl -p {0} --nshells={1} --highres={2} &> check_hkl.log\n".format( cell, shells, part_h_res ) )
    part_sh.write( "check_hkl --ltest --ignore-negs --shell-file=ltest.dat *.hkl -p {0} --nshells={1} --highres={2} &>  ltest.log\n".format( cell, shells, part_h_res ) )
@@ -90,6 +109,11 @@ def run_partialator( proc_dir, name, stream, pointgroup, model, iterations, cell
    # make file executable
    subprocess.call( [ "chmod", "+x", "{0}".format( part_run_file ) ] )
    # add partialator script to log
    part_input = open( part_run_file, "r" )
    logger.info( "partialator input file =\n{0}".format( part_input.read() ) )
    part_input.close()
    # return partialator file name
    return part_run_file
@@ -101,7 +125,7 @@ def make_process_dir( dir ):
        if e.errno != errno.EEXIST:
            raise
-def summary_stats( cc_dat, ccstar_dat, mult_dat, rsplit_dat ):
+def summary_stats( cc_dat, ccstar_dat, mult_dat, rsplit_dat, wilson_dat ):
    # read all files into pd
    # function to sort out different column names
@@ -117,37 +141,33 @@ def summary_stats( cc_dat, ccstar_dat, mult_dat, rsplit_dat ):
                    "mult", "snr", "I", "d", "min", "max" ]
        elif var == "rsplit":
            cols = [ "d(nm)", "rsplit", "nref", "d", "min", "max"  ]
        elif var == "wilson":
            cols = [ "bin", "s2", "d", "lnI", "nref" ]
        df = pd.read_csv( dat, names=cols, skiprows=1, sep="\s+" )
        print(df)
        return df
    # make df
    cc_df = read_dat( cc_dat, "cc" )
    ccstar_df = read_dat( ccstar_dat, "ccstar" )
    mult_df = read_dat( mult_dat, "mult" )
    rsplit_df = read_dat( rsplit_dat, "rsplit" )
    wilson_df = read_dat( wilson_dat, "wilson" )
    # remove unwanted cols
    cc_df = cc_df[ [ "cc" ] ]
    ccstar_df = ccstar_df[ [ "ccstar" ] ]
    rsplit_df = rsplit_df[ [ "rsplit" ] ]
    wilson_df = wilson_df[ [ "lnI" ] ]
    # merge dfs
-    stats_df = pd.concat( [ mult_df, cc_df, ccstar_df, rsplit_df ], axis=1, join="inner" )
+    stats_df = pd.concat( [ mult_df, cc_df, ccstar_df, rsplit_df, wilson_df ], axis=1, join="inner" )
    # make 1/d, 1/d^2 column
    stats_df[ "1_d" ] = 1 / stats_df.d
    stats_df[ "1_d2" ] = 1 / stats_df.d**2
    # reorder cols
    stats_df = stats_df[ [  "1_d", "1_d2", "d", "min",
                            "max", "nref", "poss",
                            "comp", "obs", "mult",
                            "snr", "I", "cc", "ccstar", "rsplit" ] ]
    # change nan to 0 
    stats_df = stats_df.fillna(0)
@@ -171,13 +191,21 @@ def get_metric( d2_series, cc_series, cut_off ):
    # calculate cut-off point 
    cc_stat = arctanh( cut_off, r, s0 )
    # covert back from 1/d2 to d
    cc_stat = np.sqrt( ( 1 / cc_stat ) )
-    return cc_stat
+    # get curve for plotting
    cc_tanh = tanh( d2_series, r, s0 )
-def summary_fig( stats_df ):
+    return round( cc_stat, 3 ), cc_tanh
 def summary_fig( stats_df, cc_tanh, ccstar_tanh, cc_cut, ccstar_cut ):
    def dto1_d( x ):
        return 1/x
    def dto1_d2( x ):
        return 1/x**2
    # plot results
    cc_fig, axs = plt.subplots(2, 2)
@@ -186,11 +214,17 @@ def summary_fig( stats_df ):
    # cc plot
    color = "tab:red"
    axs[0,0].set_xlabel( "1/d (1/A)" )
-    axs[0,0].set_ylabel("CC" )
+    axs[0,0].set_ylabel( "CC", color=color )
    axs[0,0].set_ylim( 0, 1 )
    axs[0,0].axhline( y = 0.3, color="black", linestyle = "dashed" )
    # plot cc
    axs[0,0].plot( stats_df[ "1_d" ], stats_df.cc, color=color )
    # plot fit
    axs[0,0].plot( stats_df[ "1_d" ], cc_tanh, color="tab:grey", linestyle = "dashed" )
    sax1 = axs[0,0].secondary_xaxis( 'top', functions=( dto1_d, dto1_d ) )
    sax1.set_xlabel('d (A)')
    axs[0,0].tick_params( axis="y", labelcolor=color )
    axs[0,0].text( 0.1, 0.1, "CC @ 0.2 = {0}".format( cc_cut ), fontsize = 8 )
    # cc* plot
    color = "tab:blue"
@@ -199,54 +233,38 @@ def summary_fig( stats_df ):
    axs[0,1].set_ylim( 0, 1 )
    axs[0,1].axhline( y = 0.7, color="black", linestyle = "dashed" )
    axs[0,1].plot( stats_df[ "1_d" ], stats_df.ccstar, color=color )
    # plot fit
    axs[0,1].plot( stats_df[ "1_d" ], ccstar_tanh, color="tab:grey", linestyle = "dashed" )
    sax2 = axs[0,1].secondary_xaxis( 'top', functions=( dto1_d, dto1_d ) )
    sax2.set_xlabel('d (A)')
    axs[0,1].tick_params( axis='y', labelcolor=color )
    axs[0,1].text( 0.1, 0.1, "CC* @ 0.7 = {0}".format( ccstar_cut ) , fontsize = 8 )
    # rsplit plot
    color = "tab:green"
    axs[1,0].set_xlabel( "1/d (1/A)" )
    axs[1,0].set_ylabel( "Rsplit", color=color )
    axs[1,0].plot( stats_df[ "1_d" ], stats_df.rsplit, color=color )
    sax3 = axs[1,0].secondary_xaxis( 'top', functions=( dto1_d, dto1_d ) )
    sax3.set_xlabel('d (A)')
    axs[1,0].tick_params( axis='y', labelcolor=color )
-    # rsplit plot
+    # wilson plot
    color = "tab:purple"
-    axs[1,1].set_xlabel( "1/d (1/A)" )
+    axs[1,1].set_xlabel( "1/d**2 (1/A**2)" )
-    axs[1,1].set_ylabel("Multiplicity", color=color)
+    axs[1,1].set_ylabel( "lnI", color=color )
-    axs[1,1].plot(stats_df[ "1_d" ], stats_df.mult, color=color)
+    axs[1,1].plot( stats_df[ "1_d2" ], stats_df.lnI, color=color )
    sax4 = axs[1,1].secondary_xaxis( 'top', functions=( dto1_d2, dto1_d2 ) )
    sax4.set_xlabel( "d (A)" )
    axs[1,1].tick_params( axis='y', labelcolor=color )
    # save figure
    plt.tight_layout()
    plt.savefig( "plots.png" )
-def get_mean_cell( stream ):
+def main( cwd, name, stream, pointgroup, model, iterations, cell, shells, part_h_res, adu, reservation ):
    # get uc values from stream file
    # example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg
    # scrub clen and return - else nan
    try:
        pattern = r"Cell\sparameters\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg"
        cell_lst = re.findall( pattern, stream )
        xtals = len( cell_lst )
    except AttributeError:
        return np.nan
    cols = [ "a", "b", "c", "alpha", "beta", "gamma" ]
    cell_df = pd.DataFrame( cell_lst, columns=cols )
    mean_a = round( cell_df.a.mean()*10, 3 )
    mean_b = round( cell_df.b.mean()*10, 3 )
    mean_c = round( cell_df.c.mean()*10, 3 )
    mean_alpha = round( cell_df.alpha.mean(), 3 )
    mean_beta = round( cell_df.beta.mean(), 3 )
    mean_gamma = round( cell_df.gamma.mean(), 3 )
    return mean_a, mean_b, mean_c, mean_alpha, mean_beta, mean_gamma
 def main( cwd, name, stream, pointgroup, model, iterations, cell, shells, part_h_res, adu ):
    print( "begin job" )
    # submitted job set
    submitted_job_ids = set()
@@ -257,18 +275,17 @@ def main( cwd, name, stream, pointgroup, model, iterations, cell, shells, part_h
    # move to part directory
    os.chdir( part_dir )
-    print( "making partialator files" )
+    print( "making partialator file" )
    # make partialator run file
    part_run_file = run_partialator( part_dir, name, stream, pointgroup, model, iterations, cell, shells, part_h_res, adu )
    # submit job
-    job_id = submit_job( part_run_file )
+    job_id = submit_job( part_run_file, reservation )
    print(f"job submitted: {0}".format( job_id ) )
    submitted_job_ids.add( job_id )
    print( "DONE" )
    # use progress bar to track job completion
-    time.sleep(30)
+    time.sleep(10)
    wait_for_jobs(submitted_job_ids, 1 )
    print("slurm processing done")
@@ -277,10 +294,11 @@ def main( cwd, name, stream, pointgroup, model, iterations, cell, shells, part_h
    ccstar_dat = "ccstar.dat"
    mult_dat = "mult.dat"
    rsplit_dat = "Rsplit.dat"
    wilson_dat = "wilson.dat"
    # make summary data table
-    stats_df = summary_stats( cc_dat, ccstar_dat, mult_dat, rsplit_dat )
+    stats_df = summary_stats( cc_dat, ccstar_dat, mult_dat, rsplit_dat, wilson_dat  )
-    print( stats_df.to_string() )
+    logger.info( "stats table from .dat file =\n{0}".format( stats_df.to_string() ) )
    print_df = stats_df[ [  "1_d", "d", "min",
                            "max", "nref", "poss",
                            "comp", "obs", "mult",
@@ -288,13 +306,15 @@ def main( cwd, name, stream, pointgroup, model, iterations, cell, shells, part_h
    print_df.to_csv( "summary_table.csv", sep="\t", index=False )
    # calculate cc metrics
-    cc_cut = get_metric( stats_df[ "1_d2" ], stats_df.cc, 0.3 )
+    cc_cut, cc_tanh = get_metric( stats_df[ "1_d2" ], stats_df.cc, 0.3 )
-    ccstar_cut = get_metric( stats_df[ "1_d2" ], stats_df.ccstar, 0.7 )
+    ccstar_cut, ccstar_tanh = get_metric( stats_df[ "1_d2" ], stats_df.ccstar, 0.7 )
    print( "resolution at CC0.5 at 0.3 = {0}".format( cc_cut ) )
    print( "resolution at CC* at 0.7 = {0}".format( ccstar_cut ) )
    logger.info( "resolution at CC0.5 at 0.3 = {0}".format( cc_cut ) )
    logger.info( "resolution at CC* at 0.7 = {0}".format( ccstar_cut ) )
    # show plots
-    summary_fig( stats_df )
+    summary_fig( stats_df, cc_tanh, ccstar_tanh, cc_cut, ccstar_cut )
    # move back to top dir
    os.chdir( cwd )
@@ -361,13 +381,33 @@ if __name__ == "__main__":
        "-a",
        "--max_adu",
        help="maximum detector counts to allow. Default is 12000.",
-        type=int
+        type=int,
        default=12000
    )
    parser.add_argument(
        "-R",
        "--reservation",
        help="reservation name for ra cluster. Usually along the lines of P11111_2024-12-10",
        type=str,
        default=None
    )
    parser.add_argument(
        "-d",
        "--debug",
        help="output debug to terminal.",
        type=bool,
        default=False
    )
    args = parser.parse_args()
    # set loguru
    if not args.debug:
        logger.remove()
    logfile = "{0}.log".format( args.name )
    logger.add( logfile, format="{message}", level="INFO")
    # run main
    cwd = os.getcwd()
    print( "top working directory = {0}".format( cwd ) )
-    main( cwd, args.name, args.stream_file, args.pointgroup, args.model, args.iterations, args.cell_file, args.bins, args.resolution, args.max_adu )
+    main( cwd, args.name, args.stream_file, args.pointgroup, args.model, args.iterations, args.cell_file, args.bins, args.resolution, args.max_adu, args.reservation )
--- a/reduction_tools/stream_stats.py
+++ b/reduction_tools/stream_stats.py
@@ -52,7 +52,7 @@ def scrub_res( stream ):
    # example - diffraction_resolution_limit = 4.07 nm^-1 or 2.46 A
    # scrub res_lst or return np.nan
    try:
-        pattern = r"diffraction_resolution_limit\s=\s\d\.\d+\snm\^-1\sor\s(\d\.\d+)\sA"
+        pattern = r"diffraction_resolution_limit\s=\s\d\.\d+\snm\^-1\sor\s(\d+\.\d+)\sA"
        res_lst = re.findall( pattern, stream )
        if AttributeError:
            return res_lst