now uses argparse and some bug fixes

This commit is contained in:
Beale John Henry
2023-06-25 23:23:41 +02:00
parent f4907ad02b
commit 625ae53cd5

View File

@@ -1,7 +1,22 @@
#!/usr/bin/env python3
# authors T. Mason and J. Beale
# authors T.Mason and J.Beale
"""
# aim
to refine the detector distance using crystfel
- naming covention = #.###/#.###.stream
# usage
python detector-distance-refinement.py -l <path to lst file generated by daq>
-g <path to geom file>
-d central clen to refine around
-c cell_file
-s sample size
# output
plot files of the analysis and a suggest for the clen
"""
# modules
import pandas as pd
@@ -11,6 +26,7 @@ import regex as re
import numpy as np
import matplotlib.pyplot as plt
import time
import argparse
def h5_sample( lst, sample ):
@@ -71,8 +87,8 @@ def write_crystfel_run( clen, sample_h5_file, clen_geom_file, cell_file ):
run_sh.write( " --geometry={0}\\\n".format( clen_geom_file ) )
run_sh.write( " --pdb={0} \\\n".format( cell_file ) )
run_sh.write( " --indexing=xgandalf-latt-cell --peaks=peakfinder8 \\\n" )
run_sh.write( " --integration=rings-grad --tolerance=10.0,10.0,10.0,2,3,2 --threshold=10 --min-snr=5 --int-radius=2,3,6 \\\n" )
run_sh.write( " -j 36 --no-multi --no-retry --check-peaks --max-res=3000 --min-pix-count=1 --local-bg-radius=4 --min-res=85\n\n" )
run_sh.write( " --threshold=15 --min-snr=10 --int-radius=3,5,9 \\\n" )
run_sh.write( " -j 36 --no-multi --no-retry --max-res=3000 --min-pix-count=2 --min-res=85\n\n" )
run_sh.close()
# make file executable
@@ -81,18 +97,15 @@ def write_crystfel_run( clen, sample_h5_file, clen_geom_file, cell_file ):
# return crystfel file name
return cryst_run_file
def make_sample(lst, sample):
# set current working directory
os.chdir("/sf/cristallina/data/p20590/work/process/jhb/detector_refinement")
cwd = os.getcwd()
def make_sample( lst, sample ):
# make sample list
print("making {0} sample of images".format(sample))
sample_h5 = h5_sample(lst, sample)
sample_h5_file = "{0}/{1}".format(cwd, sample_h5)
print("done")
return cwd, sample_h5_file
return sample_h5_file
def make_process_dir(proc_dir):
# make process directory
@@ -144,7 +157,7 @@ def scrub_clen( stream_pwd ):
if AttributeError:
return float( clen )
except AttributeError:
return np.nan
return 1
def find_streams( top_dir ):
@@ -193,7 +206,7 @@ def scrub_us( stream ):
except AttributeError:
return np.nan
def scrub_helper(top_dir):
def scrub_helper( top_dir ):
# find stream files from process directory
print( "finding stream files" )
stream_df = find_streams( top_dir )
@@ -262,15 +275,16 @@ def find_clen_values(stats_df):
min_gamma_val, min_gamma_clen = find_min_clen('std_gamma')
min_c_val, min_c_clen = find_min_clen('std_c')
print(f"The value of clen for the minimum alpha value of {min_alpha_val} is {min_alpha_clen}")
print(f"The value of clen for the minimum beta value of {min_beta_val} is {min_beta_clen}")
print(f"The value of clen for the minimum gamma value of {min_gamma_val} is {min_gamma_clen}")
print(f"The value of clen for the minimum c value of {min_c_val} is {min_c_clen}")
print("The value of clen for the minimum alpha value of {} is {}".format(min_alpha_val, min_alpha_clen))
print("The value of clen for the minimum beta value of {} is {}".format(min_beta_val, min_beta_clen))
print("The value of clen for the minimum gamma value of {} is {}".format(min_gamma_val, min_gamma_clen))
print("The value of clen for the minimum c value of {} is {}".format(min_c_val, min_c_clen))
return min_alpha_clen, min_beta_clen, min_gamma_clen, min_c_clen, min_alpha_val, min_beta_val, min_gamma_val, min_c_val
def plot_indexed_std(stats_df, ax1, ax2):
def plot_indexed_std( stats_df, ax1, ax2 ):
# indexed images plot
color = "tab:red"
ax1.set_xlabel("clen")
@@ -296,7 +310,8 @@ def plot_indexed_std(stats_df, ax1, ax2):
ax2.plot(stats_df.clen, stats_df.std_c, color=color)
def plot_indexed_std_alpha_beta_gamma(stats_df, ax1, ax2):
def plot_indexed_std_alpha_beta_gamma( stats_df, ax1, ax2 ):
# indexed images plot
color = "tab:red"
ax1.set_xlabel("clen")
@@ -321,13 +336,23 @@ def plot_indexed_std_alpha_beta_gamma(stats_df, ax1, ax2):
color = "green"
ax2.plot(stats_df.clen, stats_df.std_gamma, color=color)
def main_coarse( lst, sample, lab6_geom_file, centre_clen, cell_file, steps_coarse, scan_name_coarse, step_size_coarse ):
def scan( cwd, lst, sample, lab6_geom_file, centre_clen, cell_file, step_size ):
# define coarse or fine scan
if step_size == "coarse":
steps = 20
step_size = 0.0005 # m
scan_name = "coarse"
if step_size == "fine":
steps = 50
step_size = 0.00005 # m
scan_name = "fine"
#make sample list
cwd, sample_h5_file = make_sample(lst, sample)
sample_h5_file = make_sample(lst, sample)
# make list of clen steps above and below the central clen
step_range = make_step_range(centre_clen, step_size_coarse, steps_coarse)
step_range = make_step_range(centre_clen, step_size, steps)
# make directorys for results
print( "begin CrystFEL anaylsis of different clens" )
@@ -335,12 +360,9 @@ def main_coarse( lst, sample, lab6_geom_file, centre_clen, cell_file, steps_coar
# loop to cycle through clen steps
for clen in step_range:
# move back to cwd
os.chdir( cwd )
print( "processing clen = {0}".format( clen ) )
# define process directory
proc_dir = "{0}/{1}/{2}".format( cwd, scan_name_coarse, clen )
proc_dir = "{0}/{1}/{2}".format( cwd, scan_name, clen )
# make process directory
make_process_dir(proc_dir)
@@ -358,80 +380,21 @@ def main_coarse( lst, sample, lab6_geom_file, centre_clen, cell_file, steps_coar
subprocess.call( [ "sbatch", "-p", "day", "--cpus-per-task=32", "--", "./{0}".format( cryst_run_file ) ] )
print( "done" )
#wait for jobs to complete
check_job_status(username)
def main_fine( lst, lab6_geom_file, centre_clen, cell_file, steps_fine, scan_name_fine, step_size_fine ):
# set current working directory
os.chdir("/sf/cristallina/data/p20590/work/process/jhb/detector_refinement")
cwd = os.getcwd()
#define the sample_h5_file location for this function
sample_h5 = "h5_{0}_sample.lst".format(sample)
sample_h5_file = "{0}/{1}".format(cwd, sample_h5)
# make list of clen steps above and below the central clen
step_range = make_step_range(centre_clen, step_size_fine, steps_fine)
# make directorys for results
print( "begin CrystFEL anaylsis of different clens" )
# loop to cycle through clen steps
for clen in step_range:
# move back to cwd
os.chdir( cwd )
print( "processing clen = {0}".format( clen ) )
# define process directory
proc_dir = "{0}/{1}/{2}".format( cwd, scan_name_fine, clen )
# make process directory
make_process_dir(proc_dir)
# move to process directory
os.chdir( proc_dir )
# make geom file
clen_geom_file = geom_amend( lab6_geom_file, clen )
# make crystfel run file
cryst_run_file = write_crystfel_run( clen, sample_h5_file, clen_geom_file, cell_file )
# run crystfel file
subprocess.call( [ "sbatch", "-p", "day", "--cpus-per-task=32", "--", "./{0}".format( cryst_run_file ) ] )
print( "done" )
#wait for jobs to complete
check_job_status(username)
def scrub_main_coarse( top_dir_coarse ):
def scrub_scan( scan_top_dir, scan ):
stats_df = scrub_helper(top_dir_coarse)
stats_df = scrub_helper(scan_top_dir)
#print clen for minimum alpha, beta, and gamma values
min_alpha_clen, min_beta_clen, min_gamma_clen, min_c_clen, min_alpha_val, min_beta_val, min_gamma_val, min_c_val = find_clen_values(stats_df)
# plot results
fig, (ax1, ax3) = plt.subplots(1, 2)
ax2 = ax1.twinx()
ax4 = ax3.twinx()
plot_indexed_std(stats_df, ax1, ax2)
plot_indexed_std_alpha_beta_gamma(stats_df, ax3, ax4)
fig.tight_layout()
plt.show()
def scrub_main_fine( top_dir_fine ):
stats_df = scrub_helper(top_dir_fine)
#print clen for minimum alpha, beta, and gamma values
min_alpha_clen, min_beta_clen, min_gamma_clen, min_c_clen, min_alpha_val, min_beta_val, min_gamma_val, min_c_val = find_clen_values(stats_df)
#print suggested clen
# print suggested clen
if scan == "fine":
suggested_clen = (min_alpha_clen + min_beta_clen + min_gamma_clen )/3
suggested_clen = round(suggested_clen, 4)
print ("The suggested clen = {0}".format(suggested_clen))
@@ -445,38 +408,60 @@ def scrub_main_fine( top_dir_fine ):
plot_indexed_std_alpha_beta_gamma(stats_df, ax3, ax4)
fig.tight_layout()
plt.savefig("{0}.png".format(scan))
plt.show()
def main( cwd, lst, sample, geom, centre_clen, cell_file ):
#location to which the data from coarse and fine scans will be saved
top_dir = "/sf/cristallina/data/p20590/work/process/jhb/detector_refinement"
scan_name_coarse = "coarse"
scan_name_fine = "fine"
top_dir_coarse = "{0}/{1}".format( top_dir, scan_name_coarse )
top_dir_fine = "{0}/{1}".format( top_dir, scan_name_fine )
top_dir_coarse = "{0}/coarse".format( cwd )
#General parameters for the scans
lst = "/sf/cristallina/data/p20590/work/process/jhb/detector_refinement/acq0001.JF17T16V01.dark.lst"
lab6_geom_file = "/sf/cristallina/data/p20590/work/process/jhb/detector_refinement/8M_p-op_c-op_p20590.geom"
centre_clen = 0.122 # in m
cell_file = "/sf/cristallina/data/p20590/work/process/jhb/detector_refinement/hewl.cell"
username = "beale_j" #note that the timer only checks if the user has ANY jobs running,
#so the user should ONLY be running the jobs related to this script on the cluster
#to avoid a very long wait
scan( cwd, lst, sample, geom, centre_clen, cell_file, step_size="coarse" )
#stepping parameters for coarse and fine scan (generally not to be changed)
sample = 500
steps_coarse = 20
step_size_coarse = 0.0005 # m
steps_fine = 50
step_size_fine = 0.00005 # m
scrub_scan( top_dir_coarse, scan="coarse" )
#Calling the functions
main_coarse( lst, sample, lab6_geom_file, centre_clen, cell_file, steps_coarse, scan_name_coarse, step_size_coarse )
top_dir_fine = "{0}/fine".format( cwd )
scrub_main_coarse( top_dir_coarse )
scan( cwd, lst, sample, geom, centre_clen, cell_file, step_size="fine" )
main_fine( lst, lab6_geom_file, centre_clen, cell_file, steps_fine, scan_name_fine, step_size_fine )
scrub_main_fine( top_dir_fine )
scrub_scan( top_dir_fine, scan="fine" )
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-l",
"--lst",
help="path to crystfel list file containing enough patterns for detector distance refinement",
type=os.path.abspath
)
parser.add_argument(
"-g",
"--geom",
help="path to geom file to be used in the refinement",
type=os.path.abspath
)
parser.add_argument(
"-d",
"--central_distance",
help="intial clen to use for refinement - usually from detector shift refinement",
type=float
)
parser.add_argument(
"-c",
"--cell_file",
help="path to cell file of the crystals used in the refinement",
type=os.path.abspath
)
parser.add_argument(
"-s",
"--sample",
help="sample size to use in the refinement",
type=int,
default=500
)
args = parser.parse_args()
# run main
username = os.getlogin()
cwd = os.getcwd()
print( "current username = {0}".format( username ) )
print( "top working directory = {0}".format( cwd ) )
main( cwd, args.lst, args.sample, args.geom, args.central_distance, args.cell_file )