From 3c92e697362ea64b9ae14d51bae875455ad67ddf Mon Sep 17 00:00:00 2001
From: Beale John Henry <beale_j@saresc-cons-04.psi.ch>
Date: Tue, 30 Jan 2024 16:34:24 +0100
Subject: [PATCH] update - gaussian fit + dual scan

---
 clen_tools/detector-distance-refinement.py | 111 ++++++++++++---------
 1 file changed, 66 insertions(+), 45 deletions(-)
diff --git a/clen_tools/detector-distance-refinement.py b/clen_tools/detector-distance-refinement.py
index a76a0c4..becf872 100644
--- a/clen_tools/detector-distance-refinement.py
+++ b/clen_tools/detector-distance-refinement.py
@@ -15,9 +15,6 @@ python detector-distance-refinement.py -l <path to lst file generated by daq>
                                        -s sample size
                                        -e endstation - alvra or cristallina
 
-# other variables
--f = fine only = only perform fine scan
-
 # output
 plot files of the analysis and a suggest for the clen
 """
@@ -32,6 +29,8 @@ import matplotlib.pyplot as plt
 import time
 from tqdm import tqdm
 import argparse
+from scipy.optimize import curve_fit
+from scipy.signal import peak_widths, find_peaks
 
 def h5_sample( lst, sample ):
 
@@ -40,11 +39,13 @@ def h5_sample( lst, sample ):
     cols = [ "h5", "image" ]
     sample_df = pd.read_csv( lst, sep="\s//", engine="python", names=cols )
 
-    # take defined sample
-    sample_df = sample_df.sample( sample )
+    # take sample - if sample required
+    if len( sample_df ) > sample:
 
-    # sort list
-    sample_df = sample_df.sort_index()
+        # take defined sample
+        sample_df = sample_df.sample( sample )
+        # sort list
+        sample_df = sample_df.sort_index()
 
     # re-add // to image columm
     sample_df[ "image" ] = "//" + sample_df.image.astype(str)
@@ -86,7 +87,8 @@ def write_crystfel_run( clen, sample_h5_file, clen_geom_file, cell_file, thresho
     run_sh = open( cryst_run_file, "w" )
     run_sh.write( "#!/bin/sh\n\n" )
     run_sh.write( "module purge\n" )
-    run_sh.write( "module load crystfel/0.10.2\n" )
+    run_sh.write( "module use MX unstable\n" )
+    run_sh.write( "module load crystfel/0.10.2-rhel8\n" )
     run_sh.write( "indexamajig -i {0} \\\n".format( sample_h5_file ) )
     run_sh.write( "  --output={0}.stream \\\n".format( clen ) )
     run_sh.write( "  --geometry={0}\\\n".format( clen_geom_file ) )
@@ -156,7 +158,7 @@ def wait_for_jobs( job_ids, total_jobs ):
                     completed_jobs.add(job_id)
                     pbar.update(1)
             job_ids.difference_update(completed_jobs)
-            time.sleep(30)
+            time.sleep(2)
         
 def scrub_clen( stream_pwd ):
 
@@ -273,7 +275,7 @@ def scrub_helper( top_dir ):
 
     return stats_df
 
-def find_clen_values(stats_df):
+def find_clen_values( stats_df, scan ):
 
     def find_min_clen(col_name):
         min_val = stats_df[col_name].min()
@@ -281,17 +283,47 @@ def find_clen_values(stats_df):
         min_clen = min_row['clen'].values[0]
         return min_val, min_clen
 
+    def gauss(x, *p):
+        
+        A, mu, sigma = p
+        return A * np.exp(-(x-mu)**2/(2.*sigma**2))
+
+    p0 = [ 30, 0.111, 0.01 ]
+    parameters, covariance = curve_fit( gauss, stats_df.clen, stats_df.indexed, p0=p0 )
+
+    # Get the fitted curve
+    stats_df[ "gaus" ] = gauss( stats_df.clen, *parameters)
+    # find peak centre
+    peaks = find_peaks( stats_df.gaus.values )
+
+    # find full peak width
+    fwhm = peak_widths( stats_df.gaus.values, peaks[0], rel_height=0.5 )
+
+    fwhm_str = int( round( fwhm[2][0], 0 ) )
+    fwhm_end = int( round( fwhm[3][0], 0 ) )
+
+    # translate width into motor values
+    indexed_start = stats_df.iloc[ fwhm_str, 0 ]
+    indexed_end = stats_df.iloc[ fwhm_end, 0 ]
+    mid_gauss = stats_df.clen.iloc[ peaks[0] ].values[0]
+
+    # cut df to only include indexed patterns
+    stats_df = stats_df[ ( stats_df.clen < indexed_end ) & ( stats_df.clen > indexed_start ) ]
+    
+    # calculate minimum values
     min_alpha_val, min_alpha_clen = find_min_clen('std_alpha')
     min_beta_val, min_beta_clen = find_min_clen('std_beta')
     min_gamma_val, min_gamma_clen = find_min_clen('std_gamma')
     min_c_val, min_c_clen = find_min_clen('std_c')
+
+    # find possible clens
+    suggested_clen = (min_alpha_clen + min_beta_clen + min_gamma_clen )/3
+    suggested_clen = round(suggested_clen, 4)
     
-    print("The value of clen for the minimum alpha value of {} is {}".format(min_alpha_val, min_alpha_clen))
-    print("The value of clen for the minimum beta value of {} is {}".format(min_beta_val, min_beta_clen))
-    print("The value of clen for the minimum gamma value of {} is {}".format(min_gamma_val, min_gamma_clen))
-    print("The value of clen for the minimum c value of {} is {}".format(min_c_val, min_c_clen))
+    print( "middle of indexing gaussion fit of {0} scan = {1}".format( scan, mid_gauss ) )
+    print( "mean minimum of alpha, beta, gamma of {0} scan = {1}".format( scan, suggested_clen ) )
     
-    return min_alpha_clen, min_beta_clen, min_gamma_clen, min_c_clen, min_alpha_val, min_beta_val, min_gamma_val, min_c_val
+    return suggested_clen
 
 def plot_indexed_std( stats_df, ax1, ax2 ):
 
@@ -349,13 +381,13 @@ def scan( cwd, lst, sample, lab6_geom_file, centre_clen, cell_file, threshold, s
     
     # define coarse or fine scan
     if step_size == "coarse":
-        steps = 20
+        steps = 30
         step_size = 0.0005 # m
-        scan_name = "coarse"
+        scan_name = "scan"
     if step_size == "fine":
         steps = 50
         step_size = 0.00005 # m
-        scan_name = "fine"
+        scan_name = "scan"
 
     #make sample list
     sample_h5_file = make_sample(lst, sample)
@@ -404,13 +436,8 @@ def scrub_scan( scan_top_dir, scan ):
 
     stats_df = scrub_helper(scan_top_dir)
 
-    #print clen for minimum alpha, beta, and gamma values
-    min_alpha_clen, min_beta_clen, min_gamma_clen, min_c_clen, min_alpha_val, min_beta_val, min_gamma_val, min_c_val = find_clen_values(stats_df)
-
-    # print suggested clen
-    suggested_clen = (min_alpha_clen + min_beta_clen + min_gamma_clen )/3
-    suggested_clen = round(suggested_clen, 4)
-    print ("The suggested clen = {0}".format(suggested_clen))
+    # calculate suggested clen and make plot
+    suggested_clen = find_clen_values( stats_df, scan )
     
     # plot results
     fig, (ax1, ax3) = plt.subplots(1, 2)
@@ -425,24 +452,25 @@ def scrub_scan( scan_top_dir, scan ):
 
     return suggested_clen
 
-def main( cwd, lst, sample, geom, centre_clen, cell_file, fine_only, threshold ):
+def main( cwd, lst, sample, geom, centre_clen, cell_file, threshold ):
 
-    # if statement to do coarse if fine_only==False
-    if fine_only == False:
-        top_dir_coarse = "{0}/coarse".format( cwd )
+    # run scan
+    if not os.path.isdir( "{0}/scan".format( cwd ) ):
 
-        scan( cwd, lst, sample, geom, centre_clen, cell_file, threshold, step_size="coarse" )
+        # run inital coarse scan
+        scan( cwd, lst, sample, geom, centre_clen, cell_file, threshold, "coarse" )
+        # get approx centre from new results
+        coarse_clen = scrub_scan( cwd, scan="scan" )
+        # perfom 2nd scan
+        scan( cwd, lst, sample, geom, coarse_clen, cell_file, threshold, "fine" )
 
-        suggested_clen = scrub_scan( top_dir_coarse, scan="coarse" )
     else:
-        suggested_clen = centre_clen
+        print( "scan already performed" )
 
-    top_dir_fine = "{0}/fine".format( cwd )
+    # check results
+    suggested_clen = scrub_scan( cwd, scan="scan" )
 
-    scan( cwd, lst, sample, geom, suggested_clen, cell_file, threshold, step_size="fine" )
-
-    scrub_scan( top_dir_fine, scan="fine" )
-    plt.show()
+    return suggested_clen
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
@@ -481,13 +509,6 @@ if __name__ == "__main__":
         type=int,
         default=500
     )
-    parser.add_argument(
-        "-f",
-        "--fine_only",
-        help="True = only do fine scan",
-        type=bool,
-        default=False
-    )
     parser.add_argument(
         "-e",
         "--endstation",
@@ -507,5 +528,5 @@ if __name__ == "__main__":
     # run main
     cwd = os.getcwd()
     print( "top working directory = {0}".format( cwd ) )
-    main( cwd, args.lst, args.sample, args.geom, args.central_distance, args.cell_file, args.fine_only, threshold )
+    main( cwd, args.lst, args.sample, args.geom, args.central_distance, args.cell_file, threshold )