more updates now runs ok with test data

2025-01-22 12:37:45 +01:00
parent fb32283c9f
commit 6f16f39015
1 changed files with 54 additions and 10 deletions
--- a/reduction_tools/auto_process.py
+++ b/reduction_tools/auto_process.py
@@ -187,7 +187,7 @@ def scrub_index( index_dir_name ):
    index_log = index_log_file.read()

    # regex example = crystals = 0
-    images_pattern = r"images\s=\s(\d+)"
+    images_pattern = r"images?\s=\s(\d+)"
    images = re.search( images_pattern, index_log ).group(1)

    # regex example = crystals = 0
@@ -224,8 +224,22 @@ def scrub_index( index_dir_name ):
                } ]

    stream_df = pd.DataFrame( data )
+
+    index_log_file.close()
    
    return stream_df
+
+def scrub_partialator_log(  ):
+
+    # open cc log file
+    part_log_file = open( ".log" )
+    cc_log = cc_log_file.read()
+
+    # regex example = Overall CC = 0.5970865
+    overcc_pattern = r"Overall\sCC\s=\s(\d\.\d+)"
+    overcc = re.search( overcc_pattern, cc_log ).group(1)
+
+    return overcc
    
 def run_make_mtz( script_dir, hklin_file, project, crystal, dataset, cell, spacegroup, residues, res_range ):

@@ -368,18 +382,45 @@ def main( script_dir, cwd, runs,
                shutil.move( F_file, proc_dir )
            except shutil.Error as e:
                pass
-            print( "done {0}".format( run_name ) )
        
        else:
            print( "partialator not run. < 100 indexed" )
-        
-        data = [ { "run" : run,
-                   "prefix" : prefix,
-                 } ]
+            try:
+                cc = scrub_cc( part_run_name )
+            except AttributeError as e:
+                cc = np.nan
+
+        # get partialator metrics
+        overcc = part.get_overall_cc()
+        overrsplit = part.get_overall_rsplit()
+        try:
+            b_factor = part.get_b()
+        except AttributeError as e:
+            b_factor = np.nan
+
+        # collate meta data
+        run_data = [ { "run" : run,
+                       "prefix" : prefix
+                    } ]
+        run_df = pd.DataFrame( run_data )
+        part_data = [ { "overall_cc" : overcc,
+                        "cc" : cc,
+                        "overall_rsplit" : overrsplit,
+                        "overall_b" : b_factor,
+                    } ]
+        part_df = pd.DataFrame( part_data )
+        df_1 = pd.concat( [ run_df, stream_df, part_df ], axis=1 )
+        df = pd.concat( [ df, df_1 ] )
+
+        print( "done {0}".format( run_name ) )

        # move back to cwd
        os.chdir( cwd )

+    # output stats table
+    print( df )
+    df.to_csv( "auto_process_summary.csv", sep="," )
+
    print( "finished all!" )


@@ -387,10 +428,13 @@ def main( script_dir, cwd, runs,
 #script_dir = "/sf/cristallina/applications/mx/crystfel_tools/reduction_tools/"
 script_dir = "/sf/cristallina/data/p22216/work/processing/bach"
 cwd = os.getcwd()
-#runs = [ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
-#         113, 114, 115, 116, 117, 118, 119, 122, 123, 124, 125, 126, 127,
-#         128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138 ]
-runs = [ 11 ]
+runs = [ 11, 12, 13, 14, 23, 24, 25, 26, 59, 60, 61, 63, 64, 65,
+         67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82,
+         83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
+         100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
+         113, 114, 115, 116, 117, 118, 119, 122, 123, 124, 125, 126, 127,
+         128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138 ]
+#runs = [ 11, 12 ]
 endstation = "cristallina"
 pgroup = "p22216"
 jfj = True