updated to be compatible with jfj/clara and old pipeline, fixed label bug - no both label, updated bool in argparse

2025-01-16 11:56:42 +01:00
parent 58db2c33da
commit c5868683f6
1 changed files with 64 additions and 23 deletions
--- a/reduction_tools/cat_lst.py
+++ b/reduction_tools/cat_lst.py
@@ -25,41 +25,55 @@ import pandas as pd
 import glob
 import os
 import numpy as np
+from sys import exit

-def concatenate_files( input_file_lst, output ):
+def concatenate_files( input_file_lst, output, label ):

-    output_file = "{0}.lst".format( output )
+    output_file = "{0}_{1}.lst".format( output, label )

+    lines = 0
    # create output file
    with open( output_file, "w" ) as output:

        # loop through input list - read and write to output file
-        for lst_file_pwd in input_file_lst:
+        for lst_file_pwd in input_file_lst.lst_pwd:
            
            # open and write to output file
            with open( lst_file_pwd, "r" ) as lst_file:
+                lines = lines + len( lst_file.readlines() )
                output.write( lst_file.read() )
+                lst_file.close()

-def make_pwd( run_no, endstation, pgroup ):
+    output.close()

-    # construct lst folder path
-    lst_pwd = "/sf/{0}/data/{1}/raw/".format( endstation, pgroup ) + "run" + run_no + "*/data"
+    print( "written {0} images to {1}".format( lines, output_file ) )
+
+def make_pwd( run_no, endstation, pgroup, jfj ):
+
+    # if to determine folder for jfj/clara or old daq
+    if jfj == True:
+        lst_pwd = "/sf/{0}/data/{1}/res/run{2}*".format( endstation, pgroup, run_no )
+    else:
+        # construct lst folder path
+        lst_pwd = "/sf/{0}/data/{1}/raw/run{2}*/data".format( endstation, pgroup, run_no )

    return lst_pwd

 def find_lst( lst_dir, label ):

-    # if label = both, i.e. both lights and darks, set label to lst - so it's alwasy found
-    if label == "both":
-        label = "lst"
-
+    if label == "on" or label == "off":
+        tail = "{0}.list".format( label )
+    if label == "light" or label == "dark":
+        tail = "{0}.lst".format( label )
+    
    # create df for all lst
    lst_dir_df = pd.DataFrame()

    # search for lst with appropriate labels
    for path, dirs, files in os.walk( lst_dir ):
        for name in files:
-            if name.endswith( ".lst" ):
+
+            if name.endswith( tail ):

                # get lst pwd
                lst_pwd = os.path.join( path, name )
@@ -76,7 +90,7 @@ def find_lst( lst_dir, label ):
    # return df lst from this directory
    return lst_dir_df

-def generate_lst_df( run_lst, endstation, label, pgroup ):
+def generate_lst_df( run_lst, endstation, label, pgroup, jfj ):

    # make run number df
    cols = [ "run_no" ]
@@ -85,12 +99,11 @@ def generate_lst_df( run_lst, endstation, label, pgroup ):
    range_df[ "run_no" ] = range_df.run_no.str.zfill(4)

    # make new column of list paths
-    range_df[ "lst_app_dir" ] = range_df[ "run_no" ].apply( lambda x: make_pwd( x, endstation, pgroup ) )
+    range_df[ "lst_app_dir" ] = range_df[ "run_no" ].apply( lambda x: make_pwd( x, endstation, pgroup, jfj ) )

    # make df of lsts to be concatenated
    lst_df = pd.DataFrame()

-
    for index, row in range_df.iterrows():
        
        # get approximate dir pwd
@@ -114,13 +127,18 @@ def generate_lst_df( run_lst, endstation, label, pgroup ):

    return lst_df

-def main( run_lst, endstation, label, pgroup, output_file ):
+def main( run_lst, endstation, label, pgroup, output_file, jfj ):

    # make df of lst files
-    lst_df = generate_lst_df( run_lst, endstation, label, pgroup )
+    lst_df = generate_lst_df( run_lst, endstation, label, pgroup, jfj )
+
+    # check to see if any files have been found
+    if lst_df.empty:
+        print( "no {0} lists were found in runs {1}".format( label, run_lst ) )
+        exit()

    # concatinate all lst file in lst_df
-    concatenate_files( lst_df.lst_pwd, output_file )
+    concatenate_files( lst_df, output_file, label )

 def range_of_runs(arg):
    return list(map(int, arg.split(',')))
@@ -153,20 +171,38 @@ if __name__ == "__main__":
        help="pgroup the data are collected in",
        type=str
        )
+    parser.add_argument(
+        "-j",
+        "--jfj",
+        help="was the Jungfraujoch/Clara data processing pipeline used to process your data. Default = True",
+        type=bool,
+        default=False
+        )
    parser.add_argument(
        "-l",
        "--label",
-        help="the label of the lst file, i.e. 'light', 'dark' or 'both'",
-        type=str,
-        required=True
+        help="the activation label for the data. Not JFJ the labels should = 'light' or 'dark'. With JFJ the labels should = 'on' or 'off'.",
+        type=str
        )
    parser.add_argument(
        "-o",
        "--output",
        help="name of output file",
        type=str,
+        default=None
        )
    args = parser.parse_args()
+    # JFJ on/off non-JFJ light/dark logic\
+    if args.label != "off" and args.label != "on" and args.label != "light" and args.label != "dark":
+        print( "label flag (-l) must = either 'on' or 'off' with JFJ = True, or 'light' or 'dark' and JFJ = False." )
+        exit()
+    print( args.jfj )
+    if ( args.label == "off" or args.label == "on" ) and args.jfj == False:
+        print( "JFJ uses 'on' and 'off' flags. Please check inputs and whether the new JFJ/Clara processing pipeline was used." )
+        exit()
+    if ( args.label == "light" or args.label == "dark" ) and args.jfj == True:
+        print( "The old daq uses 'light' and 'dark' flags. Please check inputs and whether the newJFJ/Clara processing pipeline was used." )
+        exit()
    # make continuous list from input range limits
    range = []
    if args.range is not None:
@@ -180,7 +216,12 @@ if __name__ == "__main__":
    runs = args.runs
    run_lst = range + runs
    print( "appending {0} lst files from runs {1}".format( args.label, run_lst ) )
+    # make default name
+    if not args.output:
+        output_name = "-".join( str(e) for e in run_lst )
+        output_name = "run{0}".format( output_name )
+    else:
+        output_name = args.output
    # run main
-    main( run_lst, args.endstation, args.label, args.pgroup, args.output )
-    print( "done" )
-
+    main( run_lst, args.endstation, args.label, args.pgroup, output_name, args.jfj )
+    print( "done" )