diff --git a/reduction_tools/cat_lst.py b/reduction_tools/cat_lst.py new file mode 100644 index 0000000..428a7a1 --- /dev/null +++ b/reduction_tools/cat_lst.py @@ -0,0 +1,179 @@ +#!/usr/bin/python + +# author J.Beale + +""" +# aim +script to append lst files from different run locations +so you give the script individual run numbers and/or + +# usage +python cat_lst.py -r e.g. 45,50 - for runs 45-50, optional argument + e.g., 45 47 50 - for these specific runs + #### note #### both of these can be used together - but you can't specify two lists + -e endstation - "alvra" or "cristallina" + -p pgroup + -l label - i.e. 'light', 'dark' or 'both' + -o output file name + +# output +a concatentated list file of all the request runs +""" + +import argparse +import pandas as pd +import glob +import os +import numpy as np + +def concatenate_files( input_file_lst, output ): + + output_file = "{0}.lst".format( output ) + + # create output file + with open( output_file, "w" ) as output: + + # loop through input list - read and write to output file + for lst_file_pwd in input_file_lst: + + # open and write to output file + with open( lst_file_pwd, "r" ) as lst_file: + output.write( lst_file.read() ) + +def make_pwd( run_no, endstation, pgroup ): + + # construct lst folder path + lst_pwd = "/sf/{0}/data/{1}/raw/".format( endstation, pgroup ) + "run" + run_no + "*/data" + + return lst_pwd + +def find_lst( lst_dir, label ): + + # if label = both, i.e. both lights and darks, set label to lst - so it's alwasy found + if label == "both": + label = "lst" + + # create df for all lst + lst_dir_df = pd.DataFrame() + + # search for lst with appropriate labels + for path, dirs, files in os.walk( lst_dir ): + for name in files: + if name.endswith( ".lst" ): + + # get lst pwd + lst_pwd = os.path.join( path, name ) + + # put clen and stream pwd into df + data = [ { "lst_pwd" : lst_pwd + } ] + lst_dir_df_1 = pd.DataFrame( data ) + lst_dir_df = pd.concat( ( lst_dir_df, lst_dir_df_1 ) ) + + # reset df index + lst_dir_df = lst_dir_df.reset_index( drop=True ) + + # return df lst from this directory + return lst_dir_df + +def generate_lst_df( run_lst, endstation, label, pgroup ): + + # make run number df + cols = [ "run_no" ] + range_df = pd.DataFrame( run_lst, columns=cols ) + # add zeros to left hand of number + range_df[ "run_no" ] = range_df.run_no.str.zfill(4) + + # make new column of list paths + range_df[ "lst_app_dir" ] = range_df[ "run_no" ].apply( lambda x: make_pwd( x, endstation, pgroup ) ) + + # make df of lsts to be concatenated + lst_df = pd.DataFrame() + + for index, row in range_df.iterrows(): + + # get approximate dir pwd + lst_app_dir = row[ "lst_app_dir" ] + # find matching file + lst_dir = glob.glob( lst_app_dir ) + + # find lsts in lst directory depending on label + lst_dir_df = find_lst( lst_dir[0], label ) + + # append lst dir dfs + lst_df = pd.concat( [ lst_df, lst_dir_df ], ignore_index=True ) + + # reset df index + lst_df = lst_df.reset_index( drop=True ) + + return lst_df + +def main( run_lst, endstation, label, pgroup, output_file ): + + # make df of lst files + lst_df = generate_lst_df( run_lst, endstation, label, pgroup ) + + # concatinate all lst file in lst_df + concatenate_files( lst_df.lst_pwd, output_file ) + +def range_of_runs(arg): + return list(map(int, arg.split(','))) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "-r", + "--range", + help="list files in a range of run number to concatentate", + type=range_of_runs + ) + parser.add_argument( + "runs", + help="type in indivdual run numbers for list to be concantenated", + type=str, + nargs='*', + default=[] + ) + parser.add_argument( + "-e", + "--endstation", + help="which endstation did you collect these data from, e.g., alvra or cristallina", + type=str, + default="cristallina" + ) + parser.add_argument( + "-p", + "--pgroup", + help="pgroup the data are collected in", + type=str + ) + parser.add_argument( + "-l", + "--label", + help="the label of the lst file, i.e. 'light', 'dark' or 'both'", + type=str, + required=True + ) + parser.add_argument( + "-o", + "--output", + help="name of output file", + type=str, + ) + args = parser.parse_args() + # make continuous list from input range limits + range = [] + if args.range is not None: + limits = args.range + range = np.arange( limits[0], limits[1]+1 ) + # convert to list + range = range.tolist() + # convert to strings + range = list( map( str, range ) ) + # concat range and run lists + runs = args.runs + run_lst = range + runs + print( "appending {0} lst files from runs {1}".format( args.label, run_lst ) ) + # run main + main( run_lst, args.endstation, args.label, args.pgroup, args.output ) + print( "done" ) \ No newline at end of file