#!/usr/bin/python # author J.Beale """ # aim script to append lst files from different run locations so you give the script individual run numbers and/or # usage python cat_lst.py -r e.g. 45,50 - for runs 45-50, optional argument e.g., 45 47 50 - for these specific runs #### note #### both of these can be used together - but you can't specify two lists -e endstation - "alvra" or "cristallina" -p pgroup -l label - i.e. 'light', 'dark' or 'both' -o output file name # output a concatentated list file of all the request runs """ import argparse import pandas as pd import glob import os import numpy as np def concatenate_files( input_file_lst, output ): output_file = "{0}.lst".format( output ) # create output file with open( output_file, "w" ) as output: # loop through input list - read and write to output file for lst_file_pwd in input_file_lst: # open and write to output file with open( lst_file_pwd, "r" ) as lst_file: output.write( lst_file.read() ) def make_pwd( run_no, endstation, pgroup ): # construct lst folder path lst_pwd = "/sf/{0}/data/{1}/raw/".format( endstation, pgroup ) + "run" + run_no + "*/data" return lst_pwd def find_lst( lst_dir, label ): # if label = both, i.e. both lights and darks, set label to lst - so it's alwasy found if label == "both": label = "lst" # create df for all lst lst_dir_df = pd.DataFrame() # search for lst with appropriate labels for path, dirs, files in os.walk( lst_dir ): for name in files: if name.endswith( ".lst" ): # get lst pwd lst_pwd = os.path.join( path, name ) # put clen and stream pwd into df data = [ { "lst_pwd" : lst_pwd } ] lst_dir_df_1 = pd.DataFrame( data ) lst_dir_df = pd.concat( ( lst_dir_df, lst_dir_df_1 ) ) # reset df index lst_dir_df = lst_dir_df.reset_index( drop=True ) # return df lst from this directory return lst_dir_df def generate_lst_df( run_lst, endstation, label, pgroup ): # make run number df cols = [ "run_no" ] range_df = pd.DataFrame( run_lst, columns=cols ) # add zeros to left hand of number range_df[ "run_no" ] = range_df.run_no.str.zfill(4) # make new column of list paths range_df[ "lst_app_dir" ] = range_df[ "run_no" ].apply( lambda x: make_pwd( x, endstation, pgroup ) ) # make df of lsts to be concatenated lst_df = pd.DataFrame() for index, row in range_df.iterrows(): # get approximate dir pwd lst_app_dir = row[ "lst_app_dir" ] # find matching file lst_dir = glob.glob( lst_app_dir ) # find lsts in lst directory depending on label lst_dir_df = find_lst( lst_dir[0], label ) # append lst dir dfs lst_df = pd.concat( [ lst_df, lst_dir_df ], ignore_index=True ) # reset df index lst_df = lst_df.reset_index( drop=True ) return lst_df def main( run_lst, endstation, label, pgroup, output_file ): # make df of lst files lst_df = generate_lst_df( run_lst, endstation, label, pgroup ) # concatinate all lst file in lst_df concatenate_files( lst_df.lst_pwd, output_file ) def range_of_runs(arg): return list(map(int, arg.split(','))) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "-r", "--range", help="list files in a range of run number to concatentate", type=range_of_runs ) parser.add_argument( "runs", help="type in indivdual run numbers for list to be concantenated", type=str, nargs='*', default=[] ) parser.add_argument( "-e", "--endstation", help="which endstation did you collect these data from, e.g., alvra or cristallina", type=str, default="cristallina" ) parser.add_argument( "-p", "--pgroup", help="pgroup the data are collected in - in form p#####", type=str ) parser.add_argument( "-l", "--label", help="the label of the lst file, i.e. 'light', 'dark' or 'both'", type=str, required=True ) parser.add_argument( "-o", "--output", help="name of output file - script will add .lst", type=str, ) args = parser.parse_args() # make continuous list from input range limits range = [] if args.range is not None: limits = args.range range = np.arange( limits[0], limits[1]+1 ) # convert to list range = range.tolist() # convert to strings range = list( map( str, range ) ) # concat range and run lists runs = args.runs run_lst = range + runs print( "appending {0} lst files from runs {1}".format( args.label, run_lst ) ) # run main main( run_lst, args.endstation, args.label, args.pgroup, args.output ) print( "done" )