179 lines
5.1 KiB
Python
179 lines
5.1 KiB
Python
#!/usr/bin/python
|
|
|
|
# author J.Beale
|
|
|
|
"""
|
|
# aim
|
|
script to append lst files from different run locations
|
|
so you give the script individual run numbers and/or
|
|
|
|
# usage
|
|
python cat_lst.py -r <run_range> e.g. 45,50 - for runs 45-50, optional argument
|
|
<a series of individual run numbers> e.g., 45 47 50 - for these specific runs
|
|
#### note #### both of these can be used together - but you can't specify two lists
|
|
-e endstation - "alvra" or "cristallina"
|
|
-p pgroup
|
|
-l label - i.e. 'light', 'dark' or 'both'
|
|
-o output file name
|
|
|
|
# output
|
|
a concatentated list file of all the request runs
|
|
"""
|
|
|
|
import argparse
|
|
import pandas as pd
|
|
import glob
|
|
import os
|
|
import numpy as np
|
|
|
|
def concatenate_files( input_file_lst, output ):
|
|
|
|
output_file = "{0}.lst".format( output )
|
|
|
|
# create output file
|
|
with open( output_file, "w" ) as output:
|
|
|
|
# loop through input list - read and write to output file
|
|
for lst_file_pwd in input_file_lst:
|
|
|
|
# open and write to output file
|
|
with open( lst_file_pwd, "r" ) as lst_file:
|
|
output.write( lst_file.read() )
|
|
|
|
def make_pwd( run_no, endstation, pgroup ):
|
|
|
|
# construct lst folder path
|
|
lst_pwd = "/sf/{0}/data/{1}/raw/".format( endstation, pgroup ) + "run" + run_no + "*/data"
|
|
|
|
return lst_pwd
|
|
|
|
def find_lst( lst_dir, label ):
|
|
|
|
# if label = both, i.e. both lights and darks, set label to lst - so it's alwasy found
|
|
if label == "both":
|
|
label = "lst"
|
|
|
|
# create df for all lst
|
|
lst_dir_df = pd.DataFrame()
|
|
|
|
# search for lst with appropriate labels
|
|
for path, dirs, files in os.walk( lst_dir ):
|
|
for name in files:
|
|
if name.endswith( ".lst" ):
|
|
|
|
# get lst pwd
|
|
lst_pwd = os.path.join( path, name )
|
|
|
|
# put clen and stream pwd into df
|
|
data = [ { "lst_pwd" : lst_pwd
|
|
} ]
|
|
lst_dir_df_1 = pd.DataFrame( data )
|
|
lst_dir_df = pd.concat( ( lst_dir_df, lst_dir_df_1 ) )
|
|
|
|
# reset df index
|
|
lst_dir_df = lst_dir_df.reset_index( drop=True )
|
|
|
|
# return df lst from this directory
|
|
return lst_dir_df
|
|
|
|
def generate_lst_df( run_lst, endstation, label, pgroup ):
|
|
|
|
# make run number df
|
|
cols = [ "run_no" ]
|
|
range_df = pd.DataFrame( run_lst, columns=cols )
|
|
# add zeros to left hand of number
|
|
range_df[ "run_no" ] = range_df.run_no.str.zfill(4)
|
|
|
|
# make new column of list paths
|
|
range_df[ "lst_app_dir" ] = range_df[ "run_no" ].apply( lambda x: make_pwd( x, endstation, pgroup ) )
|
|
|
|
# make df of lsts to be concatenated
|
|
lst_df = pd.DataFrame()
|
|
|
|
for index, row in range_df.iterrows():
|
|
|
|
# get approximate dir pwd
|
|
lst_app_dir = row[ "lst_app_dir" ]
|
|
# find matching file
|
|
lst_dir = glob.glob( lst_app_dir )
|
|
|
|
# find lsts in lst directory depending on label
|
|
lst_dir_df = find_lst( lst_dir[0], label )
|
|
|
|
# append lst dir dfs
|
|
lst_df = pd.concat( [ lst_df, lst_dir_df ], ignore_index=True )
|
|
|
|
# reset df index
|
|
lst_df = lst_df.reset_index( drop=True )
|
|
|
|
return lst_df
|
|
|
|
def main( run_lst, endstation, label, pgroup, output_file ):
|
|
|
|
# make df of lst files
|
|
lst_df = generate_lst_df( run_lst, endstation, label, pgroup )
|
|
|
|
# concatinate all lst file in lst_df
|
|
concatenate_files( lst_df.lst_pwd, output_file )
|
|
|
|
def range_of_runs(arg):
|
|
return list(map(int, arg.split(',')))
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
"-r",
|
|
"--range",
|
|
help="list files in a range of run number to concatentate",
|
|
type=range_of_runs
|
|
)
|
|
parser.add_argument(
|
|
"runs",
|
|
help="type in indivdual run numbers for list to be concantenated",
|
|
type=str,
|
|
nargs='*',
|
|
default=[]
|
|
)
|
|
parser.add_argument(
|
|
"-e",
|
|
"--endstation",
|
|
help="which endstation did you collect these data from, e.g., alvra or cristallina",
|
|
type=str,
|
|
default="cristallina"
|
|
)
|
|
parser.add_argument(
|
|
"-p",
|
|
"--pgroup",
|
|
help="pgroup the data are collected in",
|
|
type=str
|
|
)
|
|
parser.add_argument(
|
|
"-l",
|
|
"--label",
|
|
help="the label of the lst file, i.e. 'light', 'dark' or 'both'",
|
|
type=str,
|
|
required=True
|
|
)
|
|
parser.add_argument(
|
|
"-o",
|
|
"--output",
|
|
help="name of output file",
|
|
type=str,
|
|
)
|
|
args = parser.parse_args()
|
|
# make continuous list from input range limits
|
|
range = []
|
|
if args.range is not None:
|
|
limits = args.range
|
|
range = np.arange( limits[0], limits[1]+1 )
|
|
# convert to list
|
|
range = range.tolist()
|
|
# convert to strings
|
|
range = list( map( str, range ) )
|
|
# concat range and run lists
|
|
runs = args.runs
|
|
run_lst = range + runs
|
|
print( "appending {0} lst files from runs {1}".format( args.label, run_lst ) )
|
|
# run main
|
|
main( run_lst, args.endstation, args.label, args.pgroup, args.output )
|
|
print( "done" ) |