diff --git a/clen_tools/sample_crystfel-ls.py b/clen_tools/sample_crystfel-ls.py new file mode 100644 index 0000000..8d99bc2 --- /dev/null +++ b/clen_tools/sample_crystfel-ls.py @@ -0,0 +1,73 @@ +#!/usr/bin/python + +# author J.Beale + +""" +# aim +input a list file made by the SwissFEL daq and a sample number get out a random selection +of files of that number + +# usage +python sample_crystfel-ls.py -l -s sample + +# output +.lst file containing the required number of random image links - with naming convention +_.lst +""" + +# modules +import pandas as pd +import os +import argparse + +def h5_sample( lst, sample ): + + # create sample of images from run + # read h5.lst - note - removes // from image column + print( "reading SwissFEL ls file" ) + # scrub file name + lst_name = os.path.basename( lst ) + + cols = [ "h5", "image" ] + sample_df = pd.read_csv( lst, sep="\s//", engine="python", names=cols ) + print( "done" ) + + print( "taking {0} image sample".format( sample ) ) + # take defined sample + sample_df = sample_df.sample( sample ) + + # sort list + sample_df = sample_df.sort_index() + + # re-add // to image columm + sample_df[ "image" ] = "//" + sample_df.image.astype( str ) + print( "done" ) + + # write sample to file + print( "writing to file" ) + sample_file = "{0}_{1}.lst".format( lst_name, sample ) + sample_df.to_csv( sample_file, sep=" ", index=False, header=False ) + print( "done" ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "-l", + "--list", + help="give the path to SwissFEL daq list file", + type=str + ) + parser.add_argument( + "-s", + "--sample", + help="number of random sample images to select", + type=int + ) + args = parser.parse_args() + # run geom converter + h5_sample( args.list, args.sample ) + + + +