#!/usr/bin/python # author J.Beale """ # aim input a list file made by the SwissFEL daq and a sample number get out a random selection of files of that number # usage python sample_crystfel-ls.py -l -s sample # output .lst file containing the required number of random image links - with naming convention _.lst """ # modules import pandas as pd import os import argparse def h5_sample( lst, sample ): # create sample of images from run # read h5.lst - note - removes // from image column print( "reading SwissFEL ls file" ) # scrub file name lst_name = os.path.splitext( os.path.basename( lst ) )[0] cols = [ "h5", "image" ] sample_df = pd.read_csv( lst, sep="\s//", engine="python", names=cols ) print( "done" ) print( "taking {0} image sample of {1} image lst".format( sample, len( sample_df ) ) ) # take defined sample sample_df = sample_df.sample( sample ) # sort list sample_df = sample_df.sort_index() # re-add // to image columm sample_df[ "image" ] = "//" + sample_df.image.astype( str ) print( "done" ) # write sample to file print( "writing to file" ) sample_file = "{0}_{1}.lst".format( sample, lst_name ) sample_df.to_csv( sample_file, sep=" ", index=False, header=False ) print( "done" ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "-l", "--list", help="give the path to SwissFEL daq list file", type=str, required=True ) parser.add_argument( "-s", "--sample", help="number of random sample images to select", type=int, required=True ) args = parser.parse_args() # run geom converter h5_sample( args.list, args.sample )