Files
crystfel_tools/clen_tools/sample_crystfel-ls.py
Beale John Henry 19a5c91043 fixed .lst big
2025-01-24 12:40:37 +01:00

77 lines
1.8 KiB
Python

#!/usr/bin/python
# author J.Beale
"""
# aim
input a list file made by the SwissFEL daq and a sample number get out a random selection
of files of that number
# usage
python sample_crystfel-ls.py -l <path-to-list-file> -s sample
# output
.lst file containing the required number of random image links - with naming convention
<input-lst-name>_<sample-size>.lst
"""
# modules
import pandas as pd
import os
import argparse
def h5_sample( lst, sample ):
# create sample of images from run
# read h5.lst - note - removes // from image column
print( "reading SwissFEL ls file" )
# scrub file name
lst_name = os.path.splitext( os.path.basename( lst ) )[0]
cols = [ "h5", "image" ]
sample_df = pd.read_csv( lst, sep="\s//", engine="python", names=cols )
print( "done" )
print( "taking {0} image sample of {1} image lst".format( sample, len( sample_df ) ) )
# take defined sample
sample_df = sample_df.sample( sample )
# sort list
sample_df = sample_df.sort_index()
# re-add // to image columm
sample_df[ "image" ] = "//" + sample_df.image.astype( str )
print( "done" )
# write sample to file
print( "writing to file" )
sample_file = "{0}_{1}.lst".format( sample, lst_name )
sample_df.to_csv( sample_file, sep=" ", index=False, header=False )
print( "done" )
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-l",
"--list",
help="give the path to SwissFEL daq list file",
type=str,
required=True
)
parser.add_argument(
"-s",
"--sample",
help="number of random sample images to select",
type=int,
required=True
)
args = parser.parse_args()
# run geom converter
h5_sample( args.list, args.sample )