crystfel_tools/clen_tools/sample_crystfel-ls.py

#!/usr/bin/python

# author J.Beale

"""
# aim
input a list file made by the SwissFEL daq and a sample number get out a random selection
of files of that number

# usage
python sample_crystfel-ls.py -l <path-to-list-file> -s sample

# output
.lst file containing the required number of random image links - with naming convention
<input-lst-name>_<sample-size>.lst
"""

# modules
import pandas as pd
import os
import argparse

def h5_sample( lst, sample ):

    # create sample of images from run
    # read h5.lst - note - removes // from image column
    print( "reading SwissFEL ls file" )
    # scrub file name
    lst_name = os.path.splitext( os.path.basename( lst ) )[0]

    cols = [ "h5", "image" ]
    sample_df = pd.read_csv( lst, sep="\s//", engine="python", names=cols )
    print( "done" )

    print( "taking {0} image sample of {1} image lst".format( sample, len( sample_df ) ) )
    # take defined sample
    sample_df = sample_df.sample( sample )

    # sort list
    sample_df = sample_df.sort_index()

    # re-add // to image columm
    sample_df[ "image" ] = "//" + sample_df.image.astype( str )
    print( "done" )

    # write sample to file
    print( "writing to file" )
    sample_file = "{0}_{1}.lst".format( sample, lst_name )
    sample_df.to_csv( sample_file, sep=" ", index=False, header=False )
    print( "done" )


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-l",
        "--list",
        help="give the path to SwissFEL daq list file",
        type=str,
        required=True
    )
    parser.add_argument(
        "-s",
        "--sample",
        help="number of random sample images to select",
        type=int,
        required=True
    )
    args = parser.parse_args()
    # run geom converter
    h5_sample( args.list, args.sample )