diff --git a/reduction_tools/select_xtals.py b/reduction_tools/select_xtals.py new file mode 100644 index 0000000..58a75ed --- /dev/null +++ b/reduction_tools/select_xtals.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python3 + +# author J.Beale + +""" +# aim +to select crystals from a stream file based on the uc distances and angles +based on a tolerance to them + +# usage +python select_crystals.py -s + -o + -c + -a + -t + +# output +.stream file with extracted crystals based on tolerance of uc +""" +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "-s", + "--stream", + help="input stream file", + required=True, + type=str, + ) + parser.add_argument( + "-o", + "--output", + help="output stream file with selected crystals", + type=str, + default="selected_crystals.stream" + ) + parser.add_argument( + "-c", + "--cell", + help="cell lengths to be found in A, e.g. 73.4,68.7,75.6 - must be a float/no spaces", + type=list_of_floats, + required=True + ) + parser.add_argument( + "-a", + "--angles", + help="anlges to be found, e.g., 90.0,105.6,90.0 - must be a float/no spaces", + type=list_of_floats, + required=True + ) + parser.add_argument( + "-t", + "--tolerance", + help="tolerance for angles and cell lengths", + type=float, + default=0.25 + ) + + +# modules +import re +import argparse + +# Function to check if cell parameters are within the specified range +def is_within_range( params, target_cell_params, target_angles, tolerance ): + for i in range(3): + if abs(params[i] - target_cell_params[i]) > tolerance: + return False + for i in range(3, 6): + if abs(params[i] - target_angles[i - 3]) > tolerance: + return False + return True + +def extract_chunks( input_file, target_cell_params, target_angles, tolerance ): + + # setup + chunks = [] + collect_lines = False + + # Open the input file for reading + with open(input_file, 'r') as f: + for line in f: + # Check for the start condition + if line.strip() == '----- Begin chunk -----': + collect_lines = True + target_chunk = False + chunk_lines = [] + # Collect lines between start and end conditions + if collect_lines: + chunk_lines.append(line) + if line.startswith('Cell parameters'): + match = re.search(r"(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg", line) + if match: + params = [float(match.group(i)) for i in range(1, 7)] + if is_within_range( params, target_cell_params, target_angles, tolerance ) == True: + target_chunk = True + # Check for the end condition + if line.strip() == '----- End chunk -----': + collect_lines = False # Stop collecting lines + if target_chunk == True: + chunks.append( chunk_lines ) + + return chunks + +def get_header( header, input_file ): + + if header == "geom": + start_keyword = "----- Begin geometry file -----" + end_keyword = "----- End geometry file -----" + if header == "cell": + start_keyword = "----- Begin unit cell -----" + end_keyword = "----- End unit cell -----" + + # setup + collect_lines = False + headers = [] + + # Open the input file for reading + with open(input_file, 'r') as f: + for line in f: + # Check for the start condition + if line.strip() == start_keyword: + collect_lines = True + headers_lines = [] + # Collect lines between start and end conditions + if collect_lines: + headers_lines.append(line) + # Check for the end condition + if line.strip() == end_keyword: + collect_lines = False # Stop collecting lines + headers.append(headers_lines) + + return headers[0] + +def write_to_file( geom, cell, sections, output_file ): + + # Write sections with matching cell parameters to the output file + with open(output_file, 'w') as out_file: + out_file.write('CrystFEL stream format 2.3\n') + out_file.write('Generated by CrystFEL 0.10.2\n') + out_file.writelines(geom) + out_file.writelines(cell) + for section in sections: + out_file.write('--- Begin crystal\n') + # out_file.writelines('--- Begin crystal\n' + section + "\n--- End crystal") + out_file.writelines(section) + out_file.write('--- End crystal\n') + out_file.write('----- End chunk -----\n') + +def main( input_file, output_file, target_cell_params, target_angles, tolerance): + + # get geom and cell file headers + print( "getting header info from .stream file" ) + geom = get_header( "geom", input_file ) + cell = get_header( "cell", input_file ) + print( "done" ) + + # get chunks with defined uc tolerances + print( "finding crystals with unit cells of around:" ) + print( "{0} nm".format( target_cell_params ) ) + print( "{0} deg".format( target_angles ) ) + chunks = extract_chunks( input_file, target_cell_params, target_angles, tolerance ) + print( "found {0} crystals".format( len(chunks) ) ) + + # write to file + print( "writing to new file" ) + write_to_file( geom, cell, chunks, output_file ) + print( "done" ) + +def list_of_floats(arg): + return list(map(float, arg.split(','))) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "-s", + "--stream", + help="input stream file", + required=True, + type=str, + ) + parser.add_argument( + "-o", + "--output", + help="output stream file with selected crystals", + type=str, + default="selected_crystals.stream" + ) + parser.add_argument( + "-c", + "--cell", + help="cell lengths to be found in A, e.g. 73.4,68.7,75.6 - must be a float/no spaces", + type=list_of_floats, + required=True + ) + parser.add_argument( + "-a", + "--angles", + help="anlges to be found, e.g., 90.0,105.6,90.0 - must be a float/no spaces", + type=list_of_floats, + required=True + ) + parser.add_argument( + "-t", + "--tolerance", + help="tolerance for angles and cell lengths", + type=float, + default=0.25 + ) + args = parser.parse_args() + # convert cell into nm + cell = [ round( x/10,4 ) for x in args.cell ] + # run main + main( args.stream, args.output, cell, args.angles, args.tolerance ) + +