#!/usr/bin/env python3 # author J.Beale """ # aim to select crystals from a stream file based on the uc distances and angles based on a tolerance to them # usage python select_crystals.py -s -o -c -a -t # output .stream file with extracted crystals based on tolerance of uc """ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "-s", "--stream", help="input stream file", required=True, type=str, ) parser.add_argument( "-o", "--output", help="output stream file with selected crystals", type=str, default="selected_crystals.stream" ) parser.add_argument( "-c", "--cell", help="cell lengths to be found in A, e.g. 73.4,68.7,75.6 - must be a float/no spaces", type=list_of_floats, required=True ) parser.add_argument( "-a", "--angles", help="anlges to be found, e.g., 90.0,105.6,90.0 - must be a float/no spaces", type=list_of_floats, required=True ) parser.add_argument( "-t", "--tolerance", help="tolerance for angles and cell lengths", type=float, default=0.25 ) # modules import re import argparse # Function to check if cell parameters are within the specified range def is_within_range( params, target_cell_params, target_angles, tolerance ): for i in range(3): if abs(params[i] - target_cell_params[i]) > tolerance: return False for i in range(3, 6): if abs(params[i] - target_angles[i - 3]) > tolerance: return False return True def extract_chunks( input_file, target_cell_params, target_angles, tolerance ): # setup chunks = [] collect_lines = False # Open the input file for reading with open(input_file, 'r') as f: for line in f: # Check for the start condition if line.strip() == '----- Begin chunk -----': collect_lines = True target_chunk = False chunk_lines = [] # Collect lines between start and end conditions if collect_lines: chunk_lines.append(line) if line.startswith('Cell parameters'): match = re.search(r"(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg", line) if match: params = [float(match.group(i)) for i in range(1, 7)] if is_within_range( params, target_cell_params, target_angles, tolerance ) == True: target_chunk = True # Check for the end condition if line.strip() == '----- End chunk -----': collect_lines = False # Stop collecting lines if target_chunk == True: chunks.append( chunk_lines ) return chunks def get_header( header, input_file ): if header == "geom": start_keyword = "----- Begin geometry file -----" end_keyword = "----- End geometry file -----" if header == "cell": start_keyword = "----- Begin unit cell -----" end_keyword = "----- End unit cell -----" # setup collect_lines = False headers = [] # Open the input file for reading with open(input_file, 'r') as f: for line in f: # Check for the start condition if line.strip() == start_keyword: collect_lines = True headers_lines = [] # Collect lines between start and end conditions if collect_lines: headers_lines.append(line) # Check for the end condition if line.strip() == end_keyword: collect_lines = False # Stop collecting lines headers.append(headers_lines) return headers[0] def write_to_file( geom, cell, sections, output_file ): # Write sections with matching cell parameters to the output file with open(output_file, 'w') as out_file: out_file.write('CrystFEL stream format 2.3\n') out_file.write('Generated by CrystFEL 0.10.2\n') out_file.writelines(geom) out_file.writelines(cell) for section in sections: out_file.write('--- Begin crystal\n') # out_file.writelines('--- Begin crystal\n' + section + "\n--- End crystal") out_file.writelines(section) out_file.write('--- End crystal\n') out_file.write('----- End chunk -----\n') def main( input_file, output_file, target_cell_params, target_angles, tolerance): # get geom and cell file headers print( "getting header info from .stream file" ) geom = get_header( "geom", input_file ) cell = get_header( "cell", input_file ) print( "done" ) # get chunks with defined uc tolerances print( "finding crystals with unit cells of around:" ) print( "{0} nm".format( target_cell_params ) ) print( "{0} deg".format( target_angles ) ) chunks = extract_chunks( input_file, target_cell_params, target_angles, tolerance ) print( "found {0} crystals".format( len(chunks) ) ) # write to file print( "writing to new file" ) write_to_file( geom, cell, chunks, output_file ) print( "done" ) def list_of_floats(arg): return list(map(float, arg.split(','))) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "-s", "--stream", help="input stream file", required=True, type=str, ) parser.add_argument( "-o", "--output", help="output stream file with selected crystals", type=str, default="selected_crystals.stream" ) parser.add_argument( "-c", "--cell", help="cell lengths to be found in A, e.g. 73.4,68.7,75.6 - must be a float/no spaces", type=list_of_floats, required=True ) parser.add_argument( "-a", "--angles", help="anlges to be found, e.g., 90.0,105.6,90.0 - must be a float/no spaces", type=list_of_floats, required=True ) parser.add_argument( "-t", "--tolerance", help="tolerance for angles and cell lengths", type=float, default=0.25 ) args = parser.parse_args() # convert cell into nm cell = [ round( x/10,4 ) for x in args.cell ] # run main main( args.stream, args.output, cell, args.angles, args.tolerance )