diff --git a/reduction_tools/select_xtals.py b/reduction_tools/select_xtals.py deleted file mode 100644 index bf6c0a5..0000000 --- a/reduction_tools/select_xtals.py +++ /dev/null @@ -1,237 +0,0 @@ -#!/usr/bin/env python3 - -# author J.Beale - -""" -# aim -to select crystals from a stram file based on the uc distances and angles -based on a tolerance to them - -# usage -python - -# output -.stream file with extracted crystals based on tolerance of uc -""" - -# modules -import re -import argparse -import pandas as pd - -def get_tolerence(input_file): - stream = open(input_file, 'r').read() - cell_lst, xtals = scrub_cells( stream ) - # res_df - cols = [ "a", "b", "c", "alpha", "beta", "gamma" ] - df = pd.DataFrame( cell_lst, columns=cols ) - - # convert all to floats - df = df.astype(float) - mean_a, std_a = round( df.a.mean()*10, 2 ), round( df.a.std()*10, 2 ) - mean_b, std_b = round( df.b.mean()*10, 2 ), round( df.b.std()*10, 2 ) - mean_c, std_c = round( df.c.mean()*10, 2 ), round( df.c.std()*10, 2 ) - mean_alpha, std_alpha = round( df.alpha.mean(), 2 ), round( df.alpha.std(), 2 ) - mean_beta, std_beta = round(df.beta.mean(), 2 ), round( df.beta.std(), 2 ) - mean_gamma, std_gamma = round( df.gamma.mean(), 2 ), round( df.gamma.std(), 2 ) - - return [(std_a/10)*2,(std_b/10)*2,(std_c/10)*2,std_alpha*3,std_beta*3,std_gamma*3] - -# Function to check if cell parameters are within the specified range -def is_within_range( params, target_cell_params, target_angles, tolerance_list ): - - returner = False - within_range = [] - - for i in range(3): - if abs(params[i] - target_cell_params[i]) < tolerance_list[i]: - within_range.append(True) - else: - within_range.append(False) - - for i in range(3, 6): - if abs(params[i] - target_angles[i - 3]) < tolerance_list[i]: - within_range.append(True) - else: - within_range.append(False) - - if all(within_range): - returner = True - - return returner - -def scrub_cells( stream ): - - # get uc values from stream file - # example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg - # scrub clen and return - else nan - try: - pattern = r"Cell\sparameters\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg" - cell_lst = re.findall( pattern, stream ) - xtals = len( cell_lst ) - if AttributeError: - return cell_lst, xtals - except AttributeError: - return np.nan - -def extract_chunks( input_file, target_cell_params, target_angles, tolerance_list ): - - # setup - chunks = [] - collect_lines = False - check_crystal=False - # Open the input file for reading - with open(input_file, 'r') as f: - for line in f: - # Check for the start condition - if line.strip() == '----- Begin chunk -----': - collect_lines = True - target_chunk = False - chunk_lines = [] - results = [] - check_crystal=False - crystal=0 - # Collect lines between start and end conditions - # Check for the end condition - if collect_lines: - chunk_lines.append(line) - if line.startswith('--- Begin crystal'): - check_crystal=True - crystal_remover=False - crystal_lines=[] - elif line.startswith('--- End crystal'): - check_crystal=False - if crystal_remover: - chunk_lines = [l for l in chunk_lines if l not in crystal_lines] - if check_crystal: - crystal_lines.append(line) - if line.startswith('Cell parameters'): - cells = re.findall(r"(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg", line) - crystal+=1 - if cells: - for cell in cells: - cell = list( cell ) - param = [float(i) for i in cell] - result = is_within_range( param, target_cell_params, target_angles, tolerance_list ) - #print(result) - results.append( result ) - if all( results ): - #print(results) - target_chunk = True - else: - crystal_remover=True - #print('removing cell: ', param) - - if line.strip() == '----- End chunk -----': - collect_lines = False # Stop collecting lines - #for line in chunk_lines: - if target_chunk == True: - chunks.append( chunk_lines ) - - return chunks - -def get_header( header, input_file ): - - if header == "geom": - start_keyword = "----- Begin geometry file -----" - end_keyword = "----- End geometry file -----" - if header == "cell": - start_keyword = "----- Begin unit cell -----" - end_keyword = "----- End unit cell -----" - - # setup - collect_lines = False - headers = [] - - # Open the input file for reading - with open(input_file, 'r') as f: - for line in f: - # Check for the start condition - if line.strip() == start_keyword: - collect_lines = True - headers_lines = [] - # Collect lines between start and end conditions - if collect_lines: - headers_lines.append(line) - # Check for the end condition - if line.strip() == end_keyword: - collect_lines = False # Stop collecting lines - headers.append(headers_lines) - - return headers[0] - -def write_to_file( geom, cell, sections, output_file ): - - # Write sections with matching cell parameters to the output file - with open(output_file, 'w') as out_file: - out_file.write('CrystFEL stream format 2.3\n') - out_file.write('Generated by CrystFEL 0.10.2\n') - out_file.writelines(geom) - out_file.writelines(cell) - for section in sections: - out_file.write('--- Begin crystal\n') - # out_file.writelines('--- Begin crystal\n' + section + "\n--- End crystal") - out_file.writelines(section) - out_file.write('--- End crystal\n') - out_file.write('----- End chunk -----\n') - -def main( input_file, output_file, target_cell_params, target_angles ): - tolerance_list = get_tolerence(input_file) - # get geom and cell file headers - print( "getting header info from .stream file" ) - geom = get_header( "geom", input_file ) - cell = get_header( "cell", input_file ) - print( "done" ) - - # get chunks with defined uc tolerances - print( "finding crystals with unit cells of around:" ) - print( "{0} nm".format( target_cell_params ) ) - print( "{0} deg".format( target_angles ) ) - chunks = extract_chunks( input_file, target_cell_params, target_angles, tolerance_list ) - print( "found {0} crystals".format( len(chunks) ) ) - - # write to file - print( "writing to new file" ) - write_to_file( geom, cell, chunks, output_file ) - print( "done" ) - -def list_of_floats(arg): - return list(map(float, arg.split(','))) - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "-s", - "--stream", - help="input stream file", - required=True, - type=str, - ) - parser.add_argument( - "-o", - "--output", - help="output stream file with selected crystals", - type=str, - default="selected_crystals.stream" - ) - parser.add_argument( - "-c", - "--cell", - help="cell lengths to be found in nm, e.g. 73.4,68.7,75.6 - must be a float/no spaces", - type=list_of_floats, - required=True - ) - parser.add_argument( - "-a", - "--angles", - help="anlges to be found, e.g., 90.0,105.6,90.0 - must be a float/no spaces", - type=list_of_floats, - required=True - ) - args = parser.parse_args() - cell = [ round( x/10,4 ) for x in args.cell ] - # run main - main( args.stream, args.output, cell, args.angles ) - - -