From 9fd4432d75dfd6e0d76d80bc6c65af3e51fdf19a Mon Sep 17 00:00:00 2001 From: Beale John Henry Date: Sun, 10 Dec 2023 15:50:34 +0100 Subject: [PATCH] bug fix for multiple crystals in chunk --- reduction_tools/select_xtals.py | 168 ++++++++++++++++++-------------- 1 file changed, 95 insertions(+), 73 deletions(-) diff --git a/reduction_tools/select_xtals.py b/reduction_tools/select_xtals.py index 58a75ed..bf6c0a5 100644 --- a/reduction_tools/select_xtals.py +++ b/reduction_tools/select_xtals.py @@ -4,78 +4,82 @@ """ # aim -to select crystals from a stream file based on the uc distances and angles +to select crystals from a stram file based on the uc distances and angles based on a tolerance to them # usage -python select_crystals.py -s - -o - -c - -a - -t +python # output .stream file with extracted crystals based on tolerance of uc """ -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "-s", - "--stream", - help="input stream file", - required=True, - type=str, - ) - parser.add_argument( - "-o", - "--output", - help="output stream file with selected crystals", - type=str, - default="selected_crystals.stream" - ) - parser.add_argument( - "-c", - "--cell", - help="cell lengths to be found in A, e.g. 73.4,68.7,75.6 - must be a float/no spaces", - type=list_of_floats, - required=True - ) - parser.add_argument( - "-a", - "--angles", - help="anlges to be found, e.g., 90.0,105.6,90.0 - must be a float/no spaces", - type=list_of_floats, - required=True - ) - parser.add_argument( - "-t", - "--tolerance", - help="tolerance for angles and cell lengths", - type=float, - default=0.25 - ) - # modules import re import argparse +import pandas as pd + +def get_tolerence(input_file): + stream = open(input_file, 'r').read() + cell_lst, xtals = scrub_cells( stream ) + # res_df + cols = [ "a", "b", "c", "alpha", "beta", "gamma" ] + df = pd.DataFrame( cell_lst, columns=cols ) + + # convert all to floats + df = df.astype(float) + mean_a, std_a = round( df.a.mean()*10, 2 ), round( df.a.std()*10, 2 ) + mean_b, std_b = round( df.b.mean()*10, 2 ), round( df.b.std()*10, 2 ) + mean_c, std_c = round( df.c.mean()*10, 2 ), round( df.c.std()*10, 2 ) + mean_alpha, std_alpha = round( df.alpha.mean(), 2 ), round( df.alpha.std(), 2 ) + mean_beta, std_beta = round(df.beta.mean(), 2 ), round( df.beta.std(), 2 ) + mean_gamma, std_gamma = round( df.gamma.mean(), 2 ), round( df.gamma.std(), 2 ) + + return [(std_a/10)*2,(std_b/10)*2,(std_c/10)*2,std_alpha*3,std_beta*3,std_gamma*3] # Function to check if cell parameters are within the specified range -def is_within_range( params, target_cell_params, target_angles, tolerance ): - for i in range(3): - if abs(params[i] - target_cell_params[i]) > tolerance: - return False - for i in range(3, 6): - if abs(params[i] - target_angles[i - 3]) > tolerance: - return False - return True +def is_within_range( params, target_cell_params, target_angles, tolerance_list ): + + returner = False + within_range = [] -def extract_chunks( input_file, target_cell_params, target_angles, tolerance ): + for i in range(3): + if abs(params[i] - target_cell_params[i]) < tolerance_list[i]: + within_range.append(True) + else: + within_range.append(False) + + for i in range(3, 6): + if abs(params[i] - target_angles[i - 3]) < tolerance_list[i]: + within_range.append(True) + else: + within_range.append(False) + + if all(within_range): + returner = True + + return returner + +def scrub_cells( stream ): + + # get uc values from stream file + # example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg + # scrub clen and return - else nan + try: + pattern = r"Cell\sparameters\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg" + cell_lst = re.findall( pattern, stream ) + xtals = len( cell_lst ) + if AttributeError: + return cell_lst, xtals + except AttributeError: + return np.nan + +def extract_chunks( input_file, target_cell_params, target_angles, tolerance_list ): # setup chunks = [] collect_lines = False - + check_crystal=False # Open the input file for reading with open(input_file, 'r') as f: for line in f: @@ -84,18 +88,43 @@ def extract_chunks( input_file, target_cell_params, target_angles, tolerance ): collect_lines = True target_chunk = False chunk_lines = [] + results = [] + check_crystal=False + crystal=0 # Collect lines between start and end conditions + # Check for the end condition if collect_lines: chunk_lines.append(line) + if line.startswith('--- Begin crystal'): + check_crystal=True + crystal_remover=False + crystal_lines=[] + elif line.startswith('--- End crystal'): + check_crystal=False + if crystal_remover: + chunk_lines = [l for l in chunk_lines if l not in crystal_lines] + if check_crystal: + crystal_lines.append(line) if line.startswith('Cell parameters'): - match = re.search(r"(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg", line) - if match: - params = [float(match.group(i)) for i in range(1, 7)] - if is_within_range( params, target_cell_params, target_angles, tolerance ) == True: - target_chunk = True - # Check for the end condition + cells = re.findall(r"(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg", line) + crystal+=1 + if cells: + for cell in cells: + cell = list( cell ) + param = [float(i) for i in cell] + result = is_within_range( param, target_cell_params, target_angles, tolerance_list ) + #print(result) + results.append( result ) + if all( results ): + #print(results) + target_chunk = True + else: + crystal_remover=True + #print('removing cell: ', param) + if line.strip() == '----- End chunk -----': collect_lines = False # Stop collecting lines + #for line in chunk_lines: if target_chunk == True: chunks.append( chunk_lines ) @@ -146,8 +175,8 @@ def write_to_file( geom, cell, sections, output_file ): out_file.write('--- End crystal\n') out_file.write('----- End chunk -----\n') -def main( input_file, output_file, target_cell_params, target_angles, tolerance): - +def main( input_file, output_file, target_cell_params, target_angles ): + tolerance_list = get_tolerence(input_file) # get geom and cell file headers print( "getting header info from .stream file" ) geom = get_header( "geom", input_file ) @@ -158,7 +187,7 @@ def main( input_file, output_file, target_cell_params, target_angles, tolerance) print( "finding crystals with unit cells of around:" ) print( "{0} nm".format( target_cell_params ) ) print( "{0} deg".format( target_angles ) ) - chunks = extract_chunks( input_file, target_cell_params, target_angles, tolerance ) + chunks = extract_chunks( input_file, target_cell_params, target_angles, tolerance_list ) print( "found {0} crystals".format( len(chunks) ) ) # write to file @@ -188,7 +217,7 @@ if __name__ == "__main__": parser.add_argument( "-c", "--cell", - help="cell lengths to be found in A, e.g. 73.4,68.7,75.6 - must be a float/no spaces", + help="cell lengths to be found in nm, e.g. 73.4,68.7,75.6 - must be a float/no spaces", type=list_of_floats, required=True ) @@ -199,17 +228,10 @@ if __name__ == "__main__": type=list_of_floats, required=True ) - parser.add_argument( - "-t", - "--tolerance", - help="tolerance for angles and cell lengths", - type=float, - default=0.25 - ) args = parser.parse_args() - # convert cell into nm cell = [ round( x/10,4 ) for x in args.cell ] # run main - main( args.stream, args.output, cell, args.angles, args.tolerance ) + main( args.stream, args.output, cell, args.angles ) +