diff --git a/reduction_tools/select_xtals.py b/reduction_tools/select_xtals.py
index 58a75ed..bf6c0a5 100644
--- a/reduction_tools/select_xtals.py
+++ b/reduction_tools/select_xtals.py
@@ -4,78 +4,82 @@
"""
# aim
-to select crystals from a stream file based on the uc distances and angles
+to select crystals from a stram file based on the uc distances and angles
based on a tolerance to them
# usage
-python select_crystals.py -s
- -o
- -c
- -a
- -t
+python
# output
.stream file with extracted crystals based on tolerance of uc
"""
-if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument(
- "-s",
- "--stream",
- help="input stream file",
- required=True,
- type=str,
- )
- parser.add_argument(
- "-o",
- "--output",
- help="output stream file with selected crystals",
- type=str,
- default="selected_crystals.stream"
- )
- parser.add_argument(
- "-c",
- "--cell",
- help="cell lengths to be found in A, e.g. 73.4,68.7,75.6 - must be a float/no spaces",
- type=list_of_floats,
- required=True
- )
- parser.add_argument(
- "-a",
- "--angles",
- help="anlges to be found, e.g., 90.0,105.6,90.0 - must be a float/no spaces",
- type=list_of_floats,
- required=True
- )
- parser.add_argument(
- "-t",
- "--tolerance",
- help="tolerance for angles and cell lengths",
- type=float,
- default=0.25
- )
-
# modules
import re
import argparse
+import pandas as pd
+
+def get_tolerence(input_file):
+ stream = open(input_file, 'r').read()
+ cell_lst, xtals = scrub_cells( stream )
+ # res_df
+ cols = [ "a", "b", "c", "alpha", "beta", "gamma" ]
+ df = pd.DataFrame( cell_lst, columns=cols )
+
+ # convert all to floats
+ df = df.astype(float)
+ mean_a, std_a = round( df.a.mean()*10, 2 ), round( df.a.std()*10, 2 )
+ mean_b, std_b = round( df.b.mean()*10, 2 ), round( df.b.std()*10, 2 )
+ mean_c, std_c = round( df.c.mean()*10, 2 ), round( df.c.std()*10, 2 )
+ mean_alpha, std_alpha = round( df.alpha.mean(), 2 ), round( df.alpha.std(), 2 )
+ mean_beta, std_beta = round(df.beta.mean(), 2 ), round( df.beta.std(), 2 )
+ mean_gamma, std_gamma = round( df.gamma.mean(), 2 ), round( df.gamma.std(), 2 )
+
+ return [(std_a/10)*2,(std_b/10)*2,(std_c/10)*2,std_alpha*3,std_beta*3,std_gamma*3]
# Function to check if cell parameters are within the specified range
-def is_within_range( params, target_cell_params, target_angles, tolerance ):
- for i in range(3):
- if abs(params[i] - target_cell_params[i]) > tolerance:
- return False
- for i in range(3, 6):
- if abs(params[i] - target_angles[i - 3]) > tolerance:
- return False
- return True
+def is_within_range( params, target_cell_params, target_angles, tolerance_list ):
+
+ returner = False
+ within_range = []
-def extract_chunks( input_file, target_cell_params, target_angles, tolerance ):
+ for i in range(3):
+ if abs(params[i] - target_cell_params[i]) < tolerance_list[i]:
+ within_range.append(True)
+ else:
+ within_range.append(False)
+
+ for i in range(3, 6):
+ if abs(params[i] - target_angles[i - 3]) < tolerance_list[i]:
+ within_range.append(True)
+ else:
+ within_range.append(False)
+
+ if all(within_range):
+ returner = True
+
+ return returner
+
+def scrub_cells( stream ):
+
+ # get uc values from stream file
+ # example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg
+ # scrub clen and return - else nan
+ try:
+ pattern = r"Cell\sparameters\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg"
+ cell_lst = re.findall( pattern, stream )
+ xtals = len( cell_lst )
+ if AttributeError:
+ return cell_lst, xtals
+ except AttributeError:
+ return np.nan
+
+def extract_chunks( input_file, target_cell_params, target_angles, tolerance_list ):
# setup
chunks = []
collect_lines = False
-
+ check_crystal=False
# Open the input file for reading
with open(input_file, 'r') as f:
for line in f:
@@ -84,18 +88,43 @@ def extract_chunks( input_file, target_cell_params, target_angles, tolerance ):
collect_lines = True
target_chunk = False
chunk_lines = []
+ results = []
+ check_crystal=False
+ crystal=0
# Collect lines between start and end conditions
+ # Check for the end condition
if collect_lines:
chunk_lines.append(line)
+ if line.startswith('--- Begin crystal'):
+ check_crystal=True
+ crystal_remover=False
+ crystal_lines=[]
+ elif line.startswith('--- End crystal'):
+ check_crystal=False
+ if crystal_remover:
+ chunk_lines = [l for l in chunk_lines if l not in crystal_lines]
+ if check_crystal:
+ crystal_lines.append(line)
if line.startswith('Cell parameters'):
- match = re.search(r"(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg", line)
- if match:
- params = [float(match.group(i)) for i in range(1, 7)]
- if is_within_range( params, target_cell_params, target_angles, tolerance ) == True:
- target_chunk = True
- # Check for the end condition
+ cells = re.findall(r"(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg", line)
+ crystal+=1
+ if cells:
+ for cell in cells:
+ cell = list( cell )
+ param = [float(i) for i in cell]
+ result = is_within_range( param, target_cell_params, target_angles, tolerance_list )
+ #print(result)
+ results.append( result )
+ if all( results ):
+ #print(results)
+ target_chunk = True
+ else:
+ crystal_remover=True
+ #print('removing cell: ', param)
+
if line.strip() == '----- End chunk -----':
collect_lines = False # Stop collecting lines
+ #for line in chunk_lines:
if target_chunk == True:
chunks.append( chunk_lines )
@@ -146,8 +175,8 @@ def write_to_file( geom, cell, sections, output_file ):
out_file.write('--- End crystal\n')
out_file.write('----- End chunk -----\n')
-def main( input_file, output_file, target_cell_params, target_angles, tolerance):
-
+def main( input_file, output_file, target_cell_params, target_angles ):
+ tolerance_list = get_tolerence(input_file)
# get geom and cell file headers
print( "getting header info from .stream file" )
geom = get_header( "geom", input_file )
@@ -158,7 +187,7 @@ def main( input_file, output_file, target_cell_params, target_angles, tolerance)
print( "finding crystals with unit cells of around:" )
print( "{0} nm".format( target_cell_params ) )
print( "{0} deg".format( target_angles ) )
- chunks = extract_chunks( input_file, target_cell_params, target_angles, tolerance )
+ chunks = extract_chunks( input_file, target_cell_params, target_angles, tolerance_list )
print( "found {0} crystals".format( len(chunks) ) )
# write to file
@@ -188,7 +217,7 @@ if __name__ == "__main__":
parser.add_argument(
"-c",
"--cell",
- help="cell lengths to be found in A, e.g. 73.4,68.7,75.6 - must be a float/no spaces",
+ help="cell lengths to be found in nm, e.g. 73.4,68.7,75.6 - must be a float/no spaces",
type=list_of_floats,
required=True
)
@@ -199,17 +228,10 @@ if __name__ == "__main__":
type=list_of_floats,
required=True
)
- parser.add_argument(
- "-t",
- "--tolerance",
- help="tolerance for angles and cell lengths",
- type=float,
- default=0.25
- )
args = parser.parse_args()
- # convert cell into nm
cell = [ round( x/10,4 ) for x in args.cell ]
# run main
- main( args.stream, args.output, cell, args.angles, args.tolerance )
+ main( args.stream, args.output, cell, args.angles )
+