bug fix for multiple crystals in chunk
This commit is contained in:
@@ -4,78 +4,82 @@
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
# aim
|
# aim
|
||||||
to select crystals from a stream file based on the uc distances and angles
|
to select crystals from a stram file based on the uc distances and angles
|
||||||
based on a tolerance to them
|
based on a tolerance to them
|
||||||
|
|
||||||
# usage
|
# usage
|
||||||
python select_crystals.py -s <input stream file>
|
python
|
||||||
-o <ouput file name>
|
|
||||||
-c <list of cell lengths, comma separted, no spaces>
|
|
||||||
-a <list of angles, comma separated, no spaces>
|
|
||||||
-t <tolerance on included crystals>
|
|
||||||
|
|
||||||
# output
|
# output
|
||||||
.stream file with extracted crystals based on tolerance of uc
|
.stream file with extracted crystals based on tolerance of uc
|
||||||
"""
|
"""
|
||||||
if __name__ == "__main__":
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument(
|
|
||||||
"-s",
|
|
||||||
"--stream",
|
|
||||||
help="input stream file",
|
|
||||||
required=True,
|
|
||||||
type=str,
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-o",
|
|
||||||
"--output",
|
|
||||||
help="output stream file with selected crystals",
|
|
||||||
type=str,
|
|
||||||
default="selected_crystals.stream"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-c",
|
|
||||||
"--cell",
|
|
||||||
help="cell lengths to be found in A, e.g. 73.4,68.7,75.6 - must be a float/no spaces",
|
|
||||||
type=list_of_floats,
|
|
||||||
required=True
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-a",
|
|
||||||
"--angles",
|
|
||||||
help="anlges to be found, e.g., 90.0,105.6,90.0 - must be a float/no spaces",
|
|
||||||
type=list_of_floats,
|
|
||||||
required=True
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-t",
|
|
||||||
"--tolerance",
|
|
||||||
help="tolerance for angles and cell lengths",
|
|
||||||
type=float,
|
|
||||||
default=0.25
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# modules
|
# modules
|
||||||
import re
|
import re
|
||||||
import argparse
|
import argparse
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
def get_tolerence(input_file):
|
||||||
|
stream = open(input_file, 'r').read()
|
||||||
|
cell_lst, xtals = scrub_cells( stream )
|
||||||
|
# res_df
|
||||||
|
cols = [ "a", "b", "c", "alpha", "beta", "gamma" ]
|
||||||
|
df = pd.DataFrame( cell_lst, columns=cols )
|
||||||
|
|
||||||
|
# convert all to floats
|
||||||
|
df = df.astype(float)
|
||||||
|
mean_a, std_a = round( df.a.mean()*10, 2 ), round( df.a.std()*10, 2 )
|
||||||
|
mean_b, std_b = round( df.b.mean()*10, 2 ), round( df.b.std()*10, 2 )
|
||||||
|
mean_c, std_c = round( df.c.mean()*10, 2 ), round( df.c.std()*10, 2 )
|
||||||
|
mean_alpha, std_alpha = round( df.alpha.mean(), 2 ), round( df.alpha.std(), 2 )
|
||||||
|
mean_beta, std_beta = round(df.beta.mean(), 2 ), round( df.beta.std(), 2 )
|
||||||
|
mean_gamma, std_gamma = round( df.gamma.mean(), 2 ), round( df.gamma.std(), 2 )
|
||||||
|
|
||||||
|
return [(std_a/10)*2,(std_b/10)*2,(std_c/10)*2,std_alpha*3,std_beta*3,std_gamma*3]
|
||||||
|
|
||||||
# Function to check if cell parameters are within the specified range
|
# Function to check if cell parameters are within the specified range
|
||||||
def is_within_range( params, target_cell_params, target_angles, tolerance ):
|
def is_within_range( params, target_cell_params, target_angles, tolerance_list ):
|
||||||
for i in range(3):
|
|
||||||
if abs(params[i] - target_cell_params[i]) > tolerance:
|
returner = False
|
||||||
return False
|
within_range = []
|
||||||
for i in range(3, 6):
|
|
||||||
if abs(params[i] - target_angles[i - 3]) > tolerance:
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def extract_chunks( input_file, target_cell_params, target_angles, tolerance ):
|
for i in range(3):
|
||||||
|
if abs(params[i] - target_cell_params[i]) < tolerance_list[i]:
|
||||||
|
within_range.append(True)
|
||||||
|
else:
|
||||||
|
within_range.append(False)
|
||||||
|
|
||||||
|
for i in range(3, 6):
|
||||||
|
if abs(params[i] - target_angles[i - 3]) < tolerance_list[i]:
|
||||||
|
within_range.append(True)
|
||||||
|
else:
|
||||||
|
within_range.append(False)
|
||||||
|
|
||||||
|
if all(within_range):
|
||||||
|
returner = True
|
||||||
|
|
||||||
|
return returner
|
||||||
|
|
||||||
|
def scrub_cells( stream ):
|
||||||
|
|
||||||
|
# get uc values from stream file
|
||||||
|
# example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg
|
||||||
|
# scrub clen and return - else nan
|
||||||
|
try:
|
||||||
|
pattern = r"Cell\sparameters\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg"
|
||||||
|
cell_lst = re.findall( pattern, stream )
|
||||||
|
xtals = len( cell_lst )
|
||||||
|
if AttributeError:
|
||||||
|
return cell_lst, xtals
|
||||||
|
except AttributeError:
|
||||||
|
return np.nan
|
||||||
|
|
||||||
|
def extract_chunks( input_file, target_cell_params, target_angles, tolerance_list ):
|
||||||
|
|
||||||
# setup
|
# setup
|
||||||
chunks = []
|
chunks = []
|
||||||
collect_lines = False
|
collect_lines = False
|
||||||
|
check_crystal=False
|
||||||
# Open the input file for reading
|
# Open the input file for reading
|
||||||
with open(input_file, 'r') as f:
|
with open(input_file, 'r') as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
@@ -84,18 +88,43 @@ def extract_chunks( input_file, target_cell_params, target_angles, tolerance ):
|
|||||||
collect_lines = True
|
collect_lines = True
|
||||||
target_chunk = False
|
target_chunk = False
|
||||||
chunk_lines = []
|
chunk_lines = []
|
||||||
|
results = []
|
||||||
|
check_crystal=False
|
||||||
|
crystal=0
|
||||||
# Collect lines between start and end conditions
|
# Collect lines between start and end conditions
|
||||||
|
# Check for the end condition
|
||||||
if collect_lines:
|
if collect_lines:
|
||||||
chunk_lines.append(line)
|
chunk_lines.append(line)
|
||||||
|
if line.startswith('--- Begin crystal'):
|
||||||
|
check_crystal=True
|
||||||
|
crystal_remover=False
|
||||||
|
crystal_lines=[]
|
||||||
|
elif line.startswith('--- End crystal'):
|
||||||
|
check_crystal=False
|
||||||
|
if crystal_remover:
|
||||||
|
chunk_lines = [l for l in chunk_lines if l not in crystal_lines]
|
||||||
|
if check_crystal:
|
||||||
|
crystal_lines.append(line)
|
||||||
if line.startswith('Cell parameters'):
|
if line.startswith('Cell parameters'):
|
||||||
match = re.search(r"(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg", line)
|
cells = re.findall(r"(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg", line)
|
||||||
if match:
|
crystal+=1
|
||||||
params = [float(match.group(i)) for i in range(1, 7)]
|
if cells:
|
||||||
if is_within_range( params, target_cell_params, target_angles, tolerance ) == True:
|
for cell in cells:
|
||||||
target_chunk = True
|
cell = list( cell )
|
||||||
# Check for the end condition
|
param = [float(i) for i in cell]
|
||||||
|
result = is_within_range( param, target_cell_params, target_angles, tolerance_list )
|
||||||
|
#print(result)
|
||||||
|
results.append( result )
|
||||||
|
if all( results ):
|
||||||
|
#print(results)
|
||||||
|
target_chunk = True
|
||||||
|
else:
|
||||||
|
crystal_remover=True
|
||||||
|
#print('removing cell: ', param)
|
||||||
|
|
||||||
if line.strip() == '----- End chunk -----':
|
if line.strip() == '----- End chunk -----':
|
||||||
collect_lines = False # Stop collecting lines
|
collect_lines = False # Stop collecting lines
|
||||||
|
#for line in chunk_lines:
|
||||||
if target_chunk == True:
|
if target_chunk == True:
|
||||||
chunks.append( chunk_lines )
|
chunks.append( chunk_lines )
|
||||||
|
|
||||||
@@ -146,8 +175,8 @@ def write_to_file( geom, cell, sections, output_file ):
|
|||||||
out_file.write('--- End crystal\n')
|
out_file.write('--- End crystal\n')
|
||||||
out_file.write('----- End chunk -----\n')
|
out_file.write('----- End chunk -----\n')
|
||||||
|
|
||||||
def main( input_file, output_file, target_cell_params, target_angles, tolerance):
|
def main( input_file, output_file, target_cell_params, target_angles ):
|
||||||
|
tolerance_list = get_tolerence(input_file)
|
||||||
# get geom and cell file headers
|
# get geom and cell file headers
|
||||||
print( "getting header info from .stream file" )
|
print( "getting header info from .stream file" )
|
||||||
geom = get_header( "geom", input_file )
|
geom = get_header( "geom", input_file )
|
||||||
@@ -158,7 +187,7 @@ def main( input_file, output_file, target_cell_params, target_angles, tolerance)
|
|||||||
print( "finding crystals with unit cells of around:" )
|
print( "finding crystals with unit cells of around:" )
|
||||||
print( "{0} nm".format( target_cell_params ) )
|
print( "{0} nm".format( target_cell_params ) )
|
||||||
print( "{0} deg".format( target_angles ) )
|
print( "{0} deg".format( target_angles ) )
|
||||||
chunks = extract_chunks( input_file, target_cell_params, target_angles, tolerance )
|
chunks = extract_chunks( input_file, target_cell_params, target_angles, tolerance_list )
|
||||||
print( "found {0} crystals".format( len(chunks) ) )
|
print( "found {0} crystals".format( len(chunks) ) )
|
||||||
|
|
||||||
# write to file
|
# write to file
|
||||||
@@ -188,7 +217,7 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-c",
|
"-c",
|
||||||
"--cell",
|
"--cell",
|
||||||
help="cell lengths to be found in A, e.g. 73.4,68.7,75.6 - must be a float/no spaces",
|
help="cell lengths to be found in nm, e.g. 73.4,68.7,75.6 - must be a float/no spaces",
|
||||||
type=list_of_floats,
|
type=list_of_floats,
|
||||||
required=True
|
required=True
|
||||||
)
|
)
|
||||||
@@ -199,17 +228,10 @@ if __name__ == "__main__":
|
|||||||
type=list_of_floats,
|
type=list_of_floats,
|
||||||
required=True
|
required=True
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"-t",
|
|
||||||
"--tolerance",
|
|
||||||
help="tolerance for angles and cell lengths",
|
|
||||||
type=float,
|
|
||||||
default=0.25
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
# convert cell into nm
|
|
||||||
cell = [ round( x/10,4 ) for x in args.cell ]
|
cell = [ round( x/10,4 ) for x in args.cell ]
|
||||||
# run main
|
# run main
|
||||||
main( args.stream, args.output, cell, args.angles, args.tolerance )
|
main( args.stream, args.output, cell, args.angles )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user