script to selectively remove unwanted crystals from a stream file based on their unit cell lengths and angles
This commit is contained in:
215
reduction_tools/select_xtals.py
Normal file
215
reduction_tools/select_xtals.py
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# author J.Beale
|
||||||
|
|
||||||
|
"""
|
||||||
|
# aim
|
||||||
|
to select crystals from a stream file based on the uc distances and angles
|
||||||
|
based on a tolerance to them
|
||||||
|
|
||||||
|
# usage
|
||||||
|
python select_crystals.py -s <input stream file>
|
||||||
|
-o <ouput file name>
|
||||||
|
-c <list of cell lengths, comma separted, no spaces>
|
||||||
|
-a <list of angles, comma separated, no spaces>
|
||||||
|
-t <tolerance on included crystals>
|
||||||
|
|
||||||
|
# output
|
||||||
|
.stream file with extracted crystals based on tolerance of uc
|
||||||
|
"""
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"-s",
|
||||||
|
"--stream",
|
||||||
|
help="input stream file",
|
||||||
|
required=True,
|
||||||
|
type=str,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-o",
|
||||||
|
"--output",
|
||||||
|
help="output stream file with selected crystals",
|
||||||
|
type=str,
|
||||||
|
default="selected_crystals.stream"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-c",
|
||||||
|
"--cell",
|
||||||
|
help="cell lengths to be found in A, e.g. 73.4,68.7,75.6 - must be a float/no spaces",
|
||||||
|
type=list_of_floats,
|
||||||
|
required=True
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-a",
|
||||||
|
"--angles",
|
||||||
|
help="anlges to be found, e.g., 90.0,105.6,90.0 - must be a float/no spaces",
|
||||||
|
type=list_of_floats,
|
||||||
|
required=True
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-t",
|
||||||
|
"--tolerance",
|
||||||
|
help="tolerance for angles and cell lengths",
|
||||||
|
type=float,
|
||||||
|
default=0.25
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# modules
|
||||||
|
import re
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
# Function to check if cell parameters are within the specified range
|
||||||
|
def is_within_range( params, target_cell_params, target_angles, tolerance ):
|
||||||
|
for i in range(3):
|
||||||
|
if abs(params[i] - target_cell_params[i]) > tolerance:
|
||||||
|
return False
|
||||||
|
for i in range(3, 6):
|
||||||
|
if abs(params[i] - target_angles[i - 3]) > tolerance:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def extract_chunks( input_file, target_cell_params, target_angles, tolerance ):
|
||||||
|
|
||||||
|
# setup
|
||||||
|
chunks = []
|
||||||
|
collect_lines = False
|
||||||
|
|
||||||
|
# Open the input file for reading
|
||||||
|
with open(input_file, 'r') as f:
|
||||||
|
for line in f:
|
||||||
|
# Check for the start condition
|
||||||
|
if line.strip() == '----- Begin chunk -----':
|
||||||
|
collect_lines = True
|
||||||
|
target_chunk = False
|
||||||
|
chunk_lines = []
|
||||||
|
# Collect lines between start and end conditions
|
||||||
|
if collect_lines:
|
||||||
|
chunk_lines.append(line)
|
||||||
|
if line.startswith('Cell parameters'):
|
||||||
|
match = re.search(r"(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg", line)
|
||||||
|
if match:
|
||||||
|
params = [float(match.group(i)) for i in range(1, 7)]
|
||||||
|
if is_within_range( params, target_cell_params, target_angles, tolerance ) == True:
|
||||||
|
target_chunk = True
|
||||||
|
# Check for the end condition
|
||||||
|
if line.strip() == '----- End chunk -----':
|
||||||
|
collect_lines = False # Stop collecting lines
|
||||||
|
if target_chunk == True:
|
||||||
|
chunks.append( chunk_lines )
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
def get_header( header, input_file ):
|
||||||
|
|
||||||
|
if header == "geom":
|
||||||
|
start_keyword = "----- Begin geometry file -----"
|
||||||
|
end_keyword = "----- End geometry file -----"
|
||||||
|
if header == "cell":
|
||||||
|
start_keyword = "----- Begin unit cell -----"
|
||||||
|
end_keyword = "----- End unit cell -----"
|
||||||
|
|
||||||
|
# setup
|
||||||
|
collect_lines = False
|
||||||
|
headers = []
|
||||||
|
|
||||||
|
# Open the input file for reading
|
||||||
|
with open(input_file, 'r') as f:
|
||||||
|
for line in f:
|
||||||
|
# Check for the start condition
|
||||||
|
if line.strip() == start_keyword:
|
||||||
|
collect_lines = True
|
||||||
|
headers_lines = []
|
||||||
|
# Collect lines between start and end conditions
|
||||||
|
if collect_lines:
|
||||||
|
headers_lines.append(line)
|
||||||
|
# Check for the end condition
|
||||||
|
if line.strip() == end_keyword:
|
||||||
|
collect_lines = False # Stop collecting lines
|
||||||
|
headers.append(headers_lines)
|
||||||
|
|
||||||
|
return headers[0]
|
||||||
|
|
||||||
|
def write_to_file( geom, cell, sections, output_file ):
|
||||||
|
|
||||||
|
# Write sections with matching cell parameters to the output file
|
||||||
|
with open(output_file, 'w') as out_file:
|
||||||
|
out_file.write('CrystFEL stream format 2.3\n')
|
||||||
|
out_file.write('Generated by CrystFEL 0.10.2\n')
|
||||||
|
out_file.writelines(geom)
|
||||||
|
out_file.writelines(cell)
|
||||||
|
for section in sections:
|
||||||
|
out_file.write('--- Begin crystal\n')
|
||||||
|
# out_file.writelines('--- Begin crystal\n' + section + "\n--- End crystal")
|
||||||
|
out_file.writelines(section)
|
||||||
|
out_file.write('--- End crystal\n')
|
||||||
|
out_file.write('----- End chunk -----\n')
|
||||||
|
|
||||||
|
def main( input_file, output_file, target_cell_params, target_angles, tolerance):
|
||||||
|
|
||||||
|
# get geom and cell file headers
|
||||||
|
print( "getting header info from .stream file" )
|
||||||
|
geom = get_header( "geom", input_file )
|
||||||
|
cell = get_header( "cell", input_file )
|
||||||
|
print( "done" )
|
||||||
|
|
||||||
|
# get chunks with defined uc tolerances
|
||||||
|
print( "finding crystals with unit cells of around:" )
|
||||||
|
print( "{0} nm".format( target_cell_params ) )
|
||||||
|
print( "{0} deg".format( target_angles ) )
|
||||||
|
chunks = extract_chunks( input_file, target_cell_params, target_angles, tolerance )
|
||||||
|
print( "found {0} crystals".format( len(chunks) ) )
|
||||||
|
|
||||||
|
# write to file
|
||||||
|
print( "writing to new file" )
|
||||||
|
write_to_file( geom, cell, chunks, output_file )
|
||||||
|
print( "done" )
|
||||||
|
|
||||||
|
def list_of_floats(arg):
|
||||||
|
return list(map(float, arg.split(',')))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"-s",
|
||||||
|
"--stream",
|
||||||
|
help="input stream file",
|
||||||
|
required=True,
|
||||||
|
type=str,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-o",
|
||||||
|
"--output",
|
||||||
|
help="output stream file with selected crystals",
|
||||||
|
type=str,
|
||||||
|
default="selected_crystals.stream"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-c",
|
||||||
|
"--cell",
|
||||||
|
help="cell lengths to be found in A, e.g. 73.4,68.7,75.6 - must be a float/no spaces",
|
||||||
|
type=list_of_floats,
|
||||||
|
required=True
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-a",
|
||||||
|
"--angles",
|
||||||
|
help="anlges to be found, e.g., 90.0,105.6,90.0 - must be a float/no spaces",
|
||||||
|
type=list_of_floats,
|
||||||
|
required=True
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-t",
|
||||||
|
"--tolerance",
|
||||||
|
help="tolerance for angles and cell lengths",
|
||||||
|
type=float,
|
||||||
|
default=0.25
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
# convert cell into nm
|
||||||
|
cell = [ round( x/10,4 ) for x in args.cell ]
|
||||||
|
# run main
|
||||||
|
main( args.stream, args.output, cell, args.angles, args.tolerance )
|
||||||
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user