removed - did not work well

This commit is contained in:
Beale John Henry
2025-05-08 00:22:24 +02:00
parent cb25bbcb44
commit 5f23127a2f

View File

@@ -1,237 +0,0 @@
#!/usr/bin/env python3
# author J.Beale
"""
# aim
to select crystals from a stram file based on the uc distances and angles
based on a tolerance to them
# usage
python
# output
.stream file with extracted crystals based on tolerance of uc
"""
# modules
import re
import argparse
import pandas as pd
def get_tolerence(input_file):
stream = open(input_file, 'r').read()
cell_lst, xtals = scrub_cells( stream )
# res_df
cols = [ "a", "b", "c", "alpha", "beta", "gamma" ]
df = pd.DataFrame( cell_lst, columns=cols )
# convert all to floats
df = df.astype(float)
mean_a, std_a = round( df.a.mean()*10, 2 ), round( df.a.std()*10, 2 )
mean_b, std_b = round( df.b.mean()*10, 2 ), round( df.b.std()*10, 2 )
mean_c, std_c = round( df.c.mean()*10, 2 ), round( df.c.std()*10, 2 )
mean_alpha, std_alpha = round( df.alpha.mean(), 2 ), round( df.alpha.std(), 2 )
mean_beta, std_beta = round(df.beta.mean(), 2 ), round( df.beta.std(), 2 )
mean_gamma, std_gamma = round( df.gamma.mean(), 2 ), round( df.gamma.std(), 2 )
return [(std_a/10)*2,(std_b/10)*2,(std_c/10)*2,std_alpha*3,std_beta*3,std_gamma*3]
# Function to check if cell parameters are within the specified range
def is_within_range( params, target_cell_params, target_angles, tolerance_list ):
returner = False
within_range = []
for i in range(3):
if abs(params[i] - target_cell_params[i]) < tolerance_list[i]:
within_range.append(True)
else:
within_range.append(False)
for i in range(3, 6):
if abs(params[i] - target_angles[i - 3]) < tolerance_list[i]:
within_range.append(True)
else:
within_range.append(False)
if all(within_range):
returner = True
return returner
def scrub_cells( stream ):
# get uc values from stream file
# example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg
# scrub clen and return - else nan
try:
pattern = r"Cell\sparameters\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg"
cell_lst = re.findall( pattern, stream )
xtals = len( cell_lst )
if AttributeError:
return cell_lst, xtals
except AttributeError:
return np.nan
def extract_chunks( input_file, target_cell_params, target_angles, tolerance_list ):
# setup
chunks = []
collect_lines = False
check_crystal=False
# Open the input file for reading
with open(input_file, 'r') as f:
for line in f:
# Check for the start condition
if line.strip() == '----- Begin chunk -----':
collect_lines = True
target_chunk = False
chunk_lines = []
results = []
check_crystal=False
crystal=0
# Collect lines between start and end conditions
# Check for the end condition
if collect_lines:
chunk_lines.append(line)
if line.startswith('--- Begin crystal'):
check_crystal=True
crystal_remover=False
crystal_lines=[]
elif line.startswith('--- End crystal'):
check_crystal=False
if crystal_remover:
chunk_lines = [l for l in chunk_lines if l not in crystal_lines]
if check_crystal:
crystal_lines.append(line)
if line.startswith('Cell parameters'):
cells = re.findall(r"(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg", line)
crystal+=1
if cells:
for cell in cells:
cell = list( cell )
param = [float(i) for i in cell]
result = is_within_range( param, target_cell_params, target_angles, tolerance_list )
#print(result)
results.append( result )
if all( results ):
#print(results)
target_chunk = True
else:
crystal_remover=True
#print('removing cell: ', param)
if line.strip() == '----- End chunk -----':
collect_lines = False # Stop collecting lines
#for line in chunk_lines:
if target_chunk == True:
chunks.append( chunk_lines )
return chunks
def get_header( header, input_file ):
if header == "geom":
start_keyword = "----- Begin geometry file -----"
end_keyword = "----- End geometry file -----"
if header == "cell":
start_keyword = "----- Begin unit cell -----"
end_keyword = "----- End unit cell -----"
# setup
collect_lines = False
headers = []
# Open the input file for reading
with open(input_file, 'r') as f:
for line in f:
# Check for the start condition
if line.strip() == start_keyword:
collect_lines = True
headers_lines = []
# Collect lines between start and end conditions
if collect_lines:
headers_lines.append(line)
# Check for the end condition
if line.strip() == end_keyword:
collect_lines = False # Stop collecting lines
headers.append(headers_lines)
return headers[0]
def write_to_file( geom, cell, sections, output_file ):
# Write sections with matching cell parameters to the output file
with open(output_file, 'w') as out_file:
out_file.write('CrystFEL stream format 2.3\n')
out_file.write('Generated by CrystFEL 0.10.2\n')
out_file.writelines(geom)
out_file.writelines(cell)
for section in sections:
out_file.write('--- Begin crystal\n')
# out_file.writelines('--- Begin crystal\n' + section + "\n--- End crystal")
out_file.writelines(section)
out_file.write('--- End crystal\n')
out_file.write('----- End chunk -----\n')
def main( input_file, output_file, target_cell_params, target_angles ):
tolerance_list = get_tolerence(input_file)
# get geom and cell file headers
print( "getting header info from .stream file" )
geom = get_header( "geom", input_file )
cell = get_header( "cell", input_file )
print( "done" )
# get chunks with defined uc tolerances
print( "finding crystals with unit cells of around:" )
print( "{0} nm".format( target_cell_params ) )
print( "{0} deg".format( target_angles ) )
chunks = extract_chunks( input_file, target_cell_params, target_angles, tolerance_list )
print( "found {0} crystals".format( len(chunks) ) )
# write to file
print( "writing to new file" )
write_to_file( geom, cell, chunks, output_file )
print( "done" )
def list_of_floats(arg):
return list(map(float, arg.split(',')))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-s",
"--stream",
help="input stream file",
required=True,
type=str,
)
parser.add_argument(
"-o",
"--output",
help="output stream file with selected crystals",
type=str,
default="selected_crystals.stream"
)
parser.add_argument(
"-c",
"--cell",
help="cell lengths to be found in nm, e.g. 73.4,68.7,75.6 - must be a float/no spaces",
type=list_of_floats,
required=True
)
parser.add_argument(
"-a",
"--angles",
help="anlges to be found, e.g., 90.0,105.6,90.0 - must be a float/no spaces",
type=list_of_floats,
required=True
)
args = parser.parse_args()
cell = [ round( x/10,4 ) for x in args.cell ]
# run main
main( args.stream, args.output, cell, args.angles )