Files
crystfel_tools/clen_tools/distance-scan-analysis.py
2024-01-31 10:28:12 +01:00

264 lines
7.8 KiB
Python

#!/usr/bin/env python3
# author J.Beale, T.Mason
"""
# aim
given a regular array of crystfel folders with different detector distances
- naming covention = #.###/#.###.stream
script will generate a graph analysing the detector distance as a function of the unit-cell constants
# usage
python update-geom-from-lab6.py <path-to-scan-folder>
# output
creates plots of the unit cell axis against clen
"""
# modules
import pandas as pd
import regex as re
import os
import numpy as np
import matplotlib.pyplot as plt
import sys
from scipy.optimize import curve_fit
from scipy.signal import peak_widths, find_peaks
def scrub_clen( stream_pwd ):
# get clen from stream name
# example - /sf/cristallina/data/p20590/work/process/jhb/detector_refinement/coarse_scan/0.115/0.115.stream
# scrub clen and return - else nan
try:
pattern = r"0\.\d+/(0\.\d+)\.stream"
re_search = re.search( pattern, stream_pwd )
clen = re_search.group( 1 )
if AttributeError:
return float( clen )
except AttributeError:
return np.nan
def find_streams( top_dir ):
# create df for streams
stream_df = pd.DataFrame()
# search for all files that end with .stream
for path, dirs, files in os.walk( top_dir ):
for name in files:
if name.endswith( ".stream" ):
# get stream pwd
stream_pwd = os.path.join( path, name )
# scrub clen from stream
clen = scrub_clen( stream_pwd )
# put clen and stream pwd into df
data = [ { "stream_pwd" : stream_pwd,
"clen" : clen
} ]
stream_df_1 = pd.DataFrame( data )
stream_df = pd.concat( ( stream_df, stream_df_1 ) )
# sort df based on clen
stream_df = stream_df.sort_values( by="clen" )
# reset df index
stream_df = stream_df.reset_index( drop=True )
# return df of streams and clens
return stream_df
def scrub_us( stream ):
# get uc values from stream file
# example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg
# scrub clen and return - else nan
try:
pattern = r"Cell\sparameters\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg"
cells = re.findall( pattern, stream )
if AttributeError:
return cells
except AttributeError:
return np.nan
def find_clen_values( stats_df ):
def find_min_clen(col_name):
min_val = stats_df[col_name].min()
min_row = stats_df[stats_df[col_name] == min_val]
min_clen = min_row['clen'].values[0]
return min_val, min_clen
def gauss(x, *p):
A, mu, sigma = p
return A * np.exp(-(x-mu)**2/(2.*sigma**2))
p0 = [ 30, 0.111, 0.01 ]
parameters, covariance = curve_fit( gauss, stats_df.clen, stats_df.indexed, p0=p0 )
# Get the fitted curve
stats_df[ "gaus" ] = gauss( stats_df.clen, *parameters)
# find peak centre
peaks = find_peaks( stats_df.gaus.values )
# find full peak width
fwhm = peak_widths( stats_df.gaus.values, peaks[0], rel_height=0.5 )
fwhm_str = int( round( fwhm[2][0], 0 ) )
fwhm_end = int( round( fwhm[3][0], 0 ) )
# translate width into motor values
indexed_start = stats_df.iloc[ fwhm_str, 0 ]
indexed_end = stats_df.iloc[ fwhm_end, 0 ]
mid_gauss = stats_df.clen.iloc[ peaks[0] ].values[0]
# cut df to only include indexed patterns
stats_df = stats_df[ ( stats_df.clen < indexed_end ) & ( stats_df.clen > indexed_start ) ]
# calculate minimum values
min_alpha_val, min_alpha_clen = find_min_clen('std_alpha')
min_beta_val, min_beta_clen = find_min_clen('std_beta')
min_gamma_val, min_gamma_clen = find_min_clen('std_gamma')
min_c_val, min_c_clen = find_min_clen('std_c')
# find possible clens
suggested_clen = (min_alpha_clen + min_beta_clen + min_gamma_clen )/3
suggested_clen = round(suggested_clen, 4)
print( "middle of indexing gaussion fit of scan = {0}".format( mid_gauss ) )
print( "mean minimum of alpha, beta, gamma of scan = {0}".format( suggested_clen ) )
def plot_indexed_std( stats_df, ax1, ax2 ):
# indexed images plot
color = "tab:red"
ax1.set_xlabel("clen")
ax1.set_ylabel("indexed", color=color)
ax1.plot(stats_df.clen, stats_df.indexed, color=color)
ax1.tick_params(axis="y", labelcolor=color)
# label color
color = "tab:blue"
ax2.set_ylabel("a,b,c st.deviation", color=color)
ax2.tick_params(axis='y', labelcolor=color)
# std_a plot
color = "turquoise"
ax2.plot(stats_df.clen, stats_df.std_a, color=color, label="a" )
# std_b plot
color = "deepskyblue"
ax2.plot(stats_df.clen, stats_df.std_b, color=color, label="b" )
# std_c plot
color = "royalblue"
ax2.plot(stats_df.clen, stats_df.std_c, color=color, label="c" )
def plot_indexed_std_alpha_beta_gamma( stats_df, ax1, ax2 ):
# indexed images plot
color = "tab:red"
ax1.set_xlabel("clen")
ax1.set_ylabel("indexed", color=color)
ax1.plot(stats_df.clen, stats_df.indexed, color=color)
ax1.tick_params(axis="y", labelcolor=color)
# label color
color = "tab:green"
ax2.set_ylabel("alpha, beta, gamma st.deviation", color=color)
ax2.tick_params(axis='y', labelcolor=color)
# std_alpha plot
color = "yellow"
ax2.plot(stats_df.clen, stats_df.std_alpha, color=color, label="alpha" )
# std_beta plot
color = "green"
ax2.plot(stats_df.clen, stats_df.std_beta, color=color, label="beta" )
# std_gamma plot
color = "darkolivegreen"
ax2.plot(stats_df.clen, stats_df.std_gamma, color=color, label="gamma" )
def main( top_dir ):
# find stream files from process directory
print( "finding stream files" )
stream_df = find_streams( top_dir )
print( "done" )
# making results df for unit cell and index no.
stats_df = pd.DataFrame()
# loop through stream files and collect unit_cell information
print( "looping through stream files to collect unit cell, indexed information" )
for index, row in stream_df.iterrows():
stream_pwd, clen = row[ "stream_pwd" ], row[ "clen" ]
# open stream file
stream = open( stream_pwd, "r" ).read()
# scrub unit cell information
cells = scrub_us( stream )
# put cells in df
cols = [ "a", "b", "c", "alpha", "beta", "gamma" ]
cells_df = pd.DataFrame( cells, columns=cols )
cells_df = cells_df.astype( float )
# calc stats
indexed = len( cells_df )
std_a = cells_df.a.std()
std_b = cells_df.b.std()
std_c = cells_df.c.std()
std_alpha = cells_df.alpha.std()
std_beta = cells_df.beta.std()
std_gamma = cells_df.gamma.std()
# put stats in results df
stats = [ { "clen" : clen,
"indexed" : indexed,
"std_a" : std_a,
"std_b" : std_b,
"std_c" : std_c,
"std_alpha" : std_alpha,
"std_beta" : std_beta,
"std_gamma" : std_gamma,
} ]
stats_df_1 = pd.DataFrame( stats )
stats_df = pd.concat( ( stats_df, stats_df_1 ) )
print( "done" )
# reset index
stats_df = stats_df.reset_index( drop=True )
#print clen for minimum alpha, beta, and gamma values
find_clen_values(stats_df)
# plot results
fig, (ax1, ax3) = plt.subplots(1, 2)
ax2 = ax1.twinx()
ax4 = ax3.twinx()
plot_indexed_std(stats_df, ax1, ax2)
plot_indexed_std_alpha_beta_gamma(stats_df, ax3, ax4)
fig.legend(loc="upper center")
fig.tight_layout()
plt.show()
if __name__ == "__main__":
stream_pwd = sys.argv[1]
main( stream_pwd )