#!/usr/bin/env python3 # author J.Beale, T.Mason """ # aim given a regular array of crystfel folders with different detector distances - naming covention = #.###/#.###.stream script will generate a graph analysing the detector distance as a function of the unit-cell constants # usage python update-geom-from-lab6.py # output creates plots of the unit cell axis against clen """ # modules import pandas as pd import regex as re import os import numpy as np import matplotlib.pyplot as plt import sys from scipy.optimize import curve_fit from scipy.signal import peak_widths, find_peaks def scrub_clen( stream_pwd ): # get clen from stream name # example - /sf/cristallina/data/p20590/work/process/jhb/detector_refinement/coarse_scan/0.115/0.115.stream # scrub clen and return - else nan try: pattern = r"0\.\d+/(0\.\d+)\.stream" re_search = re.search( pattern, stream_pwd ) clen = re_search.group( 1 ) if AttributeError: return float( clen ) except AttributeError: return np.nan def find_streams( top_dir ): # create df for streams stream_df = pd.DataFrame() # search for all files that end with .stream for path, dirs, files in os.walk( top_dir ): for name in files: if name.endswith( ".stream" ): # get stream pwd stream_pwd = os.path.join( path, name ) # scrub clen from stream clen = scrub_clen( stream_pwd ) # put clen and stream pwd into df data = [ { "stream_pwd" : stream_pwd, "clen" : clen } ] stream_df_1 = pd.DataFrame( data ) stream_df = pd.concat( ( stream_df, stream_df_1 ) ) # sort df based on clen stream_df = stream_df.sort_values( by="clen" ) # reset df index stream_df = stream_df.reset_index( drop=True ) # return df of streams and clens return stream_df def scrub_us( stream ): # get uc values from stream file # example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg # scrub clen and return - else nan try: pattern = r"Cell\sparameters\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg" cells = re.findall( pattern, stream ) if AttributeError: return cells except AttributeError: return np.nan def find_clen_values( stats_df ): def find_min_clen(col_name): min_val = stats_df[col_name].min() min_row = stats_df[stats_df[col_name] == min_val] min_clen = min_row['clen'].values[0] return min_val, min_clen def gauss(x, *p): A, mu, sigma = p return A * np.exp(-(x-mu)**2/(2.*sigma**2)) p0 = [ 30, 0.111, 0.01 ] parameters, covariance = curve_fit( gauss, stats_df.clen, stats_df.indexed, p0=p0 ) # Get the fitted curve stats_df[ "gaus" ] = gauss( stats_df.clen, *parameters) # find peak centre peaks = find_peaks( stats_df.gaus.values ) # find full peak width fwhm = peak_widths( stats_df.gaus.values, peaks[0], rel_height=0.5 ) fwhm_str = int( round( fwhm[2][0], 0 ) ) fwhm_end = int( round( fwhm[3][0], 0 ) ) # translate width into motor values indexed_start = stats_df.iloc[ fwhm_str, 0 ] indexed_end = stats_df.iloc[ fwhm_end, 0 ] mid_gauss = stats_df.clen.iloc[ peaks[0] ].values[0] # cut df to only include indexed patterns stats_df = stats_df[ ( stats_df.clen < indexed_end ) & ( stats_df.clen > indexed_start ) ] # calculate minimum values min_alpha_val, min_alpha_clen = find_min_clen('std_alpha') min_beta_val, min_beta_clen = find_min_clen('std_beta') min_gamma_val, min_gamma_clen = find_min_clen('std_gamma') min_c_val, min_c_clen = find_min_clen('std_c') # find possible clens suggested_clen = (min_alpha_clen + min_beta_clen + min_gamma_clen )/3 suggested_clen = round(suggested_clen, 4) print( "middle of indexing gaussion fit of scan = {0}".format( mid_gauss ) ) print( "mean minimum of alpha, beta, gamma of scan = {0}".format( suggested_clen ) ) def plot_indexed_std( stats_df, ax1, ax2 ): # indexed images plot color = "tab:red" ax1.set_xlabel("clen") ax1.set_ylabel("indexed", color=color) ax1.plot(stats_df.clen, stats_df.indexed, color=color) ax1.tick_params(axis="y", labelcolor=color) # label color color = "tab:blue" ax2.set_ylabel("a,b,c st.deviation", color=color) ax2.tick_params(axis='y', labelcolor=color) # std_a plot color = "turquoise" ax2.plot(stats_df.clen, stats_df.std_a, color=color, label="a" ) # std_b plot color = "deepskyblue" ax2.plot(stats_df.clen, stats_df.std_b, color=color, label="b" ) # std_c plot color = "royalblue" ax2.plot(stats_df.clen, stats_df.std_c, color=color, label="c" ) def plot_indexed_std_alpha_beta_gamma( stats_df, ax1, ax2 ): # indexed images plot color = "tab:red" ax1.set_xlabel("clen") ax1.set_ylabel("indexed", color=color) ax1.plot(stats_df.clen, stats_df.indexed, color=color) ax1.tick_params(axis="y", labelcolor=color) # label color color = "tab:green" ax2.set_ylabel("alpha, beta, gamma st.deviation", color=color) ax2.tick_params(axis='y', labelcolor=color) # std_alpha plot color = "yellow" ax2.plot(stats_df.clen, stats_df.std_alpha, color=color, label="alpha" ) # std_beta plot color = "green" ax2.plot(stats_df.clen, stats_df.std_beta, color=color, label="beta" ) # std_gamma plot color = "darkolivegreen" ax2.plot(stats_df.clen, stats_df.std_gamma, color=color, label="gamma" ) def main( top_dir ): # find stream files from process directory print( "finding stream files" ) stream_df = find_streams( top_dir ) print( "done" ) # making results df for unit cell and index no. stats_df = pd.DataFrame() # loop through stream files and collect unit_cell information print( "looping through stream files to collect unit cell, indexed information" ) for index, row in stream_df.iterrows(): stream_pwd, clen = row[ "stream_pwd" ], row[ "clen" ] # open stream file stream = open( stream_pwd, "r" ).read() # scrub unit cell information cells = scrub_us( stream ) # put cells in df cols = [ "a", "b", "c", "alpha", "beta", "gamma" ] cells_df = pd.DataFrame( cells, columns=cols ) cells_df = cells_df.astype( float ) # calc stats indexed = len( cells_df ) std_a = cells_df.a.std() std_b = cells_df.b.std() std_c = cells_df.c.std() std_alpha = cells_df.alpha.std() std_beta = cells_df.beta.std() std_gamma = cells_df.gamma.std() # put stats in results df stats = [ { "clen" : clen, "indexed" : indexed, "std_a" : std_a, "std_b" : std_b, "std_c" : std_c, "std_alpha" : std_alpha, "std_beta" : std_beta, "std_gamma" : std_gamma, } ] stats_df_1 = pd.DataFrame( stats ) stats_df = pd.concat( ( stats_df, stats_df_1 ) ) print( "done" ) # reset index stats_df = stats_df.reset_index( drop=True ) #print clen for minimum alpha, beta, and gamma values find_clen_values(stats_df) # plot results fig, (ax1, ax3) = plt.subplots(1, 2) ax2 = ax1.twinx() ax4 = ax3.twinx() plot_indexed_std(stats_df, ax1, ax2) plot_indexed_std_alpha_beta_gamma(stats_df, ax3, ax4) fig.legend(loc="upper center") fig.tight_layout() plt.show() if __name__ == "__main__": stream_pwd = sys.argv[1] main( stream_pwd )