diff --git a/distance-scan-analysis.py b/distance-scan-analysis.py deleted file mode 100644 index 7bcbadc..0000000 --- a/distance-scan-analysis.py +++ /dev/null @@ -1,155 +0,0 @@ - - -# modules -import pandas as pd -import regex as re -import os -import numpy as np -import matplotlib.pyplot as plt - - -def scrub_clen( stream_pwd ): - - # get clen from stream name - # example - /sf/cristallina/data/p20590/work/process/jhb/detector_refinement/coarse_scan/0.115/0.115.stream - # scrub clen and return - else nan - try: - pattern = r"0\.\d+/(0\.\d+)\.stream" - re_search = re.search( pattern, stream_pwd ) - clen = re_search.group( 1 ) - if AttributeError: - return float( clen ) - except AttributeError: - return np.nan - -def find_streams( top_dir ): - - # create df for streams - stream_df = pd.DataFrame() - - # search for all files that end with .stream - - for path, dirs, files in os.walk( top_dir ): - for name in files: - if name.endswith( ".stream" ): - - # get stream pwd - stream_pwd = os.path.join( path, name ) - - # scrub clen from stream - clen = scrub_clen( stream_pwd ) - - # put clen and stream pwd into df - data = [ { "stream_pwd" : stream_pwd, - "clen" : clen - } ] - stream_df_1 = pd.DataFrame( data ) - stream_df = pd.concat( ( stream_df, stream_df_1 ) ) - - # sort df based on clen - stream_df = stream_df.sort_values( by="clen" ) - - # reset df index - stream_df = stream_df.reset_index( drop=True ) - - # return df of streams and clens - return stream_df - -def scrub_us( stream ): - - # get uc values from stream file - # example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg - # scrub clen and return - else nan - try: - pattern = r"Cell\sparameters\s(\d\.\d+)\s(\d\.\d+)\s(\d\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg" - cells = re.findall( pattern, stream ) - if AttributeError: - return cells - except AttributeError: - return np.nan - -def main( top_dir ): - - # find stream files from process directory - print( "finding stream files" ) - stream_df = find_streams( top_dir ) - print( "done" ) - - # making results df for unit cell and index no. - results_df = pd.DataFrame() - - # loop through stream files and collect unit_cell information - print( "looping through stream files to collect unit cell, indexed information" ) - for index, row in stream_df.iterrows(): - - stream_pwd, clen = row[ "stream_pwd" ], row[ "clen" ] - - # open stream file - print( "scrubbing stream for clen={0}".format( clen ) ) - stream = open( stream_pwd, "r" ).read() - - # scrub unit cell information - cells = scrub_us( stream ) - - # put cells in df - cols = [ "a", "b", "c", "alpha", "beta", "gamma" ] - cells_df = pd.DataFrame( cells, columns=cols ) - cells_df = cells_df.astype( float ) - - # calc stats - indexed = len( cells_df ) - std_a = cells_df.a.std() - std_b = cells_df.b.std() - std_c = cells_df.c.std() - - # put stats in results df - stats = [ { "clen" : clen, - "indexed" : indexed, - "std_a" : std_a, - "std_b" : std_b, - "std_c" : std_c - } ] - results_df_1 = pd.DataFrame( stats ) - results_df = pd.concat( ( results_df, results_df_1 ) ) - - print( "done" ) - - # reset index - results_df = results_df.reset_index( drop=True ) - - # plot results - fig, ax1 = plt.subplots() - - # indexed images plot - color = "tab:red" - ax1.set_xlabel( "clen" ) - ax1.set_ylabel( "indexed", color=color ) - ax1.plot( results_df.clen, results_df.indexed, color=color) - ax1.tick_params( axis="y", labelcolor=color) - - # instantiate a second axes that shares the same x-axis - ax2 = ax1.twinx() - - # std_a plot - color = "tab:blue" - ax2.set_ylabel( "st.deviation", color=color ) - ax2.plot( results_df.clen, results_df.std_a, color=color ) - ax2.tick_params(axis='y', labelcolor=color) - - # std_b plot - ax2.plot( results_df.clen, results_df.std_b, color=color ) - ax2.tick_params(axis='y', labelcolor=color) - - # std_b plot - ax2.plot( results_df.clen, results_df.std_c, color=color ) - ax2.tick_params(axis='y', labelcolor=color) - - fig.tight_layout() # otherwise the right y-label is slightly clipped - plt.show() - - -# variables -top_dir = "/sf/cristallina/data/p20590/work/process/jhb/detector_refinement/coarse_scan" - - -main( top_dir )