organised tools into directories - made 16M pyfai script work
This commit is contained in:
155
clen_tools/distance-scan-analysis.py
Normal file
155
clen_tools/distance-scan-analysis.py
Normal file
@@ -0,0 +1,155 @@
|
||||
|
||||
|
||||
# modules
|
||||
import pandas as pd
|
||||
import regex as re
|
||||
import os
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def scrub_clen( stream_pwd ):
|
||||
|
||||
# get clen from stream name
|
||||
# example - /sf/cristallina/data/p20590/work/process/jhb/detector_refinement/coarse_scan/0.115/0.115.stream
|
||||
# scrub clen and return - else nan
|
||||
try:
|
||||
pattern = r"0\.\d+/(0\.\d+)\.stream"
|
||||
re_search = re.search( pattern, stream_pwd )
|
||||
clen = re_search.group( 1 )
|
||||
if AttributeError:
|
||||
return float( clen )
|
||||
except AttributeError:
|
||||
return np.nan
|
||||
|
||||
def find_streams( top_dir ):
|
||||
|
||||
# create df for streams
|
||||
stream_df = pd.DataFrame()
|
||||
|
||||
# search for all files that end with .stream
|
||||
|
||||
for path, dirs, files in os.walk( top_dir ):
|
||||
for name in files:
|
||||
if name.endswith( ".stream" ):
|
||||
|
||||
# get stream pwd
|
||||
stream_pwd = os.path.join( path, name )
|
||||
|
||||
# scrub clen from stream
|
||||
clen = scrub_clen( stream_pwd )
|
||||
|
||||
# put clen and stream pwd into df
|
||||
data = [ { "stream_pwd" : stream_pwd,
|
||||
"clen" : clen
|
||||
} ]
|
||||
stream_df_1 = pd.DataFrame( data )
|
||||
stream_df = pd.concat( ( stream_df, stream_df_1 ) )
|
||||
|
||||
# sort df based on clen
|
||||
stream_df = stream_df.sort_values( by="clen" )
|
||||
|
||||
# reset df index
|
||||
stream_df = stream_df.reset_index( drop=True )
|
||||
|
||||
# return df of streams and clens
|
||||
return stream_df
|
||||
|
||||
def scrub_us( stream ):
|
||||
|
||||
# get uc values from stream file
|
||||
# example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg
|
||||
# scrub clen and return - else nan
|
||||
try:
|
||||
pattern = r"Cell\sparameters\s(\d\.\d+)\s(\d\.\d+)\s(\d\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg"
|
||||
cells = re.findall( pattern, stream )
|
||||
if AttributeError:
|
||||
return cells
|
||||
except AttributeError:
|
||||
return np.nan
|
||||
|
||||
def main( top_dir ):
|
||||
|
||||
# find stream files from process directory
|
||||
print( "finding stream files" )
|
||||
stream_df = find_streams( top_dir )
|
||||
print( "done" )
|
||||
|
||||
# making results df for unit cell and index no.
|
||||
results_df = pd.DataFrame()
|
||||
|
||||
# loop through stream files and collect unit_cell information
|
||||
print( "looping through stream files to collect unit cell, indexed information" )
|
||||
for index, row in stream_df.iterrows():
|
||||
|
||||
stream_pwd, clen = row[ "stream_pwd" ], row[ "clen" ]
|
||||
|
||||
# open stream file
|
||||
print( "scrubbing stream for clen={0}".format( clen ) )
|
||||
stream = open( stream_pwd, "r" ).read()
|
||||
|
||||
# scrub unit cell information
|
||||
cells = scrub_us( stream )
|
||||
|
||||
# put cells in df
|
||||
cols = [ "a", "b", "c", "alpha", "beta", "gamma" ]
|
||||
cells_df = pd.DataFrame( cells, columns=cols )
|
||||
cells_df = cells_df.astype( float )
|
||||
|
||||
# calc stats
|
||||
indexed = len( cells_df )
|
||||
std_a = cells_df.a.std()
|
||||
std_b = cells_df.b.std()
|
||||
std_c = cells_df.c.std()
|
||||
|
||||
# put stats in results df
|
||||
stats = [ { "clen" : clen,
|
||||
"indexed" : indexed,
|
||||
"std_a" : std_a,
|
||||
"std_b" : std_b,
|
||||
"std_c" : std_c
|
||||
} ]
|
||||
results_df_1 = pd.DataFrame( stats )
|
||||
results_df = pd.concat( ( results_df, results_df_1 ) )
|
||||
|
||||
print( "done" )
|
||||
|
||||
# reset index
|
||||
results_df = results_df.reset_index( drop=True )
|
||||
|
||||
# plot results
|
||||
fig, ax1 = plt.subplots()
|
||||
|
||||
# indexed images plot
|
||||
color = "tab:red"
|
||||
ax1.set_xlabel( "clen" )
|
||||
ax1.set_ylabel( "indexed", color=color )
|
||||
ax1.plot( results_df.clen, results_df.indexed, color=color)
|
||||
ax1.tick_params( axis="y", labelcolor=color)
|
||||
|
||||
# instantiate a second axes that shares the same x-axis
|
||||
ax2 = ax1.twinx()
|
||||
|
||||
# std_a plot
|
||||
color = "tab:blue"
|
||||
ax2.set_ylabel( "st.deviation", color=color )
|
||||
ax2.plot( results_df.clen, results_df.std_a, color=color )
|
||||
ax2.tick_params(axis='y', labelcolor=color)
|
||||
|
||||
# std_b plot
|
||||
ax2.plot( results_df.clen, results_df.std_b, color=color )
|
||||
ax2.tick_params(axis='y', labelcolor=color)
|
||||
|
||||
# std_b plot
|
||||
ax2.plot( results_df.clen, results_df.std_c, color=color )
|
||||
ax2.tick_params(axis='y', labelcolor=color)
|
||||
|
||||
fig.tight_layout() # otherwise the right y-label is slightly clipped
|
||||
plt.show()
|
||||
|
||||
|
||||
# variables
|
||||
top_dir = "/sf/cristallina/data/p20590/work/process/jhb/detector_refinement/coarse_scan"
|
||||
|
||||
|
||||
main( top_dir )
|
||||
Reference in New Issue
Block a user