From 1f24af6cea30f8ad9ef0a1871c802736fda0c055 Mon Sep 17 00:00:00 2001 From: Beale John Henry Date: Thu, 16 Mar 2023 16:27:40 +0100 Subject: [PATCH] initial scripts to analyse the detector distance --- distance-refinement.py | 155 ++++++++++++++++++++++++++++++++++++++ distance-scan-analysis.py | 155 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 310 insertions(+) create mode 100644 distance-refinement.py create mode 100644 distance-scan-analysis.py diff --git a/distance-refinement.py b/distance-refinement.py new file mode 100644 index 0000000..f1af2e8 --- /dev/null +++ b/distance-refinement.py @@ -0,0 +1,155 @@ + + +# modules +import pandas as pd +import subprocess +import os, errno +import regex as re +import numpy as np + +def h5_sample( lst, sample ): + + # create sample of images from run + # read h5.lst - note - removes // from imade column + cols = [ "h5", "image" ] + sample_df = pd.read_csv( lst, sep="\s//", engine="python", names=cols ) + + # take defined sample + sample_df = sample_df.sample( sample ) + + # sort list + sample_df = sample_df.sort_index() + + # re-add // to image columm + sample_df[ "image" ] = "//" + sample_df.image.astype(str) + + # write sample to file + sample_file = "h5_{0}_sample.lst".format( sample ) + sample_df.to_csv( sample_file, sep=" ", index=False, header=False ) + + # return sample file name + return sample_file + +def geom_amend( lab6_geom_file, clen ): + + # read lab6 geom + lab6_geom = open( lab6_geom_file, "r" ) + + # use regex to find clen and replace with new + # clen example => clen = 0.1217 + clen_geom = re.sub( "clen = 0\.\d+", "clen = {0}".format( clen ), lab6_geom.read() ) + + # close lab6 geom file + lab6_geom.close() + + # write new clen_geom to file + clen_geom_file = "{0}.geom".format( clen ) + geom = open( clen_geom_file, "w" ) + geom.write( clen_geom ) + geom.close() + + # return clen_geom file name + return clen_geom_file + +def write_crystfel_run( clen, sample_h5_file, clen_geom_file, cell_file ): + + # crystfel file name + cryst_run_file = "{0}_cryst_run.sh".format( clen ) + + # write file + run_sh = open( cryst_run_file, "w" ) + run_sh.write( "#!/bin/sh\n\n" ) + run_sh.write( "module purge\n" ) + run_sh.write( "module load crystfel/0.10.2\n" ) +# run_sh.write( "module use MX unstable\n" ) +# run_sh.write( "module load gcc/4.8.5 hdf5_serial/1.10.3 xds/20210205 DirAx/1.17 pinkindexer/2021.08\n" ) +# run_sh.write( "module load xgandalf/2021.08 fdip/2021.08 mosflm/7.3.0 crystfel/0.10.0 HDF5_bitshuffle/2018.05 HDF5_LZ4/2018.05 ccp4\n\n" ) + run_sh.write( "indexamajig -i {0} \\\n".format( sample_h5_file ) ) + run_sh.write( " --output={0}.stream \\\n".format( clen ) ) + run_sh.write( " --geometry={0}\\\n".format( clen_geom_file ) ) + run_sh.write( " --pdb={0} \\\n".format( cell_file ) ) + run_sh.write( " --indexing=xgandalf-latt-cell --peaks=peakfinder8 \\\n" ) + run_sh.write( " --integration=rings-grad --tolerance=10.0,10.0,10.0,2,3,2 --threshold=10 --min-snr=5 --int-radius=2,3,6 \\\n" ) + run_sh.write( " -j 36 --no-multi --no-retry --check-peaks --max-res=3000 --min-pix-count=1 --local-bg-radius=4 --min-res=85\n\n" ) + run_sh.close() + + # make file executable + subprocess.call( [ "chmod", "+x", "{0}".format( cryst_run_file ) ] ) + + # return crystfel file name + return cryst_run_file + +def main( lst, sample, lab6_geom_file, centre_clen, cell_file, steps, scan_name, step_size ): + + # set current working directory + cwd = os.getcwd() + + # make sample list + print( "making {0} sample of images".format( sample ) ) + sample_h5 = h5_sample( lst, sample) + sample_h5_file = "{0}/{1}".format( cwd, sample_h5 ) + print( "done" ) + + # make list of clen steps above and below the central clen + print( "make clen array around {0}".format( centre_clen ) ) + step_range = step_size*steps + bottom_step = centre_clen-step_range/2 + top_step = bottom_step+step_range + step_range = np.arange( bottom_step, top_step, step_size ) + step_range = step_range.round( 4 ) # important - otherwise np gives your .99999999 instead of 1 somethimes + print( "done" ) + + # make directorys for results + print( "begin CrystFEL anaylsis of different clens" ) + + # loop to cycle through clen steps + for clen in step_range: + + # move back to cwd + os.chdir( cwd ) + + print( "processing clen = {0}".format( clen ) ) + # define process directory + proc_dir = "{0}/{1}/{2}".format( cwd, scan_name, clen ) + + # make process directory + try: + os.makedirs( proc_dir ) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + # move to process directory + os.chdir( proc_dir ) + + # make geom file + print( "amend .geom file" ) + clen_geom_file = geom_amend( lab6_geom_file, clen ) + print( "done" ) + + # make crystfel run file + print( "make crystfel file" ) + cryst_run_file = write_crystfel_run( clen, sample_h5_file, clen_geom_file, cell_file ) + print( "done" ) + + # run crystfel file + print( "run crystFEL" ) + #subprocess.call( [ "./{0}".format( cryst_run_file ) ] ) + subprocess.call( [ "sbatch", "-p", "day", "--cpus-per-task=32", "--", "./{0}".format( cryst_run_file ) ] ) + print( "done" ) + + #subprocess.call( [ "sbatch", "-p", "day", "--cpus-per-task=32", "--", "run{0}.sh".format( run.zfill(4) ) ] ) + +# variables +sample = 500 +lst = "/sf/cristallina/data/p20590/work/process/jhb/detector_refinement/acq0001.JF17T16V01.dark.lst" +lab6_geom_file = "/sf/cristallina/data/p20590/work/process/jhb/detector_refinement/8M_p-op_c-op_p20590.geom" +centre_clen = 0.122 # in m +cell_file = "/sf/cristallina/data/p20590/work/process/jhb/detector_refinement/hewl.cell" +steps = 10 +scan_name = "fine_scan" +step_size = 0.0005 # m - 0.001 = coarse scan, 0.0005 = fine + +main( lst, sample, lab6_geom_file, centre_clen, cell_file, steps, scan_name, step_size ) + + diff --git a/distance-scan-analysis.py b/distance-scan-analysis.py new file mode 100644 index 0000000..7bcbadc --- /dev/null +++ b/distance-scan-analysis.py @@ -0,0 +1,155 @@ + + +# modules +import pandas as pd +import regex as re +import os +import numpy as np +import matplotlib.pyplot as plt + + +def scrub_clen( stream_pwd ): + + # get clen from stream name + # example - /sf/cristallina/data/p20590/work/process/jhb/detector_refinement/coarse_scan/0.115/0.115.stream + # scrub clen and return - else nan + try: + pattern = r"0\.\d+/(0\.\d+)\.stream" + re_search = re.search( pattern, stream_pwd ) + clen = re_search.group( 1 ) + if AttributeError: + return float( clen ) + except AttributeError: + return np.nan + +def find_streams( top_dir ): + + # create df for streams + stream_df = pd.DataFrame() + + # search for all files that end with .stream + + for path, dirs, files in os.walk( top_dir ): + for name in files: + if name.endswith( ".stream" ): + + # get stream pwd + stream_pwd = os.path.join( path, name ) + + # scrub clen from stream + clen = scrub_clen( stream_pwd ) + + # put clen and stream pwd into df + data = [ { "stream_pwd" : stream_pwd, + "clen" : clen + } ] + stream_df_1 = pd.DataFrame( data ) + stream_df = pd.concat( ( stream_df, stream_df_1 ) ) + + # sort df based on clen + stream_df = stream_df.sort_values( by="clen" ) + + # reset df index + stream_df = stream_df.reset_index( drop=True ) + + # return df of streams and clens + return stream_df + +def scrub_us( stream ): + + # get uc values from stream file + # example - Cell parameters 7.71784 7.78870 3.75250 nm, 90.19135 90.77553 90.19243 deg + # scrub clen and return - else nan + try: + pattern = r"Cell\sparameters\s(\d\.\d+)\s(\d\.\d+)\s(\d\.\d+)\snm,\s(\d+\.\d+)\s(\d+\.\d+)\s(\d+\.\d+)\sdeg" + cells = re.findall( pattern, stream ) + if AttributeError: + return cells + except AttributeError: + return np.nan + +def main( top_dir ): + + # find stream files from process directory + print( "finding stream files" ) + stream_df = find_streams( top_dir ) + print( "done" ) + + # making results df for unit cell and index no. + results_df = pd.DataFrame() + + # loop through stream files and collect unit_cell information + print( "looping through stream files to collect unit cell, indexed information" ) + for index, row in stream_df.iterrows(): + + stream_pwd, clen = row[ "stream_pwd" ], row[ "clen" ] + + # open stream file + print( "scrubbing stream for clen={0}".format( clen ) ) + stream = open( stream_pwd, "r" ).read() + + # scrub unit cell information + cells = scrub_us( stream ) + + # put cells in df + cols = [ "a", "b", "c", "alpha", "beta", "gamma" ] + cells_df = pd.DataFrame( cells, columns=cols ) + cells_df = cells_df.astype( float ) + + # calc stats + indexed = len( cells_df ) + std_a = cells_df.a.std() + std_b = cells_df.b.std() + std_c = cells_df.c.std() + + # put stats in results df + stats = [ { "clen" : clen, + "indexed" : indexed, + "std_a" : std_a, + "std_b" : std_b, + "std_c" : std_c + } ] + results_df_1 = pd.DataFrame( stats ) + results_df = pd.concat( ( results_df, results_df_1 ) ) + + print( "done" ) + + # reset index + results_df = results_df.reset_index( drop=True ) + + # plot results + fig, ax1 = plt.subplots() + + # indexed images plot + color = "tab:red" + ax1.set_xlabel( "clen" ) + ax1.set_ylabel( "indexed", color=color ) + ax1.plot( results_df.clen, results_df.indexed, color=color) + ax1.tick_params( axis="y", labelcolor=color) + + # instantiate a second axes that shares the same x-axis + ax2 = ax1.twinx() + + # std_a plot + color = "tab:blue" + ax2.set_ylabel( "st.deviation", color=color ) + ax2.plot( results_df.clen, results_df.std_a, color=color ) + ax2.tick_params(axis='y', labelcolor=color) + + # std_b plot + ax2.plot( results_df.clen, results_df.std_b, color=color ) + ax2.tick_params(axis='y', labelcolor=color) + + # std_b plot + ax2.plot( results_df.clen, results_df.std_c, color=color ) + ax2.tick_params(axis='y', labelcolor=color) + + fig.tight_layout() # otherwise the right y-label is slightly clipped + plt.show() + + +# variables +top_dir = "/sf/cristallina/data/p20590/work/process/jhb/detector_refinement/coarse_scan" + + +main( top_dir )