Files
clara/src/results.py
2024-06-20 16:48:20 +02:00

362 lines
15 KiB
Python

# Author Assman G. (2023)
# execute with
# /das/home/assman_g/p19370/vespa/2023-03-30/something_mergeID/Lyso_12p4keV_1kHz_150mm_run000026_data_000010/Lyso_12p4keV_1kHz_150mm_run000026_data_000010.th6.snr4.0.mpixco1.stream
import contextlib
import json
import sys
import time
# import crystfelparser.crystfelparser as crys # acknowledge P.Gasparotto
import numpy as np
from loguru import logger
import MxdbVdpTools
# import matplotlib.pyplot as plt
LOG_FILENAME = time.strftime("/sf/cristallina/applications/mx/clara_tools/log/clara_%Y%m.log")
logger.add(LOG_FILENAME, level="INFO", rotation="100MB")
# ========== functions ================
def main():
"""
hello world testing
:return: nothing
"""
print("hello fish ")
pass
def stream_to_dictionary(streamfile):
"""
write a function that genaerates a dictionary with all the old paramweters
append the dictionary to a key called "crystals" of the original dictionary.
If theere is no lattice, the "crytals" key is an empty list, otherwise it has X entries as dictionaries. []
function from crystfelparser. edited, needs to be merged with crystfelparser
Returns:
A dictionary
"""
# series = defaultdict(dict)
series = dict()
def loop_over_next_n_lines(file, n_lines):
for cnt_tmp in range(n_lines):
line = file.readline()
return line
with open(streamfile, "r") as text_file:
# for ln,line in enumerate(text_file):
ln = -1
while True:
ln += 1
line = text_file.readline()
# print(line)
# if any(x in ["Begin","chunk"] for x in line.split()):
if "Begin chunk" in line:
# create a temporary dictionary to store the output for a frame
# tmpframe = defaultdict(int)
tmpframe = dict()
# loop over the next 3 lines to get the index of the image
# line 2 and 3 are where it is stored the image number
line = loop_over_next_n_lines(text_file, 3)
ln += 3
# save the image index and save it as zero-based
im_num = int(line.split()[-1]) - 1
tmpframe["Image serial number"] = im_num
# loop over the next 2 lines to see if the indexer worked
line = loop_over_next_n_lines(text_file, 2)
ln += 2
# save who indexed the image
indexer_tmp = line.split()[-1]
# if indexed, there is an additional line here
npeaks_lines = 6
if indexer_tmp == "none":
npeaks_lines = 5
tmpframe["multiple_lattices"] = 0
else:
tmpframe["multiple_lattices"] = 1
tmpframe["indexed_by"] = indexer_tmp
##### Get the STRONG REFLEXTIONS from the spotfinder #####
# loop over the next 5/6 lines to get the number of reflections
line = loop_over_next_n_lines(text_file, npeaks_lines)
ln += npeaks_lines
# get the number of peaks
num_peaks = int(line.split()[-1])
tmpframe["num_peaks"] = num_peaks
# get the resolution
line = text_file.readline()
ln += 1
tmpframe["peak_resolution [A]"] = float(line.split()[-2])
tmpframe["peak_resolution [nm^-1]"] = float(line.split()[2])
if num_peaks > 0:
# skip the first 2 lines
for tmpc in range(2):
text_file.readline()
ln += 1
# get the spots
# fs/px, ss/px, (1/d)/nm^-1, Intensity
# with
# dim1 = ss, dim2 = fs
tmpframe["peaks"] = np.asarray(
[text_file.readline().split()[:4] for tmpc in range(num_peaks)]
).astype(float)
# generate empty list for potential indexed lattices
##### Get the PREDICTIONS after indexing #####
# So far the framwork for multiple lattices is generated,
# but not finished. So far only the "last" lattice will be saved
# in terms of reflections etc.
# so far only the unit cell constants are all accounted for!
multiple = True
if tmpframe["indexed_by"] != "none":
tmpframe["crystals"] = []
# set lattice count to 0
lattice_count = 0
# generate a temp_crystal dict for every lattice from this frame
tmp_crystal = {}
# start a loop over this part to account for multiple lattices
while multiple == True:
# skip the first 2 header lines, only if not multiple lattice loop
if lattice_count == 0:
for tmpc in range(2):
text_file.readline()
ln += 1
# Get the unit cell -- as cell lengths and angles
# append unit cell constants if multiple lattices exist
line = text_file.readline().split()
tmp_crystal["Cell parameters"] = np.hstack([line[2:5], line[6:9]]).astype(float)
# Get the reciprocal unit cell as a 3x3 matrix
# multiple lattices not done yet
reciprocal_cell = []
for tmpc in range(3):
reciprocal_cell.append(text_file.readline().split()[2:5])
ln += 1
# print(reciprocal_cell)
tmp_crystal["reciprocal_cell_matrix"] = np.asarray(reciprocal_cell).astype(float)
# Save the lattice type
tmp_crystal["lattice_type"] = text_file.readline().split()[-1]
ln += 1
# loop over the next 5 lines to get the diffraction resolution
line = loop_over_next_n_lines(text_file, 5).split()
ln += 5
# multiple lattices not done yet
if line[0] == "predict_refine/det_shift":
tmp_crystal["det_shift_x"] = line[3]
tmp_crystal["det_shift_y"] = line[6]
line = loop_over_next_n_lines(text_file, 1).split()
ln += 1
tmp_crystal["diffraction_resolution_limit [nm^-1]"] = float(line[2])
tmp_crystal["diffraction_resolution_limit [A]"] = float(line[5])
# get the number of predicted reflections
num_reflections = int(text_file.readline().split()[-1])
tmp_crystal["num_predicted_reflections"] = num_reflections
# skip a few lines
line = loop_over_next_n_lines(text_file, 4)
ln += 4
# get the predicted reflections
if num_reflections > 0:
reflections_pos = []
for tmpc in range(num_reflections):
# read as:
# h k l I sigma(I) peak background fs/px ss/px
line = np.asarray(text_file.readline().split()[:9])
# append only: fs/px ss/px I sigma(I)
reflections_pos.append(line[[7, 8, 3, 4, 0, 1, 2]])
ln += 1
tmp_crystal["predicted_reflections"] = np.asarray(reflections_pos).astype(float)
# continue reading
line = text_file.readline()
line = text_file.readline()
line = text_file.readline()
# print(line)
if "Begin crystal" in line: # multi lattice
lattice_count = lattice_count + 1
tmpframe["multiple_lattices"] = tmpframe["multiple_lattices"] + 1
# append the lattice to the entry "crystals" in tmpframe
tmpframe["crystals"].append(tmp_crystal)
# print("multiple append")
ln += 1
# multiple=False
# lattice_count =0
else:
tmpframe["crystals"].append(tmp_crystal)
# print("else append", lattice_count)
multiple = False
if multiple == False:
break
# Add the frame to the series, using the frame index as key
series[im_num] = tmpframe
# condition to exit the while true reading cycle
if "" == line:
# print("file finished")
break
# return the series
return series
def get_data_from_streamfiles():
"""
get results from the streamfile .
Stream file is parsed as argument to the python script
Following info is greped:
1.) # of indexed crystals --> int | indexable_frames
2.) # of indexed lattices --> int | indexable_lattices
3.) # of spots per frame --> array of ints | spots_per_frame
4.) # of Lattices per images --> arrray of ints
5.) Mean beam center shift X in pixels --> float
6.) Mean beam center shift Y in pixels --> float
7.) Mean beam center shift STD X in pixels --> float
8.) Mean beam center shift STD Y in pixels --> float
9.) Mean unit cell indexed images --> float object of a,b,c, alpha,beta, gamma
10.) Mean unit cell indexed STD images --> float object of a,b,c, alpha,beta, gamma
11.) Mean processing time in sec TODO --> float
:return: old_message with additional entries
"""
# load current message in processing folder
tmpfile = open("msg.json", "r")
old_message = json.load(tmpfile)
# print(old_message)
# old message is a dict with all the input message params
# parse stream into dict
parsed_stream = stream_to_dictionary(sys.argv[1])
old_message["numberOfImages"] = len(parsed_stream)
# get number of indexable frames- not accounted for multilattice
# print(parsed_stream[0].keys())
indexable_frames = np.array(sorted([FR for FR in parsed_stream.keys() if len(parsed_stream[FR].keys()) > 7]))
old_message["numberOfImagesIndexed"] = len(indexable_frames)
# spots_per_frame = {} # as dict
# as array:
spots_per_frame = np.zeros((len(parsed_stream)))
indexable_lattices = 0
for i in range(0, len(parsed_stream)):
# get spots per indexable frames:
spots_per_frame[i] = parsed_stream[i]["num_peaks"]
# get total number of indexable lattices
indexable_lattices = indexable_lattices + parsed_stream[i]["multiple_lattices"]
# put into dict for results, convert to list to be json serializable
old_message["numberOfSpotsPerImage"] = (spots_per_frame.astype(int)).tolist()
old_message["numberOfLattices"] = indexable_lattices
# get number of indexed lattices per pattern
lattices_per_indx_frame = {}
for i in indexable_frames:
lattices_per_indx_frame[int(i)] = parsed_stream[i]["multiple_lattices"]
old_message["numberOfLatticesPerImage"] = lattices_per_indx_frame
# mean beam center shift X and Y
list_x = []
list_y = []
# define np.array
uc_array = np.zeros((indexable_lattices, 6))
b = 0
for i in indexable_frames:
for x in range(0, len(parsed_stream[i]["crystals"])):
# det shift in x and y
list_x.append((parsed_stream[i]["crystals"][x]["det_shift_x"]))
list_y.append((parsed_stream[i]["crystals"][x]["det_shift_y"]))
# unit cell constants
uc_array[b] = np.asarray((parsed_stream[i]["crystals"][x]["Cell parameters"]))
b = b + 1
# ------ DET SHIFT MEAN and STD-------
# plot det shift scatter plot
# plt.scatter(np.asarray(list_x).astype(float),np.asarray(list_y).astype(float) )
# plt.show()
mean_x = np.around(np.mean(np.asarray(list_x).astype(float)), 4)
std_x = np.around(np.std(np.asarray(list_x).astype(float)), 4)
mean_y = np.around(np.mean(np.asarray(list_y).astype(float)), 4)
std_y = np.around(np.std(np.asarray(list_y).astype(float)), 4)
# convert to pixel unit
# 0.075 mm = 1 pixel =75 um
# print(mean_x, mean_x * (1/0.075), std_x, std_x * (1/0.075), "x")
# print(mean_y, mean_y * (1/0.075), std_y, std_y * (1/0.075), "y")
old_message["beamShiftMeanX_pxl"] = np.around(mean_x * (1 / 0.075), 4)
old_message["beamShiftMeanY_pxl"] = np.around(mean_y * (1 / 0.075), 4)
old_message["beamShiftStdX_pxl"] = np.around(std_x * (1 / 0.075), 4)
old_message["beamShiftStdY_pxl"] = np.around(std_y * (1 / 0.075), 4)
# -------UC CONSTANTS MEAN and STD----
mean_uc = np.mean(uc_array, 0)
mean_uc[: 6 // 2] *= 10.0
std_uc = np.std(uc_array, 0)
std_uc[: 6 // 2] *= 10.0
#convert to list to be json serializable
old_message["unitCellIndexingMean"] = (np.around(mean_uc, 3)).tolist()
old_message["unitCellIndexingStd"] = (np.around(std_uc, 3)).tolist()
# print(old_message)
return old_message
# ============classes with functions=========
class StreamToLogger:
def __init__(self, level="INFO"):
self._level = level
def write(self, buffer):
for line in buffer.rstrip().splitlines():
logger.opt(depth=1).log(self._level, line.rstrip())
def flush(self):
pass
# =============MAIN====================
# if executed as main , code is executed in src folder of git repo
# needs to have the msg.json file in this folder to test.
if __name__ == "__main__":
# main()
# get results from streamfile
stream = StreamToLogger()
with contextlib.redirect_stdout(stream):
results_message = get_data_from_streamfiles()
#logger.info("message can be send to DB :{}", results_message)
logger.info(f"message can be send to DB :{results_message}")
# send message to database:
# init the class
mxdb = MxdbVdpTools.MxdbVdpTools()
# insert message to DB
_id = mxdb.insert(results_message)
logger.info("message inserted to DB")
# retreive the inserted doc from the database
#doc = mxdb.query(_id=_id["insertID"])
#logger.info("doc info from DB is: ")
#logger.info(doc)