clara/src/results.py

# Author Assman G. (2023)
# execute with
# /das/home/assman_g/p19370/vespa/2023-03-30/something_mergeID/Lyso_12p4keV_1kHz_150mm_run000026_data_000010/Lyso_12p4keV_1kHz_150mm_run000026_data_000010.th6.snr4.0.mpixco1.stream
import contextlib
import json
import sys
import time

# import crystfelparser.crystfelparser as crys  # acknowledge P.Gasparotto
import numpy as np
from loguru import logger
import MxdbVdpTools

# import matplotlib.pyplot as plt

LOG_FILENAME = time.strftime("/sf/cristallina/applications/mx/clara_tools/log/clara_%Y%m.log")
logger.add(LOG_FILENAME, level="INFO", rotation="100MB")


# ========== functions ================


def main():
    """
    hello world testing
    :return: nothing
    """
    print("hello fish ")
    pass


def stream_to_dictionary(streamfile):
    """
    write a function that genaerates a dictionary with all the old paramweters
    append the dictionary to a key called "crystals" of the original dictionary.
    If theere is no lattice, the "crytals" key is an empty list, otherwise it has X entries as dictionaries. []
    function from crystfelparser. edited, needs to be merged with crystfelparser
     Returns:
       A dictionary
    """
    # series = defaultdict(dict)
    series = dict()

    def loop_over_next_n_lines(file, n_lines):
        for cnt_tmp in range(n_lines):
            line = file.readline()

        return line

    with open(streamfile, "r") as text_file:
        # for ln,line in enumerate(text_file):
        ln = -1
        while True:
            ln += 1
            line = text_file.readline()
            # print(line)
            # if any(x in ["Begin","chunk"] for x in line.split()):
            if "Begin chunk" in line:
                # create a temporary dictionary to store the output for a frame
                # tmpframe = defaultdict(int)
                tmpframe = dict()

                # loop over the next 3 lines to get the index of the image
                # line 2 and 3 are where it is stored the image number
                line = loop_over_next_n_lines(text_file, 3)
                ln += 3
                # save the image index and save it as zero-based
                im_num = int(line.split()[-1]) - 1
                tmpframe["Image serial number"] = im_num

                # loop over the next 2 lines to see if the indexer worked
                line = loop_over_next_n_lines(text_file, 2)
                ln += 2
                # save who indexed the image
                indexer_tmp = line.split()[-1]
                # if indexed, there is an additional line here
                npeaks_lines = 6
                if indexer_tmp == "none":
                    npeaks_lines = 5
                    tmpframe["multiple_lattices"] = 0
                else:
                    tmpframe["multiple_lattices"] = 1
                tmpframe["indexed_by"] = indexer_tmp

                ##### Get the STRONG REFLEXTIONS from the spotfinder #####

                # loop over the next 5/6 lines to get the number of reflections
                line = loop_over_next_n_lines(text_file, npeaks_lines)
                ln += npeaks_lines
                # get the number of peaks
                num_peaks = int(line.split()[-1])
                tmpframe["num_peaks"] = num_peaks

                # get the resolution
                line = text_file.readline()
                ln += 1
                tmpframe["peak_resolution [A]"] = float(line.split()[-2])
                tmpframe["peak_resolution [nm^-1]"] = float(line.split()[2])

                if num_peaks > 0:
                    # skip the first 2 lines
                    for tmpc in range(2):
                        text_file.readline()
                        ln += 1

                    # get the spots
                    # fs/px, ss/px, (1/d)/nm^-1, Intensity
                    # with
                    # dim1 = ss, dim2 = fs
                    tmpframe["peaks"] = np.asarray(
                        [text_file.readline().split()[:4] for tmpc in range(num_peaks)]
                    ).astype(float)
                # generate empty list for potential indexed lattices

                ##### Get the PREDICTIONS after indexing #####
                # So far the framwork for multiple lattices is generated,
                # but not finished. So far only the "last" lattice will be saved
                # in terms of reflections etc.
                # so far only the unit cell constants are all accounted for!
                multiple = True
                if tmpframe["indexed_by"] != "none":
                    tmpframe["crystals"] = []
                    # set lattice count to 0
                    lattice_count = 0
                    # generate a temp_crystal dict for every lattice from this frame
                    tmp_crystal = {}
                    # start a loop over this part to account for multiple lattices
                    while multiple == True:
                        # skip the first 2 header lines, only if not multiple lattice loop
                        if lattice_count == 0:
                            for tmpc in range(2):
                                text_file.readline()
                                ln += 1

                        # Get the unit cell -- as cell lengths and angles
                        # append unit cell constants if multiple lattices exist
                        line = text_file.readline().split()
                        tmp_crystal["Cell parameters"] = np.hstack([line[2:5], line[6:9]]).astype(float)

                        # Get the reciprocal unit cell as a 3x3 matrix
                        # multiple lattices not done yet
                        reciprocal_cell = []
                        for tmpc in range(3):
                            reciprocal_cell.append(text_file.readline().split()[2:5])
                            ln += 1
                        # print(reciprocal_cell)
                        tmp_crystal["reciprocal_cell_matrix"] = np.asarray(reciprocal_cell).astype(float)

                        # Save the lattice type
                        tmp_crystal["lattice_type"] = text_file.readline().split()[-1]
                        ln += 1

                        # loop over the next 5 lines to get the diffraction resolution
                        line = loop_over_next_n_lines(text_file, 5).split()
                        ln += 5

                        # multiple lattices not done yet
                        if line[0] == "predict_refine/det_shift":
                            tmp_crystal["det_shift_x"] = line[3]
                            tmp_crystal["det_shift_y"] = line[6]
                            line = loop_over_next_n_lines(text_file, 1).split()
                            ln += 1

                        tmp_crystal["diffraction_resolution_limit [nm^-1]"] = float(line[2])
                        tmp_crystal["diffraction_resolution_limit [A]"] = float(line[5])

                        # get the number of predicted reflections
                        num_reflections = int(text_file.readline().split()[-1])
                        tmp_crystal["num_predicted_reflections"] = num_reflections

                        # skip a few lines
                        line = loop_over_next_n_lines(text_file, 4)
                        ln += 4
                        # get the predicted reflections
                        if num_reflections > 0:
                            reflections_pos = []
                            for tmpc in range(num_reflections):
                                # read as:
                                # h    k    l          I   sigma(I)       peak background  fs/px  ss/px
                                line = np.asarray(text_file.readline().split()[:9])
                                # append only:   fs/px  ss/px  I sigma(I)
                                reflections_pos.append(line[[7, 8, 3, 4, 0, 1, 2]])
                                ln += 1
                            tmp_crystal["predicted_reflections"] = np.asarray(reflections_pos).astype(float)
                        # continue reading
                        line = text_file.readline()
                        line = text_file.readline()
                        line = text_file.readline()
                        # print(line)
                        if "Begin crystal" in line:  # multi lattice
                            lattice_count = lattice_count + 1
                            tmpframe["multiple_lattices"] = tmpframe["multiple_lattices"] + 1
                            # append the lattice to the entry "crystals" in tmpframe
                            tmpframe["crystals"].append(tmp_crystal)
                            # print("multiple append")
                            ln += 1
                            # multiple=False
                            # lattice_count =0
                        else:
                            tmpframe["crystals"].append(tmp_crystal)
                            # print("else append", lattice_count)
                            multiple = False
                        if multiple == False:
                            break
                # Add the frame to the series, using the frame index as key
                series[im_num] = tmpframe

            # condition to exit the while true reading cycle
            if "" == line:
                # print("file finished")
                break

    # return the series
    return series


def get_data_from_streamfiles():
    """
    get results from the streamfile .
    Stream file is parsed as argument to the python script
    Following info is greped:
    1.) # of indexed crystals                    --> int  | indexable_frames
    2.)  # of indexed lattices                  --> int   | indexable_lattices
    3.) # of spots per frame                   --> array of ints | spots_per_frame
    4.) # of Lattices per images                --> arrray of ints
    5.) Mean beam center shift X in pixels      --> float
    6.) Mean beam center shift Y in pixels      --> float
    7.) Mean beam center shift  STD X in pixels --> float
    8.) Mean beam center shift STD Y in pixels  --> float
    9.) Mean unit cell indexed images           --> float object of a,b,c, alpha,beta, gamma
    10.) Mean unit cell indexed STD images      --> float object of a,b,c, alpha,beta, gamma
    11.) Mean processing time in sec     TODO        --> float
    :return: old_message with additional entries
    """

    # load current message in processing folder
    tmpfile = open("msg.json", "r")
    old_message = json.load(tmpfile)
    # print(old_message)
    # old message is a dict with all the input message params

    # parse stream into dict
    parsed_stream = stream_to_dictionary(sys.argv[1])
    old_message["numberOfImages"] = len(parsed_stream)

    # get number of indexable frames- not accounted for multilattice
    # print(parsed_stream[0].keys())
    indexable_frames = np.array(sorted([FR for FR in parsed_stream.keys() if len(parsed_stream[FR].keys()) > 7]))
    old_message["numberOfImagesIndexed"] = len(indexable_frames)

    # spots_per_frame = {} # as dict
    # as array:
    spots_per_frame = np.zeros((len(parsed_stream)))
    indexable_lattices = 0
    for i in range(0, len(parsed_stream)):
        # get spots per indexable frames:
        spots_per_frame[i] = parsed_stream[i]["num_peaks"]
        # get total number of indexable lattices
        indexable_lattices = indexable_lattices + parsed_stream[i]["multiple_lattices"]

    # put into dict for results, convert to list to be json serializable
    old_message["numberOfSpotsPerImage"] = (spots_per_frame.astype(int)).tolist()
    old_message["numberOfLattices"] = indexable_lattices

    # get number of indexed lattices per pattern
    lattices_per_indx_frame = {}
    for i in indexable_frames:
        lattices_per_indx_frame[int(i)] = parsed_stream[i]["multiple_lattices"]
    old_message["numberOfLatticesPerImage"] = lattices_per_indx_frame

    # mean beam center shift X and Y
    list_x = []
    list_y = []
    # define np.array
    uc_array = np.zeros((indexable_lattices, 6))
    b = 0
    for i in indexable_frames:
        for x in range(0, len(parsed_stream[i]["crystals"])):
            # det shift in x and y
            list_x.append((parsed_stream[i]["crystals"][x]["det_shift_x"]))
            list_y.append((parsed_stream[i]["crystals"][x]["det_shift_y"]))
            # unit cell constants
            uc_array[b] = np.asarray((parsed_stream[i]["crystals"][x]["Cell parameters"]))
            b = b + 1

    # ------ DET SHIFT MEAN and STD-------

    # plot det shift scatter plot
    # plt.scatter(np.asarray(list_x).astype(float),np.asarray(list_y).astype(float) )
    # plt.show()

    mean_x = np.around(np.mean(np.asarray(list_x).astype(float)), 4)
    std_x = np.around(np.std(np.asarray(list_x).astype(float)), 4)
    mean_y = np.around(np.mean(np.asarray(list_y).astype(float)), 4)
    std_y = np.around(np.std(np.asarray(list_y).astype(float)), 4)
    # convert to pixel  unit
    # 0.075 mm = 1 pixel =75 um
    # print(mean_x, mean_x * (1/0.075), std_x, std_x * (1/0.075), "x")
    # print(mean_y, mean_y * (1/0.075), std_y, std_y * (1/0.075), "y")
    old_message["beamShiftMeanX_pxl"] = np.around(mean_x * (1 / 0.075), 4)
    old_message["beamShiftMeanY_pxl"] = np.around(mean_y * (1 / 0.075), 4)
    old_message["beamShiftStdX_pxl"] = np.around(std_x * (1 / 0.075), 4)
    old_message["beamShiftStdY_pxl"] = np.around(std_y * (1 / 0.075), 4)
    # -------UC CONSTANTS MEAN and STD----
    mean_uc = np.mean(uc_array, 0)
    mean_uc[: 6 // 2] *= 10.0
    std_uc = np.std(uc_array, 0)
    std_uc[: 6 // 2] *= 10.0
    #convert to list to be json serializable
    old_message["unitCellIndexingMean"] = (np.around(mean_uc, 3)).tolist()
    old_message["unitCellIndexingStd"] = (np.around(std_uc, 3)).tolist()

    # print(old_message)
    return old_message


# ============classes with functions=========


class StreamToLogger:
    def __init__(self, level="INFO"):
        self._level = level

    def write(self, buffer):
        for line in buffer.rstrip().splitlines():
            logger.opt(depth=1).log(self._level, line.rstrip())

    def flush(self):
        pass


# =============MAIN====================

# if executed as main , code is executed in src folder of git repo
# needs to have the msg.json file in this folder to test.

if __name__ == "__main__":
    # main()
    # get results from streamfile
    stream = StreamToLogger()
    with contextlib.redirect_stdout(stream):
        results_message = get_data_from_streamfiles()
        #logger.info("message can be send to DB :{}", results_message)
        logger.info(f"message can be send to DB :{results_message}")

        # send message to database:
        # init the class
        mxdb = MxdbVdpTools.MxdbVdpTools()

        # insert message to DB
        _id = mxdb.insert(results_message)


        logger.info("message inserted to DB")

        # retreive the inserted doc from the database
        #doc = mxdb.query(_id=_id["insertID"])
        #logger.info("doc info from DB is: ")
        #logger.info(doc)