add all files

2024-05-02 16:39:56 +02:00
parent 1f894ea6e7
commit fb798fc038
12 changed files with 1579 additions and 1 deletions
--- a/src/results.py
+++ b/src/results.py
@@ -0,0 +1,389 @@
+# Author Assman G. (2023)
+# execute with
+# /das/home/assman_g/p19370/vespa/2023-03-30/something_mergeID/Lyso_12p4keV_1kHz_150mm_run000026_data_000010/Lyso_12p4keV_1kHz_150mm_run000026_data_000010.th6.snr4.0.mpixco1.stream
+import contextlib
+import json
+import sys
+import time
+
+# import crystfelparser.crystfelparser as crys  # acknowledge P.Gasparotto
+import numpy as np
+from loguru import logger
+import MxdbVdpTools
+
+# import matplotlib.pyplot as plt
+
+LOG_FILENAME = time.strftime("/sls/MX/Data10/e20233/log/vdp_%Y%m.log")  # as eaccount at beamline
+# LOG_FILENAME = time.strftime("/sls/MX/Data10-staff/e19370/log/vdp_%Y%m.log")  # as eaccount at beamline
+# LOG_FILENAME = time.strftime("/home/assman_g/Documents/log/vdp_%Y%m.log")  # as assman_g at beamline
+# LOG_FILENAME = time.strftime("/das/home/assman_g/vdp_%Y%m.log")  # on RA
+logger.add(LOG_FILENAME, level="INFO", rotation="100MB")
+
+
+# ========== functions ================
+
+
+def main():
+    """
+    hello world testing
+    :return: nothing
+    """
+    print("hello fish ")
+    pass
+
+
+def stream_to_dictionary(streamfile):
+    """
+    write a function that genaerates a dictionary with all the old paramweters
+    append the dictionary to a key called "crystals" of the original dictionary.
+    If theere is no lattice, the "crytals" key is an empty list, otherwise it has X entries as dictionaries. []
+    function from crystfelparser. edited, needs to be merged with crystfelparser
+     Returns:
+       A dictionary
+    """
+    # series = defaultdict(dict)
+    series = dict()
+
+    def loop_over_next_n_lines(file, n_lines):
+        for cnt_tmp in range(n_lines):
+            line = file.readline()
+
+        return line
+
+    with open(streamfile, "r") as text_file:
+        # for ln,line in enumerate(text_file):
+        ln = -1
+        while True:
+            ln += 1
+            line = text_file.readline()
+            # print(line)
+            # if any(x in ["Begin","chunk"] for x in line.split()):
+            if "Begin chunk" in line:
+                # create a temporary dictionary to store the output for a frame
+                # tmpframe = defaultdict(int)
+                tmpframe = dict()
+
+                # loop over the next 3 lines to get the index of the image
+                # line 2 and 3 are where it is stored the image number
+                line = loop_over_next_n_lines(text_file, 3)
+                ln += 3
+                # save the image index and save it as zero-based
+                im_num = int(line.split()[-1]) - 1
+                tmpframe["Image serial number"] = im_num
+
+                # loop over the next 2 lines to see if the indexer worked
+                line = loop_over_next_n_lines(text_file, 2)
+                ln += 2
+                # save who indexed the image
+                indexer_tmp = line.split()[-1]
+                # if indexed, there is an additional line here
+                npeaks_lines = 6
+                if indexer_tmp == "none":
+                    npeaks_lines = 5
+                    tmpframe["multiple_lattices"] = 0
+                else:
+                    tmpframe["multiple_lattices"] = 1
+                tmpframe["indexed_by"] = indexer_tmp
+
+                ##### Get the STRONG REFLEXTIONS from the spotfinder #####
+
+                # loop over the next 5/6 lines to get the number of reflections
+                line = loop_over_next_n_lines(text_file, npeaks_lines)
+                ln += npeaks_lines
+                # get the number of peaks
+                num_peaks = int(line.split()[-1])
+                tmpframe["num_peaks"] = num_peaks
+
+                # get the resolution
+                line = text_file.readline()
+                ln += 1
+                tmpframe["peak_resolution [A]"] = float(line.split()[-2])
+                tmpframe["peak_resolution [nm^-1]"] = float(line.split()[2])
+
+                if num_peaks > 0:
+                    # skip the first 2 lines
+                    for tmpc in range(2):
+                        text_file.readline()
+                        ln += 1
+
+                    # get the spots
+                    # fs/px, ss/px, (1/d)/nm^-1, Intensity
+                    # with
+                    # dim1 = ss, dim2 = fs
+                    tmpframe["peaks"] = np.asarray(
+                        [text_file.readline().split()[:4] for tmpc in range(num_peaks)]
+                    ).astype(float)
+                # generate empty list for potential indexed lattices
+
+                ##### Get the PREDICTIONS after indexing #####
+                # So far the framwork for multiple lattices is generated,
+                # but not finished. So far only the "last" lattice will be saved
+                # in terms of reflections etc.
+                # so far only the unit cell constants are all accounted for!
+                multiple = True
+                if tmpframe["indexed_by"] != "none":
+                    tmpframe["crystals"] = []
+                    # set lattice count to 0
+                    lattice_count = 0
+                    # generate a temp_crystal dict for every lattice from this frame
+                    tmp_crystal = {}
+                    # start a loop over this part to account for multiple lattices
+                    while multiple == True:
+                        # skip the first 2 header lines, only if not multiple lattice loop
+                        if lattice_count == 0:
+                            for tmpc in range(2):
+                                text_file.readline()
+                                ln += 1
+
+                        # Get the unit cell -- as cell lengths and angles
+                        # append unit cell constants if multiple lattices exist
+                        line = text_file.readline().split()
+                        tmp_crystal["Cell parameters"] = np.hstack([line[2:5], line[6:9]]).astype(float)
+
+                        # Get the reciprocal unit cell as a 3x3 matrix
+                        # multiple lattices not done yet
+                        reciprocal_cell = []
+                        for tmpc in range(3):
+                            reciprocal_cell.append(text_file.readline().split()[2:5])
+                            ln += 1
+                        # print(reciprocal_cell)
+                        tmp_crystal["reciprocal_cell_matrix"] = np.asarray(reciprocal_cell).astype(float)
+
+                        # Save the lattice type
+                        tmp_crystal["lattice_type"] = text_file.readline().split()[-1]
+                        ln += 1
+
+                        # loop over the next 5 lines to get the diffraction resolution
+                        line = loop_over_next_n_lines(text_file, 5).split()
+                        ln += 5
+
+                        # multiple lattices not done yet
+                        if line[0] == "predict_refine/det_shift":
+                            tmp_crystal["det_shift_x"] = line[3]
+                            tmp_crystal["det_shift_y"] = line[6]
+                            line = loop_over_next_n_lines(text_file, 1).split()
+                            ln += 1
+
+                        tmp_crystal["diffraction_resolution_limit [nm^-1]"] = float(line[2])
+                        tmp_crystal["diffraction_resolution_limit [A]"] = float(line[5])
+
+                        # get the number of predicted reflections
+                        num_reflections = int(text_file.readline().split()[-1])
+                        tmp_crystal["num_predicted_reflections"] = num_reflections
+
+                        # skip a few lines
+                        line = loop_over_next_n_lines(text_file, 4)
+                        ln += 4
+                        # get the predicted reflections
+                        if num_reflections > 0:
+                            reflections_pos = []
+                            for tmpc in range(num_reflections):
+                                # read as:
+                                # h    k    l          I   sigma(I)       peak background  fs/px  ss/px
+                                line = np.asarray(text_file.readline().split()[:9])
+                                # append only:   fs/px  ss/px  I sigma(I)
+                                reflections_pos.append(line[[7, 8, 3, 4, 0, 1, 2]])
+                                ln += 1
+                            tmp_crystal["predicted_reflections"] = np.asarray(reflections_pos).astype(float)
+                        # continue reading
+                        line = text_file.readline()
+                        line = text_file.readline()
+                        line = text_file.readline()
+                        # print(line)
+                        if "Begin crystal" in line:  # multi lattice
+                            lattice_count = lattice_count + 1
+                            tmpframe["multiple_lattices"] = tmpframe["multiple_lattices"] + 1
+                            # append the lattice to the entry "crystals" in tmpframe
+                            tmpframe["crystals"].append(tmp_crystal)
+                            # print("multiple append")
+                            ln += 1
+                            # multiple=False
+                            # lattice_count =0
+                        else:
+                            tmpframe["crystals"].append(tmp_crystal)
+                            # print("else append", lattice_count)
+                            multiple = False
+                        if multiple == False:
+                            break
+                # Add the frame to the series, using the frame index as key
+                series[im_num] = tmpframe
+
+            # condition to exit the while true reading cycle
+            if "" == line:
+                # print("file finished")
+                break
+
+    # return the series
+    return series
+
+
+def get_data_from_streamfiles():
+    """
+    get results from the streamfile .
+    Stream file is parsed as argument to the python script
+    Following info is greped:
+    1.) # of indexed crystals                    --> int  | indexable_frames
+    2.)  # of indexed lattices                  --> int   | indexable_lattices
+    3.) # of spots per frame                   --> array of ints | spots_per_frame
+    4.) # of Lattices per images                --> arrray of ints
+    5.) Mean beam center shift X in pixels      --> float
+    6.) Mean beam center shift Y in pixels      --> float
+    7.) Mean beam center shift  STD X in pixels --> float
+    8.) Mean beam center shift STD Y in pixels  --> float
+    9.) Mean unit cell indexed images           --> float object of a,b,c, alpha,beta, gamma
+    10.) Mean unit cell indexed STD images      --> float object of a,b,c, alpha,beta, gamma
+    11.) Mean processing time in sec     TODO        --> float
+    :return: old_message with additional entries
+    """
+
+    # load current message in processing folder
+    tmpfile = open("msg.json", "r")
+    old_message = json.load(tmpfile)
+    # print(old_message)
+    # old message is a dict with all the input message params
+
+    # parse stream into dict
+    parsed_stream = stream_to_dictionary(sys.argv[1])
+    old_message["numberOfImages"] = len(parsed_stream)
+
+    # get number of indexable frames- not accounted for multilattice
+    # print(parsed_stream[0].keys())
+    indexable_frames = np.array(sorted([FR for FR in parsed_stream.keys() if len(parsed_stream[FR].keys()) > 7]))
+    old_message["numberOfImagesIndexed"] = len(indexable_frames)
+
+    # spots_per_frame = {} # as dict
+    # as array:
+    spots_per_frame = np.zeros((len(parsed_stream)))
+    indexable_lattices = 0
+    for i in range(0, len(parsed_stream)):
+        # get spots per indexable frames:
+        spots_per_frame[i] = parsed_stream[i]["num_peaks"]
+        # get total number of indexable lattices
+        indexable_lattices = indexable_lattices + parsed_stream[i]["multiple_lattices"]
+
+    # put into dict for results, convert to list to be json serializable
+    old_message["numberOfSpotsPerImage"] = (spots_per_frame.astype(int)).tolist()
+    old_message["numberOfLattices"] = indexable_lattices
+
+    # get number of indexed lattices per pattern
+    lattices_per_indx_frame = {}
+    for i in indexable_frames:
+        lattices_per_indx_frame[int(i)] = parsed_stream[i]["multiple_lattices"]
+    old_message["numberOfLatticesPerImage"] = lattices_per_indx_frame
+
+    # mean beam center shift X and Y
+    list_x = []
+    list_y = []
+    # define np.array
+    uc_array = np.zeros((indexable_lattices, 6))
+    b = 0
+    for i in indexable_frames:
+        for x in range(0, len(parsed_stream[i]["crystals"])):
+            # det shift in x and y
+            list_x.append((parsed_stream[i]["crystals"][x]["det_shift_x"]))
+            list_y.append((parsed_stream[i]["crystals"][x]["det_shift_y"]))
+            # unit cell constants
+            uc_array[b] = np.asarray((parsed_stream[i]["crystals"][x]["Cell parameters"]))
+            b = b + 1
+
+    # ------ DET SHIFT MEAN and STD-------
+
+    # plot det shift scatter plot
+    # plt.scatter(np.asarray(list_x).astype(float),np.asarray(list_y).astype(float) )
+    # plt.show()
+
+    mean_x = np.around(np.mean(np.asarray(list_x).astype(float)), 4)
+    std_x = np.around(np.std(np.asarray(list_x).astype(float)), 4)
+    mean_y = np.around(np.mean(np.asarray(list_y).astype(float)), 4)
+    std_y = np.around(np.std(np.asarray(list_y).astype(float)), 4)
+    # convert to pixel  unit
+    # 0.075 mm = 1 pixel =75 um
+    # print(mean_x, mean_x * (1/0.075), std_x, std_x * (1/0.075), "x")
+    # print(mean_y, mean_y * (1/0.075), std_y, std_y * (1/0.075), "y")
+    old_message["beamShiftMeanX_pxl"] = np.around(mean_x * (1 / 0.075), 4)
+    old_message["beamShiftMeanY_pxl"] = np.around(mean_y * (1 / 0.075), 4)
+    old_message["beamShiftStdX_pxl"] = np.around(std_x * (1 / 0.075), 4)
+    old_message["beamShiftStdY_pxl"] = np.around(std_y * (1 / 0.075), 4)
+    # -------UC CONSTANTS MEAN and STD----
+    mean_uc = np.mean(uc_array, 0)
+    mean_uc[: 6 // 2] *= 10.0
+    std_uc = np.std(uc_array, 0)
+    std_uc[: 6 // 2] *= 10.0
+    #convert to list to be json serializable
+    old_message["unitCellIndexingMean"] = (np.around(mean_uc, 3)).tolist()
+    old_message["unitCellIndexingStd"] = (np.around(std_uc, 3)).tolist()
+
+    # print(old_message)
+    return old_message
+
+
+# ============classes with functions=========
+
+
+class StreamToLogger:
+    def __init__(self, level="INFO"):
+        self._level = level
+
+    def write(self, buffer):
+        for line in buffer.rstrip().splitlines():
+            logger.opt(depth=1).log(self._level, line.rstrip())
+
+    def flush(self):
+        pass
+
+
+# =============MAIN====================
+
+# if executed as main , code is executed in src folder of git repo
+# needs to have the msg.json file in this folder to test.
+
+if __name__ == "__main__":
+    # main()
+    # get results from streamfile
+    stream = StreamToLogger()
+    with contextlib.redirect_stdout(stream):
+        results_message = get_data_from_streamfiles()
+        #logger.info("message can be send to DB :{}", results_message)
+        logger.info(f"message can be send to DB :{results_message}")
+
+        # send message to database:
+        # init the class
+        mxdb = MxdbVdpTools.MxdbVdpTools()
+
+        # insert message to DB
+        _id = mxdb.insert(results_message)
+        
+        #EXAMPLE MESSAGE {
+        #    "mergeID": "something_mergeID",
+        #    "trackingId": "something_track",
+        #    "eaccount": "e19370",
+        #    "masterFileName": "Lyso_12p4keV_1kHz_150mm_run000026_master.h5",
+        #    "dataFileName": "Lyso_12p4keV_1kHz_150mm_run000026_data_000010.h5",
+        #    "filesystemPath": "/das/work/p19/p19607/FromGreta/REDML_indx_data/lyso/processing/",
+        #    "detectorDistance_mm": 150.0,
+        #    "beamCenterX_pxl": 1103.7,
+        #    "beamCenterY_pxl": 1175.1,
+        #    "pixelSize_um": 75,
+        #    "numberOfImages": 10000,
+        #    "imageTime_us": 100,
+        #    "enery_kev": 12398.0,
+        #    "detectorWidth_pxl": 2067,
+        #    "detectorHeight_pxl": 2163,
+        #    "underload": -30000,
+        #    "overload": 30000,
+        #    "unitCell": {"a": 79.5, "b": 79.5, "c": 38.6, "alpha": 90.0, "beta": 90.0, "gamma": 90.0},
+        #    "spaceGroupNumber": 96,
+        #    "crystfelTreshold": 6.0,
+        #    "crystfelMinSNR": 4.0,
+        #    "crystfelMinPixCount": 1,
+        #    "crystfelMultiCrystal": False,
+        #}
+
+
+        logger.info("message inserted to DB")
+
+        # retreive the inserted doc from the database
+        #doc = mxdb.query(_id=_id["insertID"])
+        #logger.info("doc info from DB is: ")
+        #logger.info(doc)
+