add all files

2024-05-02 16:39:56 +02:00
parent 1f894ea6e7
commit fb798fc038
12 changed files with 1579 additions and 1 deletions
--- a/src/clara.py
+++ b/src/clara.py
@@ -0,0 +1,581 @@
+# Author: Assmann G. (2023)
+
+import contextlib
+import datetime
+import json
+import os
+import signal
+import subprocess as sub
+import sys
+import time
+from pathlib import Path
+
+from loguru import logger
+
+import receive_msg
+
+#define log file place:
+LOG_FILENAME = time.strftime("/sf/cristallina/applications/mx/clara_tools/log/clara_%Y%m.log")
+
+
+logger.add(LOG_FILENAME, level="INFO", rotation="100MB")
+
+# hardcoded data path for e20233, as VDP is only used by e20233 so far for now. If this needs to be changed , change
+# in function mk_cd_output_dir_bl some commented lines
+pa = Path("/sls/MX/Data10/e20233")
+
+
+class StreamToLogger:
+    def __init__(self, level="INFO"):
+        self._level = level
+
+    def write(self, buffer):
+        for line in buffer.rstrip().splitlines():
+            logger.opt(depth=1).log(self._level, line.rstrip())
+
+    def flush(self):
+        pass
+
+
+# ========== functions ================
+
+
+def main():
+    """
+    hello world testing
+    :return: nothing
+    """
+    print("hello world")
+    pass
+
+
+def sigint_handler(signum, frame):
+    global TERMINATE_SERVER
+    print("CTRL-C caught --- Terminating VDP now")
+    TERMINATE_SERVER = True
+
+
+def to_json(obj):
+    """
+    makes an object serializable for json
+    :param obj: class object
+    :return: json serialzable object with indent=4
+    """
+    return json.dumps(obj, default=lambda obj: obj.__dict__, indent=4)
+
+
+# --------class with functions-----
+
+
+class CollectedH5:
+    def __init__(self, mess_in):
+        # dictionary of the json message
+        self.message = mess_in
+
+    def get_message_dict(self):
+        """
+        returns dictionary of the message (json)
+        :return: self.message
+        """
+        return self.message
+
+    def mk_cd_output_dir_bl(self):
+        """
+        mk putput dir with  pathlib and change into this dir.
+        Output dir to MX/Data10/exxx/ ... can only be written as e account
+        :return: None
+        """
+        # generate output dir
+        now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        pgroup = "p" + str(self.message["eaccount"][1:3])
+        eaccount = "e" + str(self.message["eaccount"][1:])
+        merge_id = str(self.message["mergeID"])
+        # if first char is a slash, get rid of it
+        if (str(self.message["dataFileName"][0])) == "/":
+            file_name = Path(str(self.message["dataFileName"][1:-3]) + "_" + str(now))
+        # if not use the full path
+        else:
+            file_name = Path(str(self.message["dataFileName"][:-3]) + "_" + str(now))
+        # today = str(date.today())
+        # if you dont want to use the hard coded path anymore, but the eaccount from the message, uncomment:
+        # p = Path("/sls")
+        # out_path = p / "MX" / "Data10" / eaccount / "vespa_vdp" / merge_id / file_name
+        # TODO add random number or second to processing folder
+        out_path = pa / "vespa_vdp" / merge_id / file_name
+        logger.info(f"processing folder will be created at : {out_path}")
+        try:
+            out_path.mkdir(parents=True, exist_ok=True)
+        except Exception as e:
+            logger.info("could not create processing directory {}".format(e))
+
+        # change into output dir
+        try:
+            os.chdir(out_path)
+        except Exception as e:
+            logger.info("Could not cd into processing directory: {}".format(e))
+
+        return None
+
+    def mk_cd_output_dir_ra(self):
+        """
+        mk putput dir with  pathlib and change into this dir.
+        :return: None
+
+        """
+        # generate output dir
+        pgroup = "p" + str(self.message["eaccount"][1:3])
+        paccount = "p" + str(self.message["eaccount"][1:])
+        merge_id = str(self.message["mergeID"])
+        file_name = str(self.message["dataFileName"][:-3])
+        today = str(date.today())
+        p = Path("/das")
+        out_path = p / "work" / pgroup / paccount / "vespa" / today / merge_id / file_name
+        logger.info(f"processing folder is created at : {out_path}")
+        try:
+            out_path.mkdir(parents=True, exist_ok=True)
+        except Exception as e:
+            logger.info("could not create processing directory {}".format(e))
+
+        # change into output dir
+        try:
+            os.chdir(out_path)
+        except Exception as e:
+            logger.info("Could not cd into processing directory: {}".format(e))
+
+        return None
+
+    def convert_spg_num(self, sg: int):
+        """
+        converts space group number to Hermann-Mauguin notation , 65 space groups included
+        96 --> P43212
+        :param sg: space group number
+        :return: sg_HM space group in H-M notation
+        """
+        space_groups = {
+            1: "P1",
+            3: "P2",
+            4: "P21",
+            5: "C2",
+            16: "P222",
+            17: "P2221",
+            18: "P2122",
+            19: "P21212",
+            20: "C2221",
+            21: "C222",
+            22: "F222",
+            23: "I222",
+            24: "I212121",
+            75: "P4",
+            76: "P41",
+            77: "P42",
+            78: "P43",
+            79: "I4",
+            80: "I41",
+            89: "P422",
+            90: "P4212",
+            91: "P4122",
+            92: "P41212",
+            93: "P4222",
+            94: "P42212",
+            95: "P4322",
+            96: "P43212",
+            97: "I422",
+            98: "I4122",
+            143: "P3",
+            144: "P31",
+            145: "P32",
+            146: "R3",
+            149: "P312",
+            150: "P321",
+            151: "P3112",
+            152: "P3121",
+            153: "P3212",
+            154: "P3221",
+            155: "R32",
+            168: "P6",
+            169: "P61",
+            170: "P65",
+            171: "P62",
+            172: "P64",
+            173: "P63",
+            177: "P622",
+            178: "P6122",
+            179: "P6522",
+            180: "P6222",
+            181: "P6422",
+            182: "P6322",
+            195: "P23",
+            196: "F23",
+            197: "I23",
+            198: "P213",
+            199: "I213",
+            207: "P432",
+            208: "P4232",
+            209: "F432",
+            210: "F4132",
+            211: "I432",
+            212: "P4332",
+            213: "P4132",
+            214: "I4132",
+        }
+
+        return space_groups[sg]
+
+    def get_spaceg_params(self, sg: int):
+        """
+        function to provide the pararmeters for the cell file besides the actual unit cell constants
+        :param sg: space group in HM notation as a string
+        :return: lattice,unique axis, centering
+        REMARK: probably not the optimal way to handle things. Maybe no conversion from the space group number needed,
+        rather direct conversion from the number to the lattice. can be improved
+        """
+        latt = None
+        ua = None
+        cen = sg[0]
+        print(len(sg))
+        if sg[1] == "1":
+            latt = "L_TRICLINIC"
+            ua = "*"
+        elif sg[1:3] == "23":
+            latt = "L_CUBIC"
+            ua = "*"
+        elif sg[1:4] == "213":
+            latt = "L_CUBIC"
+            ua = "*"
+        elif sg[3:5] == "32":
+            latt = "L_CUBIC"
+            ua = "*"
+        elif sg[1:4] == "432":
+            latt = "L_CUBIC"
+            ua = "*"
+        elif sg[1:4] == "222":
+            latt = "L_ORTHORHOMBIC"
+            ua = "*"
+        elif sg[1:4] == "212":
+            latt = "L_ORTHORHOMBIC"
+            ua = "*"
+        elif sg[1] == "2" and len(sg) < 4:
+            latt = "L_MONOCLINIC"
+            ua = "b"
+        elif sg[1] == "4":
+            latt = "L_TETRAGONAL"
+            ua = "c"
+        elif sg[1] == "6":
+            latt = "L_HEXAGONAL"
+            ua = "c"
+        elif sg[1] == "3":
+            if sg[0] == "P":
+                latt = "L_HEXAGONAL"
+                ua = "c"
+            else:
+                latt = "L_RHOMBOHEDRAL"
+                ua = "*"
+        else:
+            print("Couldn't understand '{}'\n".format(sg))
+            latt = "L_TRICLINIC"
+
+        return latt, ua, cen
+
+    def create_cell_file(self):
+        """
+        Creates cell file with the name mergingID.cell and writes it into the
+        processing folder with the corresponding mergeID for processing
+        :return: -
+        """
+        merge_id = str(self.message["mergeID"])
+        f = open(merge_id + ".cell", "w")
+        # start writing the cell file
+        f.write("CrystFEL unit cell file version 1.0\n\n")
+        # get lattice params and write to file
+        space_group = self.convert_spg_num(self.message["spaceGroupNumber"])
+        lat_type, unique_a, cent = self.get_spaceg_params(space_group)
+        f.write("lattice_type = " + lat_type[2:].lower() + "\n")
+        f.write("centering = " + cent + "\n")
+        if unique_a != "*":
+            f.write("unique_axis = " + unique_a + "\n\n")
+        else:
+            f.write("\n\n")
+
+        # print unit cell constants
+        f.write("a = " + str(self.message["unitCell"]["a"]) + " A\n")
+        f.write("b = " + str(self.message["unitCell"]["b"]) + " A\n")
+        f.write("c = " + str(self.message["unitCell"]["c"]) + " A\n")
+        f.write("al = " + str(self.message["unitCell"]["alpha"]) + " deg\n")
+        f.write("be = " + str(self.message["unitCell"]["beta"]) + " deg\n")
+        f.write("ga = " + str(self.message["unitCell"]["gamma"]) + " deg\n")
+        f.close()
+        return None
+
+    def create_geom_from_master(self):
+        """
+        generates the geom file from the input message for processing wih Crystfel .
+        :param self:
+        :return: none
+        """
+
+        merge_id = str(self.message["mergeID"])
+
+        # write to mergeid_jf.geom file in processing folder
+        f2 = open(merge_id + "_jf.geom", "w")
+        f2.write("; PSI JF9M \n")
+        f2.write("\n")
+        f2.write("\n")
+        f2.write("; Camera length (in m) and photon energy (eV) \n")
+        f2.write("clen = " + str(self.message["detectorDistance_mm"] * 0.001) + "\n")
+        f2.write("photon_energy = " + str(self.message["enery_kev"] * 1000) + "\n")
+        f2.write("flag_lessthan = " + str(self.message["underload"]) + "\n")
+        f2.write("\n")
+        f2.write("adu_per_eV = 0.00008065\n")
+        # f2.write("adu_per_photon = 1\n")
+        f2.write("res = 13333.3   ; " + str(self.message["pixelSize_um"]) + " micron pixel size\n")
+        f2.write("\n")
+        f2.write("rigid_group_0 = 0 \n")
+        f2.write("rigid_group_collection_0 = 0 \n")
+        f2.write("\n")
+        f2.write("; These lines describe the data layout for the JF native multi-event files \n")
+        f2.write("dim0 = % \n")
+        f2.write("dim1 = ss \n")
+        f2.write("dim2 = fs \n")
+        f2.write("data = /entry/data/data \n")
+        f2.write("\n")
+        f2.write("\n")
+        if str(self.message["masterFileName"])[0] == "/":
+            f2.write("mask_file =" + str(pa.resolve()) + self.message["masterFileName"] + "\n")
+        else:
+            f2.write("mask_file =" + str(pa.resolve()) + "/" + self.message["masterFileName"] + "\n")
+        f2.write("mask = /entry/instrument/detector/pixel_mask \n")
+        f2.write("mask_good = 0x0 \n")
+        f2.write("mask_bad = 0xFFFFFFFF\n")
+        f2.write("\n")
+        f2.write("; corner_{x,y} set the position of the corner of the detector (in pixels) \n")
+        f2.write("; relative to the beam \n")
+        f2.write("\n")
+        f2.write("0/min_fs = 0 \n")
+        f2.write("0/min_ss = 0 \n")
+        f2.write("0/max_fs =" + str(self.message["detectorWidth_pxl"] - 1) + "\n")
+        f2.write("0/max_ss =" + str(self.message["detectorHeight_pxl"] - 1) + "\n")
+        f2.write("0/corner_x = -" + str(self.message["beamCenterX_pxl"]) + "\n")
+        f2.write("0/corner_y = -" + str(self.message["beamCenterY_pxl"]) + "\n")
+        f2.write("0/fs = x \n")
+        f2.write("0/ss = y \n")
+        f2.write("\n")
+        # f2.write("badregionA/min_fs = 774 \n")
+        # f2.write("badregionA/max_fs = 1032 \n")
+        # f2.write("badregionA/min_ss = 0 \n")
+        # f2.write("badregionA/max_ss = 256 \n")
+        # f2.write("\n")
+        # f2.write("badregionB/min_fs = 256 \n")
+        # f2.write("badregionB/max_fs = 774 \n")
+        # f2.write("badregionB/min_ss = 1906 \n")
+        # f2.write("badregionB/max_ss = 2162 \n")
+        # f2.write("\n")
+        f2.close()
+        return None
+
+    def create_list_file(self):
+        """
+        Function to generate a list file with the path of the input H5 file
+        :return:None
+        """
+        merge_id = str(self.message["mergeID"])
+
+        # write to cell file in output folder
+        f = open(merge_id + ".list", "w")
+        print(pa.resolve())
+        if (str(self.message["dataFileName"][0])) == "/":
+            f.write(str(pa.resolve()) + str(self.message["dataFileName"]))
+        else:
+            f.write(str(pa.resolve()) + "/" + str(self.message["dataFileName"]))
+
+        """
+        if count == 0:
+            print("count 0")
+            f.write(str(self.message["filesystemPath"]) + str(self.message["dataFileName"]))
+        # if count =1 and at beginning
+        elif count == 1 and (str(self.message["dataFileName"][0])) == "/":
+            print("count 1 and first char")
+            # remove first char
+            f.write(str(self.message["filesystemPath"]) + str(self.message["dataFileName"][1:]))
+        # else if count >0 and not at beginning
+        elif count > 0:
+            print("count more and middle")
+            # get position of last "/" and remove until then
+            last_pos = self.message["dataFileName"].rfind("/")
+            print("last_pos", last_pos)
+            f.write(str(self.message["filesystemPath"]) + str(self.message["dataFileName"][(last_pos + 1) :]))
+        """
+        f.close()
+
+        return None
+
+    def create_slurm_script(self):
+        """
+        Creates the input SLURM file with the following info:
+        SLURM parameters ( CPUS , nodes, etc)
+        Output Log files
+        Input parameters for indexing job
+        Loading of Modules for indexing with Crystfel
+        Actual indexing job that is executed (indexamajig)
+        Also executing a python script that gets the results after processing
+        SLURM Outputs are redirected to the logfile.
+        TODO: So far only a few parameters for crystFEL are sent with the message. Additional parameters might be useful to insert into the message.
+        :return: None
+        """
+        # get dat file name without any preceding paths..
+        last_pos = str(self.message["dataFileName"]).rfind("/")
+        data_file_name = str(self.message["dataFileName"][(last_pos + 1) : -3])
+
+        # write file
+        f = open("run_SLURM", "w")
+        f.write("#!/bin/bash \n")
+        f.write("#SBATCH --job-name=index \n")
+        # uncomment if on RA
+        # f.write("#SBATCH --partition=hour \n")
+        f.write("#SBATCH --cpus-per-task=32 \n")
+        # f.write("#SBATCH --output=" + LOG_FILENAME + "\n")
+        # f.write("#SBATCH --open-mode=append \n")
+        f.write("#========================================")
+        f.write("\n\n")
+        f.write("# Load modules \n")
+        f.write("module purge \n")
+        f.write("module use MX unstable \n")
+        # f.write("module load  crystfel/0.10.2 \n")
+        # TODO ask Leo to install libs on CN for crystfel/0.10.2
+        f.write(
+            "module load crystfel/0.10.1-2 xgandalf/2018.01 HDF5_bitshuffle/2018.05 HDF5_LZ4/2018.05 gcc/4.8.5 hdf5_serial/1.10.3 \n"
+        )
+        f.write("\n\n")
+        f.write("# Actual Indexing command for crystFEL \n")
+        f.write(
+            " indexamajig --peaks=peakfinder8 --indexing=xgandalf --xgandalf-fast-execution --threshold="
+            + str(int(self.message["crystfelTreshold"]))
+            + " --int-radius=2,3,5 -p "
+            + str(self.message["mergeID"])
+            + ".cell --min-snr="
+            + str(self.message["crystfelMinSNR"])
+            + " --min-peaks=6 --min-pix-count="
+            + str(self.message["crystfelMinPixCount"])
+            + " -i "
+            + str(self.message["mergeID"])
+            + ".list -o "
+            + data_file_name
+            + ".stream -g "
+            + str(self.message["mergeID"])
+            + "_jf.geom "
+            + " -j `nproc` --min-res=75 "
+        )
+        if self.message["crystfelMultiCrystal"]:
+            f.write(" --multi" + ">& " + data_file_name + ".log\n")
+        else:
+            f.write(" --no-multi" + ">& " + data_file_name + ".log\n")
+        # Execute the a python script to get the results
+        # for now loads my conda env. needs to be changed at the beamline
+        # argument is the streamfile that is created by indexing
+        f.write("\n\n")
+        f.write("# Executing results.py to get results and send to Database \n")
+        f.write(
+            "module load anaconda \n"
+            + "conda activate /sls/MX/applications/conda_envs/vdp \n"
+            + "python /sls/MX/applications/git/vdp/src/results.py "
+            + data_file_name
+            + ".stream "
+            # + data_file_name
+            # + ".log "
+        )
+
+        f.close()
+
+        return None
+
+    def submit_job_to_slurm(self):
+        """
+        submit job to SLURM (on RA or 6S/6D nodes)
+        needs the slurm input file.
+        1.) Go to processing folder
+        2.) execute processing command
+        :return: None
+        """
+        # some info: sub.run needs either a list with the different args or needs the full command as string,
+        # but then it also needs the arg shell=True!
+
+        # EASY way without grepping slurm job id
+        # sub.run(["sbatch", "run_SLURM"])
+
+        try:
+            slurm_out = sub.run(["sbatch", "run_SLURM"], capture_output=True)
+            txt = slurm_out.stdout.decode().split()
+            # grep the slurm number
+            logger.info(f"submitted batch job number: {txt[-1]}")
+            self.message["SlurmJobID"] = str(txt[-1])
+        except Exception as e:
+            logger.info("Could not submit SLURM job: {}".format(e))
+
+        return None
+
+    def create_msg_file(self):
+        """
+        writes message to message file in folder. Can be retrieved by results.py to send the message to the database
+        :return:None
+        """
+        # write message as json file to folder
+        f = open("msg.json", "w")
+        # tmp = json.dumps(self.message, indent=4) #RA
+        tmp = to_json(self.message)
+        f.write(tmp)
+        f.close()
+
+        return None
+
+
+if __name__ == "__main__":
+    # main()
+
+    logger.info("CLARA starting up")
+    # redirect stdout to logging file
+    stream = StreamToLogger()
+    with contextlib.redirect_stdout(stream):
+        # potential message recieving:
+        vdp_server = "sf-broker-01.psi.ch"
+        vdp_port = 61613
+        vdp_inqueue = "/queue/test_in"
+        logger.info("In_queue is: {}", vdp_inqueue)
+       	vdp_outqueue = "not_relevant_atm"
+        vdp_listener = receive_msg.MyListener(vdp_server, vdp_port, vdp_inqueue, vdp_outqueue)
+        vdp_listener.connect()
+        logger.info("connected to in_queue")
+
+        TERMINATE_SERVER = False
+
+        logger.info("\nWaiting for SIGINT to stop...")
+        signal.signal(signal.SIGINT, sigint_handler)
+
+        while not TERMINATE_SERVER:
+            if vdp_listener.incoming_messages_queue.empty():
+                time.sleep(0.1)
+            else:
+                # recieves message from queue. function from python package queue. same as empty.
+                logger.info("received message from in_queue, started processing...")
+                message = vdp_listener.incoming_messages_queue.get()
+                # Do something with the message
+                logger.info(f"message is: {message}")
+                mess_inp = CollectedH5(message)
+                #mess_inp.mk_cd_output_dir_bl()
+                #logger.info("subfolder created")
+                #mess_inp.create_cell_file()
+                #logger.info("cell file created")
+                #mess_inp.create_geom_from_master()
+                #logger.info("geom file created")
+                #mess_inp.create_list_file()
+                #logger.info("list file created")
+                #mess_inp.create_slurm_script()
+                #logger.info("slurm script created")
+                #mess_inp.submit_job_to_slurm()
+                #logger.info("job submitted to SLURM")
+                #mess_inp.create_msg_file()
+                #logger.info("message file created")
+
+                vdp_listener.acknowledge(message.headers["ack"])
+                logger.info("message was acknowledged")
+                logger.info("waiting for the next message")
+
+        vdp_listener.disconnect()