add all files

This commit is contained in:
2024-05-02 16:39:56 +02:00
parent 1f894ea6e7
commit fb798fc038
12 changed files with 1579 additions and 1 deletions

581
src/clara.py Normal file
View File

@@ -0,0 +1,581 @@
# Author: Assmann G. (2023)
import contextlib
import datetime
import json
import os
import signal
import subprocess as sub
import sys
import time
from pathlib import Path
from loguru import logger
import receive_msg
#define log file place:
LOG_FILENAME = time.strftime("/sf/cristallina/applications/mx/clara_tools/log/clara_%Y%m.log")
logger.add(LOG_FILENAME, level="INFO", rotation="100MB")
# hardcoded data path for e20233, as VDP is only used by e20233 so far for now. If this needs to be changed , change
# in function mk_cd_output_dir_bl some commented lines
pa = Path("/sls/MX/Data10/e20233")
class StreamToLogger:
def __init__(self, level="INFO"):
self._level = level
def write(self, buffer):
for line in buffer.rstrip().splitlines():
logger.opt(depth=1).log(self._level, line.rstrip())
def flush(self):
pass
# ========== functions ================
def main():
"""
hello world testing
:return: nothing
"""
print("hello world")
pass
def sigint_handler(signum, frame):
global TERMINATE_SERVER
print("CTRL-C caught --- Terminating VDP now")
TERMINATE_SERVER = True
def to_json(obj):
"""
makes an object serializable for json
:param obj: class object
:return: json serialzable object with indent=4
"""
return json.dumps(obj, default=lambda obj: obj.__dict__, indent=4)
# --------class with functions-----
class CollectedH5:
def __init__(self, mess_in):
# dictionary of the json message
self.message = mess_in
def get_message_dict(self):
"""
returns dictionary of the message (json)
:return: self.message
"""
return self.message
def mk_cd_output_dir_bl(self):
"""
mk putput dir with pathlib and change into this dir.
Output dir to MX/Data10/exxx/ ... can only be written as e account
:return: None
"""
# generate output dir
now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
pgroup = "p" + str(self.message["eaccount"][1:3])
eaccount = "e" + str(self.message["eaccount"][1:])
merge_id = str(self.message["mergeID"])
# if first char is a slash, get rid of it
if (str(self.message["dataFileName"][0])) == "/":
file_name = Path(str(self.message["dataFileName"][1:-3]) + "_" + str(now))
# if not use the full path
else:
file_name = Path(str(self.message["dataFileName"][:-3]) + "_" + str(now))
# today = str(date.today())
# if you dont want to use the hard coded path anymore, but the eaccount from the message, uncomment:
# p = Path("/sls")
# out_path = p / "MX" / "Data10" / eaccount / "vespa_vdp" / merge_id / file_name
# TODO add random number or second to processing folder
out_path = pa / "vespa_vdp" / merge_id / file_name
logger.info(f"processing folder will be created at : {out_path}")
try:
out_path.mkdir(parents=True, exist_ok=True)
except Exception as e:
logger.info("could not create processing directory {}".format(e))
# change into output dir
try:
os.chdir(out_path)
except Exception as e:
logger.info("Could not cd into processing directory: {}".format(e))
return None
def mk_cd_output_dir_ra(self):
"""
mk putput dir with pathlib and change into this dir.
:return: None
"""
# generate output dir
pgroup = "p" + str(self.message["eaccount"][1:3])
paccount = "p" + str(self.message["eaccount"][1:])
merge_id = str(self.message["mergeID"])
file_name = str(self.message["dataFileName"][:-3])
today = str(date.today())
p = Path("/das")
out_path = p / "work" / pgroup / paccount / "vespa" / today / merge_id / file_name
logger.info(f"processing folder is created at : {out_path}")
try:
out_path.mkdir(parents=True, exist_ok=True)
except Exception as e:
logger.info("could not create processing directory {}".format(e))
# change into output dir
try:
os.chdir(out_path)
except Exception as e:
logger.info("Could not cd into processing directory: {}".format(e))
return None
def convert_spg_num(self, sg: int):
"""
converts space group number to Hermann-Mauguin notation , 65 space groups included
96 --> P43212
:param sg: space group number
:return: sg_HM space group in H-M notation
"""
space_groups = {
1: "P1",
3: "P2",
4: "P21",
5: "C2",
16: "P222",
17: "P2221",
18: "P2122",
19: "P21212",
20: "C2221",
21: "C222",
22: "F222",
23: "I222",
24: "I212121",
75: "P4",
76: "P41",
77: "P42",
78: "P43",
79: "I4",
80: "I41",
89: "P422",
90: "P4212",
91: "P4122",
92: "P41212",
93: "P4222",
94: "P42212",
95: "P4322",
96: "P43212",
97: "I422",
98: "I4122",
143: "P3",
144: "P31",
145: "P32",
146: "R3",
149: "P312",
150: "P321",
151: "P3112",
152: "P3121",
153: "P3212",
154: "P3221",
155: "R32",
168: "P6",
169: "P61",
170: "P65",
171: "P62",
172: "P64",
173: "P63",
177: "P622",
178: "P6122",
179: "P6522",
180: "P6222",
181: "P6422",
182: "P6322",
195: "P23",
196: "F23",
197: "I23",
198: "P213",
199: "I213",
207: "P432",
208: "P4232",
209: "F432",
210: "F4132",
211: "I432",
212: "P4332",
213: "P4132",
214: "I4132",
}
return space_groups[sg]
def get_spaceg_params(self, sg: int):
"""
function to provide the pararmeters for the cell file besides the actual unit cell constants
:param sg: space group in HM notation as a string
:return: lattice,unique axis, centering
REMARK: probably not the optimal way to handle things. Maybe no conversion from the space group number needed,
rather direct conversion from the number to the lattice. can be improved
"""
latt = None
ua = None
cen = sg[0]
print(len(sg))
if sg[1] == "1":
latt = "L_TRICLINIC"
ua = "*"
elif sg[1:3] == "23":
latt = "L_CUBIC"
ua = "*"
elif sg[1:4] == "213":
latt = "L_CUBIC"
ua = "*"
elif sg[3:5] == "32":
latt = "L_CUBIC"
ua = "*"
elif sg[1:4] == "432":
latt = "L_CUBIC"
ua = "*"
elif sg[1:4] == "222":
latt = "L_ORTHORHOMBIC"
ua = "*"
elif sg[1:4] == "212":
latt = "L_ORTHORHOMBIC"
ua = "*"
elif sg[1] == "2" and len(sg) < 4:
latt = "L_MONOCLINIC"
ua = "b"
elif sg[1] == "4":
latt = "L_TETRAGONAL"
ua = "c"
elif sg[1] == "6":
latt = "L_HEXAGONAL"
ua = "c"
elif sg[1] == "3":
if sg[0] == "P":
latt = "L_HEXAGONAL"
ua = "c"
else:
latt = "L_RHOMBOHEDRAL"
ua = "*"
else:
print("Couldn't understand '{}'\n".format(sg))
latt = "L_TRICLINIC"
return latt, ua, cen
def create_cell_file(self):
"""
Creates cell file with the name mergingID.cell and writes it into the
processing folder with the corresponding mergeID for processing
:return: -
"""
merge_id = str(self.message["mergeID"])
f = open(merge_id + ".cell", "w")
# start writing the cell file
f.write("CrystFEL unit cell file version 1.0\n\n")
# get lattice params and write to file
space_group = self.convert_spg_num(self.message["spaceGroupNumber"])
lat_type, unique_a, cent = self.get_spaceg_params(space_group)
f.write("lattice_type = " + lat_type[2:].lower() + "\n")
f.write("centering = " + cent + "\n")
if unique_a != "*":
f.write("unique_axis = " + unique_a + "\n\n")
else:
f.write("\n\n")
# print unit cell constants
f.write("a = " + str(self.message["unitCell"]["a"]) + " A\n")
f.write("b = " + str(self.message["unitCell"]["b"]) + " A\n")
f.write("c = " + str(self.message["unitCell"]["c"]) + " A\n")
f.write("al = " + str(self.message["unitCell"]["alpha"]) + " deg\n")
f.write("be = " + str(self.message["unitCell"]["beta"]) + " deg\n")
f.write("ga = " + str(self.message["unitCell"]["gamma"]) + " deg\n")
f.close()
return None
def create_geom_from_master(self):
"""
generates the geom file from the input message for processing wih Crystfel .
:param self:
:return: none
"""
merge_id = str(self.message["mergeID"])
# write to mergeid_jf.geom file in processing folder
f2 = open(merge_id + "_jf.geom", "w")
f2.write("; PSI JF9M \n")
f2.write("\n")
f2.write("\n")
f2.write("; Camera length (in m) and photon energy (eV) \n")
f2.write("clen = " + str(self.message["detectorDistance_mm"] * 0.001) + "\n")
f2.write("photon_energy = " + str(self.message["enery_kev"] * 1000) + "\n")
f2.write("flag_lessthan = " + str(self.message["underload"]) + "\n")
f2.write("\n")
f2.write("adu_per_eV = 0.00008065\n")
# f2.write("adu_per_photon = 1\n")
f2.write("res = 13333.3 ; " + str(self.message["pixelSize_um"]) + " micron pixel size\n")
f2.write("\n")
f2.write("rigid_group_0 = 0 \n")
f2.write("rigid_group_collection_0 = 0 \n")
f2.write("\n")
f2.write("; These lines describe the data layout for the JF native multi-event files \n")
f2.write("dim0 = % \n")
f2.write("dim1 = ss \n")
f2.write("dim2 = fs \n")
f2.write("data = /entry/data/data \n")
f2.write("\n")
f2.write("\n")
if str(self.message["masterFileName"])[0] == "/":
f2.write("mask_file =" + str(pa.resolve()) + self.message["masterFileName"] + "\n")
else:
f2.write("mask_file =" + str(pa.resolve()) + "/" + self.message["masterFileName"] + "\n")
f2.write("mask = /entry/instrument/detector/pixel_mask \n")
f2.write("mask_good = 0x0 \n")
f2.write("mask_bad = 0xFFFFFFFF\n")
f2.write("\n")
f2.write("; corner_{x,y} set the position of the corner of the detector (in pixels) \n")
f2.write("; relative to the beam \n")
f2.write("\n")
f2.write("0/min_fs = 0 \n")
f2.write("0/min_ss = 0 \n")
f2.write("0/max_fs =" + str(self.message["detectorWidth_pxl"] - 1) + "\n")
f2.write("0/max_ss =" + str(self.message["detectorHeight_pxl"] - 1) + "\n")
f2.write("0/corner_x = -" + str(self.message["beamCenterX_pxl"]) + "\n")
f2.write("0/corner_y = -" + str(self.message["beamCenterY_pxl"]) + "\n")
f2.write("0/fs = x \n")
f2.write("0/ss = y \n")
f2.write("\n")
# f2.write("badregionA/min_fs = 774 \n")
# f2.write("badregionA/max_fs = 1032 \n")
# f2.write("badregionA/min_ss = 0 \n")
# f2.write("badregionA/max_ss = 256 \n")
# f2.write("\n")
# f2.write("badregionB/min_fs = 256 \n")
# f2.write("badregionB/max_fs = 774 \n")
# f2.write("badregionB/min_ss = 1906 \n")
# f2.write("badregionB/max_ss = 2162 \n")
# f2.write("\n")
f2.close()
return None
def create_list_file(self):
"""
Function to generate a list file with the path of the input H5 file
:return:None
"""
merge_id = str(self.message["mergeID"])
# write to cell file in output folder
f = open(merge_id + ".list", "w")
print(pa.resolve())
if (str(self.message["dataFileName"][0])) == "/":
f.write(str(pa.resolve()) + str(self.message["dataFileName"]))
else:
f.write(str(pa.resolve()) + "/" + str(self.message["dataFileName"]))
"""
if count == 0:
print("count 0")
f.write(str(self.message["filesystemPath"]) + str(self.message["dataFileName"]))
# if count =1 and at beginning
elif count == 1 and (str(self.message["dataFileName"][0])) == "/":
print("count 1 and first char")
# remove first char
f.write(str(self.message["filesystemPath"]) + str(self.message["dataFileName"][1:]))
# else if count >0 and not at beginning
elif count > 0:
print("count more and middle")
# get position of last "/" and remove until then
last_pos = self.message["dataFileName"].rfind("/")
print("last_pos", last_pos)
f.write(str(self.message["filesystemPath"]) + str(self.message["dataFileName"][(last_pos + 1) :]))
"""
f.close()
return None
def create_slurm_script(self):
"""
Creates the input SLURM file with the following info:
SLURM parameters ( CPUS , nodes, etc)
Output Log files
Input parameters for indexing job
Loading of Modules for indexing with Crystfel
Actual indexing job that is executed (indexamajig)
Also executing a python script that gets the results after processing
SLURM Outputs are redirected to the logfile.
TODO: So far only a few parameters for crystFEL are sent with the message. Additional parameters might be useful to insert into the message.
:return: None
"""
# get dat file name without any preceding paths..
last_pos = str(self.message["dataFileName"]).rfind("/")
data_file_name = str(self.message["dataFileName"][(last_pos + 1) : -3])
# write file
f = open("run_SLURM", "w")
f.write("#!/bin/bash \n")
f.write("#SBATCH --job-name=index \n")
# uncomment if on RA
# f.write("#SBATCH --partition=hour \n")
f.write("#SBATCH --cpus-per-task=32 \n")
# f.write("#SBATCH --output=" + LOG_FILENAME + "\n")
# f.write("#SBATCH --open-mode=append \n")
f.write("#========================================")
f.write("\n\n")
f.write("# Load modules \n")
f.write("module purge \n")
f.write("module use MX unstable \n")
# f.write("module load crystfel/0.10.2 \n")
# TODO ask Leo to install libs on CN for crystfel/0.10.2
f.write(
"module load crystfel/0.10.1-2 xgandalf/2018.01 HDF5_bitshuffle/2018.05 HDF5_LZ4/2018.05 gcc/4.8.5 hdf5_serial/1.10.3 \n"
)
f.write("\n\n")
f.write("# Actual Indexing command for crystFEL \n")
f.write(
" indexamajig --peaks=peakfinder8 --indexing=xgandalf --xgandalf-fast-execution --threshold="
+ str(int(self.message["crystfelTreshold"]))
+ " --int-radius=2,3,5 -p "
+ str(self.message["mergeID"])
+ ".cell --min-snr="
+ str(self.message["crystfelMinSNR"])
+ " --min-peaks=6 --min-pix-count="
+ str(self.message["crystfelMinPixCount"])
+ " -i "
+ str(self.message["mergeID"])
+ ".list -o "
+ data_file_name
+ ".stream -g "
+ str(self.message["mergeID"])
+ "_jf.geom "
+ " -j `nproc` --min-res=75 "
)
if self.message["crystfelMultiCrystal"]:
f.write(" --multi" + ">& " + data_file_name + ".log\n")
else:
f.write(" --no-multi" + ">& " + data_file_name + ".log\n")
# Execute the a python script to get the results
# for now loads my conda env. needs to be changed at the beamline
# argument is the streamfile that is created by indexing
f.write("\n\n")
f.write("# Executing results.py to get results and send to Database \n")
f.write(
"module load anaconda \n"
+ "conda activate /sls/MX/applications/conda_envs/vdp \n"
+ "python /sls/MX/applications/git/vdp/src/results.py "
+ data_file_name
+ ".stream "
# + data_file_name
# + ".log "
)
f.close()
return None
def submit_job_to_slurm(self):
"""
submit job to SLURM (on RA or 6S/6D nodes)
needs the slurm input file.
1.) Go to processing folder
2.) execute processing command
:return: None
"""
# some info: sub.run needs either a list with the different args or needs the full command as string,
# but then it also needs the arg shell=True!
# EASY way without grepping slurm job id
# sub.run(["sbatch", "run_SLURM"])
try:
slurm_out = sub.run(["sbatch", "run_SLURM"], capture_output=True)
txt = slurm_out.stdout.decode().split()
# grep the slurm number
logger.info(f"submitted batch job number: {txt[-1]}")
self.message["SlurmJobID"] = str(txt[-1])
except Exception as e:
logger.info("Could not submit SLURM job: {}".format(e))
return None
def create_msg_file(self):
"""
writes message to message file in folder. Can be retrieved by results.py to send the message to the database
:return:None
"""
# write message as json file to folder
f = open("msg.json", "w")
# tmp = json.dumps(self.message, indent=4) #RA
tmp = to_json(self.message)
f.write(tmp)
f.close()
return None
if __name__ == "__main__":
# main()
logger.info("CLARA starting up")
# redirect stdout to logging file
stream = StreamToLogger()
with contextlib.redirect_stdout(stream):
# potential message recieving:
vdp_server = "sf-broker-01.psi.ch"
vdp_port = 61613
vdp_inqueue = "/queue/test_in"
logger.info("In_queue is: {}", vdp_inqueue)
vdp_outqueue = "not_relevant_atm"
vdp_listener = receive_msg.MyListener(vdp_server, vdp_port, vdp_inqueue, vdp_outqueue)
vdp_listener.connect()
logger.info("connected to in_queue")
TERMINATE_SERVER = False
logger.info("\nWaiting for SIGINT to stop...")
signal.signal(signal.SIGINT, sigint_handler)
while not TERMINATE_SERVER:
if vdp_listener.incoming_messages_queue.empty():
time.sleep(0.1)
else:
# recieves message from queue. function from python package queue. same as empty.
logger.info("received message from in_queue, started processing...")
message = vdp_listener.incoming_messages_queue.get()
# Do something with the message
logger.info(f"message is: {message}")
mess_inp = CollectedH5(message)
#mess_inp.mk_cd_output_dir_bl()
#logger.info("subfolder created")
#mess_inp.create_cell_file()
#logger.info("cell file created")
#mess_inp.create_geom_from_master()
#logger.info("geom file created")
#mess_inp.create_list_file()
#logger.info("list file created")
#mess_inp.create_slurm_script()
#logger.info("slurm script created")
#mess_inp.submit_job_to_slurm()
#logger.info("job submitted to SLURM")
#mess_inp.create_msg_file()
#logger.info("message file created")
vdp_listener.acknowledge(message.headers["ack"])
logger.info("message was acknowledged")
logger.info("waiting for the next message")
vdp_listener.disconnect()