Files
sf_daq_broker/client/check.py
T

225 lines
11 KiB
Python

import argparse
#import datetime
import json
import os
import h5py
def run():
parser = argparse.ArgumentParser(description="check consistency of produced files")
parser.add_argument("-r", "--run_file", help="JSON file from the retrieve process", default=None)
parser.add_argument("--frequency_reduction_factor", help="beam rate, default 1 means 100Hz (2: 50Hz, 4: 25Hz....) (overwrites one from json file)", default=0, type=int)
args = parser.parse_args()
result = check_consistency(run_file=args.run_file, rate_multiplicator=args.frequency_reduction_factor)
check = result["check"]
print(f"Result of consistency check (summary) : {check}")
if check:
reason = result["reason"]
print(f" OK : {reason}")
else:
for reason in result["reason"]:
print(f" Reason : {reason}")
def check_consistency(run_file=None, rate_multiplicator=0):
problems = []
if run_file is None:
problems.append("provide a json run file")
return {"check" : False, "reason" : problems}
if not os.path.exists(run_file):
problems.append(f"{run_file} does not exist")
return {"check" : False, "reason" : problems}
try:
with open(run_file) as json_file:
parameters = json.load(json_file)
except Exception as e:
problems.append(f"Cannot read provided run file, may be not json? due to {e}")
return {"check" : False, "reason" : problems}
start_pulse_id = parameters["start_pulseid"]
stop_pulse_id = parameters["stop_pulseid"]
if rate_multiplicator == 0:
if "rate_multiplicator" in parameters:
rate_multiplicator = parameters["rate_multiplicator"]
else:
rate_multiplicator = 1
pgroup = parameters["pgroup"]
beamline = parameters["beamline"]
run_number = parameters["run_number"]
# request_time = datetime.datetime.strptime(parameters["request_time"], "%Y-%m-%d %H:%M:%S.%f")
full_directory = f"/sf/{beamline}/data/{pgroup}/raw/"
if "directory_name" in parameters:
directory_name = parameters["directory_name"]
full_directory = f"{full_directory}{directory_name}"
#TODO: make this check possible for different from 100Hz case (not straitforward - start_pulse_id can be not alligned properly with the rate)
# this is case for 100Hz:
expected_pulse_id = []
for p in range(start_pulse_id,stop_pulse_id+1):
if p%rate_multiplicator == 0:
expected_pulse_id.append(p)
expected_number_measurements = len(expected_pulse_id)
if "channels_list" in parameters:
bsread_file = f"{full_directory}/run_{run_number:06}.BSREAD.h5"
if not os.path.exists(bsread_file):
problems.append(f"bsread file {bsread_file} does not exist")
else:
try:
bsread_h5py = h5py.File(bsread_file,"r")
inside_file = list(bsread_h5py.keys())
if "data" not in inside_file:
problems.append(f"BSREAD file {bsread_file} has bad content {inside_file}")
else:
channels_inside_file = list(bsread_h5py["data"].keys())
for channel in parameters["channels_list"]:
if channel not in channels_inside_file:
problems.append(f"channel {channel} requested but not present in cameras file")
else:
pulse_id_raw = bsread_h5py[f"/data/{channel}/pulse_id"][:]
is_data_present = bsread_h5py[f"/data/{channel}/is_data_present"][:]
# pulse_id = pulse_id_raw[is_data_present]
pulse_id = []
for n_p,p in enumerate(pulse_id_raw):
if p%rate_multiplicator == 0 and is_data_present[n_p]:
pulse_id.append(p)
n_pulse_id = len(pulse_id)
if n_pulse_id != expected_number_measurements:
problems.append(f"{channel} number of pulse_id is different from expected : {n_pulse_id} vs {expected_number_measurements}")
else:
if pulse_id[0] != expected_pulse_id[0] or pulse_id[-1] != expected_pulse_id[-1]:
problems.append(f"{channel} start/stop pulse_id are not the one which are requested (requested : {expected_pulse_id[0]},{expected_pulse_id[-1]}, got: {pulse_id[0]},{pulse_id[-1]}) ")
pulse_id_check = True # this is for 100Hz only #TODO: to make for different rate
for i in range(n_pulse_id):
if pulse_id[i] != expected_pulse_id[i]:
pulse_id_check = False
#print(channel, i, pulse_id[i], expected_pulse_id[i])
if not pulse_id_check:
problems.append(f"{channel} pulse_id are not monotonic")
bsread_h5py.close()
except Exception as e:
problems.append(f"Can not read from BSREAD file {bsread_file} may be too early due to {e}")
if "camera_list" in parameters:
cameras_file = f"{full_directory}/run_{run_number:06}.CAMERAS.h5"
if not os.path.exists(cameras_file):
problems.append(f"camera file {cameras_file} does not exist")
else:
try:
cameras_h5py = h5py.File(cameras_file,"r")
cameras_inside_file = list(cameras_h5py.keys())
for camera in parameters["camera_list"]:
if camera not in cameras_inside_file:
problems.append(f"camera {camera} requested but not present in cameras file")
else:
pulse_id = cameras_h5py[f"/{camera}/pulse_id"][:]
n_pulse_id = len(pulse_id)
if n_pulse_id != expected_number_measurements:
problems.append(f"{camera} number of pulse_id is different from expected : {n_pulse_id} vs {expected_number_measurements}")
else:
if expected_pulse_id[0] != pulse_id[0] or expected_pulse_id[-1] != pulse_id[-1]:
problems.append(f"{camera} start/stop pulse_id are not the one which are requested")
pulse_id_check = True # this is for 100Hz only #TODO: to make for different rate
for i in range(n_pulse_id):
if pulse_id[i] != expected_pulse_id[i]:
pulse_id_check = False
if not pulse_id_check:
problems.append(f"{camera} pulse_id are not monotonic")
n_images_corrupted = 0
image_data = cameras_h5py[f"/{camera}/data"]
for i_image in range(n_pulse_id):
try:
image_data[i_image]
except Exception:
n_images_corrupted += 1
if n_images_corrupted != 0:
problems.append(f"{camera} {n_images_corrupted} images (from {n_pulse_id}) corrupted, can not read them")
cameras_h5py.close()
except Exception as e:
problems.append(f"Can not read from cameras file {cameras_file} may be too early due to {e}")
if "detectors" in parameters:
for detector in parameters["detectors"]:
detector_file = f"{full_directory}/run_{run_number:06}.{detector}.h5"
if not os.path.exists(detector_file):
problems.append(f"detector file {detector_file} does not exist")
else:
try:
detector_h5py = h5py.File(detector_file,"r")
pulse_id = detector_h5py[f"/data/{detector}/pulse_id"][:]
n_pulse_id = len(pulse_id)
# in case of converted data, frame_index, is_good_frame and daq_rec may be missing
if f"data/{detector}/frame_index" in detector_h5py.keys():
frame_index = detector_h5py[f"data/{detector}/frame_index"][:]
else:
frame_index = [0] * n_pulse_id
if f"/data/{detector}/is_good_frame" in detector_h5py.keys():
is_good_frame = detector_h5py[f"/data/{detector}/is_good_frame"][:]
else:
is_good_frame = [1] * n_pulse_id
if f"/data/{detector}/daq_rec" in detector_h5py.keys():
daq_rec = detector_h5py[f"/data/{detector}/daq_rec"][:]
else:
daq_rec = [0] * n_pulse_id
if len(frame_index) != n_pulse_id or len(is_good_frame) != n_pulse_id or len(daq_rec) != n_pulse_id:
problems.append(f"{detector} length of frame_index,is_good_frame,daq_rec is not consistent with pulse_id")
if n_pulse_id != expected_number_measurements:
problems.append(f"{detector} number of pulse_id is different from expected : {n_pulse_id} vs {expected_number_measurements}")
else:
if expected_pulse_id[0] != pulse_id[0] or expected_pulse_id[-1] != pulse_id[-1]:
problems.append(f"{detector} start/stop pulse_id are not the one which are requested")
#TODO: check on NANs for pulse_ids
frame_index_check = True
n_frames_bad = 0
pulse_id_check = True # this is for 100Hz only #TODO: to make for different rate
for i in range(n_pulse_id):
if is_good_frame[i] != 1:
n_frames_bad += 1
else:
#if frame_index[i] != (frame_index[0]+i):
# frame_index_check = False
if pulse_id[i] != expected_pulse_id[i]:
pulse_id_check = False
if not frame_index_check:
problems.append(f"{detector} frame_index is not monotonic")
if n_frames_bad != 0:
problems.append(f"{detector} there are bad frames : {n_frames_bad} out of {n_pulse_id}")
if not pulse_id_check:
problems.append(f"{detector} pulse_id are not monotonic")
detector_h5py.close()
except Exception as e:
problems.append(f"Can not read from detector file {detector_file} may be too early due to {e}")
if len(problems) > 0:
return {"check" : False, "reason" : problems}
else:
return {"check" : True, "reason" : "all tests passed"}
if __name__ == "__main__":
run()