Files
Jungfraujoch/receiver/JFJochReceiver.cpp

669 lines
26 KiB
C++

// Copyright (2019-2022) Paul Scherrer Institute
// SPDX-License-Identifier: GPL-3.0-or-later
#include "JFJochReceiver.h"
#include <thread>
#include "../image_analysis/GPUImageAnalysis.h"
#include "../jungfrau/JFPedestalCalc.h"
#include "../image_analysis/IndexerWrapper.h"
#ifdef JFJOCH_USE_NUMA
#include <numa.h>
#endif
inline std::string time_UTC(const std::chrono::time_point<std::chrono::system_clock> &input) {
auto time_ms = std::chrono::duration_cast<std::chrono::milliseconds>(input.time_since_epoch()).count();
char buf1[255], buf2[255];
time_t time = time_ms / (1000);
strftime(buf1, sizeof(buf1), "%FT%T", gmtime(&time));
snprintf(buf2, sizeof(buf2), ".%06ld", time_ms%1000);
return std::string(buf1) + std::string(buf2) + "Z";
}
JFJochReceiver::JFJochReceiver(const JFJochProtoBuf::ReceiverInput &settings,
std::vector<AcquisitionDevice *> &in_aq_device,
ImagePusher &in_image_sender,
Logger &in_logger, int64_t in_forward_and_sum_nthreads,
ZMQPreviewPublisher* in_preview_publisher) :
experiment(settings.jungfraujoch_settings()),
acquisition_device(in_aq_device),
logger(in_logger),
image_pusher(in_image_sender),
frame_transformation_nthreads(in_forward_and_sum_nthreads),
preview_publisher(in_preview_publisher),
rad_int_profile_window(experiment.GetSpotFindingBin())
{
ndatastreams = experiment.GetDataStreamsNum();
if (settings.has_calibration()) {
calib.emplace(settings.calibration());
one_byte_mask = calib->CalculateOneByteMask(experiment);
} else {
one_byte_mask.resize(experiment.GetPixelsNum());
for (auto &i: one_byte_mask) i = 1;
}
if (experiment.GetConversionOnCPU())
PrepareConversionOnCPU();
if (!experiment.CheckGitSha1Consistent())
logger.Warning(experiment.CheckGitSha1Msg());
push_images_to_writer = (experiment.GetImageNum() > 0) && (!experiment.GetFilePrefix().empty());
if (acquisition_device.size() < ndatastreams)
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"Number of acquisition devices has to match data streams");
if (frame_transformation_nthreads <= 0)
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"Number of threads must be more than zero");
preview_stride = experiment.GetPreviewStride();
spotfinder_stride = experiment.GetSpotFindingStride();
logger.Info("Image stride for data analysis: preview {}, spot finding/radial integration {}",
preview_stride, spotfinder_stride);
if (experiment.GetDetectorMode() == DetectorMode::Conversion) {
if (preview_publisher != nullptr)
preview_publisher->Start(experiment, calib.value());
rad_int_mapping = std::make_unique<RadialIntegrationMapping>(experiment, one_byte_mask.data());
spot_finder_mask = calib->CalculateOneByteMask(experiment);
}
// Create latches
data_acquisition_ready = std::make_unique<Latch>(ndatastreams);
for (int d = 0; d < ndatastreams; d++) {
if (calib)
acquisition_device[d]->InitializeCalibration(experiment, calib.value());
acquisition_device[d]->PrepareAction(experiment);
logger.Debug("Acquisition device {} prepared", d);
}
if (experiment.GetImageNum() > 0)
frame_transformation_ready = std::make_unique<Latch>(frame_transformation_nthreads);
else
frame_transformation_ready = std::make_unique<Latch>(0);
for (int d = 0; d < ndatastreams; d++)
data_acquisition_futures.emplace_back(std::async(std::launch::async, &JFJochReceiver::AcquireThread,
this, d));
logger.Info("Data acquisition devices ready");
if ((experiment.GetDetectorMode() == DetectorMode::PedestalG0)
|| (experiment.GetDetectorMode() == DetectorMode::PedestalG1)
|| (experiment.GetDetectorMode() == DetectorMode::PedestalG2)) {
if (experiment.GetImageNum() > 0) {
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"Saving and calculating pedestal is not supported for the time being");
}
if (experiment.GetDetectorMode() == DetectorMode::PedestalG0) {
pedestal_result.resize(experiment.GetModulesNum() * experiment.GetStorageCellNumber());
for (int s = 0; s < experiment.GetStorageCellNumber(); s++) {
for (int d = 0; d < ndatastreams; d++) {
for (int m = 0; m < experiment.GetModulesNum(d); m++) {
auto handle = std::async(std::launch::async, &JFJochReceiver::MeasurePedestalThread, this, d, m,
s);
frame_transformation_futures.emplace_back(std::move(handle));
}
}
}
} else {
pedestal_result.resize(experiment.GetModulesNum());
for (int d = 0; d < ndatastreams; d++) {
for (int m = 0; m < experiment.GetModulesNum(d); m++) {
auto handle = std::async(std::launch::async, &JFJochReceiver::MeasurePedestalThread, this, d, m, 0);
frame_transformation_futures.emplace_back(std::move(handle));
}
}
}
logger.Info("Pedestal threads ready");
}
if (experiment.GetImageNum() > 0) {
logger.Info("Data file count {}", experiment.GetDataFileCount());
if (push_images_to_writer) {
StartMessage message{};
experiment.FillMessage(message);
message.arm_date = time_UTC(std::chrono::system_clock::now());
if (calib)
message.pixel_mask["sc0"] = calib->CalculateNexusMask(experiment, 0);
if (rad_int_mapping)
message.rad_int_bin_number = rad_int_mapping->GetBinNumber();
else
message.rad_int_bin_number = 0;
image_pusher.StartDataCollection(message);
}
for (int i = 0; i < experiment.GetImageNum(); i++)
images_to_go.Put(i);
// Setup frames summation and forwarding
for (int i = 0; i < frame_transformation_nthreads; i++) {
auto handle = std::async(std::launch::async, &JFJochReceiver::FrameTransformationThread, this);
frame_transformation_futures.emplace_back(std::move(handle));
}
logger.Info("Image compression/forwarding threads started");
frame_transformation_ready->Wait();
logger.Info("Image compression/forwarding threads ready");
}
data_acquisition_ready->Wait();
logger.Info("Acquisition devices ready");
start_time = std::chrono::system_clock::now();
logger.Info("Receiving data started");
measurement = std::async(std::launch::async, &JFJochReceiver::FinalizeMeasurement, this);
}
int64_t JFJochReceiver::AcquireThread(uint16_t data_stream) {
PinThreadToDevice(data_stream);
try {
frame_transformation_ready->Wait();
logger.Debug("Device thread {} start FPGA action", data_stream);
acquisition_device[data_stream]->StartAction(experiment);
} catch (const JFJochException &e) {
Abort(e);
data_acquisition_ready->CountDown();
return -1;
}
data_acquisition_ready->CountDown();
try {
logger.Debug("Device thread {} wait for FPGA action complete", data_stream);
acquisition_device[data_stream]->WaitForActionComplete();
} catch (const JFJochException &e) {
Abort(e);
}
logger.Debug("Device thread {} done", data_stream);
return -1;
}
int64_t JFJochReceiver::MeasurePedestalThread(uint16_t data_stream, uint16_t module_number, uint16_t storage_cell) {
PinThreadToDevice(data_stream);
JFPedestalCalc pedestal_calc(experiment);
bool storage_cell_G1G2 = (experiment.GetStorageCellNumber() > 1)
&& ((experiment.GetDetectorMode() == DetectorMode::PedestalG1)
|| (experiment.GetDetectorMode() == DetectorMode::PedestalG2));
int64_t delay = 0;
size_t staring_frame;
size_t frame_stride;
size_t offset = experiment.GetFirstModuleOfDataStream(data_stream) + module_number;
if (experiment.GetDetectorMode() == DetectorMode::PedestalG0) {
staring_frame = storage_cell;
frame_stride = experiment.GetStorageCellNumber();
offset += experiment.GetModulesNum() * storage_cell;
} else {
staring_frame = 0;
frame_stride = 1;
}
logger.Debug("Pedestal calculation thread for data stream {} module {} starting", data_stream, module_number);
try {
for (size_t frame = staring_frame; frame < experiment.GetFrameNum(); frame += frame_stride) {
// Frame will be processed only if one already collects frame+2
acquisition_device[data_stream]->WaitForFrame(frame + 2, module_number);
if (!storage_cell_G1G2 || (frame % 2 == 1)) {
// Partial packets will bring more problems, than benefit
if (acquisition_device[data_stream]->IsFullModuleCollected(frame, module_number)) {
pedestal_calc.AnalyzeImage((uint16_t *) acquisition_device[data_stream]->GetFrameBuffer(frame, module_number));
}
}
acquisition_device[data_stream]->FrameBufferRelease(frame, module_number);
delay = std::max(delay, acquisition_device[data_stream]->CalculateDelay(frame, module_number));
UpdateMaxImage(frame);
}
if (experiment.GetDetectorMode() == DetectorMode::PedestalG0)
pedestal_calc.Export(pedestal_result[offset], 2);
else
pedestal_calc.Export(pedestal_result[offset]);
pedestal_result[offset].SetFrameCount(experiment.GetFrameNum());
pedestal_result[offset].SetCollectionTime(start_time.time_since_epoch().count() / 1e9);
} catch (const JFJochException &e) { Abort(e); }
logger.Debug("Pedestal calculation thread for data stream {} module {} done", data_stream, module_number);
return delay;
}
int64_t JFJochReceiver::FrameTransformationThread() {
FrameTransformation transformation(experiment);
JFJochFrameSerializer serializer(experiment.GetPixelsNum()*sizeof(uint32_t)*2);
std::unique_ptr<GPUImageAnalysis> spot_finder;
try {
if (rad_int_mapping)
spot_finder = std::make_unique<GPUImageAnalysis>(experiment.GetXPixelsNum(),
experiment.GetYPixelsNum(),
one_byte_mask, *rad_int_mapping);
else
spot_finder = std::make_unique<GPUImageAnalysis>(experiment.GetXPixelsNum(),
experiment.GetYPixelsNum(),
one_byte_mask);
spot_finder->SetInputBuffer(transformation.GetPreview16BitImage());
spot_finder->RegisterBuffer();
} catch (const JFJochException& e) {
frame_transformation_ready->CountDown();
logger.Error("Error creating GPU spot finder");
Abort(e);
return -1;
}
std::vector<char> writer_buffer(experiment.GetMaxCompressedSize());
std::vector<int16_t> conversion_buffer(RAW_MODULE_SIZE);
int64_t max_thread_delay = 0;
uint64_t image_number;
transformation.SetOutput(writer_buffer.data());
frame_transformation_ready->CountDown();
std::unique_ptr<IndexerWrapper> indexer;
if (experiment.HasUnitCell()) {
indexer = std::make_unique<IndexerWrapper>();
indexer->Setup(experiment.GetUnitCell());
}
while (images_to_go.Get(image_number) != 0) {
try {
DataMessage message{};
message.number = image_number;
bool send_preview = false;
bool send_bkg_estimate = false;
bool calculate_spots = false;
bool index = false;
if ((preview_publisher != nullptr) && (preview_stride > 0) && (image_number % preview_stride == 0))
send_preview = true;
bool send_image = false; // We send image if at least one module was collected in full
if ((spotfinder_stride > 0) && (image_number % spotfinder_stride == 0)) {
calculate_spots = true;
if (rad_int_mapping)
send_bkg_estimate = true;
if (indexer)
index = true;
}
for (int j = 0; j < experiment.GetSummation(); j++) {
size_t frame_number = image_number * experiment.GetSummation() + j;
for (int d = 0; d < ndatastreams; d++) {
acquisition_device[d]->WaitForFrame(frame_number + 2);
for (int m = 0; m < experiment.GetModulesNum(d); m++) {
const int16_t *src;
if (acquisition_device[d]->IsFullModuleCollected(frame_number, m)) {
src = acquisition_device[d]->GetFrameBuffer(frame_number, m);
if (!send_image) {
// the information is for first module/frame that was collected in full
message.bunch_id = acquisition_device[d]->GetBunchID(frame_number, m);
message.jf_info = acquisition_device[d]->GetJFInfo(frame_number, m);
message.timestamp = acquisition_device[d]->GetTimestamp(frame_number, m);
}
send_image = true;
} else
src = acquisition_device[d]->GetErrorFrameBuffer();
if (experiment.GetConversionOnCPU()) {
auto &conv = fixed_point_conversion.at(experiment.GetFirstModuleOfDataStream(d) + m);
transformation.ProcessModule(conv, src, m, d);
} else
transformation.ProcessModule(src, m, d);
acquisition_device[d]->FrameBufferRelease(frame_number, m);
}
max_thread_delay = std::max(max_thread_delay, acquisition_device[d]->CalculateDelay(frame_number));
}
}
if (send_image) {
size_t image_size = transformation.PackStandardOutput();
std::vector<DiffractionSpot> spots;
auto local_data_processing_settings = GetDataProcessingSettings();
// Spot finding is async, so it can be sandwiched between sending image and other tasks
if (calculate_spots || send_bkg_estimate)
spot_finder->LoadDataToGPU(!experiment.GetApplyPixelMaskInFPGA());
if (calculate_spots)
spot_finder->RunSpotFinder(local_data_processing_settings);
if (send_bkg_estimate)
spot_finder->RunRadialIntegration();
if (send_preview)
preview_publisher->Publish(experiment,
transformation.GetPreview16BitImage(),
image_number);
if (calculate_spots) {
spot_finder->GetSpotFinderResults(experiment, GetDataProcessingSettings(), spots);
for (const auto & spot : spots)
message.spots.push_back(spot);
spot_count.AddElement(image_number, spots.size());
}
if (index) {
std::vector<Coord> recip;
for (const auto &i: spots)
recip.push_back(i.ReciprocalCoord(experiment));
auto indexer_result = indexer->Run(recip);
if (!indexer_result.empty()) {
message.indexing_result = 2;
indexing_solution.AddElement(image_number, 1);
} else {
message.indexing_result = 1;
indexing_solution.AddElement(image_number, 0);
}
} else
message.indexing_result = 0;
if (send_bkg_estimate) {
uint16_t rad_int_min_bin = std::floor(
rad_int_mapping->QToBin(local_data_processing_settings.bkg_estimate_low_q()));
uint16_t rad_int_max_bin = std::ceil(
rad_int_mapping->QToBin(local_data_processing_settings.bkg_estimate_high_q()));
float bkg_estimate_val = spot_finder->GetRadialIntegrationRangeValue(rad_int_min_bin, rad_int_max_bin);
bkg_estimate.AddElement(image_number, bkg_estimate_val);
spot_finder->GetRadialIntegrationProfile(message.rad_int_profile);
AddRadialIntegrationProfile(message.rad_int_profile);
}
if (push_images_to_writer) {
PrepareCBORImage(message, experiment, writer_buffer.data(), image_size);
serializer.SerializeImage(message);
image_pusher.SendData(serializer.GetBuffer(), image_number);
}
UpdateMaxImage(image_number);
compressed_size += image_size;
images_sent++;
}
} catch (const JFJochException &e) { Abort(e); }
}
spot_finder->UnregisterBuffer();
logger.Debug("Sum&compression thread done");
return max_thread_delay;
}
void JFJochReceiver::GetStatistics(JFJochProtoBuf::ReceiverOutput &ret) const {
uint64_t expected_packets = 0;
uint64_t received_packets = 0;
for (int d = 0; d < ndatastreams; d++) {
acquisition_device[d]->SaveStatistics(experiment, *ret.add_device_statistics());
expected_packets += ret.device_statistics(d).packets_expected();
received_packets += ret.device_statistics(d).good_packets();
}
if ((expected_packets == received_packets) || (expected_packets == 0))
ret.set_efficiency(1.0);
else
ret.set_efficiency(received_packets / static_cast<double>(expected_packets));
ret.set_compressed_size(compressed_size);
ret.set_max_image_number_sent(max_image_number_sent);
if (experiment.GetImageNum() > 0) {
ret.set_compressed_ratio( static_cast<double> (images_sent
* experiment.GetPixelDepth()
* experiment.GetModulesNum()
* RAW_MODULE_SIZE)
/ static_cast<double> (compressed_size));
}
if (!max_delay.empty())
ret.set_max_receive_delay(*std::max_element(max_delay.begin(), max_delay.end()));
else
ret.set_max_receive_delay(0);
ret.set_images_sent(images_sent);
ret.set_start_time_ms(std::chrono::duration_cast<std::chrono::milliseconds>(start_time.time_since_epoch()).count());
ret.set_end_time_ms(std::chrono::duration_cast<std::chrono::milliseconds>(end_time.time_since_epoch()).count());
if (!pedestal_result.empty())
*ret.mutable_pedestal_result() = {pedestal_result.begin(), pedestal_result.end()};
ret.set_master_file_name(experiment.GetFilePrefix());
ret.set_cancelled(cancelled);
auto tmp = indexing_solution.Mean();
if (!std::isnan(tmp))
ret.set_indexing_rate(tmp);
}
void JFJochReceiver::Cancel() {
// Remote abort: This tells FPGAs to stop, but doesn't do anything to CPU code
logger.Warning("Cancelling on request");
cancelled = true;
for (int d = 0; d < ndatastreams; d++)
acquisition_device[d]->ActionAbort();
}
void JFJochReceiver::Abort() {
// Remote abort: This tells FPGAs to stop, but doesn't do anything to CPU code
logger.Error("Aborting on request");
cancelled = true;
abort = 1;
for (int d = 0; d < ndatastreams; d++)
acquisition_device[d]->ActionAbort();
}
void JFJochReceiver::Abort(const JFJochException &e) {
logger.Error("Aborting data collection due to exception");
logger.ErrorException(e);
// Error abort: This tells FPGAs to stop and also prevents deadlock in CPU code, by setting abort to 1
cancelled = true;
abort = 1;
for (int d = 0; d < ndatastreams; d++)
acquisition_device[d]->ActionAbort();
}
int JFJochReceiver::GetStatus() const {
return abort;
}
double JFJochReceiver::GetIndexingRate() const {
return indexing_solution.Mean();
}
double JFJochReceiver::GetProgress() const {
if (experiment.GetImageNum() > 0)
return static_cast<double>(max_image_number_sent) / static_cast<double>(experiment.GetImageNum()) * 100.0;
else if (experiment.GetFrameNum() > 0)
// Pedestal
return static_cast<double>(max_image_number_sent) / static_cast<double>(experiment.GetFrameNum()) * 100.0;
else
return 100.0;
}
void JFJochReceiver::FinalizeMeasurement() {
if (!frame_transformation_futures.empty()) {
for (auto &future: frame_transformation_futures) {
auto val = future.get();
if (val >= 0) max_delay.push_back(val);
}
logger.Info("All processing threads done");
}
if (push_images_to_writer) {
JFJochProtoBuf::ReceiverOutput output;
GetStatistics(output);
EndMessage message{};
message.number_of_images = output.max_image_number_sent();
message.max_receiver_delay = output.max_receive_delay();
message.efficiency = output.efficiency();
message.end_date = time_UTC(std::chrono::system_clock::now());
message.write_master_file = true;
image_pusher.EndDataCollection(message);
logger.Info("Disconnected from writers");
}
if (preview_publisher != nullptr)
preview_publisher->Stop(experiment);
for (int d = 0; d < ndatastreams; d++)
acquisition_device[d]->ActionAbort();
end_time = std::chrono::system_clock::now();
for (auto &future : data_acquisition_futures) {
auto val = future.get();
if (val >= 0) max_delay.push_back(val);
}
logger.Info("Devices stopped");
logger.Info("Receiving data done");
}
void JFJochReceiver::SetDataProcessingSettings(const JFJochProtoBuf::DataProcessingSettings &in_data_processing_settings) {
std::unique_lock<std::mutex> ul(data_processing_settings_mutex);
DiffractionExperiment::CheckDataProcessingSettings(in_data_processing_settings);
data_processing_settings = in_data_processing_settings;
}
void JFJochReceiver::StopReceiver() {
if (measurement.valid()) {
measurement.get();
logger.Info("Receiver stopped");
}
}
JFJochReceiver::~JFJochReceiver() {
abort = 1;
if (measurement.valid())
measurement.get();
}
JFJochProtoBuf::DataProcessingSettings JFJochReceiver::GetDataProcessingSettings() {
std::unique_lock<std::mutex> ul(data_processing_settings_mutex);
return data_processing_settings;
}
void JFJochReceiver::AddRadialIntegrationProfile(const std::vector<float> &result) {
std::unique_lock<std::mutex> ul(rad_int_profile_mutex);
if (rad_int_profile.empty())
rad_int_profile = result;
else if (rad_int_profile.size() == result.size()) {
for (int i = 0; i < rad_int_profile.size(); i++)
rad_int_profile[i] += (result[i] - rad_int_profile[i]) / rad_int_profile_window;
} else {
// Throw exception?
Abort();
}
}
void JFJochReceiver::GetRadialIntegrationProfile(JFJochProtoBuf::Plot &plot) {
std::unique_lock<std::mutex> ul(rad_int_profile_mutex);
const auto &bin_to_q = rad_int_mapping->GetBinToQ();
if (!rad_int_profile.empty()) {
*plot.mutable_x() = {bin_to_q.begin(), bin_to_q.end()};
*plot.mutable_y() = {rad_int_profile.begin(), rad_int_profile.end()};
}
}
JFJochProtoBuf::Plot JFJochReceiver::GetPlots(const JFJochProtoBuf::PlotRequest &request) {
JFJochProtoBuf::Plot ret;
auto nbins = experiment.GetSpotFindingBin();
if (request.binning() > 0)
nbins = request.binning();
switch (request.type()) {
case JFJochProtoBuf::RAD_INT:
GetRadialIntegrationProfile(ret);
break;
case JFJochProtoBuf::SPOT_COUNT:
spot_count.GetPlot(ret, nbins);
break;
case JFJochProtoBuf::INDEXING_RATE:
indexing_solution.GetPlot(ret, nbins);
break;
case JFJochProtoBuf::BKG_ESTIMATE:
bkg_estimate.GetPlot(ret, nbins);
break;
default:
// Do nothing
break;
}
return ret;
}
void JFJochReceiver::PrepareConversionOnCPU() {
if (experiment.GetStorageCellNumber() != 1)
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"CPU conversion currently doesn't support storage cells");
if (!calib.has_value())
throw JFJochException(JFJochExceptionCategory::InputParameterInvalid,
"Calibration not provided");
fixed_point_conversion.resize(experiment.GetModulesNum());
for (int i = 0 ; i < experiment.GetModulesNum(); i++)
fixed_point_conversion[i].Setup(calib->GainCalibration(i),
calib->Pedestal(i, 0, 0),
calib->Pedestal(i, 1, 0),
calib->Pedestal(i, 2, 0),
experiment.GetPhotonEnergy_keV());
}
void JFJochReceiver::PinThreadToDevice(uint16_t data_stream) {
#ifdef JFJOCH_USE_NUMA
if (numa_available() != -1)
numa_run_on_node(acquisition_device[data_stream]->GetNUMANode());
#endif
}
void JFJochReceiver::UpdateMaxImage(int64_t image_number) {
std::unique_lock<std::mutex> ul(max_image_number_sent_mutex);
if (image_number > max_image_number_sent)
max_image_number_sent = image_number;
}