373 lines
13 KiB
C++
373 lines
13 KiB
C++
// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute <filip.leonarski@psi.ch>
|
|
// SPDX-License-Identifier: CERN-OHL-S-2.0
|
|
|
|
#include "hls_jfjoch.h"
|
|
|
|
#ifdef JFJOCH_HLS_NOSYNTH
|
|
#include <thread>
|
|
#endif
|
|
|
|
void spot_finder_in_stream(STREAM_768 &data_in,
|
|
hls::stream<spot_finder_packet> &data_out,
|
|
volatile ap_int<32> &in_count_threshold,
|
|
volatile ap_uint<32> &in_snr_threshold) {
|
|
packet_768_t packet_in;
|
|
{
|
|
#pragma HLS PROTOCOL fixed
|
|
data_in >> packet_in;
|
|
ap_wait();
|
|
data_out << spot_finder_packet{
|
|
.data = packet_in.data,
|
|
.strb = packet_in.strb,
|
|
.mask = 0,
|
|
.user = packet_in.user,
|
|
.last = packet_in.last
|
|
};
|
|
ap_wait();
|
|
}
|
|
data_in >> packet_in;
|
|
while (!packet_in.user) {
|
|
#pragma HLS PIPELINE II=1
|
|
|
|
data_out << spot_finder_packet{
|
|
.data = packet_in.data,
|
|
.strb = packet_in.strb,
|
|
.mask = packet_in.keep(31,0),
|
|
.strong_pixel = packet_in.keep(63,32),
|
|
.count_threshold = in_count_threshold,
|
|
.snr_threshold = in_snr_threshold,
|
|
.user = packet_in.user,
|
|
.last = packet_in.last
|
|
};
|
|
data_in >> packet_in;
|
|
}
|
|
data_out << spot_finder_packet{
|
|
.data = packet_in.data,
|
|
.strb = packet_in.strb,
|
|
.mask = 0,
|
|
.user = packet_in.user,
|
|
.last = packet_in.last
|
|
};
|
|
}
|
|
|
|
void spot_finder_out_stream(hls::stream<spot_finder_packet> &data_in,
|
|
STREAM_768 &data_out,
|
|
hls::stream<ap_axiu<32,1,1,1>> &strong_pixel_out) {
|
|
spot_finder_packet packet_in;
|
|
{
|
|
#pragma HLS PROTOCOL fixed
|
|
data_in >> packet_in;
|
|
ap_wait();
|
|
data_out << packet_768_t{.data = packet_in.data, .strb = packet_in.strb, .user = packet_in.user, .last = packet_in.last};
|
|
ap_wait();
|
|
}
|
|
|
|
data_in >> packet_in;
|
|
while (!packet_in.user) {
|
|
ap_uint<32> count_threshold = packet_in.count_threshold;
|
|
ap_uint<32> snr_threshold_u32 = packet_in.snr_threshold;
|
|
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 64; i++) {
|
|
#pragma HLS PIPELINE II=1
|
|
|
|
ap_uint<64> keep;
|
|
keep(31, 0) = packet_in.mask;
|
|
keep(63,32) = packet_in.strong_pixel;
|
|
|
|
data_out << packet_768_t{.data = packet_in.data, .keep = keep, .strb = packet_in.strb, .user = packet_in.user, .last = packet_in.last};
|
|
strong_pixel_out << ap_axiu<32,1,1,1>{.data = packet_in.strong_pixel, .user = 0};
|
|
data_in >> packet_in;
|
|
}
|
|
|
|
// Save module statistics
|
|
strong_pixel_out << ap_axiu<32,1,1,1>{.data = count_threshold, .user = 0};
|
|
strong_pixel_out << ap_axiu<32,1,1,1>{.data = snr_threshold_u32, .user = 0};
|
|
for (int i = 0; i < 14;i++)
|
|
strong_pixel_out << ap_axiu<32,1,1,1>{.data = 0, .user = 0};
|
|
}
|
|
strong_pixel_out << ap_axiu<32,1,1,1>{.data = 0, .user = 1};
|
|
data_out << packet_768_t{.data = packet_in.data, .strb = packet_in.strb, .user = packet_in.user, .last = packet_in.last};
|
|
}
|
|
|
|
ap_int<SUM_BITWIDTH> calc_sum(ap_int<24> val[32], ap_uint<32> mask) {
|
|
#pragma HLS PIPELINE II=1
|
|
ap_int<SUM_BITWIDTH> ret = 0;
|
|
for (int i = 0; i < 32; i++) {
|
|
if (mask[i])
|
|
ret += val[i];
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
ap_int<SUM2_BITWIDTH> calc_sum2(ap_int<24> val[32], ap_uint<32> mask) {
|
|
#pragma HLS PIPELINE II=1
|
|
ap_int<SUM2_BITWIDTH> ret = 0;
|
|
for (int i = 0; i < 32; i++) {
|
|
if (mask[i])
|
|
ret += val[i] * val[i];
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
ap_int<VALID_BITWIDTH> calc_valid(ap_uint<32> mask) {
|
|
#pragma HLS PIPELINE II=1
|
|
ap_int<VALID_BITWIDTH> ret = 0;
|
|
for (int i = 0; i < 32; i++) {
|
|
ret += mask[i];
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
void spot_finder_prepare(hls::stream<spot_finder_packet> &data_in,
|
|
hls::stream<spot_finder_packet> &data_out,
|
|
hls::stream<ap_int<SUM_BITWIDTH>> &sum_out,
|
|
hls::stream<ap_int<SUM2_BITWIDTH>> &sum2_out,
|
|
hls::stream<ap_int<VALID_BITWIDTH>> &valid_out) {
|
|
ap_uint<32> strong_pixel;
|
|
|
|
spot_finder_packet packet;
|
|
{
|
|
#pragma HLS PROTOCOL fixed
|
|
data_in >> packet;
|
|
data_out << packet;
|
|
ap_wait();
|
|
}
|
|
|
|
ap_int<SUM_BITWIDTH> sum[32];
|
|
ap_int<SUM2_BITWIDTH> sum2[32];
|
|
ap_int<VALID_BITWIDTH> valid[32];
|
|
|
|
ap_int<SUM_BITWIDTH> sum_old[32][LINES_PER_GO * 2 + 1];
|
|
ap_int<SUM2_BITWIDTH> sum2_old[32][LINES_PER_GO * 2 + 1];
|
|
ap_int<VALID_BITWIDTH> valid_old[32][LINES_PER_GO * 2 + 1];
|
|
|
|
data_in >> packet;
|
|
|
|
while (!packet.user) {
|
|
int line_in_mem = 0;
|
|
|
|
for (int j = 0; j < 32*32*16; j++) {
|
|
#pragma HLS PIPELINE II=1
|
|
data_out << packet;
|
|
|
|
int col = j % 32;
|
|
int line = j / 32;
|
|
|
|
ap_int<24> val[32];
|
|
unpack32(packet.data, val);
|
|
ap_uint<32> mask = packet.mask;
|
|
|
|
auto tmp_sum = calc_sum(val, mask);
|
|
auto tmp_sum2 = calc_sum2(val, mask);
|
|
auto tmp_valid = calc_valid(mask);
|
|
|
|
auto tmp2_sum = tmp_sum;
|
|
auto tmp2_sum2 = tmp_sum2;
|
|
auto tmp2_valid = tmp_valid;
|
|
|
|
if ((line > 0) && (line < LINES_PER_GO * 2 + 1)) {
|
|
tmp2_sum += sum[col];
|
|
tmp2_sum2 += sum2[col];
|
|
tmp2_valid += valid[col];
|
|
} else if (line >= LINES_PER_GO * 2 + 1) {
|
|
tmp2_sum += sum[col] - sum_old[col][line_in_mem];
|
|
tmp2_sum2 += sum2[col] - sum2_old[col][line_in_mem];
|
|
tmp2_valid += valid[col] - valid_old[col][line_in_mem];
|
|
}
|
|
|
|
if (line >= LINES_PER_GO) {
|
|
sum_out << tmp2_sum;
|
|
sum2_out << tmp2_sum2;
|
|
valid_out << tmp2_valid;
|
|
}
|
|
|
|
sum[col] = tmp2_sum;
|
|
sum2[col] = tmp2_sum2;
|
|
valid[col] = tmp2_valid;
|
|
|
|
sum_old[col][line_in_mem] = tmp_sum;
|
|
sum2_old[col][line_in_mem] = tmp_sum2;
|
|
valid_old[col][line_in_mem] = tmp_valid;
|
|
|
|
if (col == 31) {
|
|
if (line_in_mem >= LINES_PER_GO * 2 + 1 - 1)
|
|
line_in_mem = 0;
|
|
else
|
|
++line_in_mem;
|
|
}
|
|
|
|
data_in >> packet;
|
|
}
|
|
|
|
for (int i = 0; i < LINES_PER_GO * 32; i++) {
|
|
#pragma HLS PIPELINE II=1
|
|
int col = i % 32;
|
|
|
|
sum[col] -= sum_old[col][line_in_mem];
|
|
sum2[col] -= sum2_old[col][line_in_mem];
|
|
valid[col] -= valid_old[col][line_in_mem];
|
|
sum_out << sum[col];
|
|
sum2_out << sum2[col];
|
|
valid_out << valid[col];
|
|
|
|
if (col == 31) {
|
|
if (line_in_mem >= LINES_PER_GO * 2 + 1 - 1)
|
|
line_in_mem = 0;
|
|
else
|
|
++line_in_mem;
|
|
}
|
|
}
|
|
}
|
|
|
|
data_out << packet;
|
|
}
|
|
|
|
ap_uint<32> spot_finder_snr_threshold(ap_int<24> val[32],
|
|
ap_ufixed<20,16, AP_RND_CONV> snr_threshold_2,
|
|
ap_int<SUM_BITWIDTH> sum,
|
|
ap_int<SUM2_BITWIDTH> sum2,
|
|
ap_int<VALID_BITWIDTH> valid_count) {
|
|
#pragma HLS PIPELINE II=1
|
|
if (snr_threshold_2 == 0)
|
|
return UINT32_MAX;
|
|
|
|
ap_int<SUM2_BITWIDTH+12> variance = valid_count * sum2 - sum * sum; // This is variance * valid_count^2
|
|
ap_fixed<SUM2_BITWIDTH+12+16+1, SUM2_BITWIDTH+12+16> threshold = variance * snr_threshold_2;
|
|
// snr_threshold is in units of 0.25
|
|
|
|
ap_uint<32> ret = 0;
|
|
for (int j = 0; j < 32; j++) {
|
|
ap_int<SUM_BITWIDTH+1> in_minus_mean = val[j] * valid_count - sum; // This is (pxl - mean) * valid_count
|
|
|
|
// Aim is to compare pxl-mean with sqrt(variance) * threshold
|
|
// however this would require sqrt and divisions, so
|
|
// it is cheaper to compare ((pxl-mean) * valid_count)^2 with variance * valid_count^2 * threshold^2,
|
|
// but need to make sure that (pxl - mean) is positive
|
|
// Also assume that N ≈ (N-1)
|
|
|
|
if ((in_minus_mean * in_minus_mean > threshold) &&
|
|
(in_minus_mean > 0) &&
|
|
(valid_count > 15 * 32 / 2)) // at least half of the pixels
|
|
ret[j] = 1;
|
|
else
|
|
ret[j] = 0;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
ap_uint<32> spot_finder_count_threshold(ap_int<24> val[32], ap_int<32> &count_threshold) {
|
|
#pragma HLS PIPELINE II=1
|
|
if (count_threshold <= 0)
|
|
return UINT32_MAX;
|
|
ap_uint<32> ret = 0;
|
|
for (int j = 0; j < 32; j++) {
|
|
if (val[j] > count_threshold)
|
|
ret[j] = 1;
|
|
else
|
|
ret[j] = 0;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
void spot_finder_apply_threshold(hls::stream<spot_finder_packet> &data_in,
|
|
hls::stream<spot_finder_packet> &data_out,
|
|
hls::stream<ap_int<SUM_BITWIDTH>> &sum_in,
|
|
hls::stream<ap_int<SUM2_BITWIDTH>> &sum2_in,
|
|
hls::stream<ap_int<VALID_BITWIDTH>> &valid_in) {
|
|
ap_uint<32> strong_pixel_prev;
|
|
spot_finder_packet packet_in;
|
|
{
|
|
#pragma HLS PROTOCOL fixed
|
|
data_in >> packet_in;
|
|
ap_wait();
|
|
data_out << packet_in;
|
|
ap_wait();
|
|
}
|
|
|
|
ap_int<SUM_BITWIDTH> sum;
|
|
ap_int<SUM2_BITWIDTH> sum2;
|
|
ap_int<VALID_BITWIDTH> valid;
|
|
|
|
data_in >> packet_in;
|
|
while (!packet_in.user) {
|
|
ap_int<32> count_threshold = packet_in.count_threshold;
|
|
ap_uint<32> snr_threshold_u32 = packet_in.snr_threshold;
|
|
float_uint32 thr;
|
|
thr.u = snr_threshold_u32;
|
|
ap_ufixed<10,8, AP_RND_CONV> snr_threshold = thr.f;
|
|
if (thr.f > 255)
|
|
snr_threshold = 255;
|
|
else if (thr.f <= 0)
|
|
snr_threshold = 0;
|
|
else
|
|
snr_threshold = thr.f;
|
|
ap_ufixed<20,16, AP_RND_CONV> snr_threshold_2 = snr_threshold * snr_threshold;
|
|
ap_uint<32> strong_pixel_count = 0;
|
|
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 64; i++) {
|
|
#pragma HLS PIPELINE II=1
|
|
|
|
sum_in >> sum;
|
|
sum2_in >> sum2;
|
|
valid_in >> valid;
|
|
|
|
ap_int<24> data_unpacked[32];
|
|
unpack32(packet_in.data, data_unpacked);
|
|
|
|
ap_uint<32> strong_pixel = spot_finder_count_threshold(data_unpacked, count_threshold) &
|
|
spot_finder_snr_threshold(data_unpacked, snr_threshold_2,
|
|
sum, sum2, valid);
|
|
|
|
if ((snr_threshold == 0) && (count_threshold <= 0))
|
|
strong_pixel = 0;
|
|
|
|
strong_pixel = strong_pixel & packet_in.mask;
|
|
|
|
packet_in.mask = packet_in.mask & ~strong_pixel; // mask strong pixels
|
|
packet_in.strong_pixel |= strong_pixel; // add strong pixels to the output
|
|
|
|
data_out << packet_in;
|
|
data_in >> packet_in;
|
|
}
|
|
}
|
|
data_out << packet_in;
|
|
}
|
|
|
|
void spot_finder(STREAM_768 &data_in,
|
|
STREAM_768 &data_out,
|
|
hls::stream<ap_axiu<32,1,1,1>> &strong_pixel_out,
|
|
volatile ap_int<32> &in_count_threshold,
|
|
volatile ap_uint<32> &in_snr_threshold) {
|
|
#pragma HLS INTERFACE axis port=data_in
|
|
#pragma HLS INTERFACE axis port=data_out
|
|
#pragma HLS INTERFACE axis port=strong_pixel_out
|
|
#pragma HLS INTERFACE ap_none register port=in_count_threshold
|
|
#pragma HLS INTERFACE ap_none register port=in_snr_threshold
|
|
#pragma HLS DATAFLOW
|
|
|
|
hls::stream<spot_finder_packet, 2> data_0;
|
|
hls::stream<spot_finder_packet, 512> data_1; // 15*32 = 480 should be OK
|
|
hls::stream<spot_finder_packet, 8> data_2;
|
|
|
|
#pragma HLS BIND_STORAGE variable=data_1 type=fifo impl=bram
|
|
|
|
hls::stream<ap_int<SUM_BITWIDTH>, 32> sum_0;
|
|
hls::stream<ap_int<SUM2_BITWIDTH>, 32> sum2_0;
|
|
hls::stream<ap_int<VALID_BITWIDTH>, 32> valid_0;
|
|
|
|
#ifndef JFJOCH_HLS_NOSYNTH
|
|
spot_finder_in_stream(data_in, data_0, in_count_threshold, in_snr_threshold);
|
|
spot_finder_prepare(data_0, data_1, sum_0, sum2_0, valid_0);
|
|
spot_finder_apply_threshold(data_1, data_2, sum_0, sum2_0, valid_0);
|
|
spot_finder_out_stream(data_2, data_out, strong_pixel_out);
|
|
#else
|
|
std::vector<std::thread> spot_finder_cores;
|
|
spot_finder_cores.emplace_back([&] {spot_finder_in_stream(data_in, data_0, in_count_threshold,
|
|
in_snr_threshold);});
|
|
spot_finder_cores.emplace_back([&] {spot_finder_prepare(data_0, data_1, sum_0, sum2_0, valid_0);});
|
|
spot_finder_cores.emplace_back([&] {spot_finder_apply_threshold(data_1, data_2, sum_0, sum2_0, valid_0);});
|
|
spot_finder_cores.emplace_back([&] {spot_finder_out_stream(data_2, data_out, strong_pixel_out);});
|
|
|
|
for (auto &i : spot_finder_cores)
|
|
i.join();
|
|
#endif
|
|
}
|