// SPDX-FileCopyrightText: 2024 Filip Leonarski, Paul Scherrer Institute // SPDX-License-Identifier: CERN-OHL-S-2.0 #include "hls_jfjoch.h" #ifdef JFJOCH_HLS_NOSYNTH #include #endif void spot_finder_in_stream(STREAM_768 &data_in, hls::stream &data_out, volatile ap_int<32> &in_count_threshold, volatile ap_uint<32> &in_snr_threshold) { packet_768_t packet_in; { #pragma HLS PROTOCOL fixed data_in >> packet_in; ap_wait(); data_out << spot_finder_packet{ .data = packet_in.data, .strb = packet_in.strb, .mask = 0, .user = packet_in.user, .last = packet_in.last }; ap_wait(); } data_in >> packet_in; while (!packet_in.user) { #pragma HLS PIPELINE II=1 data_out << spot_finder_packet{ .data = packet_in.data, .strb = packet_in.strb, .mask = packet_in.keep(31,0), .strong_pixel = packet_in.keep(63,32), .count_threshold = in_count_threshold, .snr_threshold = in_snr_threshold, .user = packet_in.user, .last = packet_in.last }; data_in >> packet_in; } data_out << spot_finder_packet{ .data = packet_in.data, .strb = packet_in.strb, .mask = 0, .user = packet_in.user, .last = packet_in.last }; } void spot_finder_out_stream(hls::stream &data_in, STREAM_768 &data_out, hls::stream> &strong_pixel_out) { spot_finder_packet packet_in; { #pragma HLS PROTOCOL fixed data_in >> packet_in; ap_wait(); data_out << packet_768_t{.data = packet_in.data, .strb = packet_in.strb, .user = packet_in.user, .last = packet_in.last}; ap_wait(); } data_in >> packet_in; while (!packet_in.user) { ap_uint<32> count_threshold = packet_in.count_threshold; ap_uint<32> snr_threshold_u32 = packet_in.snr_threshold; for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 64; i++) { #pragma HLS PIPELINE II=1 ap_uint<64> keep; keep(31, 0) = packet_in.mask; keep(63,32) = packet_in.strong_pixel; data_out << packet_768_t{.data = packet_in.data, .keep = keep, .strb = packet_in.strb, .user = packet_in.user, .last = packet_in.last}; strong_pixel_out << ap_axiu<32,1,1,1>{.data = packet_in.strong_pixel, .user = 0}; data_in >> packet_in; } // Save module statistics strong_pixel_out << ap_axiu<32,1,1,1>{.data = count_threshold, .user = 0}; strong_pixel_out << ap_axiu<32,1,1,1>{.data = snr_threshold_u32, .user = 0}; for (int i = 0; i < 14;i++) strong_pixel_out << ap_axiu<32,1,1,1>{.data = 0, .user = 0}; } strong_pixel_out << ap_axiu<32,1,1,1>{.data = 0, .user = 1}; data_out << packet_768_t{.data = packet_in.data, .strb = packet_in.strb, .user = packet_in.user, .last = packet_in.last}; } ap_int calc_sum(ap_int<24> val[32], ap_uint<32> mask) { #pragma HLS PIPELINE II=1 ap_int ret = 0; for (int i = 0; i < 32; i++) { if (mask[i]) ret += val[i]; } return ret; } ap_int calc_sum2(ap_int<24> val[32], ap_uint<32> mask) { #pragma HLS PIPELINE II=1 ap_int ret = 0; for (int i = 0; i < 32; i++) { if (mask[i]) ret += val[i] * val[i]; } return ret; } ap_int calc_valid(ap_uint<32> mask) { #pragma HLS PIPELINE II=1 ap_int ret = 0; for (int i = 0; i < 32; i++) { ret += mask[i]; } return ret; } void spot_finder_prepare(hls::stream &data_in, hls::stream &data_out, hls::stream> &sum_out, hls::stream> &sum2_out, hls::stream> &valid_out) { ap_uint<32> strong_pixel; spot_finder_packet packet; { #pragma HLS PROTOCOL fixed data_in >> packet; data_out << packet; ap_wait(); } ap_int sum[32]; ap_int sum2[32]; ap_int valid[32]; ap_int sum_old[32][LINES_PER_GO * 2 + 1]; ap_int sum2_old[32][LINES_PER_GO * 2 + 1]; ap_int valid_old[32][LINES_PER_GO * 2 + 1]; data_in >> packet; while (!packet.user) { int line_in_mem = 0; for (int j = 0; j < 32*32*16; j++) { #pragma HLS PIPELINE II=1 data_out << packet; int col = j % 32; int line = j / 32; ap_int<24> val[32]; unpack32(packet.data, val); ap_uint<32> mask = packet.mask; auto tmp_sum = calc_sum(val, mask); auto tmp_sum2 = calc_sum2(val, mask); auto tmp_valid = calc_valid(mask); auto tmp2_sum = tmp_sum; auto tmp2_sum2 = tmp_sum2; auto tmp2_valid = tmp_valid; if ((line > 0) && (line < LINES_PER_GO * 2 + 1)) { tmp2_sum += sum[col]; tmp2_sum2 += sum2[col]; tmp2_valid += valid[col]; } else if (line >= LINES_PER_GO * 2 + 1) { tmp2_sum += sum[col] - sum_old[col][line_in_mem]; tmp2_sum2 += sum2[col] - sum2_old[col][line_in_mem]; tmp2_valid += valid[col] - valid_old[col][line_in_mem]; } if (line >= LINES_PER_GO) { sum_out << tmp2_sum; sum2_out << tmp2_sum2; valid_out << tmp2_valid; } sum[col] = tmp2_sum; sum2[col] = tmp2_sum2; valid[col] = tmp2_valid; sum_old[col][line_in_mem] = tmp_sum; sum2_old[col][line_in_mem] = tmp_sum2; valid_old[col][line_in_mem] = tmp_valid; if (col == 31) { if (line_in_mem >= LINES_PER_GO * 2 + 1 - 1) line_in_mem = 0; else ++line_in_mem; } data_in >> packet; } for (int i = 0; i < LINES_PER_GO * 32; i++) { #pragma HLS PIPELINE II=1 int col = i % 32; sum[col] -= sum_old[col][line_in_mem]; sum2[col] -= sum2_old[col][line_in_mem]; valid[col] -= valid_old[col][line_in_mem]; sum_out << sum[col]; sum2_out << sum2[col]; valid_out << valid[col]; if (col == 31) { if (line_in_mem >= LINES_PER_GO * 2 + 1 - 1) line_in_mem = 0; else ++line_in_mem; } } } data_out << packet; } ap_uint<32> spot_finder_snr_threshold(ap_int<24> val[32], ap_ufixed<20,16, AP_RND_CONV> snr_threshold_2, ap_int sum, ap_int sum2, ap_int valid_count) { #pragma HLS PIPELINE II=1 if (snr_threshold_2 == 0) return UINT32_MAX; ap_int variance = valid_count * sum2 - sum * sum; // This is variance * valid_count^2 ap_fixed threshold = variance * snr_threshold_2; // snr_threshold is in units of 0.25 ap_uint<32> ret = 0; for (int j = 0; j < 32; j++) { ap_int in_minus_mean = val[j] * valid_count - sum; // This is (pxl - mean) * valid_count // Aim is to compare pxl-mean with sqrt(variance) * threshold // however this would require sqrt and divisions, so // it is cheaper to compare ((pxl-mean) * valid_count)^2 with variance * valid_count^2 * threshold^2, // but need to make sure that (pxl - mean) is positive // Also assume that N ≈ (N-1) if ((in_minus_mean * in_minus_mean > threshold) && (in_minus_mean > 0) && (valid_count > 15 * 32 / 2)) // at least half of the pixels ret[j] = 1; else ret[j] = 0; } return ret; } ap_uint<32> spot_finder_count_threshold(ap_int<24> val[32], ap_int<32> &count_threshold) { #pragma HLS PIPELINE II=1 if (count_threshold <= 0) return UINT32_MAX; ap_uint<32> ret = 0; for (int j = 0; j < 32; j++) { if (val[j] > count_threshold) ret[j] = 1; else ret[j] = 0; } return ret; } void spot_finder_apply_threshold(hls::stream &data_in, hls::stream &data_out, hls::stream> &sum_in, hls::stream> &sum2_in, hls::stream> &valid_in) { ap_uint<32> strong_pixel_prev; spot_finder_packet packet_in; { #pragma HLS PROTOCOL fixed data_in >> packet_in; ap_wait(); data_out << packet_in; ap_wait(); } ap_int sum; ap_int sum2; ap_int valid; data_in >> packet_in; while (!packet_in.user) { ap_int<32> count_threshold = packet_in.count_threshold; ap_uint<32> snr_threshold_u32 = packet_in.snr_threshold; float_uint32 thr; thr.u = snr_threshold_u32; ap_ufixed<10,8, AP_RND_CONV> snr_threshold = thr.f; if (thr.f > 255) snr_threshold = 255; else if (thr.f <= 0) snr_threshold = 0; else snr_threshold = thr.f; ap_ufixed<20,16, AP_RND_CONV> snr_threshold_2 = snr_threshold * snr_threshold; ap_uint<32> strong_pixel_count = 0; for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 64; i++) { #pragma HLS PIPELINE II=1 sum_in >> sum; sum2_in >> sum2; valid_in >> valid; ap_int<24> data_unpacked[32]; unpack32(packet_in.data, data_unpacked); ap_uint<32> strong_pixel = spot_finder_count_threshold(data_unpacked, count_threshold) & spot_finder_snr_threshold(data_unpacked, snr_threshold_2, sum, sum2, valid); if ((snr_threshold == 0) && (count_threshold <= 0)) strong_pixel = 0; strong_pixel = strong_pixel & packet_in.mask; packet_in.mask = packet_in.mask & ~strong_pixel; // mask strong pixels packet_in.strong_pixel |= strong_pixel; // add strong pixels to the output data_out << packet_in; data_in >> packet_in; } } data_out << packet_in; } void spot_finder(STREAM_768 &data_in, STREAM_768 &data_out, hls::stream> &strong_pixel_out, volatile ap_int<32> &in_count_threshold, volatile ap_uint<32> &in_snr_threshold) { #pragma HLS INTERFACE axis port=data_in #pragma HLS INTERFACE axis port=data_out #pragma HLS INTERFACE axis port=strong_pixel_out #pragma HLS INTERFACE ap_none register port=in_count_threshold #pragma HLS INTERFACE ap_none register port=in_snr_threshold #pragma HLS DATAFLOW hls::stream data_0; hls::stream data_1; // 15*32 = 480 should be OK hls::stream data_2; #pragma HLS BIND_STORAGE variable=data_1 type=fifo impl=bram hls::stream, 32> sum_0; hls::stream, 32> sum2_0; hls::stream, 32> valid_0; #ifndef JFJOCH_HLS_NOSYNTH spot_finder_in_stream(data_in, data_0, in_count_threshold, in_snr_threshold); spot_finder_prepare(data_0, data_1, sum_0, sum2_0, valid_0); spot_finder_apply_threshold(data_1, data_2, sum_0, sum2_0, valid_0); spot_finder_out_stream(data_2, data_out, strong_pixel_out); #else std::vector spot_finder_cores; spot_finder_cores.emplace_back([&] {spot_finder_in_stream(data_in, data_0, in_count_threshold, in_snr_threshold);}); spot_finder_cores.emplace_back([&] {spot_finder_prepare(data_0, data_1, sum_0, sum2_0, valid_0);}); spot_finder_cores.emplace_back([&] {spot_finder_apply_threshold(data_1, data_2, sum_0, sum2_0, valid_0);}); spot_finder_cores.emplace_back([&] {spot_finder_out_stream(data_2, data_out, strong_pixel_out);}); for (auto &i : spot_finder_cores) i.join(); #endif }