diff --git a/fpga/hls/CMakeLists.txt b/fpga/hls/CMakeLists.txt index 2bbf389b..f2d7a0e8 100644 --- a/fpga/hls/CMakeLists.txt +++ b/fpga/hls/CMakeLists.txt @@ -61,6 +61,7 @@ MAKE_HLS_MODULE(save_to_hbm.cpp save_to_hbm) MAKE_HLS_MODULE(mask_missing.cpp mask_missing) MAKE_HLS_MODULE(integration.cpp integration) MAKE_HLS_MODULE(spot_finder.cpp spot_finder) +MAKE_HLS_MODULE(spot_finder.cpp spot_finder_update_sum) MAKE_HLS_MODULE(axis_broadcast.cpp axis_broadcast) MAKE_HLS_MODULE(axis_256_to_512.cpp axis_256_to_512) MAKE_HLS_MODULE(axis_256_to_512.cpp axis_32_to_512) @@ -82,6 +83,7 @@ SET (HLS_IPS psi_ch_hls_data_collection_fsm_1_0.zip psi_ch_hls_save_to_hbm_1_0.zip psi_ch_hls_mask_missing_1_0.zip psi_ch_hls_frame_generator_1_0.zip + psi_ch_hls_spot_finder_update_sum_1_0.zip psi_ch_hls_spot_finder_1_0.zip psi_ch_hls_integration_1_0.zip psi_ch_hls_axis_broadcast_1_0.zip diff --git a/fpga/hls/spot_finder.cpp b/fpga/hls/spot_finder.cpp index 531c4ef4..8429a064 100644 --- a/fpga/hls/spot_finder.cpp +++ b/fpga/hls/spot_finder.cpp @@ -37,6 +37,40 @@ void calc_sum2(ap_uint &ext_column_sum2, ext_column_sum2 = pack32(column_sum2); } +void calc_mask_diff(ap_uint &ext_column_valid, + const ap_uint<32> ext_old_value, + const ap_uint<32> ext_new_value) { +#pragma HLS PIPELINE II=1 + ap_int column_valid[32]; + + for (int i = 0; i < 32; i++) + column_valid[i] = ap_int(ext_new_value[i]) - ap_int(ext_old_value[i]); + + ext_column_valid = pack32(column_valid); +} + +void calc_mask(const ap_uint<512> &input, ap_uint<512> &output, ap_uint<32> &mask) { +#pragma HLS PIPELINE II=1 + ap_int<16> value[32]; + ap_uint<32> tmp_mask; + ap_uint<512> tmp_output; + + unpack32(input, value); + + for (int i = 0; i < 32; i++) { + if ((value[i] == INT16_MAX) || (value[i] == INT16_MIN)) { + tmp_mask[i] = 0; + tmp_output(i * 16 + 15, i * 16) = 0; + } else { + tmp_mask[i] = 1; + tmp_output(i * 16 + 15, i * 16) = input(i * 16 + 15, i * 16); + } + } + mask = tmp_mask; + output = tmp_output; +} + + ap_uint<1> check_threshold(ap_int<16> val, ap_int sum, ap_int sum2, @@ -77,6 +111,99 @@ ap_uint<32> check_threshold(const ap_uint<512> data_packed, return tmp_output; } +void spot_finder_update_sum(STREAM_512 &data_in, + STREAM_512 &data_out, + hls::stream> &sum_out, + hls::stream> &sum2_out, + hls::stream> &valid_out) { +#pragma HLS INTERFACE axis port=data_in +#pragma HLS INTERFACE axis port=data_out +#pragma HLS INTERFACE axis port=sum_out +#pragma HLS INTERFACE axis port=sum2_out +#pragma HLS INTERFACE axis port=valid_out + + ap_uint<512> data_cache[(2 * FPGA_NBX + 1) * 32]; + ap_uint<32> mask_cache[(2 * FPGA_NBX + 1) * 32]; + + ap_uint sum_cache[32]; + ap_uint sum2_cache[32]; + ap_uint valid_cache[32]; + + packet_512_t packet_in; + data_in >> packet_in; + data_out << packet_in; + + data_in >> packet_in; + while (!packet_in.user) { + + for (int i = 0; i < 32; i++) { +#pragma HLS unroll + sum_cache[i] = 0; + sum2_cache[i] = 0; + valid_cache[i] = 0; + } + + for (int i = 0; i < (2 * FPGA_NBX + 1) * 32; i++) { +#pragma HLS unroll + data_cache[i] = 0; + mask_cache[i] = 0; + } + + for (int i = 0; i < (FPGA_NBX) * 32 + RAW_MODULE_SIZE * sizeof(uint16_t) / 64; i++) { +#pragma HLS PIPELINE II = 1 + ap_uint<512> packet_in_data; + ap_uint<32> packet_in_mask; + + if (i < RAW_MODULE_SIZE * sizeof(uint16_t) / 64) { + data_out << packet_in; + calc_mask(packet_in.data, packet_in_data, packet_in_mask); + } else { + packet_in_data = 0; + packet_in_mask = 0; + } + + uint16_t cell_number_top = (i / 32) % (2 * FPGA_NBX + 1) * 32 + (i % 32); + + ap_uint<512> top_line = data_cache[cell_number_top]; + ap_uint<32> top_line_mask = mask_cache[cell_number_top]; + + ap_uint diff_sum; + ap_uint diff_sum2; + ap_uint diff_valid; + + ap_uint column_sum = sum_cache[i % 32]; + ap_uint column_sum2 = sum2_cache[i % 32]; + ap_uint column_valid = valid_cache[i % 32]; + + calc_sum(diff_sum, top_line, packet_in_data); + calc_sum2(diff_sum2, top_line, packet_in_data); + calc_mask_diff(diff_valid, top_line_mask, packet_in_mask); + + update_sum(column_sum, diff_sum); + update_sum(column_sum2, diff_sum2); + update_sum(column_valid, diff_valid); + + if (i >= (FPGA_NBX) * 32) { + sum_out << column_sum; + sum2_out << column_sum2; + valid_out << column_valid; + } + + sum_cache[i % 32] = column_sum; + sum2_cache[i % 32] = column_sum2; + valid_cache[i % 32] = column_valid; + + data_cache[cell_number_top] = packet_in_data; + mask_cache[cell_number_top] = packet_in_mask; + + if (i < RAW_MODULE_SIZE * sizeof(uint16_t) / 64) + data_in >> packet_in; + } + } + data_out << packet_in; +} + + void spot_finder(STREAM_512 &data_in, STREAM_512 &data_out, hls::stream> &strong_pixel_out, diff --git a/fpga/hls/spot_finder.h b/fpga/hls/spot_finder.h index 9a2a9537..e4394e0e 100644 --- a/fpga/hls/spot_finder.h +++ b/fpga/hls/spot_finder.h @@ -10,7 +10,8 @@ #define FPGA_NBX 5 #define FPGA_NBX_CEIL_LOG2 7 #define SUM_BITWIDTH (16+FPGA_NBX_CEIL_LOG2) -#define SUM2_BITWIDTH (15*2+FPGA_NBX_CEIL_LOG2) +#define SUM2_BITWIDTH (16*2+FPGA_NBX_CEIL_LOG2) +#define MASK_SUM_BITWIDTH (FPGA_NBX_CEIL_LOG2+1) template void update_sum(ap_uint &ext_val1, const ap_uint ext_val2) { @@ -55,6 +56,10 @@ ap_uint prefix_sum(const ap_uint ext_column_sum, return pack32(line_sum); } +void calc_mask_diff(ap_uint &ext_column_valid, + const ap_uint<32> ext_old_value, + const ap_uint<32> ext_new_value); + void calc_sum2(ap_uint &ext_column_sum2, const ap_uint<512> ext_old_value, const ap_uint<512> ext_new_value); @@ -63,6 +68,14 @@ void calc_sum(ap_uint &ext_column_sum, const ap_uint<512> ext_old_value, const ap_uint<512> ext_new_value); +void calc_mask(const ap_uint<512> &input, ap_uint<512> &output, ap_uint<32> &mask); + +void spot_finder_update_sum(STREAM_512 &data_in, + STREAM_512 &data_out, + hls::stream> &sum_out, + hls::stream> &sum2_out, + hls::stream> &valid_out); + ap_uint<1> check_threshold(ap_int<16> val, ap_int sum, ap_int sum2, diff --git a/tests/FPGASpotFindingUnitTest.cpp b/tests/FPGASpotFindingUnitTest.cpp index 384a17f3..e69c5d90 100644 --- a/tests/FPGASpotFindingUnitTest.cpp +++ b/tests/FPGASpotFindingUnitTest.cpp @@ -59,6 +59,60 @@ TEST_CASE("FPGA_calc_sum","[FPGA][SpotFinder]") { } } +TEST_CASE("FPGA_calc_valid","[FPGA][SpotFinder]") { + ap_uint<32> old_mask = UINT32_MAX; + ap_uint<32> new_mask = UINT32_MAX; + + old_mask[15] = 0; + new_mask[13] = 0; + + ap_uint diff_mask; + calc_mask_diff(diff_mask, old_mask, new_mask); + + ap_int diff_mask_32[32]; + unpack32(diff_mask, diff_mask_32); + + REQUIRE(diff_mask_32[0] == 0); + REQUIRE(diff_mask_32[1] == 0); + + REQUIRE(diff_mask_32[13] == -1); + REQUIRE(diff_mask_32[15] == 1); +} + +TEST_CASE("FPGA_calc_mask","[FPGA][SpotFinder]") { + ap_int<16> value_in[32], value_out[32]; + + for (int i = 0; i < 32; i++) + value_in[i] = 154 + i; + + value_in[15] = INT16_MAX; + value_in[0] = INT16_MIN; + value_in[1] = INT16_MIN + 1; + value_in[2] = INT16_MAX - 1; + + ap_uint<512> input = pack32(value_in); + ap_uint<512> output = 0; + ap_uint<32> mask = 0; + + calc_mask(input, output, mask); + + REQUIRE(mask[0] == 0); + REQUIRE(mask[15] == 0); + REQUIRE(mask[1] == 1); + REQUIRE(mask[2] == 1); + REQUIRE(mask[3] == 1); + REQUIRE(mask[4] == 1); + + unpack32(output, value_out); + + REQUIRE(value_out[0] == 0); + REQUIRE(value_out[15] == 0); + REQUIRE(value_out[1] == value_in[1]); + REQUIRE(value_out[2] == value_in[2]); + REQUIRE(value_out[3] == value_in[3]); + REQUIRE(value_out[4] == value_in[4]); +} + TEST_CASE("FPGA_update_sum" , "[FPGA][SpotFinder]") { ap_int arr_val1[32], arr_val2[32], arr_out[32]; @@ -107,3 +161,86 @@ TEST_CASE("FPGA_spot_check_threshold","[FPGA][SpotFinder]") { } } + +TEST_CASE("FPGA_spot_finder_update_sum","[FPGA][SpotFinder]") { + STREAM_512 input; + STREAM_512 output; + + hls::stream> sum_out; + hls::stream> sum2_out; + hls::stream> valid_out; + + std::vector input_frame(RAW_MODULE_SIZE); + for (int i = 0; i < RAW_MODULE_SIZE; i++) { + if (i % RAW_MODULE_COLS == 1023) + input_frame[i] = INT16_MIN; + else + input_frame[i] = i % RAW_MODULE_COLS; + } + auto input_frame_512 = (ap_uint<512> *) input_frame.data(); + + input << packet_512_t{.user = 0}; + for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 64; i++) + input << packet_512_t{.data = input_frame_512[i], .user = 0}; + + input << packet_512_t{.user = 1}; + + spot_finder_update_sum(input, output, sum_out, sum2_out, valid_out); + + REQUIRE(input.size() == 0); + + REQUIRE(output.size() == RAW_MODULE_SIZE * sizeof(uint16_t) / 64 + 2); + REQUIRE(sum_out.size() == RAW_MODULE_SIZE * sizeof(uint16_t) / 64); + REQUIRE(sum2_out.size() == RAW_MODULE_SIZE * sizeof(uint16_t) / 64); + REQUIRE(valid_out.size() == RAW_MODULE_SIZE * sizeof(uint16_t) / 64); + + std::vector sum(RAW_MODULE_SIZE); + std::vector sum2(RAW_MODULE_SIZE); + std::vector valid(RAW_MODULE_SIZE); + + for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 64; i++) { + ap_uint<32 * SUM_BITWIDTH> tmp_sum; + ap_uint<32 * SUM2_BITWIDTH> tmp_sum2; + ap_uint<32 * MASK_SUM_BITWIDTH> tmp_valid; + sum_out >> tmp_sum; + sum2_out >> tmp_sum2; + valid_out >> tmp_valid; + + ap_uint tmp_sum_unpacked[32]; + ap_uint tmp_sum2_unpacked[32]; + ap_uint tmp_valid_unpacked[32]; + unpack32(tmp_sum, tmp_sum_unpacked); + unpack32(tmp_sum2, tmp_sum2_unpacked); + unpack32(tmp_valid, tmp_valid_unpacked); + + for (int j = 0; j < 32; j++) { + sum[i * 32 + j] = tmp_sum_unpacked[j]; + sum2[i * 32 + j] = tmp_sum2_unpacked[j]; + valid[i * 32 + j] = tmp_valid_unpacked[j]; + } + } + + CHECK(sum[1] == (FPGA_NBX+1) * 1); + CHECK(sum[3] == (FPGA_NBX+1) * 3); + CHECK(sum[1023] == 0); + CHECK(sum[1022+200*1024] == (2 * FPGA_NBX+1) * 1022); + + CHECK(sum2[3] == (FPGA_NBX+1) * 3 * 3); + CHECK(sum2[1023] == 0); + CHECK(sum2[1022+200*1024] == (2 * FPGA_NBX+1) * 1022 * 1022); + + CHECK(valid[1] == FPGA_NBX + 1); + CHECK(valid[3] == FPGA_NBX + 1); + CHECK(valid[1023] == 0); + CHECK(valid[1023 + 323*1024] == 0); + + CHECK(valid[1+1024] == FPGA_NBX + 1 + 1); + CHECK(valid[1+1024] == FPGA_NBX + 1 + 1); + CHECK(valid[1+3*1024] == FPGA_NBX + 1 + 3); + CHECK(valid[1+200*1024] == 2 * FPGA_NBX + 1); + + CHECK(valid[1+509*1024] == FPGA_NBX + 1 + 2); + CHECK(valid[1+510*1024] == FPGA_NBX + 1 + 1); + CHECK(valid[1+511*1024] == FPGA_NBX + 1); +} +