diff --git a/common/Definitions.h b/common/Definitions.h index 49d7c11f..1a463066 100644 --- a/common/Definitions.h +++ b/common/Definitions.h @@ -53,6 +53,7 @@ #define MODE_CONV 0x0001L #define MODE_BITSHUFFLE_FPGA 0x0002L +#define MODE_ADD_MULTIPIXEL 0x0004L #define TASK_NO_DATA_STREAM UINT16_MAX diff --git a/fpga/hls/CMakeLists.txt b/fpga/hls/CMakeLists.txt index 5ea722d8..8f6c123e 100644 --- a/fpga/hls/CMakeLists.txt +++ b/fpga/hls/CMakeLists.txt @@ -22,7 +22,8 @@ ADD_LIBRARY( HLSSimulation STATIC axis_broadcast.cpp adu_histo.cpp axis_helpers.cpp - hls_bitshuffle.cpp) + hls_bitshuffle.cpp + add_multipixel.cpp) TARGET_INCLUDE_DIRECTORIES(HLSSimulation PUBLIC ../include) TARGET_LINK_LIBRARIES(HLSSimulation CommonFunctions) @@ -68,6 +69,7 @@ MAKE_HLS_MODULE(axis_broadcast axis_broadcast.cpp "") MAKE_HLS_MODULE(axis_128_to_512 axis_helpers.cpp "") MAKE_HLS_MODULE(axis_32_to_512 axis_helpers.cpp "") MAKE_HLS_MODULE(adu_histo adu_histo.cpp "") +MAKE_HLS_MODULE(add_multipixel add_multipixel.cpp add_multipixel_tb.cpp) SET (HLS_IPS ${HLS_IPS} PARENT_SCOPE) ADD_CUSTOM_TARGET(hls DEPENDS ${HLS_IPS}) diff --git a/fpga/hls/add_multipixel.cpp b/fpga/hls/add_multipixel.cpp new file mode 100644 index 00000000..dba21b88 --- /dev/null +++ b/fpga/hls/add_multipixel.cpp @@ -0,0 +1,196 @@ +// Copyright (2019-2023) Paul Scherrer Institute + +#include "hls_jfjoch.h" + +struct add_multipixel_packet { + ap_uint<512> data; + ap_uint<1> user; + ap_uint<1> last; +}; + +void add_multipixel_in_stream(STREAM_512 &data_in, + hls::stream &data_out) { + packet_512_t packet_in; + data_in >> packet_in; + data_out << add_multipixel_packet{.data = packet_in.data, .user = packet_in.user, .last = packet_in.last}; + data_in >> packet_in; + while (!packet_in.user) { +#pragma HLS PIPELINE II=1 + data_out << add_multipixel_packet{.data = packet_in.data, .user = packet_in.user, .last = packet_in.last}; + data_in >> packet_in; + } + data_out << add_multipixel_packet{.data = packet_in.data, .user = packet_in.user, .last = packet_in.last}; +} + +void add_multipixel_cols(hls::stream &data_in, + hls::stream &data_out) { +#pragma HLS INTERFACE axis port=data_in +#pragma HLS INTERFACE axis port=data_out + add_multipixel_packet packet_in, packet_out; + data_in >> packet_in; + data_out << packet_in; + ap_uint<1> add_multipixel = ((ACT_REG_MODE(packet_in.data) & MODE_ADD_MULTIPIXEL)) ? 1 : 0; + + data_in >> packet_in; + if (!add_multipixel) { + while (!packet_in.user) { +#pragma HLS PIPELINE II=1 + data_out << packet_in; + data_in >> packet_in; + } + } else { + while (!packet_in.user) { +#pragma HLS PIPELINE II=33 + + packet_out.last = 0; + packet_out.user = 0; + + ap_uint<512> save = packet_in.data; + data_in >> packet_in; + for (int i = 1; i < 8; i++) { + packet_out.data(511-8,0) = save(511,16); + packet_out.data(511,512-16) = packet_in.data(15, 0); + data_out << packet_out; + save = packet_in.data; + data_in >> packet_in; + } + packet_out = packet_in; + packet_out.data = save(511,16); + data_out << packet_out; + + save = 0; + for (int k = 1; k < 4; k++) { + for (int i = 0; i < 8; i++) { + packet_out.data(511, (k * 2 - 1) * 16) = packet_in.data(511 - (k * 2 - 1) * 16, 0); + packet_out.data((k * 2 - 1) * 16 - 1, 0) = save; + save = packet_in.data(511, 512 - (2 * k - 1) * 16); + data_out << packet_out; + data_in >> packet_in; + } + } + + packet_out.data = save; + packet_out.last = 0; + packet_out.user = 0; + data_out << packet_out; + } + } + data_out << packet_in; +} + +void add_multipixel_line(hls::stream &data_in, + hls::stream &data_out) { + add_multipixel_packet packet_in, packet_out; + data_in >> packet_in; + data_out << packet_in; + ap_uint<1> add_multipixel = ((ACT_REG_MODE(packet_in.data) & MODE_ADD_MULTIPIXEL)) ? 1 : 0; + + data_in >> packet_in; + if (!add_multipixel) { + while (!packet_in.user) { +#pragma HLS PIPELINE II=1 + data_out << packet_in; + data_in >> packet_in; + } + } else { + while (!packet_in.user) { + for (int i = 0; i < 255 * 33; i++) { +#pragma HLS PIPELINE II=1 + if (i >= 33) + data_out << packet_in; + data_in >> packet_in; + } + + for (int k = 0; k < 2; k++) { +#pragma HLS PIPELINE II=66 + ap_uint<512> packet[33]; + for (int i = 0; i < 33; i++) { + packet[i] = packet_in.data; + data_out << packet_in; + data_in >> packet_in; + } + for (int i = 0; i < 33; i++) { + data_out << add_multipixel_packet{.data = packet[i], .user = 0, .last = 0}; + } + } + + for (int i = 0; i < 255 * 33; i++) { +#pragma HLS PIPELINE II=1 + if (i < 254*33) + data_out << packet_in; + data_in >> packet_in; + } + } + } + data_out << packet_in; +} + +void add_multipixel_output(hls::stream &data_in, + STREAM_512 &data_out) { + add_multipixel_packet packet_in; + packet_512_t packet_out; + data_in >> packet_in; + data_out << packet_512_t{.data = packet_in.data, .user = packet_in.user, .last = packet_in.last}; + + ap_uint<1> add_multipixel = ((ACT_REG_MODE(packet_in.data) & MODE_ADD_MULTIPIXEL)) ? 1 : 0; + + data_in >> packet_in; + if (!add_multipixel) { + while (!packet_in.user) { + for (int i = 0; i < 512 * 32; i++) { +#pragma HLS PIPELINE II=1 + data_out << packet_512_t{.data = packet_in.data, .user = packet_in.user, .last = (i == 512 * 32 - 1)}; + data_in >> packet_in; + } + } + } else { + while (!packet_in.user) { + for (int i = 0; i < 64; i++) { + packet_out.user = 0; + packet_out.last = 0; + + for (int j = 0; j < 32; j++) { +#pragma HLS PIPELINE II=1 + data_out << packet_512_t{.data = packet_in.data, .user = packet_in.user, .last = 0}; + data_in >> packet_in; + } + ap_uint<512-64> save = 0; + + for (int k = 1; k < 8; k++) { +#pragma HLS PIPELINE OFF + for (int j = 0; j < 33; j++) { +#pragma HLS PIPELINE II=1 + if (j == 0) { + save(64 * k - 1, 64 * (k-1)) = packet_in.data(64, 0); + data_in >> packet_in; + } else { + packet_out.data(64 * k - 1, 0) = save(64 * k - 1, 0); + packet_out.data(511, 64 * k) = packet_in.data(511 - 64 * k, 0); + save(64 * k - 1, 0) = packet_in.data(511, 512 - 64 * k); + data_out << packet_out; + data_in >> packet_in; + } + } + } + packet_out.data(64 * 7, 0) = save; + packet_out.data(511, 512 - 64) = packet_in.data(63, 0); + data_out << packet_out; + data_in >> packet_in; + } + } + } + data_out << packet_512_t{.data = packet_in.data, .user = packet_in.user, .last = packet_in.last}; +} + +void add_multipixel(STREAM_512 &data_in, STREAM_512 &data_out) { +#pragma HLS INTERFACE axis port=data_in +#pragma HLS INTERFACE axis port=data_out +#pragma HLS DATAFLOW + hls::stream stream_0; + hls::stream stream_1; + hls::stream stream_2; + add_multipixel_in_stream(data_in, stream_0); + add_multipixel_cols(stream_0, stream_1); + add_multipixel_line(stream_1, stream_2); + add_multipixel_output(stream_2, data_out); +} diff --git a/fpga/hls/add_multipixel_tb.cpp b/fpga/hls/add_multipixel_tb.cpp new file mode 100644 index 00000000..4206e32a --- /dev/null +++ b/fpga/hls/add_multipixel_tb.cpp @@ -0,0 +1,80 @@ +// Copyright (2019-2023) Paul Scherrer Institute + +#include "hls_jfjoch.h" + +int main() { + + int ret = 0; + + STREAM_512 input; + STREAM_512 output; + + size_t nframes = 1; + + std::vector input_frame(nframes * RAW_MODULE_SIZE), output_frame(nframes * 257 * 64 * 32); + for (int i = 0; i < nframes * RAW_MODULE_SIZE; i++) { + input_frame[i] = i % INT16_MAX; + } + auto input_frame_512 = (ap_uint<512>*) input_frame.data(); + auto output_frame_512 = (ap_uint<512>*) output_frame.data(); + + ap_uint<512> action_control = 0; + ACT_REG_MODE(action_control) = MODE_ADD_MULTIPIXEL; + + input << packet_512_t { .data = action_control, .user = 0 }; + for (int i = 0; i < nframes * RAW_MODULE_SIZE * sizeof(uint16_t) / 64; i++) + input << packet_512_t { .data = input_frame_512[i], .user = 0 }; + + input << packet_512_t { .user = 1 }; + + add_multipixel(input, output); + + if (input.size() != 0) + ret = 1; + if (output.size() != nframes * (257 * 64) + 2) + ret = 1; + + output.read(); + for (int i = 0; i < nframes * 257 * 64 ; i++) + output_frame_512[i] = output.read().data; + output.read(); + + size_t diff = 0; + for (int line = 1; line < 511; line++) { + int new_line = line + (line / 256) * 2 - 1; + for (int col = 1; col < 1023; col++) { + int new_col = col + (col / 256) * 2 - 1; + if (output_frame[new_line * 1028 + new_col] != input_frame[line * 1024 + col]) + diff++; + } + } + + for (int col = 1; col < 1023; col++) { + int new_col = col + (col / 256) * 2 - 1; + if (output_frame[254 * 1028 + new_col] != input_frame[255 * 1024 + col]) + diff++; + if (output_frame[255 * 1028 + new_col] != input_frame[255 * 1024 + col]) + diff++; + } + + + for (int col = 1; col < 1023; col++) { + int new_col = col + (col / 256) * 2 - 1; + if (output_frame[256 * 1028 + new_col] != input_frame[256 * 1024 + col]) + diff++; + if (output_frame[257 * 1028 + new_col] != input_frame[256 * 1024 + col]) + diff++; + } + if (diff > 0) { + ret = 1; + std::cout << diff << std::endl; + } + if (ret != 0) { + printf("Test failed !!!\n"); + ret = 1; + } else { + printf("Test passed !\n"); + } + + return ret; +} diff --git a/fpga/hls/hls_jfjoch.h b/fpga/hls/hls_jfjoch.h index ae0efe7a..82770b34 100644 --- a/fpga/hls/hls_jfjoch.h +++ b/fpga/hls/hls_jfjoch.h @@ -328,4 +328,6 @@ void load_calibration(ap_uint<256> *d_hbm_p0, hls::stream > &host_memory_in, uint64_t in_mem_location[(3 * 16 + 3) * MAX_MODULES_FPGA]) ; +void add_multipixel(STREAM_512 &data_in, STREAM_512 &data_out); + #endif