HLS: Added frame_summation core

This commit is contained in:
2023-10-26 21:26:44 +02:00
parent 4011c4541d
commit 2268486824
16 changed files with 479 additions and 40 deletions
+1 -1
View File
@@ -49,7 +49,7 @@
// For FPGA
#define ACTION_TYPE 0x52324158
#define RELEASE_LEVEL 0x0041
#define RELEASE_LEVEL 0x0042
#define MODE_CONV 0x0001L
#define MODE_BITSHUFFLE_FPGA 0x0002L
+14
View File
@@ -47,6 +47,7 @@
`define ADDR_ONE_OVER_ENERGY 16'h0094
`define ADDR_NFRAMES 16'h0098
`define ADDR_NSTORAGE_CELLS 16'h009C
`define ADDR_NSUMMATION 16'h00A0
`define ADDR_SPOT_FINDER_CNT_THR 16'h0100
`define ADDR_SPOT_FINDER_SNR_THR 16'h0104
@@ -90,6 +91,7 @@ module action_config
output reg [31:0] nframes ,
output reg [7:0] nmodules ,
output reg [3:0] nstorage_cells ,
output reg [3:0] nsummation ,
output wire [31:0] hbm_size_bytes ,
output reg [15:0] spot_finder_count_threshold,
output reg [7:0] spot_finder_snr_threshold,
@@ -312,6 +314,9 @@ always @(posedge clk) begin
`ADDR_NSTORAGE_CELLS: begin
rdata <= nstorage_cells;
end
`ADDR_NSUMMATION: begin
rdata <= nsummation;
end
`ADDR_ACTION_TYPE: begin
rdata <= `ACTION_TYPE;
end
@@ -519,6 +524,15 @@ always @(posedge clk) begin
end
end
always @(posedge clk) begin
if (!resetn)
nsummation <= 0;
else if (reg_data_collection_idle) begin
if (w_hs && waddr == `ADDR_NSUMMATION)
nsummation <= (s_axi_WDATA[3:0] & wmask[3:0]) | (nsummation & !wmask[3:0]);
end
end
always @(posedge clk) begin
if (!resetn)
spot_finder_snr_threshold <= 0;
+5 -1
View File
@@ -24,7 +24,9 @@ ADD_LIBRARY( HLSSimulation STATIC
axis_helpers.cpp
hls_bitshuffle.cpp
add_multipixel.cpp
module_upside_down.cpp)
module_upside_down.cpp
frame_summation.cpp
frame_summation_reorder_compl.cpp)
TARGET_INCLUDE_DIRECTORIES(HLSSimulation PUBLIC ../include)
TARGET_LINK_LIBRARIES(HLSSimulation CommonFunctions)
@@ -72,6 +74,8 @@ MAKE_HLS_MODULE(axis_32_to_512 axis_helpers.cpp "")
MAKE_HLS_MODULE(adu_histo adu_histo.cpp "")
MAKE_HLS_MODULE(add_multipixel add_multipixel.cpp add_multipixel_tb.cpp)
MAKE_HLS_MODULE(module_upside_down module_upside_down.cpp module_upside_down_tb.cpp)
MAKE_HLS_MODULE(frame_summation frame_summation.cpp frame_summation_tb.cpp)
MAKE_HLS_MODULE(frame_summation_reorder_compl frame_summation_reorder_compl.cpp frame_summation_reorder_compl_tb.cpp)
SET (HLS_IPS ${HLS_IPS} PARENT_SCOPE)
ADD_CUSTOM_TARGET(hls DEPENDS ${HLS_IPS})
+3 -2
View File
@@ -14,7 +14,7 @@ void adu_histo(STREAM_512 &data_in,
#pragma HLS INTERFACE register both axis port=m_axis_completion
#pragma HLS INTERFACE register both axis port=s_axis_completion
ap_uint<14> count[64][ADU_HISTO_BIN_COUNT]; // log2(512*1024/64) = 13
ap_uint<19> count[64][ADU_HISTO_BIN_COUNT]; // log2(16*512*1024/64) = 17
#pragma HLS BIND_STORAGE variable=count type=ram_t2p impl=bram
#pragma HLS ARRAY_PARTITION variable=count type=complete dim=1
@@ -29,13 +29,14 @@ void adu_histo(STREAM_512 &data_in,
packet_512_t packet_in;
data_in >> packet_in;
ap_uint<4> sum = ACT_REG_NSUMMATION(packet_in.data); // 0..15
data_out << packet_in;
axis_completion cmpl;
s_axis_completion >> cmpl;
while (!cmpl.last) {
m_axis_completion << cmpl;
for (int i = 0; i < RAW_MODULE_SIZE / (32 * 2); i++) {
for (int i = 0; i < (sum + 1) * RAW_MODULE_SIZE / (32 * 2); i++) {
#pragma HLS PIPELINE II=2
for (int k = 0; k < 2; k++) {
data_in >> packet_in;
+4 -1
View File
@@ -14,7 +14,8 @@ void data_collection_fsm(AXI_STREAM &eth_in,
ap_uint<32> one_over_energy,
ap_uint<32> nframes,
ap_uint<8> nmodules,
ap_uint<4> nstorage_cells) {
ap_uint<4> nstorage_cells,
ap_uint<4> nsummation) {
#pragma HLS INTERFACE ap_ctrl_none port=return
#pragma HLS INTERFACE axis register both port=eth_in
@@ -30,6 +31,7 @@ void data_collection_fsm(AXI_STREAM &eth_in,
#pragma HLS INTERFACE ap_none register port=nframes
#pragma HLS INTERFACE ap_none register port=nmodules
#pragma HLS INTERFACE ap_none register port=nstorage_cells
#pragma HLS INTERFACE ap_none register port=nsummation
#pragma HLS PIPELINE II=1 style=flp
@@ -75,6 +77,7 @@ void data_collection_fsm(AXI_STREAM &eth_in,
ACT_REG_NFRAMES(packet_out.data) = nframes;
ACT_REG_NMODULES(packet_out.data) = nmodules;
ACT_REG_NSTORAGE_CELLS(packet_out.data) = nstorage_cells + 1;
ACT_REG_NSUMMATION(packet_out.data) = nsummation;
packet_out.user = 0;
packet_out.last = 0;
+92
View File
@@ -0,0 +1,92 @@
// Copyright (2019-2023) Paul Scherrer Institute
#include "hls_jfjoch.h"
void frame_summation(STREAM_512 &data_in, STREAM_512 &data_out,
hls::stream<axis_completion > &s_axis_completion,
hls::stream<axis_completion > &m_axis_completion) {
#pragma HLS INTERFACE axis register both port=data_in
#pragma HLS INTERFACE axis register both port=data_out
#pragma HLS INTERFACE axis register both port=s_axis_completion
#pragma HLS INTERFACE axis register both port=m_axis_completion
ap_uint<512> memory_0[16384];
#pragma HLS BIND_STORAGE variable=memory_0 type=ram_t2p impl=uram latency=3
packet_512_t packet_in, packet_out;
data_in >> packet_in;
ap_uint<4> sum = ACT_REG_NSUMMATION(packet_in.data); // 0..15
data_out << packet_in;
data_in >> packet_in;
if (sum > 0) {
axis_completion cmpl, cmpl_out;
s_axis_completion >> cmpl;
while (!cmpl.last) {
later_frames:
cmpl_out = cmpl;
// Frame numbers start with 1, so need to do a bit of play here
cmpl_out.frame_number = cmpl.frame_number / (sum+1);
for (int s = 0; s <= sum; s++) {
if (s > 0) {
cmpl_out.packet_mask = (cmpl_out.packet_mask & cmpl.packet_mask);
cmpl_out.packet_count += cmpl.packet_count;
}
if (s == sum)
m_axis_completion << cmpl_out;
for (int i = 0; i < 16384; i++) {
#pragma HLS PIPELINE II=1
ap_int<16> val_0[32];
ap_int<16> val_1[32];
unpack32(packet_in.data, val_0);
if (s == 0)
unpack32(0, val_1);
else
unpack32(memory_0[i], val_1);
for (int j = 0; j < 32; j++) {
if ((val_0[j] == INT16_MIN) || (val_1[j] == INT16_MIN))
val_0[j] = INT16_MIN;
else if ((val_0[j] == INT16_MAX) || (val_1[j] == INT16_MAX))
val_0[j] = INT16_MAX;
else {
ap_int<17> tmp = val_0[j] + val_1[j];
if (tmp >= INT16_MAX)
val_0[j] = INT16_MAX;
else
val_0[j] = tmp;
}
}
if (s == sum) {
packet_out.data = pack32(val_0);
packet_out.last = ((i == 16383) ? 1 : 0);
packet_out.keep = UINT64_MAX;
data_out << packet_out;
} else {
memory_0[i] = pack32(val_0);
}
data_in >> packet_in;
}
s_axis_completion >> cmpl;
}
}
m_axis_completion << cmpl_out;
} else {
axis_completion cmpl;
s_axis_completion >> cmpl;
while (!cmpl.last) {
m_axis_completion << cmpl;
for (int i = 0; i < 16384; i++) {
#pragma HLS PIPELINE II=1
data_out << packet_in;
data_in >> packet_in;
}
s_axis_completion >> cmpl;
}
m_axis_completion << cmpl;
}
data_out << packet_in;
}
@@ -0,0 +1,77 @@
// Copyright (2019-2023) Paul Scherrer Institute
#include "hls_jfjoch.h"
#define MAX_FPGA_SUMMATION 16
void frame_summation_reorder_compl(STREAM_512 &data_in,
STREAM_512 &data_out,
hls::stream<axis_completion > &s_axis_completion,
hls::stream<axis_completion > &m_axis_completion) {
#pragma HLS INTERFACE axis register both port=data_in
#pragma HLS INTERFACE axis register both port=data_out
#pragma HLS INTERFACE axis register both port=s_axis_completion
#pragma HLS INTERFACE axis register both port=m_axis_completion
packet_512_t packet_in;
data_in >> packet_in;
ap_uint<4> sum = ACT_REG_NSUMMATION(packet_in.data); // 0..15
data_out << packet_in;
axis_completion completions[MAX_FPGA_SUMMATION * MAX_MODULES_FPGA];
ap_uint<MAX_FPGA_SUMMATION> completion_mask[MAX_MODULES_FPGA];
ap_uint<5> completion_count[MAX_MODULES_FPGA];
ap_uint<64> curr_frame_number_prefix[MAX_MODULES_FPGA];
for (int i = 0; i < MAX_MODULES_FPGA; i++) {
completion_mask[i] = 0;
completion_count[i] = 0;
curr_frame_number_prefix[i] = 0;
}
axis_completion c;
s_axis_completion >> c;
while (!c.last) {
#pragma HLS PIPELINE II=16
ap_uint<64> frame_number_prefix = c.frame_number / (sum + 1);
ap_uint<5> frame_number_loc = c.frame_number % (sum + 1);
ap_uint<7> module = c.module;
if (frame_number_prefix > curr_frame_number_prefix[module]) {
for (int i = 0; i <= sum; i++) {
axis_completion cmpl = completions[module * MAX_FPGA_SUMMATION + i];
if (completion_count[module] != sum + 1)
cmpl.ignore = 1;
if (completion_mask[module][i])
m_axis_completion << cmpl;
}
completions[module * MAX_FPGA_SUMMATION + frame_number_loc] = c;
completion_mask[module] = 1 << frame_number_loc;
completion_count[module] = 1;
curr_frame_number_prefix[module] = frame_number_prefix;
} else if (frame_number_prefix == curr_frame_number_prefix[module]) {
completions[module * MAX_FPGA_SUMMATION + frame_number_loc] = c;
completion_mask[module][frame_number_loc] = 1;
completion_count[module] += 1;
curr_frame_number_prefix[module] = frame_number_prefix;
} else {
c.ignore = 1;
m_axis_completion << c;
}
s_axis_completion >> c;
}
for (int module = 0; module < MAX_MODULES_FPGA; module++) {
#pragma HLS PIPELINE II=16
for (int i = 0; i <= sum; i++) {
axis_completion cmpl = completions[module * MAX_FPGA_SUMMATION + i];
if (completion_count[module] != sum + 1)
cmpl.ignore = 1;
if (completion_mask[module][i])
m_axis_completion << cmpl;
}
}
m_axis_completion << c;
data_in >> packet_in;
data_out << packet_in;
}
@@ -0,0 +1,123 @@
// Copyright (2019-2023) Paul Scherrer Institute
#include "hls_jfjoch.h"
void expected(int &ret, hls::stream<axis_completion>& compl_in, size_t frame_number, uint16_t module,
uint16_t packet_count, uint16_t ignore) {
axis_completion cmpl = compl_in.read();
if (cmpl.frame_number != frame_number) {
ret = 1;
std::cerr << "Frame number error " << cmpl.frame_number << " " << frame_number << std::endl;
}
if (cmpl.module != module) {
ret = 1;
std::cerr << "Module number error " << cmpl.module << " " << module << std::endl;
}
if (cmpl.packet_count != packet_count) {
ret = 1;
std::cerr << "Packet count error " << cmpl.packet_count << " " << packet_count << std::endl;
}
if (cmpl.ignore != ignore) {
ret = 1;
std::cerr << "Ignore val error " << cmpl.ignore << " " << ignore << std::endl;
}
}
int main() {
int ret = 0;
STREAM_512 input;
STREAM_512 output;
hls::stream<axis_completion> compl_in;
hls::stream<axis_completion> compl_out;
size_t nframes = 5;
ap_uint<512> action_control = 0;
ACT_REG_NSUMMATION(action_control) = nframes - 1;
input << packet_512_t { .data = action_control, .user = 0 };
input << packet_512_t { .user = 1 };
compl_in << axis_completion{.frame_number = 100, .packet_count = 128, .module = 4, .last = 0, .ignore = 0};
compl_in << axis_completion{.frame_number = 100, .packet_count = 128, .module = 5, .last = 0, .ignore = 0};
compl_in << axis_completion{.frame_number = 100, .packet_count = 128, .module = 3, .last = 0, .ignore = 0};
compl_in << axis_completion{.frame_number = 101, .packet_count = 128, .module = 4, .last = 0, .ignore = 0};
compl_in << axis_completion{.frame_number = 101, .packet_count = 128, .module = 5, .last = 0, .ignore = 0};
compl_in << axis_completion{.frame_number = 102, .packet_count = 128, .module = 5, .last = 0, .ignore = 0};
compl_in << axis_completion{.frame_number = 102, .packet_count = 128, .module = 4, .last = 0, .ignore = 0};
compl_in << axis_completion{.frame_number = 102, .packet_count = 128, .module = 3, .last = 0, .ignore = 0};
compl_in << axis_completion{.frame_number = 103, .packet_count = 128, .module = 5, .last = 0, .ignore = 0};
compl_in << axis_completion{.frame_number = 103, .packet_count = 128, .module = 4, .last = 0, .ignore = 0};
compl_in << axis_completion{.frame_number = 104, .packet_count = 128, .module = 4, .last = 0, .ignore = 0};
compl_in << axis_completion{.frame_number = 104, .packet_count = 128, .module = 5, .last = 0, .ignore = 0};
compl_in << axis_completion{.frame_number = 80, .packet_count = 35, .module = 5, .last = 0, .ignore = 0};
compl_in << axis_completion{.frame_number = 105, .packet_count = 128, .module = 5, .last = 0, .ignore = 0};
compl_in << axis_completion{.frame_number = 105, .packet_count = 128, .module = 4, .last = 0, .ignore = 0};
compl_in << axis_completion{.frame_number = 105, .packet_count = 128, .module = 3, .last = 0, .ignore = 0};
compl_in << axis_completion{.last = 1};
size_t size_start = compl_in.size();
frame_summation_reorder_compl(input, output, compl_in, compl_out);
if (compl_in.size() != 0) {
std::cout << "compl_in should be empty: " << compl_in.size() << std::endl;
ret = 1;
}
if (compl_out.size() != size_start) {
std::cout << "compl_out should be size " << size_start << ": " << compl_out.size() << std::endl;
ret = 1;
}
if (input.size() != 0)
ret = 1;
if (output.size() != 2)
ret = 1;
output.read();
output.read();
expected(ret, compl_out, 80, 5, 35, 1);
expected(ret, compl_out, 100, 5, 128, 0);
expected(ret, compl_out, 101, 5, 128, 0);
expected(ret, compl_out, 102, 5, 128, 0);
expected(ret, compl_out, 103, 5, 128, 0);
expected(ret, compl_out, 104, 5, 128, 0);
expected(ret, compl_out, 100, 4, 128, 0);
expected(ret, compl_out, 101, 4, 128, 0);
expected(ret, compl_out, 102, 4, 128, 0);
expected(ret, compl_out, 103, 4, 128, 0);
expected(ret, compl_out, 104, 4, 128, 0);
expected(ret, compl_out, 100, 3, 128, 1);
expected(ret, compl_out, 102, 3, 128, 1);
expected(ret, compl_out, 105, 3, 128, 1);
expected(ret, compl_out, 105, 4, 128, 1);
expected(ret, compl_out, 105, 5, 128, 1);
axis_completion cmpl = compl_out.read();
if (!cmpl.last) {
std::cerr << "Last completion error" << std::endl;
ret = 1;
}
if (ret != 0) {
printf("Test failed !!!\n");
ret = 1;
} else {
printf("Test passed !\n");
}
return ret;
}
+101
View File
@@ -0,0 +1,101 @@
// Copyright (2019-2023) Paul Scherrer Institute
#include <random>
#include "hls_jfjoch.h"
int main() {
int ret = 0;
STREAM_512 input;
STREAM_512 output;
hls::stream<axis_completion> compl_in;
hls::stream<axis_completion> compl_out;
size_t nframes = 5;
std::vector<int16_t> input_frame(nframes * RAW_MODULE_SIZE);
std::vector<int16_t> output_frame_ref(RAW_MODULE_SIZE, 0);
std::vector<int16_t> output_frame(RAW_MODULE_SIZE, 0);
std::mt19937 g1(1387);
std::uniform_int_distribution<uint16_t> dist(0, 5000);
for (int n = 0; n < nframes * RAW_MODULE_SIZE; n++) {
input_frame[n] = dist(g1);
output_frame_ref[n % RAW_MODULE_SIZE] += input_frame[n];
}
auto input_frame_512 = (ap_uint<512>*) input_frame.data();
auto output_frame_512 = (ap_uint<512>*) output_frame.data();
ap_uint<512> action_control = 0;
ACT_REG_NSUMMATION(action_control) = nframes - 1;
input << packet_512_t { .data = action_control, .user = 0 };
for (int i = 0; i < nframes * RAW_MODULE_SIZE * sizeof(uint16_t) / 64; i++)
input << packet_512_t { .data = input_frame_512[i], .user = 0 };
input << packet_512_t { .user = 1 };
ap_uint<128> packet_mask;
for (int i = 0; i < 128; i++)
packet_mask[i] = 1;
for (int i = 0; i < nframes; i++)
compl_in << axis_completion{.packet_mask = packet_mask, .frame_number = 100 + i, .packet_count = 128, .last = 0};
compl_in << axis_completion{.last = 1};
frame_summation(input, output, compl_in, compl_out);
if (compl_in.size() != 0) {
std::cout << "compl_in should be empty: " << compl_in.size() << std::endl;
ret = 1;
}
if (compl_out.size() != 2) {
std::cout << "compl_out should be size 2: " << compl_out.size() << std::endl;
ret = 1;
}
if (input.size() != 0)
ret = 1;
if (output.size() != RAW_MODULE_SIZE * sizeof(uint16_t) / 64 + 2)
ret = 1;
output.read();
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 64 ; i++)
output_frame_512[i] = output.read().data;
output.read();
axis_completion cmpl;
cmpl = compl_out.read();
if (cmpl.frame_number != 100/5) {
std::cout << "Wrong output frame number" << std::endl;
ret = 1;
}
if (cmpl.packet_count != 128*5) {
std::cout << "Wrong output frame number" << std::endl;
ret = 1;
}
if (cmpl.packet_mask != packet_mask) {
std::cout << "Wrong packet mask" << std::endl;
ret = 1;
}
compl_out.read();
if (output_frame != output_frame_ref) {
std::cout << "Input and output don't match" << std::endl;
ret = 1;
}
if (ret != 0) {
printf("Test failed !!!\n");
ret = 1;
} else {
printf("Test passed !\n");
}
return ret;
}
+11 -2
View File
@@ -52,6 +52,7 @@ typedef hls::stream<packet_512_t> STREAM_512;
#define ACT_REG_NFRAMES(x) ((x)(95 , 64)) // 32 bit
#define ACT_REG_NMODULES(x) ((x)(132, 128)) // 5 bit (0..31)
#define ACT_REG_NSTORAGE_CELLS(x) ((x)(148, 144)) // 5 bit
#define ACT_REG_NSUMMATION(x) ((x)(163, 160)) // 4 bit (0..15)
struct axis_datamover_ctrl {
ap_uint<40+64> data;
@@ -76,7 +77,7 @@ struct axis_completion {
ap_uint<64> bunchid;
ap_uint<32> debug;
ap_uint<16> handle;
ap_uint<8> packet_count;
ap_uint<16> packet_count;
ap_uint<7> module;
ap_uint<1> last;
ap_uint<1> ignore;
@@ -261,7 +262,8 @@ void data_collection_fsm(AXI_STREAM &eth_in,
ap_uint<32> one_over_energy,
ap_uint<32> nframes,
ap_uint<8> nmodules,
ap_uint<4> nstorage_cells);
ap_uint<4> nstorage_cells,
ap_uint<4> nsummation);
void host_writer(STREAM_512 &data_in,
hls::stream<ap_uint<512>> &adu_histo_in,
@@ -332,4 +334,11 @@ void load_calibration(ap_uint<256> *d_hbm_p0,
void add_multipixel(STREAM_512 &data_in, STREAM_512 &data_out);
void module_upside_down(STREAM_512 &data_in, STREAM_512 &data_out);
void frame_summation(STREAM_512 &data_in, STREAM_512 &data_out,
hls::stream<axis_completion > &s_axis_completion,
hls::stream<axis_completion > &m_axis_completion);
void frame_summation_reorder_compl(STREAM_512 &data_in,
STREAM_512 &data_out,
hls::stream<axis_completion > &s_axis_completion,
hls::stream<axis_completion > &m_axis_completion);
#endif
+25 -22
View File
@@ -28,11 +28,11 @@ void integration(STREAM_512 &data_in,
#pragma HLS INTERFACE m_axi port=d_hbm_p3 bundle=d_hbm_p3 depth=512 offset=off \
max_read_burst_length=16 max_write_burst_length=2 latency=120 num_write_outstanding=2 num_read_outstanding=8
ap_fixed<46,30, AP_RND_CONV> sum[64][FPGA_INTEGRATION_BIN_COUNT];
// log2(32768*512*1024/64) = 28 + sign 1 bit
ap_fixed<50,34, AP_RND_CONV> sum[64][FPGA_INTEGRATION_BIN_COUNT];
// log2(32768*512*1024/64) = 32 + sign 1 bit
#pragma HLS BIND_STORAGE variable=sum type=ram_t2p impl=bram
#pragma HLS ARRAY_PARTITION variable=sum type=complete dim=1
ap_uint<14> count[64][FPGA_INTEGRATION_BIN_COUNT]; // log2(512*1024/64) = 13
ap_uint<18> count[64][FPGA_INTEGRATION_BIN_COUNT]; // log2(16*512*1024/64) = 17
#pragma HLS BIND_STORAGE variable=count type=ram_t2p impl=bram
#pragma HLS ARRAY_PARTITION variable=count type=complete dim=1
@@ -53,6 +53,7 @@ void integration(STREAM_512 &data_in,
packet_512_t packet_in;
data_in >> packet_in;
ap_uint<4> nsum = ACT_REG_NSUMMATION(packet_in.data); // 0..15
data_out << packet_in;
ap_uint<32> offset_hbm_0 = 16 * hbm_size_bytes / 32;
@@ -63,32 +64,35 @@ void integration(STREAM_512 &data_in,
axis_completion cmpl;
s_axis_completion >> cmpl;
while (!cmpl.last) {
m_axis_completion << cmpl;
for (int i = 0; i < RAW_MODULE_SIZE / 32 / 2; i++) {
for (int s = 0; s < nsum+1; s++) {
m_axis_completion << cmpl;
for (int i = 0; i < RAW_MODULE_SIZE / 32 / 2; i++) {
#pragma HLS PIPELINE II=2
for (int k = 0; k < 2; k++) {
data_in >> packet_in;
data_out << packet_in;
bins_0 = d_hbm_p0[offset_hbm_0 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i * 2 + k];
bins_1 = d_hbm_p1[offset_hbm_1 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i * 2 + k];
coeff_0 = d_hbm_p2[offset_hbm_2 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i * 2 + k];
coeff_1 = d_hbm_p3[offset_hbm_2 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i * 2 + k];
for (int k = 0; k < 2; k++) {
data_in >> packet_in;
data_out << packet_in;
bins_0 = d_hbm_p0[offset_hbm_0 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i * 2 + k];
bins_1 = d_hbm_p1[offset_hbm_1 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i * 2 + k];
coeff_0 = d_hbm_p2[offset_hbm_2 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i * 2 + k];
coeff_1 = d_hbm_p3[offset_hbm_2 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i * 2 + k];
unpack_2xhbm_to_32x16bit(bins_0, bins_1, in_bin);
unpack_2xhbm_to_32x16bit(coeff_0, coeff_1, in_coeff);
unpack_2xhbm_to_32x16bit(bins_0, bins_1, in_bin);
unpack_2xhbm_to_32x16bit(coeff_0, coeff_1, in_coeff);
unpack32(packet_in.data, in_val);
unpack32(packet_in.data, in_val);
for (int j = 0; j < 32; j++) {
ap_fixed<32,16, AP_RND_CONV> tmp = in_val[j] * in_coeff[j];
if ((in_val[j] != INT16_MAX) && (in_val[j] != INT16_MIN) && (in_bin[j] < FPGA_INTEGRATION_BIN_COUNT)) {
sum[k * 32 + j][in_bin[j]] += tmp;
count[k * 32 + j][in_bin[j]] += 1;
for (int j = 0; j < 32; j++) {
ap_fixed<32, 16, AP_RND_CONV> tmp = in_val[j] * in_coeff[j];
if ((in_val[j] != INT16_MAX) && (in_val[j] != INT16_MIN) &&
(in_bin[j] < FPGA_INTEGRATION_BIN_COUNT)) {
sum[k * 32 + j][in_bin[j]] += tmp;
count[k * 32 + j][in_bin[j]] += 1;
}
}
}
}
s_axis_completion >> cmpl;
}
for (int i = 0; i < FPGA_INTEGRATION_BIN_COUNT; i++) {
#pragma HLS PIPELINE II=1
ap_axiu<128,1,1,1> res;
@@ -112,7 +116,6 @@ void integration(STREAM_512 &data_in,
res.last = ((i == FPGA_INTEGRATION_BIN_COUNT - 1) ? 1 : 0);
result_out << res;
}
s_axis_completion >> cmpl;
}
m_axis_completion << cmpl;
+1
View File
@@ -19,6 +19,7 @@ struct DataCollectionConfig {
uint32_t one_over_energy;
uint32_t nframes;
uint32_t nstorage_cells;
uint32_t nsummation;
};
struct DataCollectionStatus {
+1
View File
@@ -183,6 +183,7 @@ void FPGAAcquisitionDevice::FillActionRegister(const DiffractionExperiment& x, D
job.one_over_energy = std::lround((1<<20)/ x.GetPhotonEnergy_keV());
job.nstorage_cells = x.GetStorageCellNumber() - 1;
job.mode = data_collection_id << 16;
job.nsummation = 0;
if ((x.GetDetectorMode() == DetectorMode::Conversion) && x.GetConversionOnFPGA())
job.mode |= MODE_CONV;
+2 -1
View File
@@ -306,7 +306,8 @@ void HLSSimulatedDevice::HLSMainThread() {
cfg.one_over_energy,
cfg.nframes,
cfg.nmodules,
cfg.nstorage_cells);
cfg.nstorage_cells,
cfg.nsummation);
run_data_collection = 0;
}
});
+1 -1
View File
@@ -47,7 +47,7 @@ TEST_CASE("ActionStatus") {
TEST_CASE("ActionConfigSize") {
REQUIRE(sizeof(DataCollectionConfig) == 5 * sizeof(uint32_t));
REQUIRE(sizeof(DataCollectionConfig) == 6 * sizeof(uint32_t));
}
TEST_CASE("ActionConfig") {
+18 -9
View File
@@ -734,7 +734,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
act_reg.one_over_energy,
act_reg.nframes,
act_reg.nmodules,
act_reg.nstorage_cells);
act_reg.nstorage_cells,
act_reg.nsummation);
REQUIRE(idle_data_collection == 1);
REQUIRE(addr1.empty());
REQUIRE(raw1.empty());
@@ -751,7 +752,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
act_reg.one_over_energy,
act_reg.nframes,
act_reg.nmodules,
act_reg.nstorage_cells);
act_reg.nstorage_cells,
act_reg.nsummation);
REQUIRE(idle_data_collection == 0);
REQUIRE(addr1.empty());
REQUIRE(raw1.empty());
@@ -766,7 +768,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
act_reg.one_over_energy,
act_reg.nframes,
act_reg.nmodules,
act_reg.nstorage_cells);
act_reg.nstorage_cells,
act_reg.nsummation);
REQUIRE(idle_data_collection == 0);
REQUIRE(addr1.empty());
REQUIRE(raw1.empty());
@@ -784,7 +787,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
act_reg.one_over_energy,
act_reg.nframes,
act_reg.nmodules,
act_reg.nstorage_cells);
act_reg.nstorage_cells,
act_reg.nsummation);
REQUIRE(idle_data_collection == 0);
REQUIRE(addr1.empty());
REQUIRE(raw1.empty());
@@ -799,7 +803,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
act_reg.one_over_energy,
act_reg.nframes,
act_reg.nmodules,
act_reg.nstorage_cells);
act_reg.nstorage_cells,
act_reg.nsummation);
REQUIRE(idle_data_collection == 0);
REQUIRE(addr1.size() == 1);
@@ -816,7 +821,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
act_reg.one_over_energy,
act_reg.nframes,
act_reg.nmodules,
act_reg.nstorage_cells);
act_reg.nstorage_cells,
act_reg.nsummation);
REQUIRE(idle_data_collection == 0);
REQUIRE(addr1.size() == 1);
@@ -835,7 +841,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
act_reg.one_over_energy,
act_reg.nframes,
act_reg.nmodules,
act_reg.nstorage_cells);
act_reg.nstorage_cells,
act_reg.nsummation);
REQUIRE(idle_data_collection == 0);
REQUIRE(addr1.size() == 1);
@@ -852,7 +859,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
act_reg.one_over_energy,
act_reg.nframes,
act_reg.nmodules,
act_reg.nstorage_cells);
act_reg.nstorage_cells,
act_reg.nsummation);
REQUIRE(idle_data_collection == 0);
REQUIRE(addr1.size() == 2);
@@ -869,7 +877,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
act_reg.one_over_energy,
act_reg.nframes,
act_reg.nmodules,
act_reg.nstorage_cells);
act_reg.nstorage_cells,
act_reg.nsummation);
REQUIRE(idle_data_collection == 1);
REQUIRE(addr1.size() == 2);