HLS: Added frame_summation core
This commit is contained in:
@@ -49,7 +49,7 @@
|
||||
|
||||
// For FPGA
|
||||
#define ACTION_TYPE 0x52324158
|
||||
#define RELEASE_LEVEL 0x0041
|
||||
#define RELEASE_LEVEL 0x0042
|
||||
|
||||
#define MODE_CONV 0x0001L
|
||||
#define MODE_BITSHUFFLE_FPGA 0x0002L
|
||||
|
||||
@@ -47,6 +47,7 @@
|
||||
`define ADDR_ONE_OVER_ENERGY 16'h0094
|
||||
`define ADDR_NFRAMES 16'h0098
|
||||
`define ADDR_NSTORAGE_CELLS 16'h009C
|
||||
`define ADDR_NSUMMATION 16'h00A0
|
||||
|
||||
`define ADDR_SPOT_FINDER_CNT_THR 16'h0100
|
||||
`define ADDR_SPOT_FINDER_SNR_THR 16'h0104
|
||||
@@ -90,6 +91,7 @@ module action_config
|
||||
output reg [31:0] nframes ,
|
||||
output reg [7:0] nmodules ,
|
||||
output reg [3:0] nstorage_cells ,
|
||||
output reg [3:0] nsummation ,
|
||||
output wire [31:0] hbm_size_bytes ,
|
||||
output reg [15:0] spot_finder_count_threshold,
|
||||
output reg [7:0] spot_finder_snr_threshold,
|
||||
@@ -312,6 +314,9 @@ always @(posedge clk) begin
|
||||
`ADDR_NSTORAGE_CELLS: begin
|
||||
rdata <= nstorage_cells;
|
||||
end
|
||||
`ADDR_NSUMMATION: begin
|
||||
rdata <= nsummation;
|
||||
end
|
||||
`ADDR_ACTION_TYPE: begin
|
||||
rdata <= `ACTION_TYPE;
|
||||
end
|
||||
@@ -519,6 +524,15 @@ always @(posedge clk) begin
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (!resetn)
|
||||
nsummation <= 0;
|
||||
else if (reg_data_collection_idle) begin
|
||||
if (w_hs && waddr == `ADDR_NSUMMATION)
|
||||
nsummation <= (s_axi_WDATA[3:0] & wmask[3:0]) | (nsummation & !wmask[3:0]);
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (!resetn)
|
||||
spot_finder_snr_threshold <= 0;
|
||||
|
||||
@@ -24,7 +24,9 @@ ADD_LIBRARY( HLSSimulation STATIC
|
||||
axis_helpers.cpp
|
||||
hls_bitshuffle.cpp
|
||||
add_multipixel.cpp
|
||||
module_upside_down.cpp)
|
||||
module_upside_down.cpp
|
||||
frame_summation.cpp
|
||||
frame_summation_reorder_compl.cpp)
|
||||
|
||||
TARGET_INCLUDE_DIRECTORIES(HLSSimulation PUBLIC ../include)
|
||||
TARGET_LINK_LIBRARIES(HLSSimulation CommonFunctions)
|
||||
@@ -72,6 +74,8 @@ MAKE_HLS_MODULE(axis_32_to_512 axis_helpers.cpp "")
|
||||
MAKE_HLS_MODULE(adu_histo adu_histo.cpp "")
|
||||
MAKE_HLS_MODULE(add_multipixel add_multipixel.cpp add_multipixel_tb.cpp)
|
||||
MAKE_HLS_MODULE(module_upside_down module_upside_down.cpp module_upside_down_tb.cpp)
|
||||
MAKE_HLS_MODULE(frame_summation frame_summation.cpp frame_summation_tb.cpp)
|
||||
MAKE_HLS_MODULE(frame_summation_reorder_compl frame_summation_reorder_compl.cpp frame_summation_reorder_compl_tb.cpp)
|
||||
|
||||
SET (HLS_IPS ${HLS_IPS} PARENT_SCOPE)
|
||||
ADD_CUSTOM_TARGET(hls DEPENDS ${HLS_IPS})
|
||||
|
||||
@@ -14,7 +14,7 @@ void adu_histo(STREAM_512 &data_in,
|
||||
#pragma HLS INTERFACE register both axis port=m_axis_completion
|
||||
#pragma HLS INTERFACE register both axis port=s_axis_completion
|
||||
|
||||
ap_uint<14> count[64][ADU_HISTO_BIN_COUNT]; // log2(512*1024/64) = 13
|
||||
ap_uint<19> count[64][ADU_HISTO_BIN_COUNT]; // log2(16*512*1024/64) = 17
|
||||
#pragma HLS BIND_STORAGE variable=count type=ram_t2p impl=bram
|
||||
#pragma HLS ARRAY_PARTITION variable=count type=complete dim=1
|
||||
|
||||
@@ -29,13 +29,14 @@ void adu_histo(STREAM_512 &data_in,
|
||||
|
||||
packet_512_t packet_in;
|
||||
data_in >> packet_in;
|
||||
ap_uint<4> sum = ACT_REG_NSUMMATION(packet_in.data); // 0..15
|
||||
data_out << packet_in;
|
||||
|
||||
axis_completion cmpl;
|
||||
s_axis_completion >> cmpl;
|
||||
while (!cmpl.last) {
|
||||
m_axis_completion << cmpl;
|
||||
for (int i = 0; i < RAW_MODULE_SIZE / (32 * 2); i++) {
|
||||
for (int i = 0; i < (sum + 1) * RAW_MODULE_SIZE / (32 * 2); i++) {
|
||||
#pragma HLS PIPELINE II=2
|
||||
for (int k = 0; k < 2; k++) {
|
||||
data_in >> packet_in;
|
||||
|
||||
@@ -14,7 +14,8 @@ void data_collection_fsm(AXI_STREAM ð_in,
|
||||
ap_uint<32> one_over_energy,
|
||||
ap_uint<32> nframes,
|
||||
ap_uint<8> nmodules,
|
||||
ap_uint<4> nstorage_cells) {
|
||||
ap_uint<4> nstorage_cells,
|
||||
ap_uint<4> nsummation) {
|
||||
#pragma HLS INTERFACE ap_ctrl_none port=return
|
||||
|
||||
#pragma HLS INTERFACE axis register both port=eth_in
|
||||
@@ -30,6 +31,7 @@ void data_collection_fsm(AXI_STREAM ð_in,
|
||||
#pragma HLS INTERFACE ap_none register port=nframes
|
||||
#pragma HLS INTERFACE ap_none register port=nmodules
|
||||
#pragma HLS INTERFACE ap_none register port=nstorage_cells
|
||||
#pragma HLS INTERFACE ap_none register port=nsummation
|
||||
|
||||
#pragma HLS PIPELINE II=1 style=flp
|
||||
|
||||
@@ -75,6 +77,7 @@ void data_collection_fsm(AXI_STREAM ð_in,
|
||||
ACT_REG_NFRAMES(packet_out.data) = nframes;
|
||||
ACT_REG_NMODULES(packet_out.data) = nmodules;
|
||||
ACT_REG_NSTORAGE_CELLS(packet_out.data) = nstorage_cells + 1;
|
||||
ACT_REG_NSUMMATION(packet_out.data) = nsummation;
|
||||
|
||||
packet_out.user = 0;
|
||||
packet_out.last = 0;
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
// Copyright (2019-2023) Paul Scherrer Institute
|
||||
|
||||
#include "hls_jfjoch.h"
|
||||
|
||||
void frame_summation(STREAM_512 &data_in, STREAM_512 &data_out,
|
||||
hls::stream<axis_completion > &s_axis_completion,
|
||||
hls::stream<axis_completion > &m_axis_completion) {
|
||||
#pragma HLS INTERFACE axis register both port=data_in
|
||||
#pragma HLS INTERFACE axis register both port=data_out
|
||||
#pragma HLS INTERFACE axis register both port=s_axis_completion
|
||||
#pragma HLS INTERFACE axis register both port=m_axis_completion
|
||||
|
||||
ap_uint<512> memory_0[16384];
|
||||
#pragma HLS BIND_STORAGE variable=memory_0 type=ram_t2p impl=uram latency=3
|
||||
|
||||
packet_512_t packet_in, packet_out;
|
||||
data_in >> packet_in;
|
||||
ap_uint<4> sum = ACT_REG_NSUMMATION(packet_in.data); // 0..15
|
||||
data_out << packet_in;
|
||||
|
||||
data_in >> packet_in;
|
||||
if (sum > 0) {
|
||||
axis_completion cmpl, cmpl_out;
|
||||
s_axis_completion >> cmpl;
|
||||
while (!cmpl.last) {
|
||||
later_frames:
|
||||
cmpl_out = cmpl;
|
||||
// Frame numbers start with 1, so need to do a bit of play here
|
||||
cmpl_out.frame_number = cmpl.frame_number / (sum+1);
|
||||
|
||||
for (int s = 0; s <= sum; s++) {
|
||||
if (s > 0) {
|
||||
cmpl_out.packet_mask = (cmpl_out.packet_mask & cmpl.packet_mask);
|
||||
cmpl_out.packet_count += cmpl.packet_count;
|
||||
}
|
||||
if (s == sum)
|
||||
m_axis_completion << cmpl_out;
|
||||
|
||||
for (int i = 0; i < 16384; i++) {
|
||||
#pragma HLS PIPELINE II=1
|
||||
ap_int<16> val_0[32];
|
||||
ap_int<16> val_1[32];
|
||||
unpack32(packet_in.data, val_0);
|
||||
if (s == 0)
|
||||
unpack32(0, val_1);
|
||||
else
|
||||
unpack32(memory_0[i], val_1);
|
||||
|
||||
for (int j = 0; j < 32; j++) {
|
||||
if ((val_0[j] == INT16_MIN) || (val_1[j] == INT16_MIN))
|
||||
val_0[j] = INT16_MIN;
|
||||
else if ((val_0[j] == INT16_MAX) || (val_1[j] == INT16_MAX))
|
||||
val_0[j] = INT16_MAX;
|
||||
else {
|
||||
ap_int<17> tmp = val_0[j] + val_1[j];
|
||||
if (tmp >= INT16_MAX)
|
||||
val_0[j] = INT16_MAX;
|
||||
else
|
||||
val_0[j] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
if (s == sum) {
|
||||
packet_out.data = pack32(val_0);
|
||||
packet_out.last = ((i == 16383) ? 1 : 0);
|
||||
packet_out.keep = UINT64_MAX;
|
||||
data_out << packet_out;
|
||||
} else {
|
||||
memory_0[i] = pack32(val_0);
|
||||
}
|
||||
data_in >> packet_in;
|
||||
}
|
||||
s_axis_completion >> cmpl;
|
||||
}
|
||||
}
|
||||
m_axis_completion << cmpl_out;
|
||||
} else {
|
||||
axis_completion cmpl;
|
||||
s_axis_completion >> cmpl;
|
||||
while (!cmpl.last) {
|
||||
m_axis_completion << cmpl;
|
||||
for (int i = 0; i < 16384; i++) {
|
||||
#pragma HLS PIPELINE II=1
|
||||
data_out << packet_in;
|
||||
data_in >> packet_in;
|
||||
}
|
||||
s_axis_completion >> cmpl;
|
||||
}
|
||||
m_axis_completion << cmpl;
|
||||
}
|
||||
data_out << packet_in;
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
// Copyright (2019-2023) Paul Scherrer Institute
|
||||
|
||||
#include "hls_jfjoch.h"
|
||||
#define MAX_FPGA_SUMMATION 16
|
||||
|
||||
void frame_summation_reorder_compl(STREAM_512 &data_in,
|
||||
STREAM_512 &data_out,
|
||||
hls::stream<axis_completion > &s_axis_completion,
|
||||
hls::stream<axis_completion > &m_axis_completion) {
|
||||
#pragma HLS INTERFACE axis register both port=data_in
|
||||
#pragma HLS INTERFACE axis register both port=data_out
|
||||
#pragma HLS INTERFACE axis register both port=s_axis_completion
|
||||
#pragma HLS INTERFACE axis register both port=m_axis_completion
|
||||
|
||||
packet_512_t packet_in;
|
||||
data_in >> packet_in;
|
||||
ap_uint<4> sum = ACT_REG_NSUMMATION(packet_in.data); // 0..15
|
||||
data_out << packet_in;
|
||||
|
||||
axis_completion completions[MAX_FPGA_SUMMATION * MAX_MODULES_FPGA];
|
||||
ap_uint<MAX_FPGA_SUMMATION> completion_mask[MAX_MODULES_FPGA];
|
||||
ap_uint<5> completion_count[MAX_MODULES_FPGA];
|
||||
|
||||
ap_uint<64> curr_frame_number_prefix[MAX_MODULES_FPGA];
|
||||
|
||||
for (int i = 0; i < MAX_MODULES_FPGA; i++) {
|
||||
completion_mask[i] = 0;
|
||||
completion_count[i] = 0;
|
||||
curr_frame_number_prefix[i] = 0;
|
||||
}
|
||||
|
||||
axis_completion c;
|
||||
s_axis_completion >> c;
|
||||
while (!c.last) {
|
||||
#pragma HLS PIPELINE II=16
|
||||
ap_uint<64> frame_number_prefix = c.frame_number / (sum + 1);
|
||||
ap_uint<5> frame_number_loc = c.frame_number % (sum + 1);
|
||||
ap_uint<7> module = c.module;
|
||||
if (frame_number_prefix > curr_frame_number_prefix[module]) {
|
||||
for (int i = 0; i <= sum; i++) {
|
||||
axis_completion cmpl = completions[module * MAX_FPGA_SUMMATION + i];
|
||||
if (completion_count[module] != sum + 1)
|
||||
cmpl.ignore = 1;
|
||||
if (completion_mask[module][i])
|
||||
m_axis_completion << cmpl;
|
||||
}
|
||||
completions[module * MAX_FPGA_SUMMATION + frame_number_loc] = c;
|
||||
completion_mask[module] = 1 << frame_number_loc;
|
||||
completion_count[module] = 1;
|
||||
curr_frame_number_prefix[module] = frame_number_prefix;
|
||||
} else if (frame_number_prefix == curr_frame_number_prefix[module]) {
|
||||
completions[module * MAX_FPGA_SUMMATION + frame_number_loc] = c;
|
||||
completion_mask[module][frame_number_loc] = 1;
|
||||
completion_count[module] += 1;
|
||||
curr_frame_number_prefix[module] = frame_number_prefix;
|
||||
} else {
|
||||
c.ignore = 1;
|
||||
m_axis_completion << c;
|
||||
}
|
||||
s_axis_completion >> c;
|
||||
}
|
||||
for (int module = 0; module < MAX_MODULES_FPGA; module++) {
|
||||
#pragma HLS PIPELINE II=16
|
||||
for (int i = 0; i <= sum; i++) {
|
||||
axis_completion cmpl = completions[module * MAX_FPGA_SUMMATION + i];
|
||||
if (completion_count[module] != sum + 1)
|
||||
cmpl.ignore = 1;
|
||||
if (completion_mask[module][i])
|
||||
m_axis_completion << cmpl;
|
||||
}
|
||||
}
|
||||
|
||||
m_axis_completion << c;
|
||||
|
||||
data_in >> packet_in;
|
||||
data_out << packet_in;
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
// Copyright (2019-2023) Paul Scherrer Institute
|
||||
|
||||
#include "hls_jfjoch.h"
|
||||
|
||||
void expected(int &ret, hls::stream<axis_completion>& compl_in, size_t frame_number, uint16_t module,
|
||||
uint16_t packet_count, uint16_t ignore) {
|
||||
axis_completion cmpl = compl_in.read();
|
||||
if (cmpl.frame_number != frame_number) {
|
||||
ret = 1;
|
||||
std::cerr << "Frame number error " << cmpl.frame_number << " " << frame_number << std::endl;
|
||||
}
|
||||
if (cmpl.module != module) {
|
||||
ret = 1;
|
||||
std::cerr << "Module number error " << cmpl.module << " " << module << std::endl;
|
||||
}
|
||||
|
||||
if (cmpl.packet_count != packet_count) {
|
||||
ret = 1;
|
||||
std::cerr << "Packet count error " << cmpl.packet_count << " " << packet_count << std::endl;
|
||||
}
|
||||
if (cmpl.ignore != ignore) {
|
||||
ret = 1;
|
||||
std::cerr << "Ignore val error " << cmpl.ignore << " " << ignore << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
|
||||
int ret = 0;
|
||||
|
||||
STREAM_512 input;
|
||||
STREAM_512 output;
|
||||
hls::stream<axis_completion> compl_in;
|
||||
hls::stream<axis_completion> compl_out;
|
||||
size_t nframes = 5;
|
||||
|
||||
ap_uint<512> action_control = 0;
|
||||
ACT_REG_NSUMMATION(action_control) = nframes - 1;
|
||||
|
||||
input << packet_512_t { .data = action_control, .user = 0 };
|
||||
input << packet_512_t { .user = 1 };
|
||||
|
||||
compl_in << axis_completion{.frame_number = 100, .packet_count = 128, .module = 4, .last = 0, .ignore = 0};
|
||||
compl_in << axis_completion{.frame_number = 100, .packet_count = 128, .module = 5, .last = 0, .ignore = 0};
|
||||
compl_in << axis_completion{.frame_number = 100, .packet_count = 128, .module = 3, .last = 0, .ignore = 0};
|
||||
|
||||
compl_in << axis_completion{.frame_number = 101, .packet_count = 128, .module = 4, .last = 0, .ignore = 0};
|
||||
compl_in << axis_completion{.frame_number = 101, .packet_count = 128, .module = 5, .last = 0, .ignore = 0};
|
||||
|
||||
compl_in << axis_completion{.frame_number = 102, .packet_count = 128, .module = 5, .last = 0, .ignore = 0};
|
||||
compl_in << axis_completion{.frame_number = 102, .packet_count = 128, .module = 4, .last = 0, .ignore = 0};
|
||||
compl_in << axis_completion{.frame_number = 102, .packet_count = 128, .module = 3, .last = 0, .ignore = 0};
|
||||
|
||||
compl_in << axis_completion{.frame_number = 103, .packet_count = 128, .module = 5, .last = 0, .ignore = 0};
|
||||
compl_in << axis_completion{.frame_number = 103, .packet_count = 128, .module = 4, .last = 0, .ignore = 0};
|
||||
|
||||
compl_in << axis_completion{.frame_number = 104, .packet_count = 128, .module = 4, .last = 0, .ignore = 0};
|
||||
compl_in << axis_completion{.frame_number = 104, .packet_count = 128, .module = 5, .last = 0, .ignore = 0};
|
||||
compl_in << axis_completion{.frame_number = 80, .packet_count = 35, .module = 5, .last = 0, .ignore = 0};
|
||||
|
||||
compl_in << axis_completion{.frame_number = 105, .packet_count = 128, .module = 5, .last = 0, .ignore = 0};
|
||||
compl_in << axis_completion{.frame_number = 105, .packet_count = 128, .module = 4, .last = 0, .ignore = 0};
|
||||
compl_in << axis_completion{.frame_number = 105, .packet_count = 128, .module = 3, .last = 0, .ignore = 0};
|
||||
|
||||
compl_in << axis_completion{.last = 1};
|
||||
|
||||
size_t size_start = compl_in.size();
|
||||
|
||||
frame_summation_reorder_compl(input, output, compl_in, compl_out);
|
||||
|
||||
if (compl_in.size() != 0) {
|
||||
std::cout << "compl_in should be empty: " << compl_in.size() << std::endl;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
if (compl_out.size() != size_start) {
|
||||
std::cout << "compl_out should be size " << size_start << ": " << compl_out.size() << std::endl;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
if (input.size() != 0)
|
||||
ret = 1;
|
||||
|
||||
if (output.size() != 2)
|
||||
ret = 1;
|
||||
|
||||
output.read();
|
||||
output.read();
|
||||
|
||||
expected(ret, compl_out, 80, 5, 35, 1);
|
||||
|
||||
expected(ret, compl_out, 100, 5, 128, 0);
|
||||
expected(ret, compl_out, 101, 5, 128, 0);
|
||||
expected(ret, compl_out, 102, 5, 128, 0);
|
||||
expected(ret, compl_out, 103, 5, 128, 0);
|
||||
expected(ret, compl_out, 104, 5, 128, 0);
|
||||
|
||||
expected(ret, compl_out, 100, 4, 128, 0);
|
||||
expected(ret, compl_out, 101, 4, 128, 0);
|
||||
expected(ret, compl_out, 102, 4, 128, 0);
|
||||
expected(ret, compl_out, 103, 4, 128, 0);
|
||||
expected(ret, compl_out, 104, 4, 128, 0);
|
||||
expected(ret, compl_out, 100, 3, 128, 1);
|
||||
expected(ret, compl_out, 102, 3, 128, 1);
|
||||
expected(ret, compl_out, 105, 3, 128, 1);
|
||||
expected(ret, compl_out, 105, 4, 128, 1);
|
||||
expected(ret, compl_out, 105, 5, 128, 1);
|
||||
|
||||
axis_completion cmpl = compl_out.read();
|
||||
if (!cmpl.last) {
|
||||
std::cerr << "Last completion error" << std::endl;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
if (ret != 0) {
|
||||
printf("Test failed !!!\n");
|
||||
ret = 1;
|
||||
} else {
|
||||
printf("Test passed !\n");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
// Copyright (2019-2023) Paul Scherrer Institute
|
||||
|
||||
#include <random>
|
||||
#include "hls_jfjoch.h"
|
||||
|
||||
int main() {
|
||||
|
||||
int ret = 0;
|
||||
|
||||
STREAM_512 input;
|
||||
STREAM_512 output;
|
||||
hls::stream<axis_completion> compl_in;
|
||||
hls::stream<axis_completion> compl_out;
|
||||
size_t nframes = 5;
|
||||
std::vector<int16_t> input_frame(nframes * RAW_MODULE_SIZE);
|
||||
std::vector<int16_t> output_frame_ref(RAW_MODULE_SIZE, 0);
|
||||
std::vector<int16_t> output_frame(RAW_MODULE_SIZE, 0);
|
||||
|
||||
std::mt19937 g1(1387);
|
||||
std::uniform_int_distribution<uint16_t> dist(0, 5000);
|
||||
|
||||
for (int n = 0; n < nframes * RAW_MODULE_SIZE; n++) {
|
||||
input_frame[n] = dist(g1);
|
||||
output_frame_ref[n % RAW_MODULE_SIZE] += input_frame[n];
|
||||
}
|
||||
|
||||
auto input_frame_512 = (ap_uint<512>*) input_frame.data();
|
||||
auto output_frame_512 = (ap_uint<512>*) output_frame.data();
|
||||
|
||||
ap_uint<512> action_control = 0;
|
||||
ACT_REG_NSUMMATION(action_control) = nframes - 1;
|
||||
|
||||
input << packet_512_t { .data = action_control, .user = 0 };
|
||||
for (int i = 0; i < nframes * RAW_MODULE_SIZE * sizeof(uint16_t) / 64; i++)
|
||||
input << packet_512_t { .data = input_frame_512[i], .user = 0 };
|
||||
|
||||
input << packet_512_t { .user = 1 };
|
||||
|
||||
ap_uint<128> packet_mask;
|
||||
for (int i = 0; i < 128; i++)
|
||||
packet_mask[i] = 1;
|
||||
|
||||
for (int i = 0; i < nframes; i++)
|
||||
compl_in << axis_completion{.packet_mask = packet_mask, .frame_number = 100 + i, .packet_count = 128, .last = 0};
|
||||
compl_in << axis_completion{.last = 1};
|
||||
|
||||
frame_summation(input, output, compl_in, compl_out);
|
||||
|
||||
if (compl_in.size() != 0) {
|
||||
std::cout << "compl_in should be empty: " << compl_in.size() << std::endl;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
if (compl_out.size() != 2) {
|
||||
std::cout << "compl_out should be size 2: " << compl_out.size() << std::endl;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
if (input.size() != 0)
|
||||
ret = 1;
|
||||
|
||||
if (output.size() != RAW_MODULE_SIZE * sizeof(uint16_t) / 64 + 2)
|
||||
ret = 1;
|
||||
|
||||
output.read();
|
||||
for (int i = 0; i < RAW_MODULE_SIZE * sizeof(uint16_t) / 64 ; i++)
|
||||
output_frame_512[i] = output.read().data;
|
||||
output.read();
|
||||
|
||||
axis_completion cmpl;
|
||||
cmpl = compl_out.read();
|
||||
if (cmpl.frame_number != 100/5) {
|
||||
std::cout << "Wrong output frame number" << std::endl;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
if (cmpl.packet_count != 128*5) {
|
||||
std::cout << "Wrong output frame number" << std::endl;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
if (cmpl.packet_mask != packet_mask) {
|
||||
std::cout << "Wrong packet mask" << std::endl;
|
||||
ret = 1;
|
||||
}
|
||||
compl_out.read();
|
||||
|
||||
if (output_frame != output_frame_ref) {
|
||||
std::cout << "Input and output don't match" << std::endl;
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
if (ret != 0) {
|
||||
printf("Test failed !!!\n");
|
||||
ret = 1;
|
||||
} else {
|
||||
printf("Test passed !\n");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
+11
-2
@@ -52,6 +52,7 @@ typedef hls::stream<packet_512_t> STREAM_512;
|
||||
#define ACT_REG_NFRAMES(x) ((x)(95 , 64)) // 32 bit
|
||||
#define ACT_REG_NMODULES(x) ((x)(132, 128)) // 5 bit (0..31)
|
||||
#define ACT_REG_NSTORAGE_CELLS(x) ((x)(148, 144)) // 5 bit
|
||||
#define ACT_REG_NSUMMATION(x) ((x)(163, 160)) // 4 bit (0..15)
|
||||
|
||||
struct axis_datamover_ctrl {
|
||||
ap_uint<40+64> data;
|
||||
@@ -76,7 +77,7 @@ struct axis_completion {
|
||||
ap_uint<64> bunchid;
|
||||
ap_uint<32> debug;
|
||||
ap_uint<16> handle;
|
||||
ap_uint<8> packet_count;
|
||||
ap_uint<16> packet_count;
|
||||
ap_uint<7> module;
|
||||
ap_uint<1> last;
|
||||
ap_uint<1> ignore;
|
||||
@@ -261,7 +262,8 @@ void data_collection_fsm(AXI_STREAM ð_in,
|
||||
ap_uint<32> one_over_energy,
|
||||
ap_uint<32> nframes,
|
||||
ap_uint<8> nmodules,
|
||||
ap_uint<4> nstorage_cells);
|
||||
ap_uint<4> nstorage_cells,
|
||||
ap_uint<4> nsummation);
|
||||
|
||||
void host_writer(STREAM_512 &data_in,
|
||||
hls::stream<ap_uint<512>> &adu_histo_in,
|
||||
@@ -332,4 +334,11 @@ void load_calibration(ap_uint<256> *d_hbm_p0,
|
||||
void add_multipixel(STREAM_512 &data_in, STREAM_512 &data_out);
|
||||
void module_upside_down(STREAM_512 &data_in, STREAM_512 &data_out);
|
||||
|
||||
void frame_summation(STREAM_512 &data_in, STREAM_512 &data_out,
|
||||
hls::stream<axis_completion > &s_axis_completion,
|
||||
hls::stream<axis_completion > &m_axis_completion);
|
||||
void frame_summation_reorder_compl(STREAM_512 &data_in,
|
||||
STREAM_512 &data_out,
|
||||
hls::stream<axis_completion > &s_axis_completion,
|
||||
hls::stream<axis_completion > &m_axis_completion);
|
||||
#endif
|
||||
|
||||
+25
-22
@@ -28,11 +28,11 @@ void integration(STREAM_512 &data_in,
|
||||
#pragma HLS INTERFACE m_axi port=d_hbm_p3 bundle=d_hbm_p3 depth=512 offset=off \
|
||||
max_read_burst_length=16 max_write_burst_length=2 latency=120 num_write_outstanding=2 num_read_outstanding=8
|
||||
|
||||
ap_fixed<46,30, AP_RND_CONV> sum[64][FPGA_INTEGRATION_BIN_COUNT];
|
||||
// log2(32768*512*1024/64) = 28 + sign 1 bit
|
||||
ap_fixed<50,34, AP_RND_CONV> sum[64][FPGA_INTEGRATION_BIN_COUNT];
|
||||
// log2(32768*512*1024/64) = 32 + sign 1 bit
|
||||
#pragma HLS BIND_STORAGE variable=sum type=ram_t2p impl=bram
|
||||
#pragma HLS ARRAY_PARTITION variable=sum type=complete dim=1
|
||||
ap_uint<14> count[64][FPGA_INTEGRATION_BIN_COUNT]; // log2(512*1024/64) = 13
|
||||
ap_uint<18> count[64][FPGA_INTEGRATION_BIN_COUNT]; // log2(16*512*1024/64) = 17
|
||||
#pragma HLS BIND_STORAGE variable=count type=ram_t2p impl=bram
|
||||
#pragma HLS ARRAY_PARTITION variable=count type=complete dim=1
|
||||
|
||||
@@ -53,6 +53,7 @@ void integration(STREAM_512 &data_in,
|
||||
|
||||
packet_512_t packet_in;
|
||||
data_in >> packet_in;
|
||||
ap_uint<4> nsum = ACT_REG_NSUMMATION(packet_in.data); // 0..15
|
||||
data_out << packet_in;
|
||||
|
||||
ap_uint<32> offset_hbm_0 = 16 * hbm_size_bytes / 32;
|
||||
@@ -63,32 +64,35 @@ void integration(STREAM_512 &data_in,
|
||||
axis_completion cmpl;
|
||||
s_axis_completion >> cmpl;
|
||||
while (!cmpl.last) {
|
||||
m_axis_completion << cmpl;
|
||||
for (int i = 0; i < RAW_MODULE_SIZE / 32 / 2; i++) {
|
||||
for (int s = 0; s < nsum+1; s++) {
|
||||
m_axis_completion << cmpl;
|
||||
for (int i = 0; i < RAW_MODULE_SIZE / 32 / 2; i++) {
|
||||
#pragma HLS PIPELINE II=2
|
||||
for (int k = 0; k < 2; k++) {
|
||||
data_in >> packet_in;
|
||||
data_out << packet_in;
|
||||
bins_0 = d_hbm_p0[offset_hbm_0 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i * 2 + k];
|
||||
bins_1 = d_hbm_p1[offset_hbm_1 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i * 2 + k];
|
||||
coeff_0 = d_hbm_p2[offset_hbm_2 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i * 2 + k];
|
||||
coeff_1 = d_hbm_p3[offset_hbm_2 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i * 2 + k];
|
||||
for (int k = 0; k < 2; k++) {
|
||||
data_in >> packet_in;
|
||||
data_out << packet_in;
|
||||
bins_0 = d_hbm_p0[offset_hbm_0 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i * 2 + k];
|
||||
bins_1 = d_hbm_p1[offset_hbm_1 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i * 2 + k];
|
||||
coeff_0 = d_hbm_p2[offset_hbm_2 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i * 2 + k];
|
||||
coeff_1 = d_hbm_p3[offset_hbm_2 + cmpl.module * RAW_MODULE_SIZE * sizeof(int16_t) / 64 + i * 2 + k];
|
||||
|
||||
unpack_2xhbm_to_32x16bit(bins_0, bins_1, in_bin);
|
||||
unpack_2xhbm_to_32x16bit(coeff_0, coeff_1, in_coeff);
|
||||
unpack_2xhbm_to_32x16bit(bins_0, bins_1, in_bin);
|
||||
unpack_2xhbm_to_32x16bit(coeff_0, coeff_1, in_coeff);
|
||||
|
||||
unpack32(packet_in.data, in_val);
|
||||
unpack32(packet_in.data, in_val);
|
||||
|
||||
for (int j = 0; j < 32; j++) {
|
||||
ap_fixed<32,16, AP_RND_CONV> tmp = in_val[j] * in_coeff[j];
|
||||
if ((in_val[j] != INT16_MAX) && (in_val[j] != INT16_MIN) && (in_bin[j] < FPGA_INTEGRATION_BIN_COUNT)) {
|
||||
sum[k * 32 + j][in_bin[j]] += tmp;
|
||||
count[k * 32 + j][in_bin[j]] += 1;
|
||||
for (int j = 0; j < 32; j++) {
|
||||
ap_fixed<32, 16, AP_RND_CONV> tmp = in_val[j] * in_coeff[j];
|
||||
if ((in_val[j] != INT16_MAX) && (in_val[j] != INT16_MIN) &&
|
||||
(in_bin[j] < FPGA_INTEGRATION_BIN_COUNT)) {
|
||||
sum[k * 32 + j][in_bin[j]] += tmp;
|
||||
count[k * 32 + j][in_bin[j]] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
s_axis_completion >> cmpl;
|
||||
}
|
||||
|
||||
for (int i = 0; i < FPGA_INTEGRATION_BIN_COUNT; i++) {
|
||||
#pragma HLS PIPELINE II=1
|
||||
ap_axiu<128,1,1,1> res;
|
||||
@@ -112,7 +116,6 @@ void integration(STREAM_512 &data_in,
|
||||
res.last = ((i == FPGA_INTEGRATION_BIN_COUNT - 1) ? 1 : 0);
|
||||
result_out << res;
|
||||
}
|
||||
s_axis_completion >> cmpl;
|
||||
}
|
||||
m_axis_completion << cmpl;
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ struct DataCollectionConfig {
|
||||
uint32_t one_over_energy;
|
||||
uint32_t nframes;
|
||||
uint32_t nstorage_cells;
|
||||
uint32_t nsummation;
|
||||
};
|
||||
|
||||
struct DataCollectionStatus {
|
||||
|
||||
@@ -183,6 +183,7 @@ void FPGAAcquisitionDevice::FillActionRegister(const DiffractionExperiment& x, D
|
||||
job.one_over_energy = std::lround((1<<20)/ x.GetPhotonEnergy_keV());
|
||||
job.nstorage_cells = x.GetStorageCellNumber() - 1;
|
||||
job.mode = data_collection_id << 16;
|
||||
job.nsummation = 0;
|
||||
|
||||
if ((x.GetDetectorMode() == DetectorMode::Conversion) && x.GetConversionOnFPGA())
|
||||
job.mode |= MODE_CONV;
|
||||
|
||||
@@ -306,7 +306,8 @@ void HLSSimulatedDevice::HLSMainThread() {
|
||||
cfg.one_over_energy,
|
||||
cfg.nframes,
|
||||
cfg.nmodules,
|
||||
cfg.nstorage_cells);
|
||||
cfg.nstorage_cells,
|
||||
cfg.nsummation);
|
||||
run_data_collection = 0;
|
||||
}
|
||||
});
|
||||
|
||||
@@ -47,7 +47,7 @@ TEST_CASE("ActionStatus") {
|
||||
|
||||
|
||||
TEST_CASE("ActionConfigSize") {
|
||||
REQUIRE(sizeof(DataCollectionConfig) == 5 * sizeof(uint32_t));
|
||||
REQUIRE(sizeof(DataCollectionConfig) == 6 * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
TEST_CASE("ActionConfig") {
|
||||
|
||||
@@ -734,7 +734,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
|
||||
act_reg.one_over_energy,
|
||||
act_reg.nframes,
|
||||
act_reg.nmodules,
|
||||
act_reg.nstorage_cells);
|
||||
act_reg.nstorage_cells,
|
||||
act_reg.nsummation);
|
||||
REQUIRE(idle_data_collection == 1);
|
||||
REQUIRE(addr1.empty());
|
||||
REQUIRE(raw1.empty());
|
||||
@@ -751,7 +752,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
|
||||
act_reg.one_over_energy,
|
||||
act_reg.nframes,
|
||||
act_reg.nmodules,
|
||||
act_reg.nstorage_cells);
|
||||
act_reg.nstorage_cells,
|
||||
act_reg.nsummation);
|
||||
REQUIRE(idle_data_collection == 0);
|
||||
REQUIRE(addr1.empty());
|
||||
REQUIRE(raw1.empty());
|
||||
@@ -766,7 +768,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
|
||||
act_reg.one_over_energy,
|
||||
act_reg.nframes,
|
||||
act_reg.nmodules,
|
||||
act_reg.nstorage_cells);
|
||||
act_reg.nstorage_cells,
|
||||
act_reg.nsummation);
|
||||
REQUIRE(idle_data_collection == 0);
|
||||
REQUIRE(addr1.empty());
|
||||
REQUIRE(raw1.empty());
|
||||
@@ -784,7 +787,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
|
||||
act_reg.one_over_energy,
|
||||
act_reg.nframes,
|
||||
act_reg.nmodules,
|
||||
act_reg.nstorage_cells);
|
||||
act_reg.nstorage_cells,
|
||||
act_reg.nsummation);
|
||||
REQUIRE(idle_data_collection == 0);
|
||||
REQUIRE(addr1.empty());
|
||||
REQUIRE(raw1.empty());
|
||||
@@ -799,7 +803,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
|
||||
act_reg.one_over_energy,
|
||||
act_reg.nframes,
|
||||
act_reg.nmodules,
|
||||
act_reg.nstorage_cells);
|
||||
act_reg.nstorage_cells,
|
||||
act_reg.nsummation);
|
||||
|
||||
REQUIRE(idle_data_collection == 0);
|
||||
REQUIRE(addr1.size() == 1);
|
||||
@@ -816,7 +821,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
|
||||
act_reg.one_over_energy,
|
||||
act_reg.nframes,
|
||||
act_reg.nmodules,
|
||||
act_reg.nstorage_cells);
|
||||
act_reg.nstorage_cells,
|
||||
act_reg.nsummation);
|
||||
|
||||
REQUIRE(idle_data_collection == 0);
|
||||
REQUIRE(addr1.size() == 1);
|
||||
@@ -835,7 +841,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
|
||||
act_reg.one_over_energy,
|
||||
act_reg.nframes,
|
||||
act_reg.nmodules,
|
||||
act_reg.nstorage_cells);
|
||||
act_reg.nstorage_cells,
|
||||
act_reg.nsummation);
|
||||
|
||||
REQUIRE(idle_data_collection == 0);
|
||||
REQUIRE(addr1.size() == 1);
|
||||
@@ -852,7 +859,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
|
||||
act_reg.one_over_energy,
|
||||
act_reg.nframes,
|
||||
act_reg.nmodules,
|
||||
act_reg.nstorage_cells);
|
||||
act_reg.nstorage_cells,
|
||||
act_reg.nsummation);
|
||||
|
||||
REQUIRE(idle_data_collection == 0);
|
||||
REQUIRE(addr1.size() == 2);
|
||||
@@ -869,7 +877,8 @@ TEST_CASE("HLS_DataCollectionFSM","[OpenCAPI]") {
|
||||
act_reg.one_over_energy,
|
||||
act_reg.nframes,
|
||||
act_reg.nmodules,
|
||||
act_reg.nstorage_cells);
|
||||
act_reg.nstorage_cells,
|
||||
act_reg.nsummation);
|
||||
|
||||
REQUIRE(idle_data_collection == 1);
|
||||
REQUIRE(addr1.size() == 2);
|
||||
|
||||
Reference in New Issue
Block a user