From 697e727a167ce9f7fa441b41da9a8f9a8a85a8a0 Mon Sep 17 00:00:00 2001
From: Filip Leonarski <filip.leonarski@psi.ch>
Date: Tue, 13 Jun 2023 20:58:35 +0200
Subject: [PATCH] JFConversionFixedPoint: vectorization, try again

---
 jungfrau/JFConversionFixedPoint.cpp | 60 +++++++++++++++--------------
 1 file changed, 32 insertions(+), 28 deletions(-)

diff --git a/jungfrau/JFConversionFixedPoint.cpp b/jungfrau/JFConversionFixedPoint.cpp
index 98197a61..e9eb83e1 100644
--- a/jungfrau/JFConversionFixedPoint.cpp
+++ b/jungfrau/JFConversionFixedPoint.cpp
@@ -51,7 +51,12 @@ void JFConversionFixedPoint::Setup(const JFModuleGainCalibration &gain_calibrati
 
 inline int32_t jf_round(int32_t in) {
     const int32_t half = (1L << (FIXED_PRECISION-1));
-    if (in > 0)
+
+    if (in <= INT16_MIN * (1L << FIXED_PRECISION))
+        return INT16_MIN * (1L << FIXED_PRECISION);
+    else if (in >= INT16_MAX * (1L << FIXED_PRECISION))
+        return INT16_MAX * (1L << FIXED_PRECISION);
+    else if (in > 0)
         return in + half;
     else
         return in - half;
@@ -65,38 +70,37 @@ void JFConversionFixedPoint::ConvertModule(int16_t *dest, const uint16_t *source
     auto pedestal_g1_aligned = std::assume_aligned<64>(pedestal_g1);
     auto pedestal_g2_aligned = std::assume_aligned<64>(pedestal_g2);
 
+#pragma ivdep
     for (int i = 0; i < RAW_MODULE_SIZE; i++) {
         uint16_t gainbits = source[i] & 0xc000;
         int32_t adc = source[i] & 0x3fff;
-        int32_t val = INT32_MIN;
 
-        switch (gainbits) {
-            case 0:
-                [[likely]]
-                        val = (adc - pedestal_g0_aligned[i]) * gain_g0_aligned[i];
-                break;
-            case 0x4000:
-                if (source[i] != 0x4000)
-                    val = (adc - pedestal_g1_aligned[i]) * gain_g1_aligned[i];
-                break;
-            case 0xc000:
-                if (source[i] == 0xc000)
-                    val = INT32_MAX;
-                else if (source[i] != 0xffff)
-                    val = (adc - pedestal_g2_aligned[i]) * gain_g2_aligned[i];
-                break;
-            default:
-                break;
-        }
+        dest[i] = static_cast<int16_t>(jf_round((adc - pedestal_g0_aligned[i]) * gain_g0_aligned[i])
+                                       / (1L << FIXED_PRECISION));
 
-        if (val <= INT16_MIN * (1L << FIXED_PRECISION))
-            [[unlikely]]
-                    dest[i] = INT16_MIN;
-        else if (val >= INT16_MAX * (1L << FIXED_PRECISION))
-            [[unlikely]]
-                    dest[i] = INT16_MAX;
-        else
-            dest[i] = static_cast<int16_t>(jf_round(val)/ (1L << FIXED_PRECISION));
+        int16_t val_1 = jf_round((adc - pedestal_g1_aligned[i]) * gain_g1_aligned[i])
+                        / (1L << FIXED_PRECISION);
+
+        int16_t val_2 = jf_round((adc - pedestal_g2_aligned[i]) * gain_g2_aligned[i])
+                        / (1L << FIXED_PRECISION);
+
+        if (gainbits == 0x4000)
+            dest[i] = val_1;
+
+        if (gainbits == 0xc000)
+            dest[i] = val_2;
+
+        if (gainbits == 0x8000)
+            dest[i] = INT16_MIN;
+
+        if (source[i] == 0xffff)
+            dest[i] = INT16_MIN;
+
+        if (source[i] == 0x4000)
+            dest[i] = INT16_MIN;
+
+        if (source[i] == 0xc000)
+            dest[i] = INT16_MAX;
     }
 }