Add normalize, noiseThreshold options; add rms x/y outputs

2026-03-18 08:32:35 +01:00
parent 9d7970856e
commit 572d798b72
2 changed files with 46 additions and 9 deletions
@@ -16,7 +16,7 @@ torch.backends.cudnn.deterministic = True
 torch.backends.cudnn.benchmark = False

 modelVersion = '251022'  # '250909' or '251020'
-Energy = '15.3keV'
+Energy = '15keV'
 TrainLosses, ValLosses = [], []
 LearningRates = []
 TestLoss = -1
@@ -24,7 +24,9 @@ model = models.get_model_class(modelVersion)().cuda()
 # summary(model, input_size=(128, 1, 3, 3))
 LearningRate = 1e-3
 Noise = 0.13 # in keV
-numberOfAugOps = 8 # 1 (no augmentation) or (1,8] (with augmentation)
+NoiseThreshold = 0 * Noise  # in keV, set values below this threshold to zero
+numberOfAugOps = 1 # 1 (no augmentation) or (1,8] (with augmentation)
+flag_normalize = False

 TrainLosses, TestLosses = [], []
 def weighted_loss(pred, target, alpha=7.0):
@@ -46,6 +48,7 @@ LossFunction = weighted_loss
 def train(model, trainLoader, optimizer):
    model.train()
    batchLoss = 0
+    rms_x, rms_y = 0, 0
    for batch_idx, (sample, label) in enumerate(trainLoader):
        sample, label = sample.cuda(), label.cuda()
        x, y, z, e = label[:,0], label[:,1], label[:,2], label[:,3]
@@ -55,10 +58,14 @@ def train(model, trainLoader, optimizer):
        loss.backward()
        optimizer.step()
        batchLoss += loss.item() * sample.shape[0]
+        rms_x += torch.sum((output[:,0] - x)**2).item()
+        rms_y += torch.sum((output[:,1] - y)**2).item()
    avgLoss = batchLoss / len(trainLoader.dataset)
+    rms_x = np.sqrt(rms_x / len(trainLoader.dataset))
+    rms_y = np.sqrt(rms_y / len(trainLoader.dataset))

    datasetName = trainLoader.dataset.datasetName
-    print(f"[{datasetName}]\t Average Loss: {avgLoss:.6f} (sigma = {np.sqrt(avgLoss):.6f})")
+    print(f"[{datasetName}]\t Average Loss: {avgLoss:.6f} (sigma = {np.sqrt(avgLoss):.6f}) \t RMS X: {rms_x:.6f} \t RMS Y: {rms_y:.6f}")
    TrainLosses.append(avgLoss)

 def test(model, testLoader):
@@ -89,6 +96,8 @@ trainDataset = singlePhotonDataset(
    datasetName='Train',
    noiseKeV = Noise,
    numberOfAugOps=numberOfAugOps,
+    normalize=flag_normalize,
+    noiseThreshold=NoiseThreshold
    )
 valDataset = singlePhotonDataset(
    [f'{sampleFolder}/{Energy}_Moench040_150V_{i}.npz' for i in range(13,14)],
@@ -96,12 +105,17 @@ valDataset = singlePhotonDataset(
    datasetName='Val',
    noiseKeV = Noise,
    numberOfAugOps=numberOfAugOps,
+    normalize=flag_normalize,
+    noiseThreshold=NoiseThreshold
    )
 testDataset = singlePhotonDataset(
    [f'{sampleFolder}/{Energy}_Moench040_150V_{i}.npz' for i in range(15,16)],
    sampleRatio=1.0,
    datasetName='Test',
    noiseKeV = Noise,
+    numberOfAugOps=numberOfAugOps,
+    normalize=flag_normalize,
+    noiseThreshold=NoiseThreshold
    )
 trainLoader = torch.utils.data.DataLoader(
    trainDataset,
@@ -133,11 +147,19 @@ if __name__ == "__main__":
        test(model, valLoader)
        scheduler.step(ValLosses[-1])
        print(f"Learning Rate: {optimizer.param_groups[0]['lr']}")
-        if epoch in [20, 50, 100, 200, 300, 500, 750, 1000]:
-            torch.save(model.state_dict(), f'Models/singlePhoton{modelVersion}_{Energy}_Noise{Noise}keV_E{epoch}_aug{numberOfAugOps}.pth')
+        if epoch in [20, 30, 50, 100, 200, 300, 500, 750, 1000]:
+            modelName = f'singlePhoton{modelVersion}_{Energy}_Noise{Noise}keV_E{epoch}_aug{numberOfAugOps}'
+            if flag_normalize == True:
+                modelName += '_normalized'
+            torch.save(model.state_dict(), f'Models/{modelName}.pth')
+            print(f"Saved model checkpoint: {modelName}.pth")

 test(model, testLoader)
-torch.save(model.state_dict(), f'Models/singlePhoton{modelVersion}_{Energy}_Noise{Noise}keV_aug{numberOfAugOps}.pth')
+modelName = f'singlePhoton{modelVersion}_{Energy}_Noise{Noise}keV_E{epoch}_aug{numberOfAugOps}'
+if flag_normalize == True:
+    modelName += '_normalized'
+torch.save(model.state_dict(), f'Models/{modelName}.pth')
+print(f"Saved final model checkpoint: {modelName}.pth")

 def plot_loss_curve(TrainLosses, ValLosses, TestLoss, modelVersion):
    import matplotlib.pyplot as plt
@@ -151,6 +173,8 @@ def plot_loss_curve(TrainLosses, ValLosses, TestLoss, modelVersion):
    plt.ylabel('MSE Loss')
    plt.legend()
    plt.grid()
-    plt.savefig(f'Results/loss_curve_singlePhoton_{modelVersion}.png', dpi=300)
-
+    plotName = f'loss_curve_singlePhoton_{modelVersion}'
+    if flag_normalize:
+        plotName += '_normalized'
+    plt.savefig(f'Results/{plotName}.png')
 plot_loss_curve(TrainLosses, ValLosses, TestLoss, modelVersion=modelVersion)
@@ -3,11 +3,13 @@ import torch
 import numpy as np

 class singlePhotonDataset(Dataset):
-    def __init__(self, sampleList, sampleRatio, datasetName, noiseKeV=0, numberOfAugOps=1):
+    def __init__(self, sampleList, sampleRatio, datasetName, noiseKeV=0, numberOfAugOps=1, normalize=False, noiseThreshold=0):
        self.sampleFileList = sampleList
        self.sampleRatio = sampleRatio
        self.datasetName = datasetName
        self.numberOfAugOps = numberOfAugOps
+        self.normalize = normalize
+        self.noiseThreshold = noiseThreshold
        self._init_coords()

        all_samples = []
@@ -45,12 +47,23 @@ class singlePhotonDataset(Dataset):
            print(f'Adding Gaussian noise with sigma = {noiseKeV} keV to samples in {self.datasetName} dataset')
            noise = np.random.normal(loc=0.0, scale=noiseKeV, size=self.samples.shape)
            self.samples = self.samples + noise
+        if self.noiseThreshold != 0 and noiseKeV != 0:
+            print(f'[{self.datasetName} dataset] \t Setting values below noise threshold ({self.noiseThreshold} keV) to zero')
+            self.samples[self.samples < self.noiseThreshold] = 0  ### set values below threshold to zero
        self.labels = np.concatenate(all_labels, axis=0)
        self.referencePoint = np.concatenate(all_ref_pts, axis=0) if all_ref_pts else None
+
+        if self.normalize:
+            print(f'Normalizing samples in {self.datasetName} dataset by total charge')
+            total_charge = np.sum(self.samples, axis=(1,2), keepdims=True)  # (B, 1, 1)
+            total_charge[total_charge == 0] = 1  # avoid division by zero
+            self.samples = self.samples / total_charge * 15.  # normalize each sample by its total charge
        
        if self.samples.shape[1] == 5: ### if sample size is 5x5, remove border pixels to make it 3x3
            self.samples = self.samples[:, 1:-1, 1:-1]  ### remove border pixels
            self.labels = self.labels - np.array([1, 1, 0, 0])  ### adjust labels accordingly
+            if self.referencePoint is not None:
+                self.referencePoint = self.referencePoint + np.array([1, 1])  ### adjust reference points accordingly
        self.samples = np.expand_dims(self.samples, axis=1)
        self.labels -= np.array([self.samples.shape[-1]/2., self.samples.shape[-1]/2., 0, 0])  ### B,D,3,3 adjust labels to be centered at (0,0)
        self.labels[:, 2] /= 650. ### normalize z coordinate (depth) to [0, 1]