From 73bbcb404b255361f560d86927eb660b80456eb9 Mon Sep 17 00:00:00 2001 From: "xiangyu.xie" Date: Wed, 6 May 2026 16:33:36 +0200 Subject: [PATCH] Add normalization and noise cut options --- src/datasets.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/datasets.py b/src/datasets.py index e79492d..5acc0b7 100644 --- a/src/datasets.py +++ b/src/datasets.py @@ -114,12 +114,13 @@ class singlePhotonDataset(Dataset): return self.effectiveLength class doublePhotonDataset(Dataset): - def __init__(self, sampleList, sampleRatio, datasetName, reuselFactor=1, noiseKeV=0, nSize=6): + def __init__(self, sampleList, sampleRatio, datasetName, reuselFactor=1, noiseKeV=0, nSize=6, noiseThreshold=0, normalize=False): self.sampleFileList = sampleList self.sampleRatio = sampleRatio self.datasetName = datasetName self.noiseKeV = noiseKeV self.nSize = nSize + self.normalize = normalize self._init_coords() all_samples = [] @@ -143,6 +144,14 @@ class doublePhotonDataset(Dataset): #### add noise only to pixels that not zero noise[self.samples == 0] = 0 self.samples = self.samples + noise + if noiseThreshold != 0: + print(f'[{self.datasetName} dataset] \t Setting values below noise threshold ({noiseThreshold} keV) to zero') + self.samples[self.samples < noiseThreshold] = 0 ### set values below threshold to zero + if self.normalize: + print(f'Normalizing samples in {self.datasetName} dataset by total charge') + total_charge = np.sum(self.samples, axis=(1,2), keepdims=True) # (B, 1, 1) + total_charge[total_charge == 0] = 1 # avoid division by zero + self.samples = self.samples / total_charge * 30. # normalize each sample by its total charge self.labels = np.concatenate(all_labels, axis=0) ### total number of samples