mirror of
https://github.com/slsdetectorgroup/aare.git
synced 2026-04-21 17:24:36 +02:00
8f8173feb6
To improve codebase quality and reduce human error, this PR introduces the pre-commit framework. This ensures that all code adheres to project standards before it is even committed, maintaining a consistent style and catching common mistakes early. Key Changes: - Code Formatting: Automated C++ formatting using clang-format (based on the project's .clang-format file). - Syntax Validation: Basic checks for file integrity and syntax. - Spell Check: Automated scanning for typos in source code and comments. - CMake Formatting: Standardization of CMakeLists.txt and .cmake configuration files. - GitHub Workflow: Added a CI action that validates every Pull Request against the pre-commit configuration to ensure compliance. The configuration includes a [ci] block to handle automated fixes within the PR. Currently, this is disabled. If we want the CI to automatically commit formatting fixes back to the PR branch, this can be toggled to true in .pre-commit-config.yaml. ```yaml ci: autofix_commit_msg: [pre-commit] auto fixes from pre-commit hooks autofix_prs: false autoupdate_schedule: monthly ``` The last large commit with the fit functions, for example, was not formatted according to the clang-format rules. This PR would allow to avoid similar mistakes in the future. Python fomat with `ruff` for tests and sanitiser for `.ipynb` notebooks can be added as well.
830 KiB
830 KiB
In [1]:
from aare import Interpolator import numpy as np import matplotlib.pyplot as plt
In [ ]:
def calculate_rosenblatt_lookup_table(etacube, etabins_x, etabins_y, etabins_e): """ Rosenblatt transform: transforms joint probability P_X,Y to uniform probability between [0,1] U U_0 = F_X(x) = P(x' < x) marginal CDF of X U_1 = F_Y|X(y|x) = P(y' < y | x) conditional CDF """ #marginal CDF for eta_x pEtaX = etacube.sum(axis=1) # marginal distribution eta_x CDF_EtaX = np.concatenate([np.zeros(shape=[1, len(etabins_e)]), np.cumsum(pEtaX, axis=0)], axis=0) CDF_EtaX = CDF_EtaX / CDF_EtaX[-1] #normalize # conditional CDF F_Y|X (a CDF in y direction for each etaX bin) CDF_EtaY_cond = np.cumsum(etacube, axis=1) np.divide(CDF_EtaY_cond, np.repeat(pEtaX[:, None, :], len(etabins_y), axis=1), out=np.zeros_like(CDF_EtaY_cond, dtype=np.float64), where=np.repeat(pEtaX[:, None, :], len(etabins_y), axis=1)!=0) CDF_EtaY_cond = np.concatenate([np.zeros(shape=[len(etabins_x), 1, len(etabins_e)]), CDF_EtaY_cond], axis=1) #CDF_EtaY_cond = CDF_EtaY_cond / np.where(CDF_EtaY_cond[:,-1:,:] == 0, 1, CDF_EtaY_cond[:,-1:,:]) #normalize # calculate (u,v) at the center of each original bin """ Xc_1d = 0.5 * (F_EtaX_edges[:-1] + F_EtaX_edges[1:]) # why the average between two values? Yc_2d = 0.5 * (F_EtaY_cond_edges[:, :-1] + F_EtaY_cond_edges[:, 1:]) X_tab = np.repeat(Xc_1d[:, None], nBinY, axis=1) # (nBinX, nBinY) Y_tab = Yc_2d # (nBinX, nBinY) """ X_tab = np.repeat(CDF_EtaX[:, None, :], etabins_y.shape[0], axis=1) #TODO: actually does not depend on eta_y at all Y_tab = CDF_EtaY_cond print("shape X_tab: ", X_tab.shape) print("shape Y_tab: ", Y_tab.shape) return X_tab, Y_tab
In [3]:
## joint conditional distribution rng = np.random.default_rng(42) n = 10_000_000 ## circular distribution x,y dependant theta = rng.uniform(0, 4*np.pi, size=n) r = rng.uniform(0.3, 0.4) #r = 0.3 + 0.4 * (theta / (4*np.pi)) # 螺旋半径从 0.3 到 0.7 x_raw = r * np.cos(theta) + 0.1 * rng.normal(size=n) y_raw = r * np.sin(theta) + 0.1 * rng.normal(size=n) # scale to [0, 1] x = (x_raw - x_raw.min()) / (x_raw.max() - x_raw.min()) y = (y_raw - y_raw.min()) / (y_raw.max() - y_raw.min()) plt.scatter(x[:5000], y[:5000], s=1, alpha=0.5) plt.title("Original Distribution (x, y)") plt.gca().set_aspect('equal') plt.xlabel("x") plt.ylabel("y")
Out[3]:
Text(0, 0.5, 'y')
In [4]:
def plot(x, y, CDF_X, CDF_Y, u, v): #plot fig, axs = plt.subplots(1, 4, figsize=(15, 4)) axs[0].scatter(x[:5000], y[:5000], s=1, alpha=0.5) axs[0].set_title("Original (x, y)") axs[0].set_aspect('equal') axs[0].set_xlabel("x") axs[0].set_ylabel("y") axs[1].imshow(CDF_X[:,:,0], origin='lower', extent=[0,1,0,1], cmap='viridis') axs[1].set_title("normalized CDF_X (u = F_X(x))") axs[1].set_xlabel("y") axs[1].set_ylabel("P_X(x < x')") axs[2].imshow(CDF_Y[:,:,0], origin='lower', extent=[0,1,0,1], cmap='viridis') axs[2].set_title("normalized conditional CDF_Y (u = F_Y|X(y|x))") axs[2].set_xlabel("P_Y|X(y < y'|x)") axs[2].set_ylabel("x") axs[3].scatter(u[:5000], v[:5000], s=1, alpha=0.5, color='blue') axs[3].set_title("Mapped (u, v)") axs[3].set_aspect('equal') axs[3].set_xlim(0,1); axs[2].set_ylim(0,1) axs[3].set_xlabel("u") axs[3].set_ylabel("v") plt.tight_layout() plt.show()
In [5]:
#calculate joint probability density function nBins = 300 range = np.array([[x.min(),x.max()], [y.min(),y.max()]]) bin_width = (range[:,0] - range[:,1])/nBins H, xedges, yedges = np.histogram2d(x, y, bins=nBins, range=range) H = H.astype(int) H = np.repeat(H[:,:,None], 2, axis=2) #add energy axis print(H.shape) ebins = [0,1,2] #dummy energy bins
(300, 300, 2)
In [7]:
## check that interpolator transforms to uniform distribution interpolator = Interpolator(H, xedges[:-1], yedges[:-1], ebins[:-1]) #dummy energy bins CDF_X = interpolator.get_ietax() #lookup table x CDF_Y = interpolator.get_ietay() #lookup table y x_indices = ((x - range[0,0])/bin_width[0]).astype(int) y_indices = ((y - range[1,0])/bin_width[1]).astype(int) u = CDF_X[x_indices, y_indices, 0] #transformed to uniform coordinates v = CDF_Y[x_indices, y_indices, 0] #transformed to uniform coordinates plot(x,y,CDF_X,CDF_Y, u,v)
In [9]:
x_indices = ((x - range[0,0])/bin_width[0]).astype(int) y_indices = ((y - range[1,0])/bin_width[1]).astype(int) CDF_X, CDF_Y = calculate_rosenblatt_lookup_table(H, xedges[:-1], yedges[:-1],ebins[:-1]) u = CDF_X[x_indices, y_indices, 0] #transformed to uniform coordinates v = CDF_Y[x_indices, y_indices, 0] #transformed to uniform coordinates plot(x,y,CDF_X,CDF_Y, u,v)
shape X_tab: (301, 300, 2) shape Y_tab: (300, 301, 2)
In [6]:
## check that cpp rosenblatt transforms to uniform distribution interpolator = Interpolator(xedges[:-1], yedges[:-1], ebins[:-1]) #dummy energy bins interpolator.rosenblatttransform(H) CDF_X = interpolator.get_ietax() #lookup table x CDF_Y = interpolator.get_ietay() #lookup table y x_indices = ((x - range[0,0])/bin_width[0]).astype(int) y_indices = ((y - range[1,0])/bin_width[1]).astype(int) u = CDF_X[x_indices, y_indices, 0] #transformed to uniform coordinates v = CDF_Y[x_indices, y_indices, 0] #transformed to uniform coordinates plot(x,y,CDF_X,CDF_Y, u,v)
worked: could normalize CDF eta_x construction worked calculation of conditional CDF etay worked assignment m_ietay worked assignment m_ietax worked
In [ ]: