Source code for tf_pwa.sweights

"""
sWeights implementation.

sWeights are used to statistically separate signal and background contributions
in a data sample when the distributions overlap.
"""

import numpy as np


[docs] def cal_sweights(pdfs, N=None): """ Calculate sWeights for a set of PDF components. Given normalized PDFs for each component (signal, background, etc.), computes weights such that the weighted sum of events gives the yield for each component. Parameters ---------- pdfs : list of array-like List of PDF values evaluated at each data point. Each element should be an array of shape (n_events,). Two calling conventions are supported: 1. Normalized PDFs with N parameter: .. code-block:: python >>> import numpy as np >>> from scipy.stats import norm >>> np.random.seed(42) >>> x_sig = np.random.normal(5.0, 0.5, 50) >>> x_bkg = np.random.uniform(0, 10, 100) >>> x = np.concatenate([x_sig, x_bkg]) >>> f_sig = norm.pdf(x, 5.0, 0.5) >>> f_bkg = np.ones_like(x) / 10.0 >>> sw = cal_sweights([f_sig, f_bkg], N=[50, 100]) >>> bool(np.isclose(np.sum(sw[0]), 50)) True 2. Unnormalized PDFs without N parameter: .. code-block:: python >>> sw2 = cal_sweights([f_sig * 50, f_bkg * 100]) >>> bool(np.allclose(np.sum(sw2[0]), 50, rtol=0.1)) True N : array-like, optional Expected yields for each component. If None, yields are estimated from the PDFs assuming the PDFs are already scaled by yields. Returns ------- sweights : ndarray Array of shape (n_components, n_events) containing sWeights for each component at each event. Raises ------ LinAlgError If the covariance matrix is singular and cannot be inverted. Notes ----- The sWeights are computed using Cowan's formula: .. math:: p(x) = \\sum_i N_i f_i(x) .. math:: V_{ij}^{-1} = \\sum_k \\frac{f_i(x_k) f_j(x_k)}{p(x_k)^2} .. math:: w_i(x_k) = \\frac{\\sum_j V_{ij} f_j(x_k)}{p(x_k)} where: - :math:`N_i` is the yield for component i - :math:`f_i(x)` is the normalized PDF for component i - :math:`p(x)` is the total PDF - :math:`V` is the covariance matrix of the yields Reference: `sPlot paper <https://doi.org/10.1016/j.nima.2005.08.106>`_ """ pdfs = np.stack(pdfs, axis=0) if N is None: # Estimate yields from unnormalized PDFs p = np.sum(pdfs, axis=0) # N = int c_i f_i(x) dx # = int c_i f_i(x)/p(x) * p(x) dx # = E [ c_i f_i(x) / p(x) ] ratio = np.sum(pdfs / p, axis=-1) N = p.shape[0] * ratio / np.sum(ratio) pdfs = pdfs / N[:, None] else: N = np.array(N) p = np.sum(N[:, None] * pdfs, axis=0) Vij_inv = np.sum(pdfs[:, None, :] * pdfs[None, :, :] / p**2, axis=-1) Vij = np.linalg.inv(Vij_inv) sw = np.matmul(Vij, pdfs) / p return sw