Source code for tf_pwa.histogram

import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import UnivariateSpline, interp1d


[docs] def plot_hist(binning, count, ax=plt, **kwargs): n = count.shape[0] a = np.zeros((n + 2,)) b = np.zeros((n + 2,)) a[:-1] = binning a[-1] = binning[-1] + (binning[-1] - binning[-2]) / 20 b[1:-1] = count return ax.step(a, b, **kwargs)
[docs] def interp_hist(binning, y, num=1000, kind="UnivariateSpline"): """interpolate data from hostgram into a line""" x = (binning[:-1] + binning[1:]) / 2 if kind == "UnivariateSpline": func = UnivariateSpline(x, y, s=2) else: func = interp1d(x, y, kind=kind, fill_value="extrapolate") x_new = np.linspace( np.min(binning), np.max(binning), num=num, endpoint=True ) y_new = func(x_new) return x_new, y_new
[docs] def gauss(x): return np.exp(-(x**2) / 2) / np.sqrt(2 * np.pi)
[docs] def cauchy(x): return 1 / (x**2 + 1) / np.pi
[docs] def epanechnikov(x): return np.where((x < 1) & (x > -1), (1 - x**2) / 4 * 3, 0)
[docs] def uniform(x): return np.where((x < 1) & (x > -1), 0.5, 0)
[docs] def weighted_kde(m, w, bw, kind="gauss"): n = w.shape[0] kind_map = { "gauss": gauss, "cauchy": cauchy, "epanechnikov": epanechnikov, "uniform": uniform, } if isinstance(kind, str): kernel = kind_map[kind] else: kernel = kind def f(x): ret = [] for i in x: y = (i - m) / bw tmp = w * kernel(y) ret.append(np.sum(tmp)) return np.array(ret) # ret = np.zeros_like(x) # for i in range(n): # y = (x - m[i]) / bw[i] # tmp = w[i] * kernel(y) # ret += tmp # return ret return f
[docs] class Hist1D: def __init__(self, binning, count, error=None): if error is None: error = np.sqrt(count) self.binning = binning self.count = count self.error = error self._cached_color = None
[docs] def draw(self, ax=plt, **kwargs): draw_type = kwargs.pop("type", "hist") if "+" in draw_type: ret = [] for i in draw_type.split("+"): ret.append(self.draw(ax=ax, type=i, **kwargs)) elif draw_type in [ "hist", "bar", "kde", "error", "line", "fill", "stepfill", ]: draw_fun = getattr(self, "draw_" + draw_type) ret = draw_fun(ax=ax, **kwargs) else: raise NotImplementedError() return ret
[docs] def draw_hist(self, ax=plt, **kwargs): a = plot_hist(self.binning, self.count, ax=ax, **kwargs) self._cached_color = a[0].get_color() return a
[docs] def draw_bar(self, ax=plt, **kwargs): return ax.bar( self.bin_center, self.count, width=self.bin_width, **kwargs, )
[docs] def draw_kde(self, ax=plt, kind="gauss", bin_scale=1.0, **kwargs): color = kwargs.pop("color", self._cached_color) m = self.bin_center bw = self.bin_width * bin_scale kde = weighted_kde(m, self.count, bw, kind) x = np.linspace( self.binning[0], self.binning[-1], self.count.shape[0] * 10 ) if "fmt" in kwargs: fmt = kwargs.pop("fmt") return ax.plot(x, kde(x), fmt, color=color, **kwargs) else: return ax.plot(x, kde(x), color=color, **kwargs)
[docs] def draw_fill(self, ax=plt, kind="gauss", bin_scale=1.0, **kwargs): color = kwargs.pop("color", self._cached_color) m = self.bin_center bw = self.bin_width * bin_scale kde = weighted_kde(m, self.count, bw, kind) x = np.linspace( self.binning[0], self.binning[-1], self.count.shape[0] * 10 ) return ax.fill_between( x, kde(x), np.zeros_like(x), color=color, **kwargs )
[docs] def draw_stepfill(self, ax=plt, kind="gauss", bin_scale=1.0, **kwargs): color = kwargs.pop("color", self._cached_color) x = np.repeat(self.binning, 2) y = np.concatenate([[0], np.repeat(self.count, 2), [0]]) return ax.fill_between(x, y, np.zeros_like(x), color=color, **kwargs)
[docs] def draw_pull(self, ax=plt, **kwargs): with np.errstate(divide="ignore", invalid="ignore"): y_error = np.where(self.error == 0, 0, self.count / self.error) return ax.bar( self.bin_center, y_error, width=self.bin_width, **kwargs, )
def chi2(self): with np.errstate(divide="ignore", invalid="ignore"): y_error = np.where(self.error == 0, 0, self.count / self.error) print(y_error) print((y_error**2).astype(np.int)) print(np.sum((y_error**2).astype(np.int))) return np.sum(y_error**2)
[docs] def draw_line(self, ax=plt, num=1000, kind="UnivariateSpline", **kwargs): x_new, y_new = interp_hist(self.binning, self.count, num, kind) return ax.plot(x_new, y_new, **kwargs)
[docs] def draw_error(self, ax=plt, fmt="none", **kwargs): color = kwargs.pop("color", self._cached_color) return ax.errorbar( self.bin_center, y=self.count, xerr=self.bin_width / 2, yerr=self.error, fmt=fmt, color=color, **kwargs, )
@property def bin_center(self): return (self.binning[:-1] + self.binning[1:]) / 2 @property def bin_width(self): return self.binning[1:] - self.binning[:-1]
[docs] def get_bin_weight(self): return (self.binning[-1] - self.binning[0]) / ( self.binning.shape[0] - 1 )
def __mul__(self, other): if isinstance(other, (float, int)): return Hist1D(self.binning, self.count * other, self.error * other) raise NotImplementedError __rmul__ = __mul__ def __add__(self, other): assert np.allclose( self.binning, other.binning ), "need to be the same binning" return Hist1D( self.binning, self.count + other.count, np.sqrt(self.error**2 + other.error**2), ) def __sub__(self, other): assert np.allclose( self.binning, other.binning ), "need to be the same binning" return Hist1D( self.binning, self.count - other.count, np.sqrt(self.error**2 + other.error**2), )
[docs] @staticmethod def histogram(m, *args, weights=None, mask_error=np.inf, **kwargs): if weights is None: count, binning = np.histogram(m, *args, **kwargs) count2, _ = np.histogram(m, *args, **kwargs) mask_count = count else: weights = np.asarray(weights) count, binning = np.histogram(m, *args, weights=weights, **kwargs) count2, _ = np.histogram(m, *args, weights=weights**2, **kwargs) mask_count, _ = np.histogram(m, *args, **kwargs) count2 = np.where(mask_count == 0, mask_error, count2) return Hist1D(binning, count, np.sqrt(count2))
[docs] def chi2(self): cut = ~np.isinf(self.error) return np.sum((self.count[cut] / self.error[cut]) ** 2)
[docs] def ndf(self): return np.sum(~np.isinf(self.error))
[docs] def scale_to(self, other): scale_factor = other.get_count() / self.get_count() bin_width_factor = np.mean(other.bin_width) / np.mean(self.bin_width) scale = scale_factor * bin_width_factor self.count *= scale self.error *= scale return scale
[docs] def get_count(self): return np.sum(self.count)
[docs] class WeightedData(Hist1D): def __init__(self, m, *args, weights=None, **kwargs): if weights is None: weights = np.ones_like(m) count, binning = np.histogram(m, *args, weights=weights, **kwargs) count2, _ = np.histogram(m, *args, weights=weights**2, **kwargs) self.value = m self.weights = weights super().__init__(binning, count, np.sqrt(count2))
[docs] def draw_kde(self, ax=plt, kind="gauss", bin_scale=1.0, **kwargs): color = kwargs.pop("color", self._cached_color) bw = np.mean(self.bin_width) * bin_scale * np.ones_like(self.value) kde = weighted_kde(self.value, self.weights, bw, kind) x = np.linspace( self.binning[0], self.binning[-1], self.count.shape[0] * 10 ) return ax.plot(x, kde(x), color=color, **kwargs)
def __add__(self, other): assert np.allclose( self.binning, other.binning ), "need to be the same binning" ret = WeightedData( np.concatenate([self.value, other.value]), weights=np.concatenate([self.weights, other.weights]), ) ret.binning = self.binning ret.count = self.count + other.count ret.error = np.sqrt(self.error**2 + other.error**2) return ret def __mul__(self, other): if isinstance(other, (float, int)): ret = WeightedData(self.value, weights=self.weights * other) ret.binning = self.binning ret.error = self.error * other ret.count = self.count * other return ret raise NotImplementedError
[docs] def scale_to(self, other): scale = super().scale_to(other) self.weights *= scale return scale