Source code for tf_pwa.histogram

import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import UnivariateSpline, interp1d



[docs]
def plot_hist(binning, count, ax=plt, **kwargs):
    n = count.shape[0]
    a = np.zeros((n + 2,))
    b = np.zeros((n + 2,))
    a[:-1] = binning
    a[-1] = binning[-1] + (binning[-1] - binning[-2]) / 20
    b[1:-1] = count
    return ax.step(a, b, **kwargs)




[docs]
def interp_hist(binning, y, num=1000, kind="UnivariateSpline"):
    """interpolate data from hostgram into a line"""
    x = (binning[:-1] + binning[1:]) / 2
    if kind == "UnivariateSpline":
        func = UnivariateSpline(x, y, s=2)
    else:
        func = interp1d(x, y, kind=kind, fill_value="extrapolate")
    x_new = np.linspace(
        np.min(binning), np.max(binning), num=num, endpoint=True
    )
    y_new = func(x_new)
    return x_new, y_new




[docs]
def gauss(x):
    return np.exp(-(x**2) / 2) / np.sqrt(2 * np.pi)




[docs]
def cauchy(x):
    return 1 / (x**2 + 1) / np.pi




[docs]
def epanechnikov(x):
    return np.where((x < 1) & (x > -1), (1 - x**2) / 4 * 3, 0)




[docs]
def uniform(x):
    return np.where((x < 1) & (x > -1), 0.5, 0)




[docs]
def weighted_kde(m, w, bw, kind="gauss"):
    n = w.shape[0]

    kind_map = {
        "gauss": gauss,
        "cauchy": cauchy,
        "epanechnikov": epanechnikov,
        "uniform": uniform,
    }
    if isinstance(kind, str):
        kernel = kind_map[kind]
    else:
        kernel = kind

    def f(x):
        ret = []
        for i in x:
            y = (i - m) / bw
            tmp = w * kernel(y)
            ret.append(np.sum(tmp))
        return np.array(ret)
        # ret = np.zeros_like(x)
        # for i in range(n):
        #    y = (x - m[i]) / bw[i]
        #    tmp = w[i] * kernel(y)
        #    ret += tmp
        # return ret

    return f




[docs]
class Hist1D:
    def __init__(self, binning, count, error=None):
        if error is None:
            error = np.sqrt(count)
        self.binning = binning
        self.count = count
        self.error = error
        self._cached_color = None


[docs]
    def draw(self, ax=plt, **kwargs):
        draw_type = kwargs.pop("type", "hist")
        if "+" in draw_type:
            ret = []
            for i in draw_type.split("+"):
                ret.append(self.draw(ax=ax, type=i, **kwargs))
        elif draw_type in [
            "hist",
            "bar",
            "kde",
            "error",
            "line",
            "fill",
            "stepfill",
        ]:
            draw_fun = getattr(self, "draw_" + draw_type)
            ret = draw_fun(ax=ax, **kwargs)
        else:
            raise NotImplementedError()
        return ret



[docs]
    def draw_hist(self, ax=plt, **kwargs):
        a = plot_hist(self.binning, self.count, ax=ax, **kwargs)
        self._cached_color = a[0].get_color()
        return a



[docs]
    def draw_bar(self, ax=plt, **kwargs):
        return ax.bar(
            self.bin_center,
            self.count,
            width=self.bin_width,
            **kwargs,
        )



[docs]
    def draw_kde(self, ax=plt, kind="gauss", bin_scale=1.0, **kwargs):
        color = kwargs.pop("color", self._cached_color)
        m = self.bin_center
        bw = self.bin_width * bin_scale
        kde = weighted_kde(m, self.count, bw, kind)
        x = np.linspace(
            self.binning[0], self.binning[-1], self.count.shape[0] * 10
        )
        if "fmt" in kwargs:
            fmt = kwargs.pop("fmt")
            return ax.plot(x, kde(x), fmt, color=color, **kwargs)
        else:
            return ax.plot(x, kde(x), color=color, **kwargs)



[docs]
    def draw_fill(self, ax=plt, kind="gauss", bin_scale=1.0, **kwargs):
        color = kwargs.pop("color", self._cached_color)
        m = self.bin_center
        bw = self.bin_width * bin_scale
        kde = weighted_kde(m, self.count, bw, kind)
        x = np.linspace(
            self.binning[0], self.binning[-1], self.count.shape[0] * 10
        )
        return ax.fill_between(
            x, kde(x), np.zeros_like(x), color=color, **kwargs
        )



[docs]
    def draw_stepfill(self, ax=plt, kind="gauss", bin_scale=1.0, **kwargs):
        color = kwargs.pop("color", self._cached_color)
        x = np.repeat(self.binning, 2)
        y = np.concatenate([[0], np.repeat(self.count, 2), [0]])
        return ax.fill_between(x, y, np.zeros_like(x), color=color, **kwargs)



[docs]
    def draw_pull(self, ax=plt, **kwargs):
        with np.errstate(divide="ignore", invalid="ignore"):
            y_error = np.where(self.error == 0, 0, self.count / self.error)
        return ax.bar(
            self.bin_center,
            y_error,
            width=self.bin_width,
            **kwargs,
        )


    def chi2(self):
        with np.errstate(divide="ignore", invalid="ignore"):
            y_error = np.where(self.error == 0, 0, self.count / self.error)
        print(y_error)
        print((y_error**2).astype(np.int))
        print(np.sum((y_error**2).astype(np.int)))
        return np.sum(y_error**2)


[docs]
    def draw_line(self, ax=plt, num=1000, kind="UnivariateSpline", **kwargs):
        x_new, y_new = interp_hist(self.binning, self.count, num, kind)
        return ax.plot(x_new, y_new, **kwargs)



[docs]
    def draw_error(self, ax=plt, fmt="none", **kwargs):
        color = kwargs.pop("color", self._cached_color)
        return ax.errorbar(
            self.bin_center,
            y=self.count,
            xerr=self.bin_width / 2,
            yerr=self.error,
            fmt=fmt,
            color=color,
            **kwargs,
        )


    @property
    def bin_center(self):
        return (self.binning[:-1] + self.binning[1:]) / 2

    @property
    def bin_width(self):
        return self.binning[1:] - self.binning[:-1]


[docs]
    def get_bin_weight(self):
        return (self.binning[-1] - self.binning[0]) / (
            self.binning.shape[0] - 1
        )


    def __mul__(self, other):
        if isinstance(other, (float, int)):
            return Hist1D(self.binning, self.count * other, self.error * other)
        raise NotImplementedError

    __rmul__ = __mul__

    def __add__(self, other):
        assert np.allclose(
            self.binning, other.binning
        ), "need to be the same binning"
        return Hist1D(
            self.binning,
            self.count + other.count,
            np.sqrt(self.error**2 + other.error**2),
        )

    def __sub__(self, other):
        assert np.allclose(
            self.binning, other.binning
        ), "need to be the same binning"
        return Hist1D(
            self.binning,
            self.count - other.count,
            np.sqrt(self.error**2 + other.error**2),
        )


[docs]
    @staticmethod
    def histogram(m, *args, weights=None, mask_error=np.inf, **kwargs):
        if weights is None:
            count, binning = np.histogram(m, *args, **kwargs)
            count2, _ = np.histogram(m, *args, **kwargs)
            mask_count = count
        else:
            weights = np.asarray(weights)
            count, binning = np.histogram(m, *args, weights=weights, **kwargs)
            count2, _ = np.histogram(m, *args, weights=weights**2, **kwargs)
            mask_count, _ = np.histogram(m, *args, **kwargs)
        count2 = np.where(mask_count == 0, mask_error, count2)
        return Hist1D(binning, count, np.sqrt(count2))



[docs]
    def chi2(self):
        cut = ~np.isinf(self.error)
        return np.sum((self.count[cut] / self.error[cut]) ** 2)



[docs]
    def ndf(self):
        return np.sum(~np.isinf(self.error))



[docs]
    def scale_to(self, other):
        scale_factor = other.get_count() / self.get_count()
        bin_width_factor = np.mean(other.bin_width) / np.mean(self.bin_width)
        scale = scale_factor * bin_width_factor
        self.count *= scale
        self.error *= scale
        return scale



[docs]
    def get_count(self):
        return np.sum(self.count)





[docs]
class WeightedData(Hist1D):
    def __init__(self, m, *args, weights=None, **kwargs):
        if weights is None:
            weights = np.ones_like(m)
        count, binning = np.histogram(m, *args, weights=weights, **kwargs)
        count2, _ = np.histogram(m, *args, weights=weights**2, **kwargs)
        self.value = m
        self.weights = weights
        super().__init__(binning, count, np.sqrt(count2))


[docs]
    def draw_kde(self, ax=plt, kind="gauss", bin_scale=1.0, **kwargs):
        color = kwargs.pop("color", self._cached_color)
        bw = np.mean(self.bin_width) * bin_scale * np.ones_like(self.value)
        kde = weighted_kde(self.value, self.weights, bw, kind)
        x = np.linspace(
            self.binning[0], self.binning[-1], self.count.shape[0] * 10
        )
        return ax.plot(x, kde(x), color=color, **kwargs)


    def __add__(self, other):
        assert np.allclose(
            self.binning, other.binning
        ), "need to be the same binning"
        ret = WeightedData(
            np.concatenate([self.value, other.value]),
            weights=np.concatenate([self.weights, other.weights]),
        )
        ret.binning = self.binning
        ret.count = self.count + other.count
        ret.error = np.sqrt(self.error**2 + other.error**2)
        return ret

    def __mul__(self, other):
        if isinstance(other, (float, int)):
            ret = WeightedData(self.value, weights=self.weights * other)
            ret.binning = self.binning
            ret.error = self.error * other
            ret.count = self.count * other
            return ret
        raise NotImplementedError


[docs]
    def scale_to(self, other):
        scale = super().scale_to(other)
        self.weights *= scale
        return scale