Source code for tf_pwa.model.model

"""
This module provides methods to calculate NLL(Negative Log-Likelihood) as well as its derivatives.
"""

import math
import warnings
from itertools import repeat as _loop_generator

import numpy as np

from ..config import create_config, get_config
from ..data import (
    EvalLazy,
    data_merge,
    data_replace,
    data_shape,
    data_split,
    split_generator,
)
from ..tensorflow_wrapper import tf
from ..utils import time_print
from ..variable import Variable



[docs]
def get_shape(x):
    if hasattr(x, "shape"):
        return x.shape
    return ()



def _resolution_shape(x):
    shape = get_shape(x)
    if shape:
        return shape[-1]
    return 1


set_nll_model, get_nll_model, register_nll_model = create_config()


def _batch_sum(f, data_i, weight_i, trans, resolution_size, args, kwargs):
    weight_shape = (-1, min(_resolution_shape(weight_i), resolution_size))
    part_y = f(data_i, *args, **kwargs)
    weight_i = tf.cast(weight_i, part_y.dtype)
    rw = tf.reshape(weight_i, weight_shape)
    part_y = weight_i * part_y
    part_y = tf.reshape(part_y, (-1, resolution_size))
    part_y = tf.reduce_sum(part_y, axis=-1)
    event_w = tf.reduce_sum(rw, axis=-1)
    dom_event_w = tf.where(event_w == 0, tf.ones_like(event_w), event_w)
    part_y = part_y / dom_event_w  # event_w
    part_y = trans(part_y)
    y_i = tf.reduce_sum(event_w * part_y)
    return y_i



[docs]
def sum_gradient(
    f,
    data,
    var,
    weight=1.0,
    trans=tf.identity,
    resolution_size=1,
    args=(),
    kwargs=None,
):
    """
    NLL is the sum of trans(f(data)):math:`*`weight; gradient is the derivatives for each variable in ``var``.

    :param f: Function. The amplitude PDF.
    :param data: Data array
    :param var: List of strings. Names of the trainable variables in the PDF.
    :param weight: Weight factor for each data point. It's either a real number or an array of the same shape with ``data``.
    :param trans: Function. Transformation of ``data`` before multiplied by ``weight``.
    :param kwargs: Further arguments for ``f``.
    :return: Real number NLL, list gradient
    """
    kwargs = kwargs if kwargs is not None else {}
    if isinstance(weight, float):
        weight = _loop_generator(weight)
    ys = []
    gs = []
    for data_i, weight_i in zip(data, weight):
        with tf.GradientTape() as tape:
            y_i = _batch_sum(
                f, data_i, weight_i, trans, resolution_size, args, kwargs
            )
        g_i = tape.gradient(y_i, var, unconnected_gradients="zero")
        ys.append(y_i)
        gs.append(g_i)
    nll = sum(ys)
    # print("ll0:,", nll)
    g = list(map(sum, zip(*gs)))
    return nll, g




[docs]
def sum_hessian(
    f,
    data,
    var,
    weight=1.0,
    trans=tf.identity,
    resolution_size=1,
    args=(),
    kwargs=None,
):
    """
    The parameters are the same with ``sum_gradient()``, but this function will return hessian as well,
    which is the matrix of the second-order derivative.

    :return: Real number NLL, list gradient, 2-D list hessian
    """
    kwargs = kwargs if kwargs is not None else {}
    if isinstance(weight, float):
        weight = _loop_generator(weight)
    y_s = []
    g_s = []
    h_s = []
    for data_i, weight_i in zip(data, weight):
        with tf.GradientTape(persistent=True) as tape0:
            with tf.GradientTape() as tape:
                y_i = _batch_sum(
                    f, data_i, weight_i, trans, resolution_size, args, kwargs
                )
            g_i = tape.gradient(y_i, var, unconnected_gradients="zero")
        h_s_i = []
        # h_s_i  = tape0.jacobian(tf.stack(g_i), var, unconnected_gradients="zero")
        for gi in g_i:
            # 2nd order derivative
            h_s_i.append(tape0.gradient(gi, var, unconnected_gradients="zero"))
        del tape0
        y_s.append(y_i)
        g_s.append(g_i)
        h_s.append(h_s_i)
    nll = tf.reduce_sum(y_s)
    # print("ll: ", nll)
    g = tf.reduce_sum(g_s, axis=0)
    h = tf.reduce_sum(h_s, axis=0)
    # h = [[sum(j) for j in zip(*i)] for i in h_s]
    return nll, g, h




[docs]
def sum_grad_hessp(
    f,
    p,
    data,
    var,
    weight=1.0,
    trans=tf.identity,
    resolution_size=1,
    args=(),
    kwargs=None,
):
    """
    The parameters are the same with ``sum_gradient()``, but this function will return hessian as well,
    which is the matrix of the second-order derivative.

    :return: Real number NLL, list gradient, 2-D list hessian
    """
    kwargs = kwargs if kwargs is not None else {}
    if isinstance(weight, float):
        weight = _loop_generator(weight)
    y_s = []
    g_s = []
    h_s = []
    from tensorflow.python.eager import forwardprop

    for data_i, weight_i in zip(data, weight):
        with forwardprop.ForwardAccumulator(var, list(p)) as acc:
            with tf.GradientTape() as tape:
                y_i = _batch_sum(
                    f, data_i, weight_i, trans, resolution_size, args, kwargs
                )
            g_i = tape.gradient(y_i, var, unconnected_gradients="zero")
        hessp = acc.jvp(g_i, unconnected_gradients="zero")
        y_s.append(y_i)
        g_s.append(g_i)
        h_s.append(hessp)
        # print(hessp)
    nll = tf.reduce_sum(y_s)
    # print("ll: ", nll)
    g = tf.reduce_sum(g_s, axis=0)
    h = tf.reduce_sum(h_s, axis=0)
    # print(h)
    # h = [[sum(j) for j in zip(*i)] for i in h_s]
    return nll, g, h




[docs]
def sum_gradient_new(
    amp,
    data,
    mcdata,
    weight,
    mcweight,
    var,
    trans=tf.math.log,
    w_flatmc=lambda: 0,
    args=(),
    kwargs=None,
):
    """
    NLL is the sum of trans(f(data)):math:`*`weight; gradient is the derivatives for each variable in ``var``.

    :param f: Function. The amplitude PDF.
    :param data: Data array
    :param var: List of strings. Names of the trainable variables in the PDF.
    :param weight: Weight factor for each data point. It's either a real number or an array of the same shape with ``data``.
    :param trans: Function. Transformation of ``data`` before multiplied by ``weight``.
    :param kwargs: Further arguments for ``f``.
    :return: Real number NLL, list gradient
    """
    kwargs = kwargs if kwargs is not None else {}
    if isinstance(weight, float):
        weight = _loop_generator(weight)
    ys = []
    # gs = []
    ymc = []
    with tf.GradientTape() as tape:
        for mcdata_i, mcweight_i in zip(mcdata, mcweight):
            part_y = amp(mcdata_i, *args, **kwargs)
            y_i = tf.reduce_sum(tf.cast(mcweight_i, part_y.dtype) * part_y)
            ymc.append(y_i)
        int_dt = tf.reduce_sum(ymc)
        for data_i, weight_i in zip(data, weight):
            wmc = w_flatmc()
            part_y = (amp(data_i, *args, **kwargs) / int_dt + wmc) / (1 + wmc)
            part_y = trans(part_y)
            y_i = tf.reduce_sum(tf.cast(weight_i, part_y.dtype) * part_y)
            ys.append(y_i)
        nll = -tf.reduce_sum(ys)
    g = tape.gradient(nll, var, unconnected_gradients="zero")
    return nll, g




[docs]
def sum_hessian_new(
    amp,
    data,
    mcdata,
    weight,
    mcweight,
    var,
    trans=tf.math.log,
    w_flatmc=lambda: 0,
    args=(),
    kwargs=None,
):
    """
    The parameters are the same with ``sum_gradient()``, but this function will return hessian as well,
    which is the matrix of the second-order derivative.

    :return: Real number NLL, list gradient, 2-D list hessian
    """
    kwargs = kwargs if kwargs is not None else {}
    if isinstance(weight, float):
        weight = _loop_generator(weight)
    ys = []
    ymc = []
    with tf.GradientTape(persistent=True) as tape0:
        with tf.GradientTape() as tape:
            for mcdata_i, mcweight_i in zip(mcdata, mcweight):
                part_y = amp(mcdata_i, *args, **kwargs)
                y_i = tf.reduce_sum(tf.cast(mcweight_i, part_y.dtype) * part_y)
                ymc.append(y_i)
            int_dt = tf.reduce_sum(ymc)
            for data_i, weight_i in zip(data, weight):
                wmc = w_flatmc()
                part_y = (amp(data_i, *args, **kwargs) / int_dt + wmc) / (
                    1 + wmc
                )
                part_y = trans(part_y)
                y_i = tf.reduce_sum(tf.cast(weight_i, part_y.dtype) * part_y)
                ys.append(y_i)
            nll = -tf.reduce_sum(ys)
        gradient = tape.gradient(nll, var, unconnected_gradients="zero")
    hessian = []
    for gi in gradient:
        # 2nd order derivative
        hessian.append(tape0.gradient(gi, var, unconnected_gradients="zero"))
    del tape0
    return nll, gradient, hessian




[docs]
def clip_log(x, _epsilon=1e-6):
    """clip log to allowed large value"""
    x_cut = tf.where(x > _epsilon, x, tf.ones_like(x) * _epsilon)
    b_t = tf.math.log(x_cut)
    delta_x = x - _epsilon
    b_f = (
        np.log(_epsilon) + delta_x / _epsilon - (delta_x / _epsilon) ** 2 / 2.0
    )
    # print("$$$", tf.where(x < _epsilon).numpy().tolist())
    return tf.where(x > _epsilon, b_t, b_f)




[docs]
class BaseModel(object):
    """
    This class implements methods to calculate NLL as well as its derivatives for an amplitude model. It may include
    data for both signal and background.

    :param signal: Signal Model
    """

    def __init__(self, signal, resolution_size=1, extended=False):
        self.signal = EvalLazy(signal)
        self.Amp = signal
        self.vm = signal.vm
        self.resolution_size = resolution_size
        self.extended = extended
        if extended:
            self.int_f = lambda x: x
            self.int_g = lambda x: 1
            self.int_h = lambda x: 0
        else:
            self.int_f = tf.math.log
            self.int_g = lambda x: 1 / x
            self.int_h = lambda x: -1 / x**2


[docs]
    def sum_resolution(self, w):
        w = tf.reshape(w, (-1, self.resolution_size))
        return tf.reduce_sum(w, axis=-1)



[docs]
    def nll(self, data, mcdata):
        """Negative log-Likelihood"""
        weight = data.get("weight", tf.ones((data_shape(data),)))
        sw = tf.reduce_sum(weight)
        rw = tf.reshape(weight, (-1, self.resolution_size))
        amp_s2 = self.signal(data) * weight
        amp_s2 = self.sum_resolution(amp_s2)
        weight = tf.reduce_sum(rw, axis=-1)
        dom_weight = tf.where(
            weight == 0, tf.constant(1.0, dtype=weight.dtype), weight
        )
        ln_data = clip_log(amp_s2 / dom_weight)
        mc_weight = mcdata.get("weight", tf.ones((data_shape(mcdata),)))
        int_mc = tf.reduce_sum(
            mc_weight * self.signal(mcdata)
        ) / tf.reduce_sum(mc_weight)
        alpha = sw / tf.reduce_sum(weight**2)
        return -alpha * (
            tf.reduce_sum(weight * ln_data) - sw * self.int_f(int_mc)
        )



[docs]
    def sum_nll_grad_bacth(self, data):
        weight = [i.get("weight", tf.ones((data_shape(i),))) for i in data]
        ln_data, g_ln_data = sum_gradient(
            self.signal,
            data,
            self.signal.trainable_variables,
            weight=weight,
            trans=clip_log,
            resolution_size=self.resolution_size,
        )
        return -ln_data, [-i for i in g_ln_data]



[docs]
    def sum_log_integral_grad_batch(self, mcdata, ndata):
        mc_weight = [i["weight"] for i in mcdata]
        int_mc, g_int_mc = sum_gradient(
            self.signal,
            mcdata,
            self.signal.trainable_variables,
            weight=mc_weight,
        )
        return self.int_f(int_mc) * ndata, [
            ndata * self.int_g(int_mc) * i for i in g_int_mc
        ]



[docs]
    def nll_grad(self, data, mcdata, batch=65000):
        weight = data.get("weight", tf.ones((data_shape(data),)))
        weight_rw = tf.reduce_sum(
            tf.reshape(weight, (-1, self.resolution_size)), axis=-1
        )
        alpha = tf.reduce_sum(weight_rw) / tf.reduce_sum(weight_rw**2)
        weight = alpha * weight
        assert (
            batch % self.resolution_size == 0
        ), "batch size should be the multiple of resolution_size"
        ln_data, g_ln_data = sum_gradient(
            self.signal,
            split_generator(data, batch),
            self.signal.trainable_variables,
            weight=split_generator(weight, batch),
            trans=clip_log,
            resolution_size=self.resolution_size,
        )
        mc_weight = mcdata.get("weight", tf.ones((data_shape(mcdata),)))
        mc_weight = mc_weight / tf.reduce_sum(mc_weight)
        int_mc, g_int_mc = sum_gradient(
            self.signal,
            split_generator(mcdata, batch),
            self.signal.trainable_variables,
            weight=data_split(mc_weight, batch),
        )

        sw = tf.cast(tf.reduce_sum(weight), ln_data.dtype)

        g = list(
            map(
                lambda x: -x[0] + sw * x[1] * self.int_g(int_mc),
                zip(g_ln_data, g_int_mc),
            )
        )
        nll = -ln_data + sw * self.int_f(int_mc)
        return nll, g


    @property
    def trainable_variables(self):
        return self.signal.trainable_variables


[docs]
    def nll_grad_batch(self, data, mcdata, weight, mc_weight):
        """
        ``self.nll_grad()`` is replaced by this one???

        .. math::
          - \\frac{\\partial \\ln L}{\\partial \\theta_k } =
            -\\sum_{x_i \\in data } w_i \\frac{\\partial}{\\partial \\theta_k} \\ln f(x_i;\\theta_k)
            + (\\sum w_j ) \\left( \\frac{ \\partial }{\\partial \\theta_k} \\sum_{x_i \\in mc} f(x_i;\\theta_k) \\right)
              \\frac{1}{ \\sum_{x_i \\in mc} f(x_i;\\theta_k) }

        :param data:
        :param mcdata:
        :param weight:
        :param mc_weight:
        :return:
        """
        weight = list(weight)
        sw = tf.reduce_sum([tf.reduce_sum(i) for i in weight])
        ln_data, g_ln_data = sum_gradient(
            self.signal,
            data,
            self.signal.trainable_variables,
            weight=weight,
            trans=clip_log,
            resolution_size=self.resolution_size,
        )
        int_mc, g_int_mc = sum_gradient(
            self.signal,
            mcdata,
            self.signal.trainable_variables,
            weight=mc_weight,
        )

        sw = tf.cast(sw, ln_data.dtype)

        g = list(
            map(
                lambda x: -x[0] + sw * x[1] * self.int_g(int_mc),
                zip(g_ln_data, g_int_mc),
            )
        )
        nll = -ln_data + sw * self.int_f(int_mc)
        return nll, g



[docs]
    def grad_hessp_batch(self, p, data, mcdata, weight, mc_weight):
        """
        ``self.nll_grad()`` is replaced by this one???

        .. math::
          - \\frac{\\partial \\ln L}{\\partial \\theta_k } =
            -\\sum_{x_i \\in data } w_i \\frac{\\partial}{\\partial \\theta_k} \\ln f(x_i;\\theta_k)
            + (\\sum w_j ) \\left( \\frac{ \\partial }{\\partial \\theta_k} \\sum_{x_i \\in mc} f(x_i;\\theta_k) \\right)
              \\frac{1}{ \\sum_{x_i \\in mc} f(x_i;\\theta_k) }

        :param data:
        :param mcdata:
        :param weight:
        :param mc_weight:
        :return:
        """
        if not hasattr(self, "hess_product_vector_i"):
            self.hess_product_vector_i = [tf.Variable(i) for i in p]
        for i, j in zip(self.hess_product_vector_i, p):
            i.assign(j)
        weight = list(weight)
        sw = tf.reduce_sum([tf.reduce_sum(i) for i in weight])
        ln_data, g_ln_data, hessp_ln_data = sum_grad_hessp(
            self.signal,
            self.hess_product_vector_i,
            data,
            self.signal.trainable_variables,
            weight=weight,
            trans=clip_log,
            resolution_size=self.resolution_size,
        )

        # print("hessp_ln_data",hessp_ln_data)

        int_mc, g_int_mc, hessp_int_mc = sum_grad_hessp(
            self.signal,
            self.hess_product_vector_i,
            mcdata,
            self.signal.trainable_variables,
            weight=mc_weight,
        )

        # print("hessp_int_mc", hessp_int_mc)

        sw = tf.cast(sw, ln_data.dtype)

        g = list(
            map(
                lambda x: -x[0] + sw * x[1] * self.int_g(int_mc),
                zip(g_ln_data, g_int_mc),
            )
        )

        g_int_mc = np.array(g_int_mc)
        hessp2 = sw * (
            hessp_int_mc * self.int_g(int_mc)
            + g_int_mc * np.dot(p, g_int_mc) * self.int_h(int_mc)
        )
        # print("hessp2", hessp2)
        # print("ret", g, hessp2 - hessp_ln_data)
        return g, hessp2 - hessp_ln_data



[docs]
    def nll_grad_hessian(self, data, mcdata, batch=25000):
        """
        The parameters are the same with ``self.nll()``, but it will return Hessian as well.

        :return NLL: Real number. The value of NLL.
        :return gradients: List of real numbers. The gradients for each variable.
        :return Hessian: 2-D Array of real numbers. The Hessian matrix of the variables.
        """
        assert (
            batch % self.resolution_size == 0
        ), "batch size should be the multiple of resolution_size"
        weight = data.get("weight", tf.ones((data_shape(data),)))
        mc_weight = mcdata.get("weight", tf.ones((data_shape(mcdata),)))
        mc_weight = mc_weight / tf.reduce_sum(mc_weight)
        weight_rw = tf.reduce_sum(
            tf.reshape(weight, (-1, self.resolution_size)), axis=-1
        )
        alpha = tf.reduce_sum(weight_rw) / tf.reduce_sum(weight_rw**2)
        weight = alpha * weight
        sw = tf.reduce_sum(weight)
        ln_data, g_ln_data, h_ln_data = sum_hessian(
            self.signal,
            split_generator(data, batch),
            self.signal.trainable_variables,
            weight=split_generator(weight, batch),
            trans=clip_log,
            resolution_size=self.resolution_size,
        )
        int_mc, g_int_mc, h_int_mc = sum_hessian(
            self.signal,
            split_generator(mcdata, batch),
            self.signal.trainable_variables,
            weight=split_generator(mc_weight, batch),
        )

        n_var = len(g_ln_data)
        nll = -ln_data + sw * self.int_f(int_mc)
        g = -g_ln_data + sw * g_int_mc * self.int_g(int_mc)

        g_int_mc = g_int_mc
        g_outer = (
            tf.reshape(g_int_mc, (-1, 1))
            * tf.reshape(g_int_mc, (1, -1))
            * self.int_h(int_mc)
        )

        h = -h_ln_data + sw * g_outer + sw * h_int_mc * self.int_g(int_mc)
        # print("nll: ", nll)
        return nll, g, h



[docs]
    def set_params(self, var):
        """
        It has interface to ``Amplitude.set_params()``.
        """
        self.Amp.set_params(var)



[docs]
    def get_params(self, trainable_only=False):
        """
        It has interface to ``Amplitude.get_params()``.
        """
        return self.Amp.get_params(trainable_only)





[docs]
@register_nll_model("default")
class Model(object):
    """
    This class implements methods to calculate NLL as well as its derivatives for an amplitude model. It may include
    data for both signal and background.

    :param amp: ``AllAmplitude`` object. The amplitude model.
    :param w_bkg: Real number. The weight of background.
    """

    def __init__(
        self, amp, w_bkg=1.0, resolution_size=1, extended=False, **kwargs
    ):
        for k, v in kwargs.items():
            setattr(self, k, v)
        self.model = BaseModel(
            amp, resolution_size=resolution_size, extended=extended
        )
        self.Amp = amp
        self.w_bkg = w_bkg
        self.vm = amp.vm
        self.resolution_size = self.model.resolution_size


[docs]
    def sum_resolution(self, w):
        w = tf.reshape(w, (-1, self.resolution_size))
        return tf.reduce_sum(w, axis=-1)



[docs]
    def get_weight_data(self, data, weight=None, bg=None, alpha=True):
        """
        Blend data and background data together multiplied by their weights.

        :param data: Data array
        :param weight: Weight for data
        :param bg: Data array for background
        :param alpha: Boolean. If it's true, ``weight`` will be multiplied by a factor :math:`\\alpha=`???
        :return: Data, weight. Their length both equals ``len(data)+len(bg)``.
        """
        if weight is None:
            weight = data.get("weight", 1.0)
        if isinstance(weight, float):
            n_data = data_shape(data)
            weight = tf.convert_to_tensor(
                [weight] * n_data, dtype=get_config("dtype")
            )
        if bg is not None:
            n_bg = data_shape(bg)
            data = data_merge(data, bg)
            bg_weight = bg.get("weight", None)
            if bg_weight is None:
                bg_weight = tf.convert_to_tensor(
                    [-self.w_bkg] * n_bg, dtype=get_config("dtype")
                )
            else:
                bg_weight = tf.convert_to_tensor(
                    bg_weight, dtype=get_config("dtype")
                )
            weight = tf.concat([weight, bg_weight], axis=0)
        # print(weight.shape)
        if alpha:
            weight_r = tf.reshape(weight, (-1, self.resolution_size))
            weight_r = tf.reduce_sum(weight_r, axis=-1)
            alpha = tf.reduce_sum(weight_r) / tf.reduce_sum(
                weight_r * weight_r
            )
            return data, alpha * weight
        return data, weight



[docs]
    def mix_data_bakcground(self, data, bg):
        ret, weight = self.get_weight_data(
            data, weight=data.get_weight(), bg=bg, alpha=True
        )
        ret["weight"] = weight
        return ret



[docs]
    def sum_nll_grad_bacth(self, data):
        return self.model.sum_nll_grad_bacth(data)



[docs]
    def sum_log_integral_grad_batch(self, mcdata, ndata):
        return self.model.sum_log_integral_grad_batch(mcdata, ndata)



[docs]
    def nll(
        self,
        data,
        mcdata,
        weight: tf.Tensor = 1.0,
        batch=None,
        bg=None,
        mc_weight=1.0,
    ):
        """
        Calculate NLL.

        .. math::
          -\\ln L = -\\sum_{x_i \\in data } w_i \\ln f(x_i;\\theta_k) +  (\\sum w_j ) \\ln \\sum_{x_i \\in mc } f(x_i;\\theta_k)

        :param data: Data array
        :param mcdata: MCdata array
        :param weight: Weight of data???
        :param batch: The length of array to calculate as a vector at a time. How to fold the data array may depend on the GPU computability.
        :param bg: Background data array. It can be set to ``None`` if there is no such thing.
        :return: Real number. The value of NLL.
        """
        data, weight = self.get_weight_data(data, weight, bg=bg)
        if isinstance(mc_weight, float):
            mc_weight = tf.convert_to_tensor(
                [mc_weight] * data_shape(mcdata), dtype="float64"
            )
        return self.model.nll(
            {**data, "weight": weight}, {**mcdata, "weight": mc_weight}
        )



[docs]
    def nll_grad(
        self, data, mcdata, weight=1.0, batch=65000, bg=None, mc_weight=1.0
    ):
        """
        Calculate NLL and its gradients.

        .. math::
          - \\frac{\\partial \\ln L}{\\partial \\theta_k } =
            -\\sum_{x_i \\in data } w_i \\frac{\\partial}{\\partial \\theta_k} \\ln f(x_i;\\theta_k)
            + (\\sum w_j ) \\left( \\frac{ \\partial }{\\partial \\theta_k} \\sum_{x_i \\in mc} f(x_i;\\theta_k) \\right)
              \\frac{1}{ \\sum_{x_i \\in mc} f(x_i;\\theta_k) }

        The parameters are the same with ``self.nll()``, but it will return gradients as well.

        :return NLL: Real number. The value of NLL.
        :return gradients: List of real numbers. The gradients for each variable.
        """
        data, weight = self.get_weight_data(data, weight, bg=bg)
        if isinstance(mc_weight, float):
            mc_weight = tf.convert_to_tensor(
                [mc_weight] * data_shape(mcdata), dtype="float64"
            )
        return self.model.nll_grad(
            {**data, "weight": weight},
            {**mcdata, "weight": mc_weight},
            batch=batch,
        )


    # @tf.function

[docs]
    def grad_hessp_batch(self, p, data, mcdata, weight, mc_weight):
        """
        ``self.nll_grad()`` is replaced by this one???

        .. math::
          - \\frac{\\partial \\ln L}{\\partial \\theta_k } =
            -\\sum_{x_i \\in data } w_i \\frac{\\partial}{\\partial \\theta_k} \\ln f(x_i;\\theta_k)
            + (\\sum w_j ) \\left( \\frac{ \\partial }{\\partial \\theta_k} \\sum_{x_i \\in mc} f(x_i;\\theta_k) \\right)
              \\frac{1}{ \\sum_{x_i \\in mc} f(x_i;\\theta_k) }

        :param data:
        :param mcdata:
        :param weight:
        :param mc_weight:
        :return:
        """
        data_i = ({**i, "weight": j} for i, j in zip(data, weight))
        mcdata_i = ({**i, "weight": j} for i, j in zip(mcdata, mc_weight))
        return self.model.grad_hessp_batch(
            p, data_i, mcdata_i, weight, mc_weight
        )


    # @tf.function

[docs]
    def nll_grad_batch(self, data, mcdata, weight, mc_weight):
        """
        batch version of ``self.nll_grad()``

        .. math::
          - \\frac{\\partial \\ln L}{\\partial \\theta_k } =
            -\\sum_{x_i \\in data } w_i \\frac{\\partial}{\\partial \\theta_k} \\ln f(x_i;\\theta_k)
            + (\\sum w_j ) \\left( \\frac{ \\partial }{\\partial \\theta_k} \\sum_{x_i \\in mc} f(x_i;\\theta_k) \\right)
              \\frac{1}{ \\sum_{x_i \\in mc} f(x_i;\\theta_k) }

        :param data:
        :param mcdata:
        :param weight:
        :param mc_weight:
        :return:
        """
        data_i = data  # ({**i, "weight": j} for i, j in zip(data, weight))
        mcdata_i = (
            mcdata  # ({**i, "weight": j} for i, j in zip(mcdata, mc_weight))
        )
        return self.model.nll_grad_batch(data_i, mcdata_i, weight, mc_weight)



[docs]
    def nll_grad_hessian(
        self, data, mcdata, weight=1.0, batch=24000, bg=None, mc_weight=1.0
    ):
        """
        The parameters are the same with ``self.nll()``, but it will return Hessian as well.

        :return NLL: Real number. The value of NLL.
        :return gradients: List of real numbers. The gradients for each variable.
        :return Hessian: 2-D Array of real numbers. The Hessian matrix of the variables.
        """
        data, weight = self.get_weight_data(data, weight, bg=bg)
        if isinstance(mc_weight, float):
            mc_weight = tf.convert_to_tensor(
                [mc_weight] * data_shape(mcdata), dtype="float64"
            )
        data_i = data_replace(data, "weight", weight)
        mcdata_i = data_replace(mcdata, "weight", mc_weight)
        return self.model.nll_grad_hessian(data_i, mcdata_i, batch=batch)



[docs]
    def set_params(self, var):
        """
        It has interface to ``Amplitude.set_params()``.
        """
        self.Amp.set_params(var)



[docs]
    def get_params(self, trainable_only=False):
        """
        It has interface to ``Amplitude.get_params()``.
        """
        return self.Amp.get_params(trainable_only)





[docs]
@register_nll_model("inject_mc")
class Model_new(Model):
    """
    This class implements methods to calculate NLL as well as its derivatives for an amplitude model. It may include
    data for both signal and background.

    :param amp: ``AllAmplitude`` object. The amplitude model.
    :param w_bkg: Real number. The weight of background.
    """

    def __init__(self, amp, w_bkg=1.0, w_inmc=0, float_wmc=False):
        super(Model_new, self).__init__(amp, w_bkg)
        # self.w_inmc = w_inmc
        self.w_inmc = Variable(
            "weight_injectMC", value=w_inmc, vm=self.Amp.vm, overwrite=False
        )
        if not float_wmc:
            self.w_inmc.fixed()


[docs]
    def get_weight_data(
        self, data, weight=1.0, bg=None, inmc=None, alpha=True
    ):
        """
        Blend data and background data together multiplied by their weights.

        :param data: Data array
        :param weight: Weight for data
        :param bg: Data array for background
        :param alpha: Boolean. If it's true, ``weight`` will be multiplied by a factor :math:`\\alpha=`???
        :return: Data, weight. Their length both equals ``len(data)+len(bg)``.
        """
        n_data = data_shape(data)
        if isinstance(weight, float):
            weight = tf.convert_to_tensor(
                [weight] * n_data, dtype=get_config("dtype")
            )
        if bg is not None:
            n_bg = data_shape(bg)
            data = data_merge(data, bg)
            bg_weight = tf.convert_to_tensor(
                [-self.w_bkg] * n_bg, dtype=get_config("dtype")
            )
            weight = tf.concat([weight, bg_weight], axis=0)
        if inmc is not None:
            n_inmc = data_shape(inmc)
            data = data_merge(data, inmc)
            wmc = self.w_inmc() * n_data / n_inmc
            inmc_weight = tf.convert_to_tensor(
                [wmc] * n_inmc, dtype=get_config("dtype")
            )
            weight = tf.concat([weight, inmc_weight], axis=0)

        if alpha:
            alpha = tf.reduce_sum(weight) / tf.reduce_sum(
                weight * weight
            )  # correct with inject MC?
            return data, alpha * weight
        return data, weight



[docs]
    def nll(self, data, mcdata, weight: tf.Tensor = 1.0, batch=None, bg=None):
        """
        Calculate NLL.

        .. math::
          -\\ln L = -\\sum_{x_i \\in data } w_i \\ln f(x_i;\\theta_k) +  (\\sum w_j ) \\ln \\sum_{x_i \\in mc } f(x_i;\\theta_k)

        :param data: Data array
        :param mcdata: MCdata array
        :param weight: Weight of data???
        :param batch: The length of array to calculate as a vector at a time. How to fold the data array may depend on the GPU computability.
        :param bg: Background data array. It can be set to ``None`` if there is no such thing.
        :return: Real number. The value of NLL.
        """
        data, weight = self.get_weight_data(data, weight, bg=bg)
        sw = tf.reduce_sum(weight)
        ln_data = tf.math.log(self.Amp(data))
        int_mc = tf.math.log(tf.reduce_mean(self.Amp(mcdata)))
        nll_0 = -tf.reduce_sum(tf.cast(weight, ln_data.dtype) * ln_data)
        return nll_0 + tf.cast(sw, int_mc.dtype) * int_mc



[docs]
    def nll_grad_batch(self, data, mcdata, weight, mc_weight):
        """
        ``self.nll_grad_new``
        """
        # N_data = 2525
        # N_flatmc = 3106
        # w_flatmc = 0#878/2525#2889/2525#3106/2525
        nll, g = sum_gradient_new(
            self.Amp,
            data,
            mcdata,
            weight,
            mc_weight,
            self.Amp.trainable_variables,
            clip_log,
            self.w_inmc,
        )
        # print("@@@",nll,np.array(g).tolist())
        return nll, g



[docs]
    def nll_grad_hessian(self, data, mcdata, weight, mc_weight):
        """
        The parameters are the same with ``self.nll()``, but it will return Hessian as well.

        :return NLL: Real number. The value of NLL.
        :return gradients: List of real numbers. The gradients for each variable.
        :return Hessian: 2-D Array of real numbers. The Hessian matrix of the variables.
        """
        nll, g, h = sum_hessian_new(
            self.Amp,
            data,
            mcdata,
            weight,
            mc_weight,
            self.Amp.trainable_variables,
            clip_log,
            self.w_inmc,
        )
        h = tf.stack(h)
        return nll, g, h





[docs]
class GaussianConstr(object):
    def __init__(self, vm, constraint={}):
        self.vm = vm
        self.constraint = {}
        self.update(constraint)


[docs]
    def update(self, constraint={}):
        for i in constraint:
            if not i in self.vm.trainable_vars:
                warnings.warn(
                    "Constraint {} is useless to fitting because it's not trainable".format(
                        i
                    )
                )
        self.constraint.update(constraint)



[docs]
    def get_constrain_term(self):
        r"""
        constraint: Gauss(mean,sigma)
          by add a term :math:`\frac{(\theta_i-\bar{\theta_i})^2}{2\sigma^2}`
        """
        term = 0.0
        for i in self.constraint:
            pi = self.constraint[i]
            assert isinstance(pi, tuple) or isinstance(pi, list)
            assert len(pi) == 2
            mean, sigma = pi
            var = self.vm.variables[i]
            term += (var - mean) ** 2 / (sigma**2) / 2
        return term



[docs]
    def get_constrain_grad(self):
        r"""
        constraint: Gauss(mean,sigma)
          by add a term :math:`\frac{d}{d\theta_i}\frac{(\theta_i-\bar{\theta_i})^2}{2\sigma^2} = \frac{\theta_i-\bar{\theta_i}}{\sigma^2}`
        """
        g_dict = {}
        for i in self.constraint:
            if not i in self.vm.trainable_vars:
                continue
            pi = self.constraint[i]
            assert isinstance(pi, tuple) or isinstance(pi, list)
            assert len(pi) == 2
            mean, sigma = pi
            var = self.vm.variables[i]
            g_dict[i] = (var - mean) / (sigma**2)  # 1st differentiation
        grad = []
        for i in self.vm.trainable_vars:
            if i in g_dict:
                grad.append(g_dict[i])
            else:
                grad.append(0.0)
        return np.array(grad)



[docs]
    def get_constrain_hessian(self):
        """the constrained parameter's 2nd differentiation"""
        h_dict = {}
        for i in self.constraint:
            if not i in self.vm.trainable_vars:
                continue
            pi = self.constraint[i]
            assert isinstance(pi, tuple) or isinstance(pi, list)
            assert len(pi) == 2
            mean, sigma = pi
            var = self.vm.variables[i]
            h_dict[i] = 1 / (sigma**2)  # 2nd differentiation
        nv = len(self.vm.trainable_vars)
        hessian = np.zeros([nv, nv])
        for v, i in zip(self.vm.trainable_vars, range(nv)):
            if v in h_dict:
                hessian[i, i] = h_dict[v]
        return np.array(hessian)





[docs]
class ConstrainModel(Model):
    """
    negative log likelihood model with constrains

    """

    def __init__(self, amp, w_bkg=1.0, constrain={}):
        super(ConstrainModel, self).__init__(amp, w_bkg)
        self.constrain = (
            constrain  # priori gauss constrain for the fitting parameters
        )


[docs]
    def get_constrain_term(self):  # the priori constrain term added to NLL
        r"""
        constrain: Gauss(mean,sigma)
          by add a term :math:`\frac{(\theta_i-\bar{\theta_i})^2}{2\sigma^2}`

        """
        t_var = self.Amp.trainable_variables
        t_var_name = [i.name for i in t_var]
        var_dict = dict(zip(t_var_name, t_var))
        nll = 0.0
        for i in self.constrain:
            if not i in var_dict:
                break
            pi = self.constrain[i]
            if isinstance(pi, tuple) and len(pi) == 2:
                mean, sigma = pi
                var = var_dict[i]
                nll += (var - mean) ** 2 / (sigma**2) / 2
        return nll



[docs]
    def get_constrain_grad(
        self,
    ):  # the constrained parameter's 1st differentiation
        r"""
        constrain: Gauss(mean,sigma)
          by add a term :math:`\frac{d}{d\theta_i}\frac{(\theta_i-\bar{\theta_i})^2}{2\sigma^2} = \frac{\theta_i-\bar{\theta_i}}{\sigma^2}`

        """
        t_var = self.Amp.trainable_variables
        t_var_name = [i.name for i in t_var]
        var_dict = dict(zip(t_var_name, t_var))
        g_dict = {}
        for i in self.constrain:
            if not i in var_dict:
                break
            pi = self.constrain[i]
            if isinstance(pi, tuple) and len(pi) == 2:
                mean, sigma = pi
                var = var_dict[i]
                g_dict[i] = (var - mean) / (sigma**2)  # 1st differentiation
        nll_g = []
        for i in t_var_name:
            if i in g_dict:
                nll_g.append(g_dict[i])
            else:
                nll_g.append(0.0)
        return nll_g



[docs]
    def get_constrain_hessian(self):
        """the constrained parameter's 2nd differentiation"""
        t_var = self.Amp.trainable_variables
        t_var_name = [i.name for i in t_var]
        var_dict = dict(zip(t_var_name, t_var))
        g_dict = {}
        for i in self.constrain:
            if not i in var_dict:
                break
            pi = self.constrain[i]
            if isinstance(pi, tuple) and len(pi) == 2:
                mean, sigma = pi
                var = var_dict[i]
                g_dict[i] = 1 / (sigma**2)  # 2nd differentiation
        nll_g = []
        for i in t_var_name:
            if i in g_dict:
                nll_g.append(g_dict[i])
            else:
                nll_g.append(0.0)
        return np.diag(nll_g)



[docs]
    def nll(self, data, mcdata, weight=1.0, bg=None, batch=None):
        r"""
        calculate negative log-likelihood

        .. math::
          -\ln L = -\sum_{x_i \in data } w_i \ln f(x_i;\theta_i) +  (\sum w_i ) \ln \sum_{x_i \in mc } f(x_i;\theta_i) + cons

        """
        nll_0 = super(ConstrainModel, self).nll(
            data, mcdata, weight=weight, batch=batch, bg=bg
        )
        cons = self.get_constrain_term()
        return nll_0 + cons



[docs]
    def nll_gradient(self, data, mcdata, weight=1.0, batch=None, bg=None):
        r"""
        calculate negative log-likelihood with gradient

        .. math::
          \frac{\partial }{\partial \theta_i }(-\ln L) = -\sum_{x_i \in data } w_i \frac{\partial }{\partial \theta_i } \ln f(x_i;\theta_i) +
          \frac{\sum w_i }{\sum_{x_i \in mc }f(x_i;\theta_i)} \sum_{x_i \in mc } \frac{\partial }{\partial \theta_i } f(x_i;\theta_i) + cons

        """
        cons_grad = self.get_constrain_grad()  # the constrain term
        cons = self.get_constrain_term()
        nll0, g0 = super(ConstrainModel, self).nll_grad(
            data, mcdata, weight=weight, batch=batch, bg=bg
        )
        nll = nll0 + cons
        g = [cons_grad[i] + g0[i] for i in range(len(g0))]
        return nll, g




def _convert_batch(data, batch, cached_file=None, name=""):
    from tf_pwa.data import LazyCall

    if isinstance(data, LazyCall):
        if cached_file is not None:
            return data.as_dataset(batch, cached_file + name)
        else:
            return data.as_dataset(batch)
    return list(split_generator(data, batch))



[docs]
class FCN(object):
    """
    This class implements methods to calculate the NLL as well as its derivatives for a general function.

    :param model: Model object.
    :param data: Data array.
    :param mcdata: MCdata array.
    :param bg: Background array.
    :param batch: The length of array to calculate as a vector at a time. How to fold the data array may depend on the GPU computability.
    """

    def __init__(
        self,
        model,
        data,
        mcdata,
        bg=None,
        batch=65000,
        inmc=None,
        gauss_constr={},
    ):
        self.model = model
        self.vm = model.vm
        self.n_call = 0
        self.n_grad = 0
        self.cached_nll = None
        if inmc is None:
            data, weight = self.model.get_weight_data(data, bg=bg)
            print("Using Model")
        else:
            data, weight = self.model.get_weight_data(data, bg=bg, inmc=inmc)
            print("Using Model with inmc")
        n_mcdata = data_shape(mcdata)
        self.alpha = tf.reduce_sum(weight) / tf.reduce_sum(weight * weight)
        self.weight = weight
        self.data = data
        self.batch_data = self._convert_batch(data, batch)
        self.mcdata = mcdata
        self.batch_mcdata = self._convert_batch(mcdata, batch)
        self.batch = batch
        if mcdata.get("weight", None) is not None:
            mc_weight = tf.convert_to_tensor(mcdata["weight"], dtype="float64")
            self.mc_weight = mc_weight / tf.reduce_sum(mc_weight)
        else:
            self.mc_weight = tf.convert_to_tensor(
                [1 / n_mcdata] * n_mcdata, dtype="float64"
            )

        self.batch_weight = self._convert_batch(self.weight, self.batch)
        self.batch_mc_weight = self._convert_batch(self.mc_weight, self.batch)
        self.gauss_constr = GaussianConstr(self.vm, gauss_constr)
        self.cached_mc = {}

    def _convert_batch(self, data, batch):
        ret = _convert_batch(data, batch)
        if self.vm.strategy is not None:
            ret = self._distribute_multi_gpu(ret)
        return ret

    def _distribute_multi_gpu(self, data):
        strategy = self.vm.strategy
        if isinstance(data, tf.data.Dataset):
            data = strategy.experimental_distribute_dataset(data)
        elif isinstance(data, list):
            ret = []
            n_p = strategy.num_replicas_in_sync
            for ia in data:
                ia = list(
                    split_generator(
                        ia, batch_size=(data_shape(ia) + n_p - 1) // n_p
                    )
                )

                def _tmp_fun(ctx):
                    return ia[ctx.replica_id_in_sync_group]

                tmp = strategy.experimental_distribute_values_from_function(
                    _tmp_fun
                )
                ret.append(tmp)
            data = ret
        return data


[docs]
    def get_params(self, trainable_only=False):
        return self.vm.get_all_dic(trainable_only)



[docs]
    def get_nll(self, x={}):
        """
        :param x: List. Values of variables.
        :return nll: Real number. The value of NLL.
        """
        self.model.set_params(x)
        if type(self.model) == Model_new:
            nll, g = self.get_nll_grad(x)
        else:
            nll = self.model.nll(
                self.data,
                self.mcdata,
                weight=self.weight,
                mc_weight=self.mc_weight,
            )
            self.n_call += 1
        return nll


    def __call__(self, x={}):
        self.cached_nll = (
            self.get_nll(x) + self.gauss_constr.get_constrain_term()
        )
        return self.cached_nll


[docs]
    def get_grad(self, x={}):
        """
        :param x: List. Values of variables.
        :return gradients: List of real numbers. The gradients for each variable.
        """
        nll, g = self.get_nll_grad(x)
        return g



[docs]
    def grad(self, x={}):
        return self.get_grad(x) + self.gauss_constr.get_constrain_grad()



[docs]
    def get_nll_grad(self, x={}):
        """
        :param x: List. Values of variables.
        :return nll: Real number. The value of NLL.
        :return gradients: List of real numbers. The gradients for each variable.
        """
        self.model.set_params(x)
        nll, g = self.model.nll_grad_batch(
            self.batch_data,
            self.batch_mcdata,
            weight=self.batch_weight,
            mc_weight=self.batch_mc_weight,
        )
        self.n_call += 1
        return nll, g



[docs]
    @time_print
    def nll_grad(self, x={}):
        nll, g = self.get_nll_grad(x)
        constr = self.gauss_constr.get_constrain_term()
        constr_grad = self.gauss_constr.get_constrain_grad()
        self.cached_nll = nll + constr
        return float(self.cached_nll), g + constr_grad



[docs]
    def get_nll_grad_hessian(self, x={}, batch=None):
        """
        :param x: List. Values of variables.
        :return nll: Real number. The value of NLL.
        :return gradients: List of real numbers. The gradients for each variable.
        :return hessian: 2-D Array of real numbers. The Hessian matrix of the variables.
        """
        if batch is None:
            batch = self.batch
        self.model.set_params(x)
        if type(self.model) == Model_new:
            nll, g, h = self.model.nll_grad_hessian(
                self.batch_data,
                self.batch_mcdata,
                weight=list(data_split(self.weight, self.batch)),
                mc_weight=data_split(self.mc_weight, self.batch),
            )
        else:
            nll, g, h = self.model.nll_grad_hessian(
                self.data,
                self.mcdata,
                weight=self.weight,
                batch=batch,
                mc_weight=self.mc_weight,
            )
        return nll, g, h



[docs]
    def nll_grad_hessian(self, x={}, batch=None):
        if batch is None:
            batch = self.batch
        nll, g, h = self.get_nll_grad_hessian(x, batch)
        constr = self.gauss_constr.get_constrain_term()
        constr_grad = self.gauss_constr.get_constrain_grad()
        constr_hessian = self.gauss_constr.get_constrain_hessian()
        return nll + constr, g + constr_grad, h + constr_hessian



[docs]
    @time_print
    def grad_hessp(self, x, p, batch=None):
        if batch is None:
            batch = self.batch
        g, h = self.get_grad_hessp(x, p, batch)
        constr_grad = self.gauss_constr.get_constrain_grad()
        constr_hessian = 0.0  # self.gauss_constr.get_constrain_hessp(p)
        return g + constr_grad, h + constr_hessian



[docs]
    def get_grad_hessp(self, x, p, batch):
        self.model.set_params(x)
        grad, hessp = self.model.grad_hessp_batch(
            p,
            self.batch_data,
            self.batch_mcdata,
            weight=list(data_split(self.weight, self.batch)),
            mc_weight=self.batch_mc_weight,
        )
        return grad, hessp





[docs]
class CombineFCN(object):
    """
    This class implements methods to calculate the NLL as well as its derivatives for a general function.

    :param model: List of model object.
    :param data: List of data array.
    :param mcdata: list of MCdata array.
    :param bg: list of Background array.
    :param batch: The length of array to calculate as a vector at a time. How to fold the data array may depend on the GPU computability.
    """

    def __init__(
        self,
        model=None,
        data=None,
        mcdata=None,
        bg=None,
        fcns=None,
        batch=65000,
        gauss_constr={},
    ):
        if fcns is None:
            assert model is not None, "model required"
            assert data is not None, "data required"
            assert mcdata is not None, "mcdata required"
            self.fcns = []
            self.cached_nll = 0.0
            if bg is None:
                bg = _loop_generator(None)
            for model_i, data_i, mcdata_i, bg_i in zip(
                model, data, mcdata, bg
            ):
                self.fcns.append(FCN(model_i, data_i, mcdata_i, bg_i))
        else:
            self.fcns = list(fcns)
        self.vm = self.fcns[0].vm
        self.gauss_constr = GaussianConstr(self.vm, gauss_constr)


[docs]
    def get_params(self, trainable_only=False):
        return self.vm.get_all_dic(trainable_only)



[docs]
    def get_nll(self, x={}):
        """
        :param x: List. Values of variables.
        :return nll: Real number. The value of NLL.
        """
        nlls = []
        for i in self.fcns:
            nlls.append(i.get_nll(x))
        return sum(nlls)


    def __call__(self, x={}):
        self.cached_nll = (
            self.get_nll(x) + self.gauss_constr.get_constrain_term()
        )
        return self.cached_nll


[docs]
    def get_grad(self, x={}):
        """
        :param x: List. Values of variables.
        :return gradients: List of real numbers. The gradients for each variable.
        """
        gs = []
        for i in self.fcns:
            g = i.get_grad(x)
            gs.append(g)
        return sum(gs)



[docs]
    def grad(self, x={}):
        return self.get_grad(x) + self.gauss_constr.get_constrain_grad()



[docs]
    def get_nll_grad(self, x={}):
        """
        :param x: List. Values of variables.
        :return nll: Real number. The value of NLL.
        :return gradients: List of real numbers. The gradients for each variable.
        """
        nlls = []
        gs = []
        for i in self.fcns:
            nll, g = i.get_nll_grad(x)
            nlls.append(nll)
            gs.append(g)
        return sum(nlls), tf.reduce_sum(gs, axis=0)



[docs]
    @time_print
    def nll_grad(self, x={}):
        nll, g = self.get_nll_grad(x)
        constr = self.gauss_constr.get_constrain_term()
        constr_grad = self.gauss_constr.get_constrain_grad()
        self.cached_nll = nll + constr
        return float(self.cached_nll), g + constr_grad



[docs]
    def get_nll_grad_hessian(self, x={}, batch=None):
        """
        :param x: List. Values of variables.
        :return nll: Real number. The value of NLL.
        :return gradients: List of real numbers. The gradients for each variable.
        :return hessian: 2-D Array of real numbers. The Hessian matrix of the variables.
        """
        nlls = []
        gs = []
        hs = []
        for i in self.fcns:
            nll, g, h = i.get_nll_grad_hessian(x)
            nlls.append(nll)
            gs.append(g)
            hs.append(h)
        # print("NLL list: ",nlls)
        # print("Gradient List: ",tf.transpose(gs))
        return (
            tf.reduce_sum(nlls, axis=0),
            tf.reduce_sum(gs, axis=0),
            tf.reduce_sum(hs, axis=0),
        )



[docs]
    def nll_grad_hessian(self, x={}, batch=None):
        nll, g, h = self.get_nll_grad_hessian(x, batch)
        constr = self.gauss_constr.get_constrain_term()
        constr_grad = self.gauss_constr.get_constrain_grad()
        constr_hessian = self.gauss_constr.get_constrain_hessian()
        return nll + constr, g + constr_grad, h + constr_hessian



[docs]
    def get_grad_hessp(self, x, p, batch):
        """
        :param x: List. Values of variables.
        :return nll: Real number. The value of NLL.
        :return gradients: List of real numbers. The gradients for each variable.
        """
        hs = []
        gs = []
        for i in self.fcns:
            g, h = i.get_grad_hessp(x, p, batch)
            hs.append(h)
            gs.append(g)
        return tf.reduce_sum(gs, axis=0), tf.reduce_sum(hs, axis=0)



[docs]
    def grad_hessp(self, x, p, batch=None):
        grad, hessp = self.get_grad_hessp(x, p, batch)
        constr_grad = self.gauss_constr.get_constrain_grad()
        return grad + constr_grad, hessp





[docs]
class MixLogLikehoodFCN(CombineFCN):
    """
    This class implements methods to calculate the NLL as well as its derivatives for a general function.

    :param model: List of model object.
    :param data: List of data array.
    :param mcdata: list of MCdata array.
    :param bg: list of Background array.
    :param batch: The length of array to calculate as a vector at a time. How to fold the data array may depend on the GPU computability.
    """

    def __init__(
        self,
        model,
        data,
        mcdata,
        bg=None,
        batch=65000,
        gauss_constr={},
    ):
        self.cached_nll = 0.0
        assert model is not None, "model required"
        assert data is not None, "data required"
        assert mcdata is not None, "mcdata required"
        self.fcns = []
        self.cached_nll = 0.0
        if bg is None:
            bg = _loop_generator(None)
        self.datas = []
        self.weight_phsps = []
        self.n_datas = []
        self.model = model
        for model_i, data_i, mcdata_i, bg_i in zip(model, data, mcdata, bg):
            data_s = model_i.mix_data_bakcground(data_i, bg_i)
            self.datas.append(data_s)
            weight_phsp = type(mcdata_i)(
                {k: v for k, v in mcdata_i.items()}
            )  # simple copy
            w = weight_phsp.get_weight()
            weight_phsp["weight"] = w / tf.reduce_sum(w)
            self.n_datas.append(tf.reduce_sum(data_s.get_weight()))
            self.weight_phsps.append(list(split_generator(weight_phsp, batch)))
            self.fcns.append(FCN(model_i, data_i, mcdata_i, bg_i))
        self.data_merge = list(split_generator(data_merge(*self.datas), batch))
        self.vm = self.model[0].vm
        self.gauss_constr = GaussianConstr(self.vm, gauss_constr)


[docs]
    def get_nll_grad(self, x={}):
        """
        :param x: List. Values of variables.
        :return nll: Real number. The value of NLL.
        :return gradients: List of real numbers. The gradients for each variable.
        """
        [i.set_params(x) for i in self.model]
        nlls = []
        gs = []
        nll, g = self.model[0].sum_nll_grad_bacth(self.data_merge)
        nlls.append(nll)
        gs.append(g)
        for i, k, l in zip(self.model, self.weight_phsps, self.n_datas):
            nll, g = i.sum_log_integral_grad_batch(k, l)
            nlls.append(nll)
            gs.append(g)
        print(sum(nlls))
        return sum(nlls), tf.reduce_sum(gs, axis=0)