# GoF from binned cost functions

The builtin cost functions for binned data, BinnedNLL and ExtendedBinnedNLL have a minimum value which is asymptotically chi2-distributed and thus can be used as a goodness-of-fit statistic. This example shows, that one still needs a large number of entries in each bin to reach the asymptotic regime.

[1]:

from iminuit import Minuit
from iminuit.cost import BinnedNLL, ExtendedBinnedNLL, LeastSquares
import numpy as np
from numba_stats import norm, expon
import matplotlib.pyplot as plt
import joblib
from scipy.stats import chi2

[2]:

def generate(n, seed):
rng = np.random.default_rng(seed)
s = rng.normal(1, 0.1, size=rng.poisson(n))
b = rng.exponential(size=rng.poisson(n))
x = np.append(s, b)
return x[(x > 0) & (x < 2)]

x = generate(1000, 1)
plt.hist(x, bins=20, range=(0, 2));

[3]:

@joblib.delayed
def run(n, seed):
x = generate(n, seed)
xrange = (0, 2)
w, xe = np.histogram(x, bins=20, range=xrange)

def model1(x, z, mu, sigma, tau):
return z * norm.cdf(x, mu, sigma) / np.diff(norm.cdf(xrange, mu, sigma)) + (1 - z) * expon.cdf(x, 0, tau) / np.diff(expon.cdf(xrange, 0, tau))

def model2(x, s, b, mu, sigma, tau):
return s * n * norm.cdf(x, mu, sigma) + b * n * expon.cdf(x, 0, tau)

m = [
Minuit(BinnedNLL(w, xe, model1), z=0.5, mu=0.5, sigma=0.5, tau=0.5),
Minuit(ExtendedBinnedNLL(w, xe, model2), s=1, b=1, mu=0.5, sigma=0.5, tau=0.5),
]
for mi in m:
mi.limits["mu"] = (0, 2)
mi.limits["sigma", "tau"] = (0.1, None)
m[0].limits["z"] = (0, 1)
m[1].limits["s", "b"] = (0, None)
r = []
for mi in m:
if mi.valid:
pvalue = 1 - chi2(mi.fcn._fcn.ndata - mi.nfit).cdf(mi.fval)
r.append(pvalue)
else:
r.append(np.nan)
return r

pvalues = {}
for n in (20, 100, 1000, 10000):
pvalues[n] = np.array(joblib.Parallel(-1)(run(n, i) for i in range(500)))

[4]:

fig, ax = plt.subplots(2, 4, figsize=(15, 6), constrained_layout=True)
for i, (ni, vi) in enumerate(pvalues.items()):
ax[0, i].hist(vi[:, 0])
ax[1, i].hist(vi[:, 1])
ax[0, i].set_title(f"n = {ni}, failed fits = {np.sum(np.isnan(vi[:, 0]))}")
ax[1, i].set_title(f"n = {ni}, failed fits = {np.sum(np.isnan(vi[:, 1]))}")
fig.supxlabel("pvalue");