109: chi-square residuals and pulls¶

This example corresponds to RF109.

[1]:

import numpy as np
from numba_stats import norm
from iminuit.cost import BinnedNLL
import boost_histogram as bh
from matplotlib import pyplot as plt

# generate a sample of 1000 events with sigma=3
rng = np.random.default_rng(1)
x = rng.normal(scale=3, size=10000)

# make histogram
h = bh.Histogram(bh.axis.Regular(50, -10, 10))
h.fill(x)
cx = h.axes[0].centers
xe = h.axes[0].edges

# compute residuals and pulls for wrong Gaussian with sigma = 3.15
pars = [
    0,  # mu
    3.15,  # sigma
]

# data model, this is a cdf for a binned analysis
def model(x, mu, sigma):
    return norm.cdf(x, mu, sigma)

# pulls can be computed from the cost functions in iminuit.cost
cost = BinnedNLL(h.values(), xe, model)
pulls = cost.pulls(pars)
# expected count per bin from the model
m = cost.prediction(pars)
# residuals are not generally useful, so there is no library routine and
# we compute them "by hand"
residuals = cost.n - m

# value returned by BinnedNLL functor is chi-square distributed
chi_square = cost(*pars)
# normally, we would subtract fitted degrees of freedom from cost.ndata, but
# in this example nothing no parameters are fitted
print(f"𝜒²/ndof = {chi_square / cost.ndata:.2f}")

fig, ax = plt.subplots(1, 3, figsize=(10, 3),
                       sharex=True, constrained_layout=True)

# plot data and gaussian model with sigma = 3.15
plt.sca(ax[0])
plt.title("data with distorted gaussian pdf")
plt.errorbar(cx, h.values(), h.variances() ** 0.5, fmt="ok")
plt.stairs(m, xe, fill=True)

plt.sca(ax[1])
plt.title("residual distribution")
plt.errorbar(cx, residuals, h.variances() ** 0.5, fmt="ok")

plt.sca(ax[2])
plt.title("pull distribution")
plt.errorbar(cx, pulls, 1, fmt="ok");

𝜒²/ndof = 2.77

../../_images/notebooks_roofit_rf109_chi2residpull_1_1.svg