{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# 109: chi-square residuals and pulls\n", "\n", "This example corresponds to [RF109](https://root.cern.ch/doc/master/rf109__chi2residpull_8py.html)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from numba_stats import norm\n", "from iminuit.cost import BinnedNLL\n", "import boost_histogram as bh\n", "from matplotlib import pyplot as plt\n", "\n", "# generate a sample of 1000 events with sigma=3\n", "rng = np.random.default_rng(1)\n", "x = rng.normal(scale=3, size=10000)\n", "\n", "# make histogram\n", "h = bh.Histogram(bh.axis.Regular(50, -10, 10))\n", "h.fill(x)\n", "cx = h.axes[0].centers\n", "xe = h.axes[0].edges\n", "\n", "# compute residuals and pulls for wrong Gaussian with sigma = 3.15\n", "pars = [\n", " 0, # mu\n", " 3.15, # sigma\n", "]\n", "\n", "# data model, this is a cdf for a binned analysis\n", "def model(x, mu, sigma):\n", " return norm.cdf(x, mu, sigma)\n", "\n", "# pulls can be computed from the cost functions in iminuit.cost \n", "cost = BinnedNLL(h.values(), xe, model)\n", "pulls = cost.pulls(pars)\n", "# expected count per bin from the model\n", "m = cost.prediction(pars)\n", "# residuals are not generally useful, so there is no library routine and\n", "# we compute them \"by hand\"\n", "residuals = cost.n - m\n", "\n", "# value returned by BinnedNLL functor is chi-square distributed\n", "chi_square = cost(*pars)\n", "# normally, we would subtract fitted degrees of freedom from cost.ndata, but\n", "# in this example nothing no parameters are fitted\n", "print(f\"𝜒²/ndof = {chi_square / cost.ndata:.2f}\")\n", "\n", "fig, ax = plt.subplots(1, 3, figsize=(10, 3),\n", " sharex=True, constrained_layout=True)\n", "\n", "# plot data and gaussian model with sigma = 3.15\n", "plt.sca(ax[0])\n", "plt.title(\"data with distorted gaussian pdf\")\n", "plt.errorbar(cx, h.values(), h.variances() ** 0.5, fmt=\"ok\")\n", "plt.stairs(m, xe, fill=True)\n", "\n", "plt.sca(ax[1])\n", "plt.title(\"residual distribution\")\n", "plt.errorbar(cx, residuals, h.variances() ** 0.5, fmt=\"ok\")\n", "\n", "plt.sca(ax[2])\n", "plt.title(\"pull distribution\")\n", "plt.errorbar(cx, pulls, 1, fmt=\"ok\");" ] } ], "metadata": { "kernelspec": { "display_name": "py310", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }