3 files changed, 99 insertions, 50 deletions
diff --git a/utils/chi2 b/utils/chi2
index 72ad89b..f1ae974 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -121,9 +121,6 @@ def make_nll(data, mc_hists):
         return nll - norm.logpdf(x[1],ENERGY_SCALE_MEAN,ENERGY_SCALE_UNCERTAINTY)
     return nll
 
-def chi2(samples,expected):
-    return np.sum((samples-expected)**2/expected,axis=-1)
-
 def get_mc_hist(data,x,bins):
     hist = np.histogram(data,bins=bins)[0]
     return sample_mc_hist(hist,norm=x[0]/100.0)
diff --git a/utils/plot-muons b/utils/plot-muons
index 11d2d2e..f130444 100755
--- a/utils/plot-muons
+++ b/utils/plot-muons
@@ -25,38 +25,8 @@ and michel electrons.
 from __future__ import print_function, division
 import numpy as np
 from scipy.stats import iqr, poisson
-from matplotlib.lines import Line2D
 from scipy.stats import iqr, norm, beta
-from scipy.special import spence
-from itertools import izip_longest
-
-particle_id = {20: 'e', 22: r'\mu'}
-
-def plot_hist2(df, muons=False, norm=1.0, label=None, color=None):
-    for id, df_id in sorted(df.groupby('id')):
-        if id == 20:
-            plt.subplot(2,3,1)
-        elif id == 22:
-            plt.subplot(2,3,2)
-        elif id == 2020:
-            plt.subplot(2,3,4)
-        elif id == 2022:
-            plt.subplot(2,3,5)
-        elif id == 2222:
-            plt.subplot(2,3,6)
-
-        if muons:
-            plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step', weights=np.tile(norm,len(df_id.ke.values)), label=label, color=color)
-            plt.xlabel("log10(Energy (GeV))")
-        else:
-            bins = np.logspace(np.log10(20),np.log10(10e3),21)
-            plt.hist(df_id.ke.values, bins=bins, histtype='step', weights=np.tile(norm,len(df_id.ke.values)), label=label, color=color)
-            plt.gca().set_xscale("log")
-            plt.xlabel("Energy (MeV)")
-        plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$')
-
-    if len(df):
-        plt.tight_layout()
+from sddm.stats import *
 
 if __name__ == '__main__':
     import argparse
@@ -134,9 +104,15 @@ if __name__ == '__main__':
                Line2D([0], [0], color='C1')]
     labels = ('Data','Monte Carlo')
 
+    # For the Michel energy plot, we only look at events where the
+    # corresponding muon had less than 2500 nhit.  The reason for only looking
+    # at Michel electrons from muons with less than 2500 nhit is because there
+    # is significant ringing and afterpulsing after a large muon which can
+    # cause the reconstruction to overestimate the energy.
+    michel_low_nhit = michel[michel.muon_nhit < 2500]
+
     fig = plt.figure()
-    plot_hist2(michel,color='C0')
-    plot_hist2(michel_mc,norm=len(michel)/len(michel_mc),color='C1')
+    plot_hist2_data_mc(michel_low_nhit,michel_mc)
     despine(fig,trim=True)
     fig.legend(handles,labels,loc='upper right')
     if args.save:
@@ -146,8 +122,7 @@ if __name__ == '__main__':
         plt.suptitle("Michel Electrons")
 
     fig = plt.figure()
-    plot_hist2(muons,muons=True,color='C0')
-    plot_hist2(muons_mc,norm=len(muons)/len(muons_mc),muons=True,color='C1')
+    plot_hist2_data_mc(muons,muons_mc)
     despine(fig,trim=True)
     if len(muons):
         plt.tight_layout()
@@ -186,7 +161,7 @@ if __name__ == '__main__':
     stopping_muons_mc = stopping_muons_mc[stopping_muons_mc.nhit_cal < 4000]
 
     stopping_muons = stopping_muons[stopping_muons.udotr < -0.5]
-    stopping_muons_mc = stopping_muons_mc[stopping_muons_mc.nhit_cal < -0.5]
+    stopping_muons_mc = stopping_muons_mc[stopping_muons_mc.udotr < -0.5]
 
     if len(stopping_muons):
         # project muon to PSUP
@@ -254,18 +229,18 @@ if __name__ == '__main__':
         else:
             plt.title("Stopping Muon Energy Resolution")
 
-    # For the Michel energy plot, we only look at the single particle electron
-    # fit and events where the corresponding muon had less than 2500 nhit.  The
-    # reason for only looking at Michel electrons from muons with less than
-    # 2500 nhit is because there is significant ringing and afterpulsing after
-    # a large muon which can cause the reconstruction to overestimate the
-    # energy.
-    michel = michel[(michel.muon_nhit < 2500) & (michel.id == 20)]
-
     fig = plt.figure()
     bins = np.linspace(0,100,41)
-    plt.hist(michel.ke.values, bins=bins, histtype='step', color='C0', label="Data")
-    plt.hist(michel_mc.ke.values, weights=np.tile(len(michel)/len(michel_mc),len(michel_mc.ke.values)), bins=bins, histtype='step', color='C1', label="Monte Carlo")
+    plt.hist(michel_low_nhit.ke.values, bins=bins, histtype='step', color='C0', label="Data")
+    plt.hist(michel_mc.ke.values, weights=np.tile(len(michel_low_nhit)/len(michel_mc),len(michel_mc.ke.values)), bins=bins, histtype='step', color='C1', label="Monte Carlo")
+    hist = np.histogram(michel_low_nhit.ke.values,bins=bins)[0]
+    hist_mc = np.histogram(michel_mc.ke.values,bins=bins)[0]
+    if len(michel_mc) > 0:
+        norm = len(michel)/len(michel_mc)
+    else:
+        norm = 1.0
+    p = get_multinomial_prob(hist,hist_mc,norm)
+    plt.text(0.95,0.95,"p = %.2f" % p,horizontalalignment='right',verticalalignment='top',transform=plt.gca().transAxes)
     despine(fig,trim=True)
     plt.xlabel("Energy (MeV)")
     plt.tight_layout()
diff --git a/utils/sddm/stats.py b/utils/sddm/stats.py
index 96ca87a..9c5e827 100644
--- a/utils/sddm/stats.py
+++ b/utils/sddm/stats.py
@@ -1,5 +1,9 @@
+from __future__ import print_function, division
 import numpy as np
-from scipy.stats import gamma, dirichlet
+from scipy.stats import gamma, dirichlet, multinomial
+from . import grouper
+
+particle_id = {20: 'e', 22: r'\mu'}
 
 def chi2(samples,expected):
     return np.sum((samples-expected)**2/expected,axis=-1)
@@ -8,3 +12,76 @@ def sample_mc_hist(hist, norm):
     alpha = np.ones_like(hist) + hist
     N = gamma.rvs(np.sum(hist)+1e-10,scale=1)
     return dirichlet.rvs(alpha)[0]*N*norm
+
+def get_multinomial_prob(data, mc, norm, size=10000):
+    """
+    Returns the p-value that the histogram of the data is drawn from the MC
+    histogram.
+
+    Arguments:
+
+        data: histogram array of KE values
+        mc: histogram array of MC KE values
+        norm: Normalization constant for the MC
+        size: number of values to compute
+    """
+    chi2_data = []
+    chi2_samples = []
+    for i in range(size):
+        mc_hist = sample_mc_hist(mc,norm=norm)
+        N = mc_hist.sum()
+        # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think).
+        mc_hist = mc_hist + 1e-10
+        p = mc_hist/mc_hist.sum()
+        chi2_data.append(chi2(data,mc_hist))
+        n = np.random.poisson(N)
+        samples = multinomial.rvs(n,p)
+        chi2_samples.append(chi2(samples,mc_hist))
+    chi2_samples = np.array(chi2_samples)
+    chi2_data = np.array(chi2_data)
+    return np.count_nonzero(chi2_samples >= chi2_data)/len(chi2_samples)
+
+def plot_hist2_data_mc(data, mc, bins=None, norm=None):
+    import matplotlib.pyplot as plt
+    from matplotlib.lines import Line2D
+
+    if bins is None:
+        bins = np.logspace(np.log10(20),np.log10(10e3),21)
+
+    if norm is None:
+        if len(mc) > 0:
+            norm = len(data)/len(mc)
+        else:
+            norm = 1.0
+
+    handles = [Line2D([0], [0], color='C0'),
+               Line2D([0], [0], color='C1')]
+    labels = ('Data','Monte Carlo')
+
+    for id in (20,22,2020,2022,2222):
+        if id == 20:
+            plt.subplot(2,3,1)
+        elif id == 22:
+            plt.subplot(2,3,2)
+        elif id == 2020:
+            plt.subplot(2,3,4)
+        elif id == 2022:
+            plt.subplot(2,3,5)
+        elif id == 2222:
+            plt.subplot(2,3,6)
+
+        hist = np.histogram(data[data.id == id].ke.values,bins=bins)[0]
+        hist_mc = np.histogram(mc[mc.id == id].ke.values,bins=bins)[0]
+
+        plt.hist(data[data.id == id].ke.values, bins=bins, histtype='step', label="Data", color="C0")
+        plt.hist(mc[mc.id == id].ke.values, bins=bins, histtype='step', weights=np.tile(norm,len(mc[mc.id == id].ke.values)), label="Monte Carlo", color="C1")
+        plt.gca().set_xscale("log")
+        plt.xlabel("Energy (MeV)")
+        p = get_multinomial_prob(hist,hist_mc,norm)
+        plt.text(0.95,0.95,"p = %.2f" % p,horizontalalignment='right',verticalalignment='top',transform=plt.gca().transAxes)
+        plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$')
+
+    plt.gcf().legend(handles,labels,loc='upper right')
+
+    if len(data):
+        plt.tight_layout()