aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xutils/chi23
-rwxr-xr-xutils/plot-muons67
-rw-r--r--utils/sddm/stats.py79
3 files changed, 99 insertions, 50 deletions
diff --git a/utils/chi2 b/utils/chi2
index 72ad89b..f1ae974 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -121,9 +121,6 @@ def make_nll(data, mc_hists):
return nll - norm.logpdf(x[1],ENERGY_SCALE_MEAN,ENERGY_SCALE_UNCERTAINTY)
return nll
-def chi2(samples,expected):
- return np.sum((samples-expected)**2/expected,axis=-1)
-
def get_mc_hist(data,x,bins):
hist = np.histogram(data,bins=bins)[0]
return sample_mc_hist(hist,norm=x[0]/100.0)
diff --git a/utils/plot-muons b/utils/plot-muons
index 11d2d2e..f130444 100755
--- a/utils/plot-muons
+++ b/utils/plot-muons
@@ -25,38 +25,8 @@ and michel electrons.
from __future__ import print_function, division
import numpy as np
from scipy.stats import iqr, poisson
-from matplotlib.lines import Line2D
from scipy.stats import iqr, norm, beta
-from scipy.special import spence
-from itertools import izip_longest
-
-particle_id = {20: 'e', 22: r'\mu'}
-
-def plot_hist2(df, muons=False, norm=1.0, label=None, color=None):
- for id, df_id in sorted(df.groupby('id')):
- if id == 20:
- plt.subplot(2,3,1)
- elif id == 22:
- plt.subplot(2,3,2)
- elif id == 2020:
- plt.subplot(2,3,4)
- elif id == 2022:
- plt.subplot(2,3,5)
- elif id == 2222:
- plt.subplot(2,3,6)
-
- if muons:
- plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step', weights=np.tile(norm,len(df_id.ke.values)), label=label, color=color)
- plt.xlabel("log10(Energy (GeV))")
- else:
- bins = np.logspace(np.log10(20),np.log10(10e3),21)
- plt.hist(df_id.ke.values, bins=bins, histtype='step', weights=np.tile(norm,len(df_id.ke.values)), label=label, color=color)
- plt.gca().set_xscale("log")
- plt.xlabel("Energy (MeV)")
- plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$')
-
- if len(df):
- plt.tight_layout()
+from sddm.stats import *
if __name__ == '__main__':
import argparse
@@ -134,9 +104,15 @@ if __name__ == '__main__':
Line2D([0], [0], color='C1')]
labels = ('Data','Monte Carlo')
+ # For the Michel energy plot, we only look at events where the
+ # corresponding muon had less than 2500 nhit. The reason for only looking
+ # at Michel electrons from muons with less than 2500 nhit is because there
+ # is significant ringing and afterpulsing after a large muon which can
+ # cause the reconstruction to overestimate the energy.
+ michel_low_nhit = michel[michel.muon_nhit < 2500]
+
fig = plt.figure()
- plot_hist2(michel,color='C0')
- plot_hist2(michel_mc,norm=len(michel)/len(michel_mc),color='C1')
+ plot_hist2_data_mc(michel_low_nhit,michel_mc)
despine(fig,trim=True)
fig.legend(handles,labels,loc='upper right')
if args.save:
@@ -146,8 +122,7 @@ if __name__ == '__main__':
plt.suptitle("Michel Electrons")
fig = plt.figure()
- plot_hist2(muons,muons=True,color='C0')
- plot_hist2(muons_mc,norm=len(muons)/len(muons_mc),muons=True,color='C1')
+ plot_hist2_data_mc(muons,muons_mc)
despine(fig,trim=True)
if len(muons):
plt.tight_layout()
@@ -186,7 +161,7 @@ if __name__ == '__main__':
stopping_muons_mc = stopping_muons_mc[stopping_muons_mc.nhit_cal < 4000]
stopping_muons = stopping_muons[stopping_muons.udotr < -0.5]
- stopping_muons_mc = stopping_muons_mc[stopping_muons_mc.nhit_cal < -0.5]
+ stopping_muons_mc = stopping_muons_mc[stopping_muons_mc.udotr < -0.5]
if len(stopping_muons):
# project muon to PSUP
@@ -254,18 +229,18 @@ if __name__ == '__main__':
else:
plt.title("Stopping Muon Energy Resolution")
- # For the Michel energy plot, we only look at the single particle electron
- # fit and events where the corresponding muon had less than 2500 nhit. The
- # reason for only looking at Michel electrons from muons with less than
- # 2500 nhit is because there is significant ringing and afterpulsing after
- # a large muon which can cause the reconstruction to overestimate the
- # energy.
- michel = michel[(michel.muon_nhit < 2500) & (michel.id == 20)]
-
fig = plt.figure()
bins = np.linspace(0,100,41)
- plt.hist(michel.ke.values, bins=bins, histtype='step', color='C0', label="Data")
- plt.hist(michel_mc.ke.values, weights=np.tile(len(michel)/len(michel_mc),len(michel_mc.ke.values)), bins=bins, histtype='step', color='C1', label="Monte Carlo")
+ plt.hist(michel_low_nhit.ke.values, bins=bins, histtype='step', color='C0', label="Data")
+ plt.hist(michel_mc.ke.values, weights=np.tile(len(michel_low_nhit)/len(michel_mc),len(michel_mc.ke.values)), bins=bins, histtype='step', color='C1', label="Monte Carlo")
+ hist = np.histogram(michel_low_nhit.ke.values,bins=bins)[0]
+ hist_mc = np.histogram(michel_mc.ke.values,bins=bins)[0]
+ if len(michel_mc) > 0:
+ norm = len(michel)/len(michel_mc)
+ else:
+ norm = 1.0
+ p = get_multinomial_prob(hist,hist_mc,norm)
+ plt.text(0.95,0.95,"p = %.2f" % p,horizontalalignment='right',verticalalignment='top',transform=plt.gca().transAxes)
despine(fig,trim=True)
plt.xlabel("Energy (MeV)")
plt.tight_layout()
diff --git a/utils/sddm/stats.py b/utils/sddm/stats.py
index 96ca87a..9c5e827 100644
--- a/utils/sddm/stats.py
+++ b/utils/sddm/stats.py
@@ -1,5 +1,9 @@
+from __future__ import print_function, division
import numpy as np
-from scipy.stats import gamma, dirichlet
+from scipy.stats import gamma, dirichlet, multinomial
+from . import grouper
+
+particle_id = {20: 'e', 22: r'\mu'}
def chi2(samples,expected):
return np.sum((samples-expected)**2/expected,axis=-1)
@@ -8,3 +12,76 @@ def sample_mc_hist(hist, norm):
alpha = np.ones_like(hist) + hist
N = gamma.rvs(np.sum(hist)+1e-10,scale=1)
return dirichlet.rvs(alpha)[0]*N*norm
+
+def get_multinomial_prob(data, mc, norm, size=10000):
+ """
+ Returns the p-value that the histogram of the data is drawn from the MC
+ histogram.
+
+ Arguments:
+
+ data: histogram array of KE values
+ mc: histogram array of MC KE values
+ norm: Normalization constant for the MC
+ size: number of values to compute
+ """
+ chi2_data = []
+ chi2_samples = []
+ for i in range(size):
+ mc_hist = sample_mc_hist(mc,norm=norm)
+ N = mc_hist.sum()
+ # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think).
+ mc_hist = mc_hist + 1e-10
+ p = mc_hist/mc_hist.sum()
+ chi2_data.append(chi2(data,mc_hist))
+ n = np.random.poisson(N)
+ samples = multinomial.rvs(n,p)
+ chi2_samples.append(chi2(samples,mc_hist))
+ chi2_samples = np.array(chi2_samples)
+ chi2_data = np.array(chi2_data)
+ return np.count_nonzero(chi2_samples >= chi2_data)/len(chi2_samples)
+
+def plot_hist2_data_mc(data, mc, bins=None, norm=None):
+ import matplotlib.pyplot as plt
+ from matplotlib.lines import Line2D
+
+ if bins is None:
+ bins = np.logspace(np.log10(20),np.log10(10e3),21)
+
+ if norm is None:
+ if len(mc) > 0:
+ norm = len(data)/len(mc)
+ else:
+ norm = 1.0
+
+ handles = [Line2D([0], [0], color='C0'),
+ Line2D([0], [0], color='C1')]
+ labels = ('Data','Monte Carlo')
+
+ for id in (20,22,2020,2022,2222):
+ if id == 20:
+ plt.subplot(2,3,1)
+ elif id == 22:
+ plt.subplot(2,3,2)
+ elif id == 2020:
+ plt.subplot(2,3,4)
+ elif id == 2022:
+ plt.subplot(2,3,5)
+ elif id == 2222:
+ plt.subplot(2,3,6)
+
+ hist = np.histogram(data[data.id == id].ke.values,bins=bins)[0]
+ hist_mc = np.histogram(mc[mc.id == id].ke.values,bins=bins)[0]
+
+ plt.hist(data[data.id == id].ke.values, bins=bins, histtype='step', label="Data", color="C0")
+ plt.hist(mc[mc.id == id].ke.values, bins=bins, histtype='step', weights=np.tile(norm,len(mc[mc.id == id].ke.values)), label="Monte Carlo", color="C1")
+ plt.gca().set_xscale("log")
+ plt.xlabel("Energy (MeV)")
+ p = get_multinomial_prob(hist,hist_mc,norm)
+ plt.text(0.95,0.95,"p = %.2f" % p,horizontalalignment='right',verticalalignment='top',transform=plt.gca().transAxes)
+ plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$')
+
+ plt.gcf().legend(handles,labels,loc='upper right')
+
+ if len(data):
+ plt.tight_layout()