diff options
-rwxr-xr-x | utils/chi2 | 3 | ||||
-rwxr-xr-x | utils/plot-muons | 67 | ||||
-rw-r--r-- | utils/sddm/stats.py | 79 |
3 files changed, 99 insertions, 50 deletions
@@ -121,9 +121,6 @@ def make_nll(data, mc_hists): return nll - norm.logpdf(x[1],ENERGY_SCALE_MEAN,ENERGY_SCALE_UNCERTAINTY) return nll -def chi2(samples,expected): - return np.sum((samples-expected)**2/expected,axis=-1) - def get_mc_hist(data,x,bins): hist = np.histogram(data,bins=bins)[0] return sample_mc_hist(hist,norm=x[0]/100.0) diff --git a/utils/plot-muons b/utils/plot-muons index 11d2d2e..f130444 100755 --- a/utils/plot-muons +++ b/utils/plot-muons @@ -25,38 +25,8 @@ and michel electrons. from __future__ import print_function, division import numpy as np from scipy.stats import iqr, poisson -from matplotlib.lines import Line2D from scipy.stats import iqr, norm, beta -from scipy.special import spence -from itertools import izip_longest - -particle_id = {20: 'e', 22: r'\mu'} - -def plot_hist2(df, muons=False, norm=1.0, label=None, color=None): - for id, df_id in sorted(df.groupby('id')): - if id == 20: - plt.subplot(2,3,1) - elif id == 22: - plt.subplot(2,3,2) - elif id == 2020: - plt.subplot(2,3,4) - elif id == 2022: - plt.subplot(2,3,5) - elif id == 2222: - plt.subplot(2,3,6) - - if muons: - plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step', weights=np.tile(norm,len(df_id.ke.values)), label=label, color=color) - plt.xlabel("log10(Energy (GeV))") - else: - bins = np.logspace(np.log10(20),np.log10(10e3),21) - plt.hist(df_id.ke.values, bins=bins, histtype='step', weights=np.tile(norm,len(df_id.ke.values)), label=label, color=color) - plt.gca().set_xscale("log") - plt.xlabel("Energy (MeV)") - plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$') - - if len(df): - plt.tight_layout() +from sddm.stats import * if __name__ == '__main__': import argparse @@ -134,9 +104,15 @@ if __name__ == '__main__': Line2D([0], [0], color='C1')] labels = ('Data','Monte Carlo') + # For the Michel energy plot, we only look at events where the + # corresponding muon had less than 2500 nhit. The reason for only looking + # at Michel electrons from muons with less than 2500 nhit is because there + # is significant ringing and afterpulsing after a large muon which can + # cause the reconstruction to overestimate the energy. + michel_low_nhit = michel[michel.muon_nhit < 2500] + fig = plt.figure() - plot_hist2(michel,color='C0') - plot_hist2(michel_mc,norm=len(michel)/len(michel_mc),color='C1') + plot_hist2_data_mc(michel_low_nhit,michel_mc) despine(fig,trim=True) fig.legend(handles,labels,loc='upper right') if args.save: @@ -146,8 +122,7 @@ if __name__ == '__main__': plt.suptitle("Michel Electrons") fig = plt.figure() - plot_hist2(muons,muons=True,color='C0') - plot_hist2(muons_mc,norm=len(muons)/len(muons_mc),muons=True,color='C1') + plot_hist2_data_mc(muons,muons_mc) despine(fig,trim=True) if len(muons): plt.tight_layout() @@ -186,7 +161,7 @@ if __name__ == '__main__': stopping_muons_mc = stopping_muons_mc[stopping_muons_mc.nhit_cal < 4000] stopping_muons = stopping_muons[stopping_muons.udotr < -0.5] - stopping_muons_mc = stopping_muons_mc[stopping_muons_mc.nhit_cal < -0.5] + stopping_muons_mc = stopping_muons_mc[stopping_muons_mc.udotr < -0.5] if len(stopping_muons): # project muon to PSUP @@ -254,18 +229,18 @@ if __name__ == '__main__': else: plt.title("Stopping Muon Energy Resolution") - # For the Michel energy plot, we only look at the single particle electron - # fit and events where the corresponding muon had less than 2500 nhit. The - # reason for only looking at Michel electrons from muons with less than - # 2500 nhit is because there is significant ringing and afterpulsing after - # a large muon which can cause the reconstruction to overestimate the - # energy. - michel = michel[(michel.muon_nhit < 2500) & (michel.id == 20)] - fig = plt.figure() bins = np.linspace(0,100,41) - plt.hist(michel.ke.values, bins=bins, histtype='step', color='C0', label="Data") - plt.hist(michel_mc.ke.values, weights=np.tile(len(michel)/len(michel_mc),len(michel_mc.ke.values)), bins=bins, histtype='step', color='C1', label="Monte Carlo") + plt.hist(michel_low_nhit.ke.values, bins=bins, histtype='step', color='C0', label="Data") + plt.hist(michel_mc.ke.values, weights=np.tile(len(michel_low_nhit)/len(michel_mc),len(michel_mc.ke.values)), bins=bins, histtype='step', color='C1', label="Monte Carlo") + hist = np.histogram(michel_low_nhit.ke.values,bins=bins)[0] + hist_mc = np.histogram(michel_mc.ke.values,bins=bins)[0] + if len(michel_mc) > 0: + norm = len(michel)/len(michel_mc) + else: + norm = 1.0 + p = get_multinomial_prob(hist,hist_mc,norm) + plt.text(0.95,0.95,"p = %.2f" % p,horizontalalignment='right',verticalalignment='top',transform=plt.gca().transAxes) despine(fig,trim=True) plt.xlabel("Energy (MeV)") plt.tight_layout() diff --git a/utils/sddm/stats.py b/utils/sddm/stats.py index 96ca87a..9c5e827 100644 --- a/utils/sddm/stats.py +++ b/utils/sddm/stats.py @@ -1,5 +1,9 @@ +from __future__ import print_function, division import numpy as np -from scipy.stats import gamma, dirichlet +from scipy.stats import gamma, dirichlet, multinomial +from . import grouper + +particle_id = {20: 'e', 22: r'\mu'} def chi2(samples,expected): return np.sum((samples-expected)**2/expected,axis=-1) @@ -8,3 +12,76 @@ def sample_mc_hist(hist, norm): alpha = np.ones_like(hist) + hist N = gamma.rvs(np.sum(hist)+1e-10,scale=1) return dirichlet.rvs(alpha)[0]*N*norm + +def get_multinomial_prob(data, mc, norm, size=10000): + """ + Returns the p-value that the histogram of the data is drawn from the MC + histogram. + + Arguments: + + data: histogram array of KE values + mc: histogram array of MC KE values + norm: Normalization constant for the MC + size: number of values to compute + """ + chi2_data = [] + chi2_samples = [] + for i in range(size): + mc_hist = sample_mc_hist(mc,norm=norm) + N = mc_hist.sum() + # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think). + mc_hist = mc_hist + 1e-10 + p = mc_hist/mc_hist.sum() + chi2_data.append(chi2(data,mc_hist)) + n = np.random.poisson(N) + samples = multinomial.rvs(n,p) + chi2_samples.append(chi2(samples,mc_hist)) + chi2_samples = np.array(chi2_samples) + chi2_data = np.array(chi2_data) + return np.count_nonzero(chi2_samples >= chi2_data)/len(chi2_samples) + +def plot_hist2_data_mc(data, mc, bins=None, norm=None): + import matplotlib.pyplot as plt + from matplotlib.lines import Line2D + + if bins is None: + bins = np.logspace(np.log10(20),np.log10(10e3),21) + + if norm is None: + if len(mc) > 0: + norm = len(data)/len(mc) + else: + norm = 1.0 + + handles = [Line2D([0], [0], color='C0'), + Line2D([0], [0], color='C1')] + labels = ('Data','Monte Carlo') + + for id in (20,22,2020,2022,2222): + if id == 20: + plt.subplot(2,3,1) + elif id == 22: + plt.subplot(2,3,2) + elif id == 2020: + plt.subplot(2,3,4) + elif id == 2022: + plt.subplot(2,3,5) + elif id == 2222: + plt.subplot(2,3,6) + + hist = np.histogram(data[data.id == id].ke.values,bins=bins)[0] + hist_mc = np.histogram(mc[mc.id == id].ke.values,bins=bins)[0] + + plt.hist(data[data.id == id].ke.values, bins=bins, histtype='step', label="Data", color="C0") + plt.hist(mc[mc.id == id].ke.values, bins=bins, histtype='step', weights=np.tile(norm,len(mc[mc.id == id].ke.values)), label="Monte Carlo", color="C1") + plt.gca().set_xscale("log") + plt.xlabel("Energy (MeV)") + p = get_multinomial_prob(hist,hist_mc,norm) + plt.text(0.95,0.95,"p = %.2f" % p,horizontalalignment='right',verticalalignment='top',transform=plt.gca().transAxes) + plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$') + + plt.gcf().legend(handles,labels,loc='upper right') + + if len(data): + plt.tight_layout() |