aboutsummaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
Diffstat (limited to 'utils')
-rwxr-xr-xutils/chi2213
1 files changed, 110 insertions, 103 deletions
diff --git a/utils/chi2 b/utils/chi2
index 3b519e2..444638b 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -36,13 +36,15 @@ from sddm.stats import *
# Uncertainty on the energy scale
# FIXME: Should get real number from stopping muons
-ENERGY_SCALE_MEAN = 1.0
-ENERGY_SCALE_UNCERTAINTY = 0.1
+ENERGY_SCALE_MEAN = {'e': 1.0, 'u': 1.0, 'eu': 1.0}
+ENERGY_SCALE_UNCERTAINTY = {'e':0.1, 'u': 0.1, 'eu': 0.1}
+ENERGY_RESOLUTION_MEAN = {'e': 0.0, 'u': 0.0, 'eu': 0.0}
+ENERGY_RESOLUTION_UNCERTAINTY = {'e':0.1, 'u': 0.1, 'eu': 0.1}
particle_id = {20: 'e', 22: r'\mu'}
-def plot_hist2(df, norm=1.0, scale=1.0, color=None):
- for id, df_id in sorted(df.groupby('id')):
+def plot_hist2(hists, bins, color=None):
+ for id in (20,22,2020,2022,2222):
if id == 20:
plt.subplot(2,3,1)
elif id == 22:
@@ -54,8 +56,8 @@ def plot_hist2(df, norm=1.0, scale=1.0, color=None):
elif id == 2222:
plt.subplot(2,3,6)
- bins = np.logspace(np.log10(20),np.log10(10e3),21)
- plt.hist(df_id.ke.values*scale, bins=bins, histtype='step', weights=np.tile(norm,len(df_id.ke.values)),color=color)
+ bincenters = (bins[1:] + bins[:-1])/2
+ plt.hist(bincenters, bins=bins, histtype='step', weights=hists[id],color=color)
plt.gca().set_xscale("log")
plt.xlabel("Energy (MeV)")
plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$')
@@ -63,69 +65,85 @@ def plot_hist2(df, norm=1.0, scale=1.0, color=None):
if len(df):
plt.tight_layout()
-def plot_hist(df, muons=False):
- for id, df_id in sorted(df.groupby('id')):
- if id == 20:
- plt.subplot(3,4,1)
- elif id == 22:
- plt.subplot(3,4,2)
- elif id == 2020:
- plt.subplot(3,4,5)
+def get_mc_hists(data,x,bins,apply_norm=False):
+ mc_hists = {}
+
+ # FIXME: May need to increase number of bins here
+ bins2 = np.logspace(np.log10(20),np.log10(10e3),1000)
+ bincenters2 = (bins2[1:] + bins2[:-1])/2
+
+ for id in (20,22,2020,2022,2222):
+ ke = data[data.id == id].ke.values
+
+ if id in (20,2020):
+ ke = ke*x[1]
+ scale = bincenters2*x[2]
+ elif id in (22,2222):
+ ke = ke*x[3]
+ scale = bincenters2*x[4]
elif id == 2022:
- plt.subplot(3,4,6)
- elif id == 2222:
- plt.subplot(3,4,7)
- elif id == 202020:
- plt.subplot(3,4,9)
- elif id == 202022:
- plt.subplot(3,4,10)
- elif id == 202222:
- plt.subplot(3,4,11)
- elif id == 222222:
- plt.subplot(3,4,12)
-
- if muons:
- plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step')
- plt.xlabel("log10(Energy (GeV))")
- else:
- plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step')
- plt.xlabel("Energy (MeV)")
- plt.title(str(id))
+ ke = ke*x[5]
+ scale = bincenters2*x[6]
- if len(df):
- plt.tight_layout()
+ hist = np.histogram(ke,bins=bins2)[0]
+
+ cdf = norm.cdf(bins[:,np.newaxis],bincenters2,scale)*hist
+ mc_hists[id] = np.sum(cdf[1:] - cdf[:-1],axis=-1)
+ if apply_norm:
+ mc_hists[id] *= x[0]
+ return mc_hists
+
+def get_data_hists(data,bins):
+ data_hists = {}
+ for id in (20,22,2020,2022,2222):
+ df_id = data[data.id == id]
+ data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]
+ return data_hists
+
+# Likelihood Fit Parameters
+# 0 - Atmospheric Neutrino Flux Scale
+# 1 - Electron energy bias
+# 2 - Electron energy resolution
+# 3 - Muon energy bias
+# 4 - Muon energy resolution
+# 5 - Electron + Muon energy bias
+# 6 - Electron + Muon energy resolution
+
+def make_nll(data, mc, bins):
+ data_hists = get_data_hists(data,bins)
-def make_nll(data, mc_hists):
def nll(x, grad=None):
- if x[0] < 0 or x[1] < 0:
+ if any(x[i] < 0 for i in range(len(x))):
return np.inf
- data_hists = {}
- for id in (20,22,2020,2022,2222):
- df_id = data[data.id == id]
- if len(df_id):
- data_hists[id] = np.histogram(df_id.ke.values*x[1],bins=bins)[0]
- else:
- data_hists[id] = np.zeros(len(bins)-1,dtype=np.int)
+ mc_hists = get_mc_hists(mc,x,bins,apply_norm=True)
nll = 0
for id in data_hists:
- oi = data_hists[id].sum()
+ oi = data_hists[id]
ei = mc_hists[id] + EPSILON
- N = oi.sum()
- n = ei.sum()
+ N = ei.sum()
nll -= -N - np.sum(gammaln(oi+1)) + np.sum(oi*np.log(ei))
- return nll - norm.logpdf(x[1],ENERGY_SCALE_MEAN,ENERGY_SCALE_UNCERTAINTY)
+ nll -= norm.logpdf(x[1],ENERGY_SCALE_MEAN['e'],ENERGY_SCALE_UNCERTAINTY['e'])
+ nll -= norm.logpdf(x[3],ENERGY_SCALE_MEAN['u'],ENERGY_SCALE_UNCERTAINTY['u'])
+ nll -= norm.logpdf(x[5],ENERGY_SCALE_MEAN['eu'],ENERGY_SCALE_UNCERTAINTY['eu'])
+ nll -= norm.logpdf(x[2],ENERGY_RESOLUTION_MEAN['e'],ENERGY_RESOLUTION_UNCERTAINTY['e'])
+ nll -= norm.logpdf(x[4],ENERGY_RESOLUTION_MEAN['u'],ENERGY_RESOLUTION_UNCERTAINTY['u'])
+ nll -= norm.logpdf(x[6],ENERGY_RESOLUTION_MEAN['eu'],ENERGY_RESOLUTION_UNCERTAINTY['eu'])
+ print("nll = %.2f" % nll)
+ return nll
return nll
-def get_mc_hist(data_mc,data_hist,x,bins):
- hist = np.histogram(data_mc,bins=bins)[0]
- return get_mc_hist_posterior(hist,data_hist,norm=x[0]/100.0)
+def get_mc_hists_posterior(data,data_hists,x,bins):
+ mc_hists = get_mc_hists(data,x,bins)
+ for id in (20,22,2020,2022,2222):
+ mc_hists[id] = get_mc_hist_posterior(mc_hists[id],data_hists[id],norm=x[0])
+ return mc_hists
def get_data_hist(data,x,bins):
- return np.histogram(data*x[1],bins=bins)[0]
+ return np.histogram(data,bins=bins)[0]
-def get_multinomial_prob(data, data_mc, x_samples, bins, percentile=99.0, size=10000):
+def get_multinomial_prob(data, data_mc, id, x_samples, bins, percentile=99.0, size=10000):
"""
Returns the p-value that the histogram of the data is drawn from the MC
histogram.
@@ -145,16 +163,17 @@ def get_multinomial_prob(data, data_mc, x_samples, bins, percentile=99.0, size=1
bins: bins used to bin the mc histogram
size: number of values to compute
"""
+ data_hists = get_data_hists(data,bins)
+
ps = []
for i in range(size):
x = x_samples[np.random.randint(x_samples.shape[0])]
- data_hist = get_data_hist(data,x,bins)
- mc = get_mc_hist(data_mc,data_hist,x,bins)
+ mc = get_mc_hists_posterior(data_mc,data_hists,x,bins)[id]
N = mc.sum()
# Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think).
mc = mc + 1e-10
p = mc/mc.sum()
- chi2_data = nllr(data_hist,mc)
+ chi2_data = nllr(data_hists[id],mc)
# To draw the multinomial samples we first draw the expected number of
# events from a Poisson distribution and then loop over the counts and
# unique values. The reason we do this is that you can't call
@@ -254,59 +273,28 @@ if __name__ == '__main__':
mc = prompt_mc
bins = np.logspace(np.log10(20),np.log10(10e3),21)
- data_hists = {}
- mc_hists = {}
- for id in (20,22,2020,2022,2222):
- df_id = data[data.id == id]
- if len(df_id):
- data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]
- else:
- data_hists[id] = np.zeros(len(bins)-1,dtype=np.int)
- for id in (20,22,2020,2022,2222):
- df_id = mc[mc.id == id]
- if len(df_id):
- mc_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]/100
- else:
- mc_hists[id] = np.zeros(len(bins)-1,dtype=np.int)
-
- data_atm_hists = {}
- mc_atm_hists = {}
- for id in (20,22,2020,2022,2222):
- df_id = atm[atm.id == id]
- if len(df_id):
- data_atm_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]
- else:
- data_atm_hists[id] = np.zeros(len(bins)-1,dtype=np.int)
+ nll = make_nll(data,mc,bins)
- for id in (20,22,2020,2022,2222):
- df_id = atm_mc[atm_mc.id == id]
- if len(df_id):
- mc_atm_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]/100
- else:
- mc_atm_hists[id] = np.zeros(len(bins)-1,dtype=np.int)
-
- nll = make_nll(data,mc_hists)
-
- x0 = np.array([1.0,1.0])
+ x0 = np.array([1.0,1.0,EPSILON,1.0,EPSILON,1.0,EPSILON])
opt = nlopt.opt(nlopt.LN_SBPLX, len(x0))
opt.set_min_objective(nll)
- low = np.array([1e-10,1e-10])
- high = np.array([10,10])
+ low = np.array([EPSILON]*len(x0))
+ high = np.array([10]*len(x0))
opt.set_lower_bounds(low)
opt.set_upper_bounds(high)
opt.set_ftol_abs(1e-10)
- opt.set_initial_step([0.01,0.01])
+ opt.set_initial_step([0.01]*len(x0))
xopt = opt.optimize(x0)
print("xopt = ", xopt)
nll_xopt = nll(xopt)
print("nll(xopt) = ", nll(xopt))
- pos = np.empty((10, len(x0)),dtype=np.double)
+ pos = np.empty((20, len(x0)),dtype=np.double)
for i in range(pos.shape[0]):
pos[i] = xopt + np.random.randn(len(x0))*0.1
- pos[i,:] = np.clip(pos[i,:],1e-10,10)
+ pos[i,:] = np.clip(pos[i,:],low,high)
nwalkers, ndim = pos.shape
@@ -324,21 +312,36 @@ if __name__ == '__main__':
samples = sampler.chain.reshape((-1,len(x0)))
plt.figure()
- plt.subplot(2,2,1)
+ plt.subplot(3,3,1)
plt.hist(samples[:,0],bins=100,histtype='step')
plt.xlabel("Atmospheric Flux Scale")
- plt.subplot(2,2,2)
+ plt.subplot(3,3,2)
+ plt.hist(samples[:,1],bins=100,histtype='step')
+ plt.xlabel("Electron Energy Scale")
+ plt.subplot(3,3,3)
+ plt.hist(samples[:,1],bins=100,histtype='step')
+ plt.xlabel("Electron Energy Resolution")
+ plt.subplot(3,3,4)
+ plt.hist(samples[:,1],bins=100,histtype='step')
+ plt.xlabel("Muon Energy Scale")
+ plt.subplot(3,3,5)
+ plt.hist(samples[:,1],bins=100,histtype='step')
+ plt.xlabel("Muon Energy Resolution")
+ plt.subplot(3,3,6)
+ plt.hist(samples[:,1],bins=100,histtype='step')
+ plt.xlabel("Electron + Muon Energy Scale")
+ plt.subplot(3,3,7)
plt.hist(samples[:,1],bins=100,histtype='step')
- plt.xlabel("Energy Scale")
+ plt.xlabel("Electron + Muon Energy Resolution")
prob = {}
for id in (20,22,2020,2022,2222):
- prob[id] = get_multinomial_prob(data[data.id == id].ke.values,mc[mc.id == id].ke.values,samples,bins)
+ prob[id] = get_multinomial_prob(data,mc,id,samples,bins)
print(id, prob[id])
prob_atm = {}
for id in (20,22,2020,2022,2222):
- prob_atm[id] = get_multinomial_prob(atm[atm.id == id].ke.values,atm_mc[atm_mc.id == id].ke.values,samples,bins)
+ prob_atm[id] = get_multinomial_prob(atm,atm_mc,id,samples,bins)
print(id, prob_atm[id])
handles = [Line2D([0], [0], color='C0'),
@@ -346,8 +349,10 @@ if __name__ == '__main__':
labels = ('Data','Monte Carlo')
fig = plt.figure()
- plot_hist2(prompt,scale=xopt[1],color='C0')
- plot_hist2(prompt_mc,norm=xopt[0]/100,color='C1')
+ hists = get_data_hists(prompt,bins)
+ hists_mc = get_mc_hists(prompt_mc,xopt,bins,apply_norm=True)
+ plot_hist2(hists,bins=bins,color='C0')
+ plot_hist2(hists_mc,bins=bins,color='C1')
for id in (20,22,2020,2022,2222):
if id == 20:
plt.subplot(2,3,1)
@@ -369,8 +374,10 @@ if __name__ == '__main__':
else:
plt.suptitle("Without Neutron Follower")
fig = plt.figure()
- plot_hist2(atm,scale=xopt[1],color='C0')
- plot_hist2(atm_mc,norm=xopt[0]/100,color='C1')
+ hists = get_data_hists(atm,bins)
+ hists_mc = get_mc_hists(atm_mc,xopt,bins,apply_norm=True)
+ plot_hist2(hists,bins=bins,color='C0')
+ plot_hist2(hists_mc,bins=bins,color='C1')
for id in (20,22,2020,2022,2222):
if id == 20:
plt.subplot(2,3,1)