aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xutils/chi273
1 files changed, 38 insertions, 35 deletions
diff --git a/utils/chi2 b/utils/chi2
index 30a0fbe..be99626 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -71,21 +71,21 @@ FIT_PARS = [
# - For the electron + muon energy resolution, I don't have any real good way
# to estimate this, so we are conservative and set the error to 10%.
PRIORS = [
- 0.01, # Atmospheric Neutrino Scale
+ 1.0, # Atmospheric Neutrino Scale
0.0, # Electron energy scale
0.0, # Electron energy resolution
0.066, # Muon energy scale
0.0, # Muon energy resolution
- 0.01, # Muon scale
+ 0.0, # Muon scale
]
PRIOR_UNCERTAINTIES = [
- 0.002, # Atmospheric Neutrino Scale
+ 0.2 , # Atmospheric Neutrino Scale
0.1, # Electron energy scale
0.1, # Electron energy resolution
0.011, # Muon energy scale
0.014, # Muon energy resolution
- 0.001, # Muon scale
+ 10.0, # Muon scale
]
# Lower bounds for the fit parameters
@@ -201,7 +201,7 @@ def get_data_hists(data,bins,scale=1.0):
data_hists[id] = np.histogram(data[data.id == id].ke.values,bins=bins)[0]*scale
return data_hists
-def make_nll(data, muons, mc, bins, print_nll=False):
+def make_nll(data, muons, mc, atmo_scale_factor, muon_scale_factor, bins, print_nll=False):
df_dict = {}
for id in (20,22,2020,2022,2222):
df_dict[id] = mc[mc.id == id]
@@ -219,8 +219,8 @@ def make_nll(data, muons, mc, bins, print_nll=False):
# Get the Monte Carlo histograms. We need to do this within the
# likelihood function since we apply the energy resolution parameters
# to the Monte Carlo.
- mc_hists = get_mc_hists_fast(df_dict,x,bins)
- muon_hists = get_mc_hists_fast(df_dict_muon,x,bins)
+ mc_hists = get_mc_hists_fast(df_dict,x,bins,scale=1/atmo_scale_factor)
+ muon_hists = get_mc_hists_fast(df_dict_muon,x,bins,scale=1/muon_scale_factor)
# Calculate the negative log of the likelihood of observing the data
# given the fit parameters
@@ -242,7 +242,7 @@ def make_nll(data, muons, mc, bins, print_nll=False):
return nll
return nll
-def get_mc_hists_posterior(data_mc,muon_hists,data_hists,x,bins):
+def get_mc_hists_posterior(data_mc,muon_hists,data_hists,atmo_scale_factor,muon_scale_factor,x,bins):
"""
Returns the posterior on the Monte Carlo histograms.
@@ -268,12 +268,12 @@ def get_mc_hists_posterior(data_mc,muon_hists,data_hists,x,bins):
"""
mc_hists = get_mc_hists(data_mc,x,bins,reweight=True)
for id in (20,22,2020,2022,2222):
- mc_hists[id] = get_mc_hist_posterior(mc_hists[id],data_hists[id],norm=x[0])
+ mc_hists[id] = get_mc_hist_posterior(mc_hists[id],data_hists[id],norm=x[0]/atmo_scale_factor)
# FIXME: does the orering of when we add the muons matter here?
- mc_hists[id] += muon_hists[id]*x[5]
+ mc_hists[id] += muon_hists[id]*x[5]/muon_scale_factor
return mc_hists
-def get_multinomial_prob(data, data_muon, data_mc, weights, id, x_samples, bins, percentile=50.0, size=10000):
+def get_multinomial_prob(data, data_muon, data_mc, weights, atmo_scale_factor, muon_scale_factor, id, x_samples, bins, percentile=50.0, size=10000):
"""
Returns the p-value that the histogram of the data is drawn from the MC
histogram.
@@ -318,7 +318,7 @@ def get_multinomial_prob(data, data_muon, data_mc, weights, id, x_samples, bins,
else:
data_mc_with_weights = data_mc.copy()
data_mc['weight'] = 1.0
- mc = get_mc_hists_posterior(data_mc_with_weights,muon_hists,data_hists,x,bins)[id]
+ mc = get_mc_hists_posterior(data_mc_with_weights,muon_hists,data_hists,atmo_scale_factor,muon_scale_factor,x,bins)[id]
N = mc.sum()
# Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think).
mc = mc + 1e-10
@@ -340,14 +340,14 @@ def get_multinomial_prob(data, data_muon, data_mc, weights, id, x_samples, bins,
ps.append(np.count_nonzero(chi2_samples >= chi2_data)/len(chi2_samples))
return np.percentile(ps,percentile)
-def get_prob(data,muon,mc,weights,samples,bins,size):
+def get_prob(data,muon,mc,weights,atmo_scale_factor,muon_scale_factor,samples,bins,size):
prob = {}
for id in (20,22,2020,2022,2222):
- prob[id] = get_multinomial_prob(data,muon,mc,weights,id,samples,bins,size=size)
+ prob[id] = get_multinomial_prob(data,muon,mc,weights,atmo_scale_factor,muon_scale_factor,id,samples,bins,size=size)
print(id, prob[id])
return prob
-def do_fit(data,muon,data_mc,bins,steps,print_nll=False,walkers=100,thin=10):
+def do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll=False,walkers=100,thin=10):
"""
Run the fit and return the minimum along with samples from running an MCMC
starting near the minimum.
@@ -364,7 +364,7 @@ def do_fit(data,muon,data_mc,bins,steps,print_nll=False,walkers=100,thin=10):
Returns a tuple (xopt, samples) where samples is an array of shape (steps,
number of parameters).
"""
- nll = make_nll(data,muon,data_mc,bins,print_nll)
+ nll = make_nll(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,print_nll)
pos = np.empty((walkers, len(PRIORS)),dtype=np.double)
for i in range(pos.shape[0]):
@@ -487,6 +487,9 @@ if __name__ == '__main__':
bins = np.logspace(np.log10(20),np.log10(10e3),21)
+ atmo_scale_factor = 100.0
+ muon_scale_factor = len(muon) + len(muon_atm)
+
if args.coverage:
p_values = {id: [] for id in (20,22,2020,2022,2222)}
p_values_atm = {id: [] for id in (20,22,2020,2022,2222)}
@@ -501,10 +504,10 @@ if __name__ == '__main__':
for i in range(args.coverage):
# Calculate expected number of events
- N = len(data_mc)*xtrue[0]
- N_atm = len(data_atm_mc)*xtrue[0]
- N_muon = len(muon)*xtrue[5]
- N_muon_atm = len(muon_atm)*xtrue[5]
+ N = len(data_mc)*xtrue[0]/atmo_scale_factor
+ N_atm = len(data_atm_mc)*xtrue[0]/atmo_scale_factor
+ N_muon = len(muon)*xtrue[5]/muon_scale_factor
+ N_muon_atm = len(muon_atm)*xtrue[5]/muon_scale_factor
# Calculate observed number of events
n = np.random.poisson(N)
@@ -529,10 +532,10 @@ if __name__ == '__main__':
data_atm.loc[data_atm.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id2 == 22))*xtrue[4])
data_atm['ke'] = data_atm['energy1'].fillna(0) + data_atm['energy2'].fillna(0) + data_atm['energy3'].fillna(0)
- xopt, samples = do_fit(data,muon,data_mc,bins,args.steps,args.print_nll,args.walkers,args.thin)
+ xopt, samples = do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin)
- prob = get_prob(data,muon,data_mc,weights,samples,bins,size=args.multinomial_prob_size)
- prob_atm = get_prob(data_atm,muon_atm,data_atm_mc,weights,samples,bins,size=args.multinomial_prob_size)
+ prob = get_prob(data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,samples,bins,size=args.multinomial_prob_size)
+ prob_atm = get_prob(data_atm,muon_atm,data_atm_mc,weights,atmo_scale_factor,muon_scale_factor,samples,bins,size=args.multinomial_prob_size)
for id in (20,22,2020,2022,2222):
p_values[id].append(prob[id])
@@ -596,10 +599,10 @@ if __name__ == '__main__':
xtrue = truncnorm_scaled(PRIORS_LOW,PRIORS_HIGH,PRIORS,PRIOR_UNCERTAINTIES)
# Calculate expected number of events
- N = len(data_mc)*xtrue[0]
- N_atm = len(data_atm_mc)*xtrue[0]
- N_muon = len(muon)*xtrue[5]
- N_muon_atm = len(muon_atm)*xtrue[5]
+ N = len(data_mc)*xtrue[0]/atmo_scale_factor
+ N_atm = len(data_atm_mc)*xtrue[0]/atmo_scale_factor
+ N_muon = len(muon)*xtrue[5]/muon_scale_factor
+ N_muon_atm = len(muon_atm)*xtrue[5]/muon_scale_factor
# Calculate observed number of events
n = np.random.poisson(N)
@@ -624,7 +627,7 @@ if __name__ == '__main__':
data_atm.loc[data_atm.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id2 == 22))*xtrue[4])
data_atm['ke'] = data_atm['energy1'].fillna(0) + data_atm['energy2'].fillna(0) + data_atm['energy3'].fillna(0)
- xopt, samples = do_fit(data,muon,data_mc,bins,args.steps,args.print_nll,args.walkers,args.thin)
+ xopt, samples = do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin)
for i in range(len(FIT_PARS)):
# The "pull plots" we make here are actually produced via a
@@ -652,10 +655,10 @@ if __name__ == '__main__':
sys.exit(0)
- xopt, samples = do_fit(data,muon,data_mc,bins,args.steps,args.print_nll,args.walkers,args.thin)
+ xopt, samples = do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin)
- prob = get_prob(data,muon,data_mc,weights,samples,bins,size=args.multinomial_prob_size)
- prob_atm = get_prob(data_atm,muon_atm,data_atm_mc,weights,samples,bins,size=args.multinomial_prob_size)
+ prob = get_prob(data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,samples,bins,size=args.multinomial_prob_size)
+ prob_atm = get_prob(data_atm,muon_atm,data_atm_mc,weights,atmo_scale_factor,muon_scale_factor,samples,bins,size=args.multinomial_prob_size)
plt.figure()
for i in range(len(FIT_PARS)):
@@ -679,8 +682,8 @@ if __name__ == '__main__':
fig = plt.figure()
hists = get_data_hists(data,bins)
- hists_muon = get_mc_hists(muon,xopt,bins,scale=xopt[5])
- hists_mc = get_mc_hists(data_mc,xopt,bins,scale=xopt[0])
+ hists_muon = get_mc_hists(muon,xopt,bins,scale=xopt[5]/muon_scale_factor)
+ hists_mc = get_mc_hists(data_mc,xopt,bins,scale=xopt[0]/atmo_scale_factor)
plot_hist2(hists,bins=bins,color='C0')
plot_hist2(hists_mc,bins=bins,color='C1')
plot_hist2(hists_muon,bins=bins,color='C2')
@@ -706,8 +709,8 @@ if __name__ == '__main__':
plt.suptitle("Without Neutron Follower")
fig = plt.figure()
hists = get_data_hists(data_atm,bins)
- hists_muon = get_mc_hists(muon_atm,xopt,bins,scale=xopt[5])
- hists_mc = get_mc_hists(data_atm_mc,xopt,bins,scale=xopt[0])
+ hists_muon = get_mc_hists(muon_atm,xopt,bins,scale=xopt[5]/muon_scale_factor)
+ hists_mc = get_mc_hists(data_atm_mc,xopt,bins,scale=xopt[0]/atmo_scale_factor)
plot_hist2(hists,bins=bins,color='C0')
plot_hist2(hists_mc,bins=bins,color='C1')
plot_hist2(hists_muon,bins=bins,color='C2')