aboutsummaryrefslogtreecommitdiff
path: root/utils/chi2
diff options
context:
space:
mode:
Diffstat (limited to 'utils/chi2')
-rwxr-xr-xutils/chi243
1 files changed, 18 insertions, 25 deletions
diff --git a/utils/chi2 b/utils/chi2
index be6b3e6..9835ba3 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -273,7 +273,7 @@ def get_mc_hists_posterior(data_mc,muon_hists,data_hists,x,bins):
mc_hists[id] += muon_hists[id]*x[5]
return mc_hists
-def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percentile=50.0, size=10000):
+def get_multinomial_prob(data, data_muon, data_mc, weights, id, x_samples, bins, percentile=50.0, size=10000):
"""
Returns the p-value that the histogram of the data is drawn from the MC
histogram.
@@ -301,7 +301,7 @@ def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percenti
# Get the total number of "universes" simulated in the GENIE reweight tool
if len(data_mc):
- nuniverses = data_mc['universe'].max()+1
+ nuniverses = weights['universe'].max()+1
else:
nuniverses = 0
@@ -313,9 +313,12 @@ def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percenti
if nuniverses > 0:
universe = np.random.randint(nuniverses)
+ data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == universe],how='left',on=['run','evn'])
+ data_mc_with_weights.weight = data_mc_with_weights.weight.fillna(1.0)
else:
- universe = 0
- mc = get_mc_hists_posterior(data_mc[data_mc.universe == universe],muon_hists,data_hists,x,bins)[id]
+ data_mc_with_weights = data_mc.copy()
+ data_mc['weight'] = 1.0
+ mc = get_mc_hists_posterior(data_mc_with_weights,muon_hists,data_hists,x,bins)[id]
N = mc.sum()
# Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think).
mc = mc + 1e-10
@@ -337,10 +340,10 @@ def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percenti
ps.append(np.count_nonzero(chi2_samples >= chi2_data)/len(chi2_samples))
return np.percentile(ps,percentile)
-def get_prob(data,muon,mc,samples,bins,size):
+def get_prob(data,muon,mc,weights,samples,bins,size):
prob = {}
for id in (20,22,2020,2022,2222):
- prob[id] = get_multinomial_prob(data,muon,mc,id,samples,bins,size=size)
+ prob[id] = get_multinomial_prob(data,muon,mc,weights,id,samples,bins,size=size)
print(id, prob[id])
return prob
@@ -496,19 +499,6 @@ if __name__ == '__main__':
data_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ~ev_mc.atm]
data_atm_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ev_mc.atm]
- # Merge the reweight scales only after we've done all the data cleaning and
- # high level cuts
- data_mc_with_weights = pd.merge(data_mc,weights,how='left',on=['run','evn'])
- data_atm_mc_with_weights = pd.merge(data_atm_mc,weights,how='left',on=['run','evn'])
-
- # Make sure we are only using data with weights
- #
- # FIXME: There is probably a better way to do this. For example, we could
- # use the full dataset for the fit and then scale the normalization factor
- # by the ratio of events with weights to the total number of events.
- data_mc = data_mc_with_weights[data_mc_with_weights.universe == 0]
- data_atm_mc = data_atm_mc_with_weights[data_atm_mc_with_weights.universe == 0]
-
bins = np.logspace(np.log10(20),np.log10(10e3),21)
if args.coverage:
@@ -520,6 +510,9 @@ if __name__ == '__main__':
xtrue = truncnorm_scaled(PRIORS_LOW,PRIORS_HIGH,PRIORS,PRIOR_UNCERTAINTIES)
+ data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == 0],how='left',on=['run','evn'])
+ data_atm_mc_with_weights = pd.merge(data_atm_mc,weights[weights.universe == 0],how='left',on=['run','evn'])
+
for i in range(args.coverage):
# Calculate expected number of events
N = len(data_mc)*xtrue[0]
@@ -534,8 +527,8 @@ if __name__ == '__main__':
n_muon_atm = np.random.poisson(N_muon_atm)
# Sample data from Monte Carlo
- data = pd.concat((data_mc.sample(n=n,replace=True,weights='weight'), muon.sample(n=n_muon,replace=True)))
- data_atm = pd.concat((data_atm_mc.sample(n=n_atm,replace=True,weights='weight'), muon_atm.sample(n=n_muon_atm,replace=True)))
+ data = pd.concat((data_mc_with_weights.sample(n=n,replace=True,weights='weight'), muon.sample(n=n_muon,replace=True)))
+ data_atm = pd.concat((data_atm_mc_with_weights.sample(n=n_atm,replace=True,weights='weight'), muon_atm.sample(n=n_muon_atm,replace=True)))
# Smear the energies by the additional energy resolution
data.loc[data.id1 == 20,'energy1'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data.id1 == 20))*xtrue[2])
@@ -552,8 +545,8 @@ if __name__ == '__main__':
xopt, samples = do_fit(data,muon,data_mc,bins,args.steps,args.print_nll,args.walkers,args.thin)
- prob = get_prob(data,muon,data_mc_with_weights,samples,bins,size=args.multinomial_prob_size)
- prob_atm = get_prob(data_atm,muon_atm,data_atm_mc_with_weights,samples,bins,size=args.multinomial_prob_size)
+ prob = get_prob(data,muon,data_mc,weights,samples,bins,size=args.multinomial_prob_size)
+ prob_atm = get_prob(data_atm,muon_atm,data_atm_mc,weights,samples,bins,size=args.multinomial_prob_size)
for id in (20,22,2020,2022,2222):
p_values[id].append(prob[id])
@@ -675,8 +668,8 @@ if __name__ == '__main__':
xopt, samples = do_fit(data,muon,data_mc,bins,args.steps,args.print_nll,args.walkers,args.thin)
- prob = get_prob(data,muon,data_mc_with_weights,samples,bins,size=args.multinomial_prob_size)
- prob_atm = get_prob(data_atm,muon_atm,data_atm_mc_with_weights,samples,bins,size=args.multinomial_prob_size)
+ prob = get_prob(data,muon,data_mc,weights,samples,bins,size=args.multinomial_prob_size)
+ prob_atm = get_prob(data_atm,muon_atm,data_atm_mc,weights,samples,bins,size=args.multinomial_prob_size)
plt.figure()
for i in range(len(FIT_PARS)):