diff options
-rwxr-xr-x | utils/chi2 | 43 |
1 files changed, 18 insertions, 25 deletions
@@ -273,7 +273,7 @@ def get_mc_hists_posterior(data_mc,muon_hists,data_hists,x,bins): mc_hists[id] += muon_hists[id]*x[5] return mc_hists -def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percentile=50.0, size=10000): +def get_multinomial_prob(data, data_muon, data_mc, weights, id, x_samples, bins, percentile=50.0, size=10000): """ Returns the p-value that the histogram of the data is drawn from the MC histogram. @@ -301,7 +301,7 @@ def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percenti # Get the total number of "universes" simulated in the GENIE reweight tool if len(data_mc): - nuniverses = data_mc['universe'].max()+1 + nuniverses = weights['universe'].max()+1 else: nuniverses = 0 @@ -313,9 +313,12 @@ def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percenti if nuniverses > 0: universe = np.random.randint(nuniverses) + data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == universe],how='left',on=['run','evn']) + data_mc_with_weights.weight = data_mc_with_weights.weight.fillna(1.0) else: - universe = 0 - mc = get_mc_hists_posterior(data_mc[data_mc.universe == universe],muon_hists,data_hists,x,bins)[id] + data_mc_with_weights = data_mc.copy() + data_mc['weight'] = 1.0 + mc = get_mc_hists_posterior(data_mc_with_weights,muon_hists,data_hists,x,bins)[id] N = mc.sum() # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think). mc = mc + 1e-10 @@ -337,10 +340,10 @@ def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percenti ps.append(np.count_nonzero(chi2_samples >= chi2_data)/len(chi2_samples)) return np.percentile(ps,percentile) -def get_prob(data,muon,mc,samples,bins,size): +def get_prob(data,muon,mc,weights,samples,bins,size): prob = {} for id in (20,22,2020,2022,2222): - prob[id] = get_multinomial_prob(data,muon,mc,id,samples,bins,size=size) + prob[id] = get_multinomial_prob(data,muon,mc,weights,id,samples,bins,size=size) print(id, prob[id]) return prob @@ -496,19 +499,6 @@ if __name__ == '__main__': data_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ~ev_mc.atm] data_atm_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ev_mc.atm] - # Merge the reweight scales only after we've done all the data cleaning and - # high level cuts - data_mc_with_weights = pd.merge(data_mc,weights,how='left',on=['run','evn']) - data_atm_mc_with_weights = pd.merge(data_atm_mc,weights,how='left',on=['run','evn']) - - # Make sure we are only using data with weights - # - # FIXME: There is probably a better way to do this. For example, we could - # use the full dataset for the fit and then scale the normalization factor - # by the ratio of events with weights to the total number of events. - data_mc = data_mc_with_weights[data_mc_with_weights.universe == 0] - data_atm_mc = data_atm_mc_with_weights[data_atm_mc_with_weights.universe == 0] - bins = np.logspace(np.log10(20),np.log10(10e3),21) if args.coverage: @@ -520,6 +510,9 @@ if __name__ == '__main__': xtrue = truncnorm_scaled(PRIORS_LOW,PRIORS_HIGH,PRIORS,PRIOR_UNCERTAINTIES) + data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == 0],how='left',on=['run','evn']) + data_atm_mc_with_weights = pd.merge(data_atm_mc,weights[weights.universe == 0],how='left',on=['run','evn']) + for i in range(args.coverage): # Calculate expected number of events N = len(data_mc)*xtrue[0] @@ -534,8 +527,8 @@ if __name__ == '__main__': n_muon_atm = np.random.poisson(N_muon_atm) # Sample data from Monte Carlo - data = pd.concat((data_mc.sample(n=n,replace=True,weights='weight'), muon.sample(n=n_muon,replace=True))) - data_atm = pd.concat((data_atm_mc.sample(n=n_atm,replace=True,weights='weight'), muon_atm.sample(n=n_muon_atm,replace=True))) + data = pd.concat((data_mc_with_weights.sample(n=n,replace=True,weights='weight'), muon.sample(n=n_muon,replace=True))) + data_atm = pd.concat((data_atm_mc_with_weights.sample(n=n_atm,replace=True,weights='weight'), muon_atm.sample(n=n_muon_atm,replace=True))) # Smear the energies by the additional energy resolution data.loc[data.id1 == 20,'energy1'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data.id1 == 20))*xtrue[2]) @@ -552,8 +545,8 @@ if __name__ == '__main__': xopt, samples = do_fit(data,muon,data_mc,bins,args.steps,args.print_nll,args.walkers,args.thin) - prob = get_prob(data,muon,data_mc_with_weights,samples,bins,size=args.multinomial_prob_size) - prob_atm = get_prob(data_atm,muon_atm,data_atm_mc_with_weights,samples,bins,size=args.multinomial_prob_size) + prob = get_prob(data,muon,data_mc,weights,samples,bins,size=args.multinomial_prob_size) + prob_atm = get_prob(data_atm,muon_atm,data_atm_mc,weights,samples,bins,size=args.multinomial_prob_size) for id in (20,22,2020,2022,2222): p_values[id].append(prob[id]) @@ -675,8 +668,8 @@ if __name__ == '__main__': xopt, samples = do_fit(data,muon,data_mc,bins,args.steps,args.print_nll,args.walkers,args.thin) - prob = get_prob(data,muon,data_mc_with_weights,samples,bins,size=args.multinomial_prob_size) - prob_atm = get_prob(data_atm,muon_atm,data_atm_mc_with_weights,samples,bins,size=args.multinomial_prob_size) + prob = get_prob(data,muon,data_mc,weights,samples,bins,size=args.multinomial_prob_size) + prob_atm = get_prob(data_atm,muon_atm,data_atm_mc,weights,samples,bins,size=args.multinomial_prob_size) plt.figure() for i in range(len(FIT_PARS)): |