diff options
author | tlatorre <tlatorre@uchicago.edu> | 2020-12-08 08:59:15 -0600 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2020-12-08 08:59:15 -0600 |
commit | ed6302abb7bf4c3304f2e8f7cb6c7462c7caaf4f (patch) | |
tree | e4636affbac86adcf746a500cbc704708f9b202f /utils | |
parent | 3716b099fe18ff458012084d344fce5f439d3532 (diff) | |
download | sddm-ed6302abb7bf4c3304f2e8f7cb6c7462c7caaf4f.tar.gz sddm-ed6302abb7bf4c3304f2e8f7cb6c7462c7caaf4f.tar.bz2 sddm-ed6302abb7bf4c3304f2e8f7cb6c7462c7caaf4f.zip |
update chi2 script to pick the best universe
Diffstat (limited to 'utils')
-rwxr-xr-x | utils/chi2 | 79 |
1 files changed, 46 insertions, 33 deletions
@@ -205,7 +205,7 @@ def get_data_hists(data,bins,scale=1.0): data_hists[id] = np.histogram(data[data.id == id].ke.values,bins=bins)[0]*scale return data_hists -def make_nll(data, muons, mc, atmo_scale_factor, muon_scale_factor, bins, print_nll=False): +def make_nll(data, muons, mc, atmo_scale_factor, muon_scale_factor, bins, reweight=False, print_nll=False): df_dict = {} for id in (20,22,2020,2022,2222): df_dict[id] = mc[mc.id == id] @@ -223,7 +223,7 @@ def make_nll(data, muons, mc, atmo_scale_factor, muon_scale_factor, bins, print_ # Get the Monte Carlo histograms. We need to do this within the # likelihood function since we apply the energy resolution parameters # to the Monte Carlo. - mc_hists = get_mc_hists_fast(df_dict,x,bins,scale=1/atmo_scale_factor) + mc_hists = get_mc_hists_fast(df_dict,x,bins,scale=1/atmo_scale_factor,reweight=reweight) muon_hists = get_mc_hists_fast(df_dict_muon,x,bins,scale=1/muon_scale_factor) # Calculate the negative log of the likelihood of observing the data @@ -277,7 +277,7 @@ def get_mc_hists_posterior(data_mc,muon_hists,data_hists,atmo_scale_factor,muon_ mc_hists[id] += muon_hists[id]*x[5]/muon_scale_factor return mc_hists -def get_multinomial_prob(data, data_muon, data_mc, weights, atmo_scale_factor, muon_scale_factor, id, x_samples, bins, percentile=50.0, size=10000): +def get_multinomial_prob(data, data_muon, data_mc, atmo_scale_factor, muon_scale_factor, id, x_samples, bins, percentile=50.0, size=10000): """ Returns the p-value that the histogram of the data is drawn from the MC histogram. @@ -303,26 +303,13 @@ def get_multinomial_prob(data, data_muon, data_mc, weights, atmo_scale_factor, m data_hists = get_data_hists(data,bins) - # Get the total number of "universes" simulated in the GENIE reweight tool - if len(data_mc): - nuniverses = weights['universe'].max()+1 - else: - nuniverses = 0 - ps = [] for i in range(size): x = x_samples[np.random.randint(x_samples.shape[0])] muon_hists = get_mc_hists_fast(df_dict_muon,x,bins) - if nuniverses > 0: - universe = np.random.randint(nuniverses) - data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == universe],how='left',on=['run','evn']) - data_mc_with_weights.weight = data_mc_with_weights.weight.fillna(1.0) - else: - data_mc_with_weights = data_mc.copy() - data_mc['weight'] = 1.0 - mc = get_mc_hists_posterior(data_mc_with_weights,muon_hists,data_hists,atmo_scale_factor,muon_scale_factor,x,bins)[id] + mc = get_mc_hists_posterior(data_mc,muon_hists,data_hists,atmo_scale_factor,muon_scale_factor,x,bins)[id] N = mc.sum() # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think). mc = mc + 1e-10 @@ -344,14 +331,14 @@ def get_multinomial_prob(data, data_muon, data_mc, weights, atmo_scale_factor, m ps.append(np.count_nonzero(chi2_samples >= chi2_data)/len(chi2_samples)) return np.percentile(ps,percentile) -def get_prob(data,muon,mc,weights,atmo_scale_factor,muon_scale_factor,samples,bins,size): +def get_prob(data,muon,mc,atmo_scale_factor,muon_scale_factor,samples,bins,size): prob = {} for id in (20,22,2020,2022,2222): - prob[id] = get_multinomial_prob(data,muon,mc,weights,atmo_scale_factor,muon_scale_factor,id,samples,bins,size=size) + prob[id] = get_multinomial_prob(data,muon,mc,atmo_scale_factor,muon_scale_factor,id,samples,bins,size=size) print(id, prob[id]) return prob -def do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll=False,walkers=100,thin=10): +def do_fit(data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll=False,walkers=100,thin=10): """ Run the fit and return the minimum along with samples from running an MCMC starting near the minimum. @@ -362,13 +349,14 @@ def do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,prin external muons - data_mc: pandas dataframe representing the expected background from atmospheric neutrino events + - weights: pandas dataframe with the GENIE weights - bins: an array of bins to use for the fit - steps: the number of MCMC steps to run - Returns a tuple (xopt, samples) where samples is an array of shape (steps, - number of parameters). + Returns a tuple (xopt, universe, samples) where samples is an array of + shape (steps, number of parameters). """ - nll = make_nll(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,print_nll) + nll = make_nll(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,print_nll=print_nll) pos = np.empty((walkers, len(PRIORS)),dtype=np.double) for i in range(pos.shape[0]): @@ -409,7 +397,20 @@ def do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,prin opt.set_initial_step([0.01]*len(x0)) xopt = opt.optimize(x0) - return xopt, samples + # Get the total number of "universes" simulated in the GENIE reweight tool + nuniverses = weights['universe'].max()+1 + + nlls = [] + for universe in range(nuniverses): + data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == universe],how='left',on=['run','evn']) + data_mc_with_weights.weight = data_mc_with_weights.weight.fillna(1.0) + + nll = make_nll(data,muon,data_mc_with_weights,atmo_scale_factor,muon_scale_factor,bins,reweight=True,print_nll=print_nll) + nlls.append(nll(xopt)) + + universe = np.argmin(nlls) + + return xopt, universe, samples if __name__ == '__main__': import argparse @@ -570,10 +571,16 @@ if __name__ == '__main__': data_atm.loc[data_atm.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id2 == 22))*xtrue[4]) data_atm['ke'] = data_atm['energy1'].fillna(0) + data_atm['energy2'].fillna(0) + data_atm['energy3'].fillna(0) - xopt, samples = do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) + xopt, universe, samples = do_fit(data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) + + data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == universe],how='left',on=['run','evn']) + data_mc_with_weights.weight = data_mc_with_weights.weight.fillna(1.0) + + data_atm_mc_with_weights = pd.merge(data_atm_mc,weights[weights.universe == universe],how='left',on=['run','evn']) + data_atm_mc_with_weights.weight = data_atm_mc_with_weights.weight.fillna(1.0) - prob = get_prob(data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,samples,bins,size=args.multinomial_prob_size) - prob_atm = get_prob(data_atm,muon_atm,data_atm_mc,weights,atmo_scale_factor,muon_scale_factor,samples,bins,size=args.multinomial_prob_size) + prob = get_prob(data,muon,data_mc_with_weights,atmo_scale_factor,muon_scale_factor,samples,bins,size=args.multinomial_prob_size) + prob_atm = get_prob(data_atm,muon_atm,data_atm_mc_with_weights,atmo_scale_factor,muon_scale_factor,samples,bins,size=args.multinomial_prob_size) for id in (20,22,2020,2022,2222): p_values[id].append(prob[id]) @@ -665,7 +672,7 @@ if __name__ == '__main__': data_atm.loc[data_atm.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id2 == 22))*xtrue[4]) data_atm['ke'] = data_atm['energy1'].fillna(0) + data_atm['energy2'].fillna(0) + data_atm['energy3'].fillna(0) - xopt, samples = do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) + xopt, universe, samples = do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) for i in range(len(FIT_PARS)): # The "pull plots" we make here are actually produced via a @@ -696,10 +703,16 @@ if __name__ == '__main__': sys.exit(0) - xopt, samples = do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) + xopt, universe, samples = do_fit(data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) + + data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == universe],how='left',on=['run','evn']) + data_mc_with_weights.weight = data_mc_with_weights.weight.fillna(1.0) + + data_atm_mc_with_weights = pd.merge(data_atm_mc,weights[weights.universe == universe],how='left',on=['run','evn']) + data_atm_mc_with_weights.weight = data_atm_mc_with_weights.weight.fillna(1.0) - prob = get_prob(data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,samples,bins,size=args.multinomial_prob_size) - prob_atm = get_prob(data_atm,muon_atm,data_atm_mc,weights,atmo_scale_factor,muon_scale_factor,samples,bins,size=args.multinomial_prob_size) + prob = get_prob(data,muon,data_mc_with_weights,atmo_scale_factor,muon_scale_factor,samples,bins,size=args.multinomial_prob_size) + prob_atm = get_prob(data_atm,muon_atm,data_atm_mc_with_weights,atmo_scale_factor,muon_scale_factor,samples,bins,size=args.multinomial_prob_size) plt.figure() for i in range(len(FIT_PARS)): @@ -724,7 +737,7 @@ if __name__ == '__main__': fig = plt.figure() hists = get_data_hists(data,bins) hists_muon = get_mc_hists(muon,xopt,bins,scale=xopt[5]/muon_scale_factor) - hists_mc = get_mc_hists(data_mc,xopt,bins,scale=xopt[0]/atmo_scale_factor) + hists_mc = get_mc_hists(data_mc_with_weights,xopt,bins,scale=xopt[0]/atmo_scale_factor,reweight=True) plot_hist2(hists,bins=bins,color='C0') plot_hist2(hists_mc,bins=bins,color='C1') plot_hist2(hists_muon,bins=bins,color='C2') @@ -751,7 +764,7 @@ if __name__ == '__main__': fig = plt.figure() hists = get_data_hists(data_atm,bins) hists_muon = get_mc_hists(muon_atm,xopt,bins,scale=xopt[5]/muon_scale_factor) - hists_mc = get_mc_hists(data_atm_mc,xopt,bins,scale=xopt[0]/atmo_scale_factor) + hists_mc = get_mc_hists(data_atm_mc_with_weights,xopt,bins,scale=xopt[0]/atmo_scale_factor,reweight=True) plot_hist2(hists,bins=bins,color='C0') plot_hist2(hists_mc,bins=bins,color='C1') plot_hist2(hists_muon,bins=bins,color='C2') |