diff options
author | tlatorre <tlatorre@uchicago.edu> | 2020-09-07 13:39:53 -0500 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2020-09-07 13:39:53 -0500 |
commit | 96220c7a7e9a07456d64915f88e46f3468c3d2bc (patch) | |
tree | 0bebc9360959f1f855c54a5701f3bda675192ca0 | |
parent | 8c2dba79a018e91dc8cf607499cea4d975776cd1 (diff) | |
download | sddm-96220c7a7e9a07456d64915f88e46f3468c3d2bc.tar.gz sddm-96220c7a7e9a07456d64915f88e46f3468c3d2bc.tar.bz2 sddm-96220c7a7e9a07456d64915f88e46f3468c3d2bc.zip |
update chi2
This commit updates the chi2 analysis with two big changes:
- I now apply the energy bias correction in the likelihood per particle
and not per fit. So there is no longer an energy bias parameter for
electron + muon fits, instead we just apply the energy bias correction
for electrons to the electron and the energy bias correction for muons
to the muon and then add the two kinetic energies.
- I now apply the energy bias correction terms to the data instead of
the Monte Carlo. This does introduce an issue with discontinuities in
the likelihood but it makes everything easier to interpret. The
discontinuities *should* be correctly taken into account by the MCMC.
-rwxr-xr-x | utils/chi2 | 117 |
1 files changed, 80 insertions, 37 deletions
@@ -47,10 +47,10 @@ def printoptions(*args, **kwargs): # Uncertainty on the energy scale # FIXME: These are just placeholders! Should get real number from stopping # muons. -ENERGY_SCALE_MEAN = {'e': 1.0, 'u': 1.0, 'eu': 1.0} -ENERGY_SCALE_UNCERTAINTY = {'e':0.1, 'u': 1.1, 'eu': 0.1} +ENERGY_SCALE_MEAN = {'e': 0.0, 'u': -0.066} +ENERGY_SCALE_UNCERTAINTY = {'e':0.1, 'u': 0.011} ENERGY_RESOLUTION_MEAN = {'e': 0.0, 'u': 0.0, 'eu': 0.0} -ENERGY_RESOLUTION_UNCERTAINTY = {'e':0.1, 'u': 0.1, 'eu': 0.1} +ENERGY_RESOLUTION_UNCERTAINTY = {'e':0.1, 'u': 0.014, 'eu': 0.1} # Absolute tolerance for the minimizer. # Since we're minimizing the negative log likelihood, we really only care about @@ -139,14 +139,11 @@ def get_mc_hists_fast(ke_dict,x,bins,apply_norm=False,weights_dict=None): ke = ke_dict[id] if id in (20,2020): - ke = ke*x[1] scale = bincenters2*max(EPSILON,x[2]) elif id in (22,2222): - ke = ke*x[3] scale = bincenters2*max(EPSILON,x[4]) elif id == 2022: - ke = ke*x[5] - scale = bincenters2*max(EPSILON,x[6]) + scale = bincenters2*max(EPSILON,x[5]) if weights_dict: hist = np.histogram(ke,bins=bins2,weights=weights_dict[id])[0] @@ -159,15 +156,32 @@ def get_mc_hists_fast(ke_dict,x,bins,apply_norm=False,weights_dict=None): mc_hists[id] *= x[0] return mc_hists -def get_data_hists(data,bins,scale=1.0): +def get_data_hists_fast(ke_dict,x,bins,scale=1.0): + data_hists = {} + for id in (20,22,2020,2022,2222): + if id in (20,2020): + data_hists[id] = np.histogram(ke_dict[id]*(1+x[1]),bins=bins)[0]*scale + elif id in (22,2222): + data_hists[id] = np.histogram(ke_dict[id]*(1+x[3]),bins=bins)[0]*scale + elif id == 2022: + data_hists[id] = np.histogram((ke_dict[id]*[1+x[1],1+x[3]]).sum(axis=-1),bins=bins)[0]*scale + return data_hists + +def get_data_hists(data,x,bins,scale=1.0): """ Returns the data histogrammed into `bins`. """ - data_hists = {} + ke_dict = {} for id in (20,22,2020,2022,2222): - df_id = data[data.id == id] - data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]*scale - return data_hists + if id in (20,2020): + ke_dict[id] = data[data.id == id].ke.values + elif id in (22,2222): + ke_dict[id] = data[data.id == id].ke.values + elif id == 2022: + ke_dict[id] = np.array([data.loc[data.id == id,'energy1'].values, + data.loc[data.id == id,'energy2'].values]).T + + return get_data_hists_fast(ke_dict,x,bins,scale) # Likelihood Fit Parameters # 0 - Atmospheric Neutrino Flux Scale @@ -175,9 +189,8 @@ def get_data_hists(data,bins,scale=1.0): # 2 - Electron energy resolution # 3 - Muon energy bias # 4 - Muon energy resolution -# 5 - Electron + Muon energy bias -# 6 - Electron + Muon energy resolution -# 7 - External Muon scale +# 5 - Electron + Muon energy resolution +# 6 - External Muon scale FIT_PARS = [ 'Atmospheric Neutrino Flux Scale', @@ -185,13 +198,10 @@ FIT_PARS = [ 'Electron energy resolution', 'Muon energy bias', 'Muon energy resolution', - 'Electron + Muon energy bias', 'Electron + Muon energy resolution', 'External Muon scale'] def make_nll(data, muons, mc, bins, print_nll=False): - data_hists = get_data_hists(data,bins) - ke_dict = {} for id in (20,22,2020,2022,2222): ke_dict[id] = mc[mc.id == id].ke.values @@ -200,10 +210,22 @@ def make_nll(data, muons, mc, bins, print_nll=False): for id in (20,22,2020,2022,2222): ke_dict_muon[id] = muons[muons.id == id].ke.values + ke_dict_data = {} + for id in (20,22,2020,2022,2222): + if id in (20,2020): + ke_dict_data[id] = data[data.id == id].ke.values + elif id in (22,2222): + ke_dict_data[id] = data[data.id == id].ke.values + elif id == 2022: + ke_dict_data[id] = np.array([data.loc[data.id == id,'energy1'].values, + data.loc[data.id == id,'energy2'].values]).T + def nll(x, grad=None): if any(x[i] < 0 for i in range(len(x))): return np.inf + data_hists = get_data_hists_fast(ke_dict_data,x,bins) + # Get the Monte Carlo histograms. We need to do this within the # likelihood function since several of the parameters like the energy # bias and resolution affect the histograms. @@ -245,17 +267,16 @@ def make_nll(data, muons, mc, bins, print_nll=False): nll = 0 for id in data_hists: oi = data_hists[id] - ei = mc_hists[id] + muon_hists[id]*x[7] + EPSILON + ei = mc_hists[id] + muon_hists[id]*x[6] + EPSILON N = ei.sum() nll -= -N - np.sum(gammaln(oi+1)) + np.sum(oi*np.log(ei)) # Add the priors nll -= norm.logpdf(x[1],ENERGY_SCALE_MEAN['e'],ENERGY_SCALE_UNCERTAINTY['e']) nll -= norm.logpdf(x[3],ENERGY_SCALE_MEAN['u'],ENERGY_SCALE_UNCERTAINTY['u']) - nll -= norm.logpdf(x[5],ENERGY_SCALE_MEAN['eu'],ENERGY_SCALE_UNCERTAINTY['eu']) nll -= norm.logpdf(x[2],ENERGY_RESOLUTION_MEAN['e'],ENERGY_RESOLUTION_UNCERTAINTY['e']) nll -= norm.logpdf(x[4],ENERGY_RESOLUTION_MEAN['u'],ENERGY_RESOLUTION_UNCERTAINTY['u']) - nll -= norm.logpdf(x[6],ENERGY_RESOLUTION_MEAN['eu'],ENERGY_RESOLUTION_UNCERTAINTY['eu']) + nll -= norm.logpdf(x[5],ENERGY_RESOLUTION_MEAN['eu'],ENERGY_RESOLUTION_UNCERTAINTY['eu']) if print_nll: # Print the result @@ -292,7 +313,7 @@ def get_mc_hists_posterior(data_mc,muon_hists,data_hists,x,bins): for id in (20,22,2020,2022,2222): mc_hists[id] = get_mc_hist_posterior(mc_hists[id],data_hists[id],norm=x[0]) # FIXME: does the orering of when we add the muons matter here? - mc_hists[id] += muon_hists[id]*x[7] + mc_hists[id] += muon_hists[id]*x[6] return mc_hists def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percentile=50.0, size=10000): @@ -315,8 +336,19 @@ def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percenti bins: bins used to bin the mc histogram size: number of values to compute """ - data_hists = get_data_hists(data,bins) - muon_hists = get_data_hists(data_muon,bins) + ke_dict_muon = {} + for _id in (20,22,2020,2022,2222): + ke_dict_muon[_id] = data_muon[data_muon.id == _id].ke.values + + ke_dict_data = {} + for _id in (20,22,2020,2022,2222): + if _id in (20,2020): + ke_dict_data[_id] = data[data.id == _id].ke.values + elif _id in (22,2222): + ke_dict_data[_id] = data[data.id == _id].ke.values + elif _id == 2022: + ke_dict_data[_id] = np.array([data.loc[data.id == _id,'energy1'].values, + data.loc[data.id == _id,'energy2'].values]).T # Get the total number of "universes" simulated in the GENIE reweight tool if len(data_mc): @@ -327,6 +359,10 @@ def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percenti ps = [] for i in range(size): x = x_samples[np.random.randint(x_samples.shape[0])] + + data_hists = get_data_hists_fast(ke_dict_data,x,bins) + muon_hists = get_mc_hists_fast(ke_dict_muon,x,bins,apply_norm=False) + if nuniverses > 0: universe = np.random.randint(nuniverses) else: @@ -379,11 +415,17 @@ def do_fit(data,muon,data_mc,bins,steps,print_nll=False): """ nll = make_nll(data,muon,data_mc,bins,print_nll) - x0 = np.array([1.0,1.0,EPSILON,1.0,EPSILON,1.0,EPSILON,EPSILON]) + x0 = np.array([1.0,0.0,EPSILON,0.0,EPSILON,EPSILON,EPSILON]) opt = nlopt.opt(nlopt.LN_SBPLX, len(x0)) opt.set_min_objective(nll) - low = np.array([EPSILON]*len(x0)) - high = np.array([10]*len(x0)) + low = np.array([EPSILON,-1e9,EPSILON,-1e9,EPSILON,EPSILON,EPSILON]) + high = np.array([1e9]*len(x0)) + # Fix the energy bias parameters for the minimization, since they will + # cause discontinuities in the likelihood function. + low[1] = 0.0 + low[3] = 0.0 + high[1] = 0.0 + high[3] = 0.0 opt.set_lower_bounds(low) opt.set_upper_bounds(high) opt.set_ftol_abs(FTOL_ABS) @@ -394,6 +436,12 @@ def do_fit(data,muon,data_mc,bins,steps,print_nll=False): nll_xopt = nll(xopt) print("nll(xopt) = ", nll(xopt)) + # Now, we allow them to float + low[1] = -1e9 + low[3] = -1e9 + high[1] = 1e9 + high[3] = 1e9 + stepsizes = estimate_errors(nll,xopt,low,high) with printoptions(precision=3, suppress=True): @@ -719,16 +767,11 @@ if __name__ == '__main__': plt.gca().get_yaxis().set_visible(False) plt.subplot(3,3,6) plt.hist(samples[:,5],bins=100,histtype='step') - plt.xlabel("Electron + Muon Energy Scale") + plt.xlabel("Electron + Muon Energy Resolution") despine(ax=plt.gca(),left=True,trim=True) plt.gca().get_yaxis().set_visible(False) plt.subplot(3,3,7) plt.hist(samples[:,6],bins=100,histtype='step') - plt.xlabel("Electron + Muon Energy Resolution") - despine(ax=plt.gca(),left=True,trim=True) - plt.gca().get_yaxis().set_visible(False) - plt.subplot(3,3,8) - plt.hist(samples[:,7],bins=100,histtype='step') plt.xlabel("Muon Scale") despine(ax=plt.gca(),left=True,trim=True) plt.gca().get_yaxis().set_visible(False) @@ -746,8 +789,8 @@ if __name__ == '__main__': labels = ('Data','Monte Carlo','External Muons') fig = plt.figure() - hists = get_data_hists(data,bins) - hists_muon = get_data_hists(muon,bins,scale=xopt[7]) + hists = get_data_hists(data,xopt,bins) + hists_muon = get_data_hists(muon,xopt,bins,scale=xopt[6]) hists_mc = get_mc_hists(data_mc,xopt,bins,apply_norm=True) plot_hist2(hists,bins=bins,color='C0') plot_hist2(hists_mc,bins=bins,color='C1') @@ -773,8 +816,8 @@ if __name__ == '__main__': else: plt.suptitle("Without Neutron Follower") fig = plt.figure() - hists = get_data_hists(data_atm,bins) - hists_muon = get_data_hists(muon_atm,bins,scale=xopt[7]) + hists = get_data_hists(data_atm,xopt,bins) + hists_muon = get_data_hists(muon_atm,xopt,bins,scale=xopt[6]) hists_mc = get_mc_hists(data_atm_mc,xopt,bins,apply_norm=True) plot_hist2(hists,bins=bins,color='C0') plot_hist2(hists_mc,bins=bins,color='C1') |