diff options
Diffstat (limited to 'utils/chi2')
-rwxr-xr-x | utils/chi2 | 117 |
1 files changed, 80 insertions, 37 deletions
@@ -47,10 +47,10 @@ def printoptions(*args, **kwargs): # Uncertainty on the energy scale # FIXME: These are just placeholders! Should get real number from stopping # muons. -ENERGY_SCALE_MEAN = {'e': 1.0, 'u': 1.0, 'eu': 1.0} -ENERGY_SCALE_UNCERTAINTY = {'e':0.1, 'u': 1.1, 'eu': 0.1} +ENERGY_SCALE_MEAN = {'e': 0.0, 'u': -0.066} +ENERGY_SCALE_UNCERTAINTY = {'e':0.1, 'u': 0.011} ENERGY_RESOLUTION_MEAN = {'e': 0.0, 'u': 0.0, 'eu': 0.0} -ENERGY_RESOLUTION_UNCERTAINTY = {'e':0.1, 'u': 0.1, 'eu': 0.1} +ENERGY_RESOLUTION_UNCERTAINTY = {'e':0.1, 'u': 0.014, 'eu': 0.1} # Absolute tolerance for the minimizer. # Since we're minimizing the negative log likelihood, we really only care about @@ -139,14 +139,11 @@ def get_mc_hists_fast(ke_dict,x,bins,apply_norm=False,weights_dict=None): ke = ke_dict[id] if id in (20,2020): - ke = ke*x[1] scale = bincenters2*max(EPSILON,x[2]) elif id in (22,2222): - ke = ke*x[3] scale = bincenters2*max(EPSILON,x[4]) elif id == 2022: - ke = ke*x[5] - scale = bincenters2*max(EPSILON,x[6]) + scale = bincenters2*max(EPSILON,x[5]) if weights_dict: hist = np.histogram(ke,bins=bins2,weights=weights_dict[id])[0] @@ -159,15 +156,32 @@ def get_mc_hists_fast(ke_dict,x,bins,apply_norm=False,weights_dict=None): mc_hists[id] *= x[0] return mc_hists -def get_data_hists(data,bins,scale=1.0): +def get_data_hists_fast(ke_dict,x,bins,scale=1.0): + data_hists = {} + for id in (20,22,2020,2022,2222): + if id in (20,2020): + data_hists[id] = np.histogram(ke_dict[id]*(1+x[1]),bins=bins)[0]*scale + elif id in (22,2222): + data_hists[id] = np.histogram(ke_dict[id]*(1+x[3]),bins=bins)[0]*scale + elif id == 2022: + data_hists[id] = np.histogram((ke_dict[id]*[1+x[1],1+x[3]]).sum(axis=-1),bins=bins)[0]*scale + return data_hists + +def get_data_hists(data,x,bins,scale=1.0): """ Returns the data histogrammed into `bins`. """ - data_hists = {} + ke_dict = {} for id in (20,22,2020,2022,2222): - df_id = data[data.id == id] - data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]*scale - return data_hists + if id in (20,2020): + ke_dict[id] = data[data.id == id].ke.values + elif id in (22,2222): + ke_dict[id] = data[data.id == id].ke.values + elif id == 2022: + ke_dict[id] = np.array([data.loc[data.id == id,'energy1'].values, + data.loc[data.id == id,'energy2'].values]).T + + return get_data_hists_fast(ke_dict,x,bins,scale) # Likelihood Fit Parameters # 0 - Atmospheric Neutrino Flux Scale @@ -175,9 +189,8 @@ def get_data_hists(data,bins,scale=1.0): # 2 - Electron energy resolution # 3 - Muon energy bias # 4 - Muon energy resolution -# 5 - Electron + Muon energy bias -# 6 - Electron + Muon energy resolution -# 7 - External Muon scale +# 5 - Electron + Muon energy resolution +# 6 - External Muon scale FIT_PARS = [ 'Atmospheric Neutrino Flux Scale', @@ -185,13 +198,10 @@ FIT_PARS = [ 'Electron energy resolution', 'Muon energy bias', 'Muon energy resolution', - 'Electron + Muon energy bias', 'Electron + Muon energy resolution', 'External Muon scale'] def make_nll(data, muons, mc, bins, print_nll=False): - data_hists = get_data_hists(data,bins) - ke_dict = {} for id in (20,22,2020,2022,2222): ke_dict[id] = mc[mc.id == id].ke.values @@ -200,10 +210,22 @@ def make_nll(data, muons, mc, bins, print_nll=False): for id in (20,22,2020,2022,2222): ke_dict_muon[id] = muons[muons.id == id].ke.values + ke_dict_data = {} + for id in (20,22,2020,2022,2222): + if id in (20,2020): + ke_dict_data[id] = data[data.id == id].ke.values + elif id in (22,2222): + ke_dict_data[id] = data[data.id == id].ke.values + elif id == 2022: + ke_dict_data[id] = np.array([data.loc[data.id == id,'energy1'].values, + data.loc[data.id == id,'energy2'].values]).T + def nll(x, grad=None): if any(x[i] < 0 for i in range(len(x))): return np.inf + data_hists = get_data_hists_fast(ke_dict_data,x,bins) + # Get the Monte Carlo histograms. We need to do this within the # likelihood function since several of the parameters like the energy # bias and resolution affect the histograms. @@ -245,17 +267,16 @@ def make_nll(data, muons, mc, bins, print_nll=False): nll = 0 for id in data_hists: oi = data_hists[id] - ei = mc_hists[id] + muon_hists[id]*x[7] + EPSILON + ei = mc_hists[id] + muon_hists[id]*x[6] + EPSILON N = ei.sum() nll -= -N - np.sum(gammaln(oi+1)) + np.sum(oi*np.log(ei)) # Add the priors nll -= norm.logpdf(x[1],ENERGY_SCALE_MEAN['e'],ENERGY_SCALE_UNCERTAINTY['e']) nll -= norm.logpdf(x[3],ENERGY_SCALE_MEAN['u'],ENERGY_SCALE_UNCERTAINTY['u']) - nll -= norm.logpdf(x[5],ENERGY_SCALE_MEAN['eu'],ENERGY_SCALE_UNCERTAINTY['eu']) nll -= norm.logpdf(x[2],ENERGY_RESOLUTION_MEAN['e'],ENERGY_RESOLUTION_UNCERTAINTY['e']) nll -= norm.logpdf(x[4],ENERGY_RESOLUTION_MEAN['u'],ENERGY_RESOLUTION_UNCERTAINTY['u']) - nll -= norm.logpdf(x[6],ENERGY_RESOLUTION_MEAN['eu'],ENERGY_RESOLUTION_UNCERTAINTY['eu']) + nll -= norm.logpdf(x[5],ENERGY_RESOLUTION_MEAN['eu'],ENERGY_RESOLUTION_UNCERTAINTY['eu']) if print_nll: # Print the result @@ -292,7 +313,7 @@ def get_mc_hists_posterior(data_mc,muon_hists,data_hists,x,bins): for id in (20,22,2020,2022,2222): mc_hists[id] = get_mc_hist_posterior(mc_hists[id],data_hists[id],norm=x[0]) # FIXME: does the orering of when we add the muons matter here? - mc_hists[id] += muon_hists[id]*x[7] + mc_hists[id] += muon_hists[id]*x[6] return mc_hists def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percentile=50.0, size=10000): @@ -315,8 +336,19 @@ def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percenti bins: bins used to bin the mc histogram size: number of values to compute """ - data_hists = get_data_hists(data,bins) - muon_hists = get_data_hists(data_muon,bins) + ke_dict_muon = {} + for _id in (20,22,2020,2022,2222): + ke_dict_muon[_id] = data_muon[data_muon.id == _id].ke.values + + ke_dict_data = {} + for _id in (20,22,2020,2022,2222): + if _id in (20,2020): + ke_dict_data[_id] = data[data.id == _id].ke.values + elif _id in (22,2222): + ke_dict_data[_id] = data[data.id == _id].ke.values + elif _id == 2022: + ke_dict_data[_id] = np.array([data.loc[data.id == _id,'energy1'].values, + data.loc[data.id == _id,'energy2'].values]).T # Get the total number of "universes" simulated in the GENIE reweight tool if len(data_mc): @@ -327,6 +359,10 @@ def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percenti ps = [] for i in range(size): x = x_samples[np.random.randint(x_samples.shape[0])] + + data_hists = get_data_hists_fast(ke_dict_data,x,bins) + muon_hists = get_mc_hists_fast(ke_dict_muon,x,bins,apply_norm=False) + if nuniverses > 0: universe = np.random.randint(nuniverses) else: @@ -379,11 +415,17 @@ def do_fit(data,muon,data_mc,bins,steps,print_nll=False): """ nll = make_nll(data,muon,data_mc,bins,print_nll) - x0 = np.array([1.0,1.0,EPSILON,1.0,EPSILON,1.0,EPSILON,EPSILON]) + x0 = np.array([1.0,0.0,EPSILON,0.0,EPSILON,EPSILON,EPSILON]) opt = nlopt.opt(nlopt.LN_SBPLX, len(x0)) opt.set_min_objective(nll) - low = np.array([EPSILON]*len(x0)) - high = np.array([10]*len(x0)) + low = np.array([EPSILON,-1e9,EPSILON,-1e9,EPSILON,EPSILON,EPSILON]) + high = np.array([1e9]*len(x0)) + # Fix the energy bias parameters for the minimization, since they will + # cause discontinuities in the likelihood function. + low[1] = 0.0 + low[3] = 0.0 + high[1] = 0.0 + high[3] = 0.0 opt.set_lower_bounds(low) opt.set_upper_bounds(high) opt.set_ftol_abs(FTOL_ABS) @@ -394,6 +436,12 @@ def do_fit(data,muon,data_mc,bins,steps,print_nll=False): nll_xopt = nll(xopt) print("nll(xopt) = ", nll(xopt)) + # Now, we allow them to float + low[1] = -1e9 + low[3] = -1e9 + high[1] = 1e9 + high[3] = 1e9 + stepsizes = estimate_errors(nll,xopt,low,high) with printoptions(precision=3, suppress=True): @@ -719,16 +767,11 @@ if __name__ == '__main__': plt.gca().get_yaxis().set_visible(False) plt.subplot(3,3,6) plt.hist(samples[:,5],bins=100,histtype='step') - plt.xlabel("Electron + Muon Energy Scale") + plt.xlabel("Electron + Muon Energy Resolution") despine(ax=plt.gca(),left=True,trim=True) plt.gca().get_yaxis().set_visible(False) plt.subplot(3,3,7) plt.hist(samples[:,6],bins=100,histtype='step') - plt.xlabel("Electron + Muon Energy Resolution") - despine(ax=plt.gca(),left=True,trim=True) - plt.gca().get_yaxis().set_visible(False) - plt.subplot(3,3,8) - plt.hist(samples[:,7],bins=100,histtype='step') plt.xlabel("Muon Scale") despine(ax=plt.gca(),left=True,trim=True) plt.gca().get_yaxis().set_visible(False) @@ -746,8 +789,8 @@ if __name__ == '__main__': labels = ('Data','Monte Carlo','External Muons') fig = plt.figure() - hists = get_data_hists(data,bins) - hists_muon = get_data_hists(muon,bins,scale=xopt[7]) + hists = get_data_hists(data,xopt,bins) + hists_muon = get_data_hists(muon,xopt,bins,scale=xopt[6]) hists_mc = get_mc_hists(data_mc,xopt,bins,apply_norm=True) plot_hist2(hists,bins=bins,color='C0') plot_hist2(hists_mc,bins=bins,color='C1') @@ -773,8 +816,8 @@ if __name__ == '__main__': else: plt.suptitle("Without Neutron Follower") fig = plt.figure() - hists = get_data_hists(data_atm,bins) - hists_muon = get_data_hists(muon_atm,bins,scale=xopt[7]) + hists = get_data_hists(data_atm,xopt,bins) + hists_muon = get_data_hists(muon_atm,xopt,bins,scale=xopt[6]) hists_mc = get_mc_hists(data_atm_mc,xopt,bins,apply_norm=True) plot_hist2(hists,bins=bins,color='C0') plot_hist2(hists_mc,bins=bins,color='C1') |