diff options
Diffstat (limited to 'utils')
-rwxr-xr-x | utils/chi2 | 213 |
1 files changed, 110 insertions, 103 deletions
@@ -36,13 +36,15 @@ from sddm.stats import * # Uncertainty on the energy scale # FIXME: Should get real number from stopping muons -ENERGY_SCALE_MEAN = 1.0 -ENERGY_SCALE_UNCERTAINTY = 0.1 +ENERGY_SCALE_MEAN = {'e': 1.0, 'u': 1.0, 'eu': 1.0} +ENERGY_SCALE_UNCERTAINTY = {'e':0.1, 'u': 0.1, 'eu': 0.1} +ENERGY_RESOLUTION_MEAN = {'e': 0.0, 'u': 0.0, 'eu': 0.0} +ENERGY_RESOLUTION_UNCERTAINTY = {'e':0.1, 'u': 0.1, 'eu': 0.1} particle_id = {20: 'e', 22: r'\mu'} -def plot_hist2(df, norm=1.0, scale=1.0, color=None): - for id, df_id in sorted(df.groupby('id')): +def plot_hist2(hists, bins, color=None): + for id in (20,22,2020,2022,2222): if id == 20: plt.subplot(2,3,1) elif id == 22: @@ -54,8 +56,8 @@ def plot_hist2(df, norm=1.0, scale=1.0, color=None): elif id == 2222: plt.subplot(2,3,6) - bins = np.logspace(np.log10(20),np.log10(10e3),21) - plt.hist(df_id.ke.values*scale, bins=bins, histtype='step', weights=np.tile(norm,len(df_id.ke.values)),color=color) + bincenters = (bins[1:] + bins[:-1])/2 + plt.hist(bincenters, bins=bins, histtype='step', weights=hists[id],color=color) plt.gca().set_xscale("log") plt.xlabel("Energy (MeV)") plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$') @@ -63,69 +65,85 @@ def plot_hist2(df, norm=1.0, scale=1.0, color=None): if len(df): plt.tight_layout() -def plot_hist(df, muons=False): - for id, df_id in sorted(df.groupby('id')): - if id == 20: - plt.subplot(3,4,1) - elif id == 22: - plt.subplot(3,4,2) - elif id == 2020: - plt.subplot(3,4,5) +def get_mc_hists(data,x,bins,apply_norm=False): + mc_hists = {} + + # FIXME: May need to increase number of bins here + bins2 = np.logspace(np.log10(20),np.log10(10e3),1000) + bincenters2 = (bins2[1:] + bins2[:-1])/2 + + for id in (20,22,2020,2022,2222): + ke = data[data.id == id].ke.values + + if id in (20,2020): + ke = ke*x[1] + scale = bincenters2*x[2] + elif id in (22,2222): + ke = ke*x[3] + scale = bincenters2*x[4] elif id == 2022: - plt.subplot(3,4,6) - elif id == 2222: - plt.subplot(3,4,7) - elif id == 202020: - plt.subplot(3,4,9) - elif id == 202022: - plt.subplot(3,4,10) - elif id == 202222: - plt.subplot(3,4,11) - elif id == 222222: - plt.subplot(3,4,12) - - if muons: - plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step') - plt.xlabel("log10(Energy (GeV))") - else: - plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step') - plt.xlabel("Energy (MeV)") - plt.title(str(id)) + ke = ke*x[5] + scale = bincenters2*x[6] - if len(df): - plt.tight_layout() + hist = np.histogram(ke,bins=bins2)[0] + + cdf = norm.cdf(bins[:,np.newaxis],bincenters2,scale)*hist + mc_hists[id] = np.sum(cdf[1:] - cdf[:-1],axis=-1) + if apply_norm: + mc_hists[id] *= x[0] + return mc_hists + +def get_data_hists(data,bins): + data_hists = {} + for id in (20,22,2020,2022,2222): + df_id = data[data.id == id] + data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0] + return data_hists + +# Likelihood Fit Parameters +# 0 - Atmospheric Neutrino Flux Scale +# 1 - Electron energy bias +# 2 - Electron energy resolution +# 3 - Muon energy bias +# 4 - Muon energy resolution +# 5 - Electron + Muon energy bias +# 6 - Electron + Muon energy resolution + +def make_nll(data, mc, bins): + data_hists = get_data_hists(data,bins) -def make_nll(data, mc_hists): def nll(x, grad=None): - if x[0] < 0 or x[1] < 0: + if any(x[i] < 0 for i in range(len(x))): return np.inf - data_hists = {} - for id in (20,22,2020,2022,2222): - df_id = data[data.id == id] - if len(df_id): - data_hists[id] = np.histogram(df_id.ke.values*x[1],bins=bins)[0] - else: - data_hists[id] = np.zeros(len(bins)-1,dtype=np.int) + mc_hists = get_mc_hists(mc,x,bins,apply_norm=True) nll = 0 for id in data_hists: - oi = data_hists[id].sum() + oi = data_hists[id] ei = mc_hists[id] + EPSILON - N = oi.sum() - n = ei.sum() + N = ei.sum() nll -= -N - np.sum(gammaln(oi+1)) + np.sum(oi*np.log(ei)) - return nll - norm.logpdf(x[1],ENERGY_SCALE_MEAN,ENERGY_SCALE_UNCERTAINTY) + nll -= norm.logpdf(x[1],ENERGY_SCALE_MEAN['e'],ENERGY_SCALE_UNCERTAINTY['e']) + nll -= norm.logpdf(x[3],ENERGY_SCALE_MEAN['u'],ENERGY_SCALE_UNCERTAINTY['u']) + nll -= norm.logpdf(x[5],ENERGY_SCALE_MEAN['eu'],ENERGY_SCALE_UNCERTAINTY['eu']) + nll -= norm.logpdf(x[2],ENERGY_RESOLUTION_MEAN['e'],ENERGY_RESOLUTION_UNCERTAINTY['e']) + nll -= norm.logpdf(x[4],ENERGY_RESOLUTION_MEAN['u'],ENERGY_RESOLUTION_UNCERTAINTY['u']) + nll -= norm.logpdf(x[6],ENERGY_RESOLUTION_MEAN['eu'],ENERGY_RESOLUTION_UNCERTAINTY['eu']) + print("nll = %.2f" % nll) + return nll return nll -def get_mc_hist(data_mc,data_hist,x,bins): - hist = np.histogram(data_mc,bins=bins)[0] - return get_mc_hist_posterior(hist,data_hist,norm=x[0]/100.0) +def get_mc_hists_posterior(data,data_hists,x,bins): + mc_hists = get_mc_hists(data,x,bins) + for id in (20,22,2020,2022,2222): + mc_hists[id] = get_mc_hist_posterior(mc_hists[id],data_hists[id],norm=x[0]) + return mc_hists def get_data_hist(data,x,bins): - return np.histogram(data*x[1],bins=bins)[0] + return np.histogram(data,bins=bins)[0] -def get_multinomial_prob(data, data_mc, x_samples, bins, percentile=99.0, size=10000): +def get_multinomial_prob(data, data_mc, id, x_samples, bins, percentile=99.0, size=10000): """ Returns the p-value that the histogram of the data is drawn from the MC histogram. @@ -145,16 +163,17 @@ def get_multinomial_prob(data, data_mc, x_samples, bins, percentile=99.0, size=1 bins: bins used to bin the mc histogram size: number of values to compute """ + data_hists = get_data_hists(data,bins) + ps = [] for i in range(size): x = x_samples[np.random.randint(x_samples.shape[0])] - data_hist = get_data_hist(data,x,bins) - mc = get_mc_hist(data_mc,data_hist,x,bins) + mc = get_mc_hists_posterior(data_mc,data_hists,x,bins)[id] N = mc.sum() # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think). mc = mc + 1e-10 p = mc/mc.sum() - chi2_data = nllr(data_hist,mc) + chi2_data = nllr(data_hists[id],mc) # To draw the multinomial samples we first draw the expected number of # events from a Poisson distribution and then loop over the counts and # unique values. The reason we do this is that you can't call @@ -254,59 +273,28 @@ if __name__ == '__main__': mc = prompt_mc bins = np.logspace(np.log10(20),np.log10(10e3),21) - data_hists = {} - mc_hists = {} - for id in (20,22,2020,2022,2222): - df_id = data[data.id == id] - if len(df_id): - data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0] - else: - data_hists[id] = np.zeros(len(bins)-1,dtype=np.int) - for id in (20,22,2020,2022,2222): - df_id = mc[mc.id == id] - if len(df_id): - mc_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]/100 - else: - mc_hists[id] = np.zeros(len(bins)-1,dtype=np.int) - - data_atm_hists = {} - mc_atm_hists = {} - for id in (20,22,2020,2022,2222): - df_id = atm[atm.id == id] - if len(df_id): - data_atm_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0] - else: - data_atm_hists[id] = np.zeros(len(bins)-1,dtype=np.int) + nll = make_nll(data,mc,bins) - for id in (20,22,2020,2022,2222): - df_id = atm_mc[atm_mc.id == id] - if len(df_id): - mc_atm_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]/100 - else: - mc_atm_hists[id] = np.zeros(len(bins)-1,dtype=np.int) - - nll = make_nll(data,mc_hists) - - x0 = np.array([1.0,1.0]) + x0 = np.array([1.0,1.0,EPSILON,1.0,EPSILON,1.0,EPSILON]) opt = nlopt.opt(nlopt.LN_SBPLX, len(x0)) opt.set_min_objective(nll) - low = np.array([1e-10,1e-10]) - high = np.array([10,10]) + low = np.array([EPSILON]*len(x0)) + high = np.array([10]*len(x0)) opt.set_lower_bounds(low) opt.set_upper_bounds(high) opt.set_ftol_abs(1e-10) - opt.set_initial_step([0.01,0.01]) + opt.set_initial_step([0.01]*len(x0)) xopt = opt.optimize(x0) print("xopt = ", xopt) nll_xopt = nll(xopt) print("nll(xopt) = ", nll(xopt)) - pos = np.empty((10, len(x0)),dtype=np.double) + pos = np.empty((20, len(x0)),dtype=np.double) for i in range(pos.shape[0]): pos[i] = xopt + np.random.randn(len(x0))*0.1 - pos[i,:] = np.clip(pos[i,:],1e-10,10) + pos[i,:] = np.clip(pos[i,:],low,high) nwalkers, ndim = pos.shape @@ -324,21 +312,36 @@ if __name__ == '__main__': samples = sampler.chain.reshape((-1,len(x0))) plt.figure() - plt.subplot(2,2,1) + plt.subplot(3,3,1) plt.hist(samples[:,0],bins=100,histtype='step') plt.xlabel("Atmospheric Flux Scale") - plt.subplot(2,2,2) + plt.subplot(3,3,2) + plt.hist(samples[:,1],bins=100,histtype='step') + plt.xlabel("Electron Energy Scale") + plt.subplot(3,3,3) + plt.hist(samples[:,1],bins=100,histtype='step') + plt.xlabel("Electron Energy Resolution") + plt.subplot(3,3,4) + plt.hist(samples[:,1],bins=100,histtype='step') + plt.xlabel("Muon Energy Scale") + plt.subplot(3,3,5) + plt.hist(samples[:,1],bins=100,histtype='step') + plt.xlabel("Muon Energy Resolution") + plt.subplot(3,3,6) + plt.hist(samples[:,1],bins=100,histtype='step') + plt.xlabel("Electron + Muon Energy Scale") + plt.subplot(3,3,7) plt.hist(samples[:,1],bins=100,histtype='step') - plt.xlabel("Energy Scale") + plt.xlabel("Electron + Muon Energy Resolution") prob = {} for id in (20,22,2020,2022,2222): - prob[id] = get_multinomial_prob(data[data.id == id].ke.values,mc[mc.id == id].ke.values,samples,bins) + prob[id] = get_multinomial_prob(data,mc,id,samples,bins) print(id, prob[id]) prob_atm = {} for id in (20,22,2020,2022,2222): - prob_atm[id] = get_multinomial_prob(atm[atm.id == id].ke.values,atm_mc[atm_mc.id == id].ke.values,samples,bins) + prob_atm[id] = get_multinomial_prob(atm,atm_mc,id,samples,bins) print(id, prob_atm[id]) handles = [Line2D([0], [0], color='C0'), @@ -346,8 +349,10 @@ if __name__ == '__main__': labels = ('Data','Monte Carlo') fig = plt.figure() - plot_hist2(prompt,scale=xopt[1],color='C0') - plot_hist2(prompt_mc,norm=xopt[0]/100,color='C1') + hists = get_data_hists(prompt,bins) + hists_mc = get_mc_hists(prompt_mc,xopt,bins,apply_norm=True) + plot_hist2(hists,bins=bins,color='C0') + plot_hist2(hists_mc,bins=bins,color='C1') for id in (20,22,2020,2022,2222): if id == 20: plt.subplot(2,3,1) @@ -369,8 +374,10 @@ if __name__ == '__main__': else: plt.suptitle("Without Neutron Follower") fig = plt.figure() - plot_hist2(atm,scale=xopt[1],color='C0') - plot_hist2(atm_mc,norm=xopt[0]/100,color='C1') + hists = get_data_hists(atm,bins) + hists_mc = get_mc_hists(atm_mc,xopt,bins,apply_norm=True) + plot_hist2(hists,bins=bins,color='C0') + plot_hist2(hists_mc,bins=bins,color='C1') for id in (20,22,2020,2022,2222): if id == 20: plt.subplot(2,3,1) |