From 0d9c562d02950f03deac10d2377f8e5c644aff31 Mon Sep 17 00:00:00 2001 From: tlatorre Date: Wed, 9 Sep 2020 14:25:54 -0500 Subject: update michel script to fit for bias and resolution --- utils/plot-michels | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 127 insertions(+), 1 deletion(-) diff --git a/utils/plot-michels b/utils/plot-michels index 01e7028..1379231 100755 --- a/utils/plot-michels +++ b/utils/plot-michels @@ -27,9 +27,31 @@ import numpy as np from scipy.stats import iqr, poisson from scipy.stats import iqr, norm, beta from sddm.stats import * +import emcee +from sddm.dc import estimate_errors, EPSILON +import nlopt +import contextlib + +# from https://stackoverflow.com/questions/2891790/how-to-pretty-print-a-numpy-array-without-scientific-notation-and-with-given-pre +@contextlib.contextmanager +def printoptions(*args, **kwargs): + original = np.get_printoptions() + np.set_printoptions(*args, **kwargs) + try: + yield + finally: + np.set_printoptions(**original) particle_id = {20: 'e', 22: 'u'} +# Absolute tolerance for the minimizer. +# Since we're minimizing the negative log likelihood, we really only care about +# the value of the minimum to within ~0.05 (10% of the one sigma shift). +# However, I have noticed before that setting a higher tolerance can sometimes +# cause the fit to get stuck in a local minima, so we set it here to a very +# small value. +FTOL_ABS = 1e-10 + def print_particle_probs(data): n = [len(data[data.id == id]) for id in (20,22,2020,2022,2222)] @@ -42,6 +64,33 @@ def print_particle_probs(data): particle_id_str = ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) print("P(%s) = %.2f +/- %.2f" % (particle_id_str,mode[i]*100,std[i]*100)) +def make_nll(data, mc, bins, print_nll=False): + def nll(x, grad=None): + if any(x[i] < 0 for i in (0,2)): + return np.inf + + data_hist = np.histogram(data.ke.values*(1+x[1]),bins=bins)[0] + + # Get the Monte Carlo histograms. We need to do this within the + # likelihood function since we apply the energy resolution parameters + # to the Monte Carlo. + cdf = norm.cdf(bins[:,np.newaxis],mc.ke.values,mc.ke.values*x[2]) + mc_hist = np.sum(cdf[1:] - cdf[:-1],axis=-1)*x[0] + + # Calculate the negative log of the likelihood of observing the data + # given the fit parameters + oi = data_hist + ei = mc_hist + EPSILON + N = ei.sum() + nll = N + np.sum(gammaln(oi+1)) - np.sum(oi*np.log(ei)) + + if print_nll: + # Print the result + print("nll = %.2f" % nll) + + return nll + return nll + if __name__ == '__main__': import argparse import numpy as np @@ -57,6 +106,8 @@ if __name__ == '__main__': parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds") parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files") parser.add_argument("--nhit-thresh", type=int, default=None, help="nhit threshold to apply to events before processing (should only be used for testing to speed things up)") + parser.add_argument("--print-nll", action='store_true', default=False, help="print nll values") + parser.add_argument("--steps", type=int, default=1000, help="number of steps in the MCMC chain") args = parser.parse_args() setup_matplotlib(args.save) @@ -123,6 +174,82 @@ if __name__ == '__main__': michel_low_nhit = michel[michel.muon_gtid.isin(ev.gtid.values) & (michel.muon_nhit < 2500)] michel_low_nhit_mc = michel_mc[michel_mc.muon_gtid.isin(ev_mc.gtid.values) & (michel_mc.muon_nhit < 2500)] + bins = np.linspace(0,100,41) + + nll = make_nll(michel_low_nhit,michel_low_nhit_mc,bins,args.print_nll) + + x0 = np.array([len(michel_low_nhit)/len(michel_low_nhit_mc),0.0,EPSILON]) + opt = nlopt.opt(nlopt.LN_SBPLX, len(x0)) + opt.set_min_objective(nll) + low = np.array([EPSILON,-1,EPSILON]) + high = np.array([1e9,1.0,1e9]) + opt.set_lower_bounds(low) + opt.set_upper_bounds(high) + opt.set_ftol_abs(FTOL_ABS) + opt.set_initial_step([0.01]*len(x0)) + + xopt = opt.optimize(x0) + print("xopt = ", xopt) + nll_xopt = nll(xopt) + print("nll(xopt) = ", nll(xopt)) + + stepsizes = estimate_errors(nll,xopt,low,high) + + with printoptions(precision=3, suppress=True): + print("Errors: ", stepsizes) + + pos = np.empty((20, len(x0)),dtype=np.double) + for i in range(pos.shape[0]): + pos[i] = xopt + np.random.randn(len(x0))*stepsizes + pos[i,:] = np.clip(pos[i,:],low,high) + + nwalkers, ndim = pos.shape + + sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x: -nll(x)) + with np.errstate(invalid='ignore'): + sampler.run_mcmc(pos, args.steps) + + print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction))) + + try: + print("autocorrelation time: ", sampler.get_autocorr_time(quiet=True)) + except Exception as e: + print(e) + + log_prob = sampler.get_log_prob(flat=True) + samples = sampler.get_chain(flat=True) + flat_samples = samples.reshape((-1,len(x0))) + + plt.figure() + plt.subplot(2,2,1) + plt.hist(flat_samples[:,0],bins=100,histtype='step') + plt.xlabel("Normalization") + despine(ax=plt.gca(),left=True,trim=True) + plt.gca().get_yaxis().set_visible(False) + plt.subplot(2,2,2) + plt.hist(flat_samples[:,1],bins=100,histtype='step') + plt.xlabel("Energy Bias") + despine(ax=plt.gca(),left=True,trim=True) + plt.gca().get_yaxis().set_visible(False) + plt.subplot(2,2,3) + plt.hist(flat_samples[:,2],bins=100,histtype='step') + plt.xlabel("Energy Resolution") + despine(ax=plt.gca(),left=True,trim=True) + plt.gca().get_yaxis().set_visible(False) + if args.save: + plt.savefig("michel_electron_fit_posterior.pdf") + plt.savefig("michel_electron_fit_posterior.eps") + else: + plt.suptitle("Fit Parameters") + + mode_bias = samples[log_prob.argmin()][1] + mean_bias, error_bias = np.mean(samples[:,1]), np.std(samples[:,1]) + mode_resolution = samples[log_prob.argmin()][2] + mean_resolution, error_resolution = np.mean(samples[:,2]), np.std(samples[:,2]) + + print("Energy bias = %.2g +/- %.2g" % (mode_bias,error_bias)) + print("Energy resolution = %.2g +/- %.2g" % (mode_resolution,error_resolution)) + print("Particle ID probability for Michel electrons:") print("Data") print_particle_probs(michel_low_nhit) @@ -140,7 +267,6 @@ if __name__ == '__main__': plt.suptitle("Michel Electrons") fig = plt.figure() - bins = np.linspace(0,100,41) plt.hist(michel_low_nhit.ke.values, bins=bins, histtype='step', color='C0', label="Data") plt.hist(michel_low_nhit_mc.ke.values, weights=np.tile(len(michel_low_nhit)/len(michel_low_nhit_mc),len(michel_low_nhit_mc.ke.values)), bins=bins, histtype='step', color='C1', label="Monte Carlo") hist = np.histogram(michel_low_nhit.ke.values,bins=bins)[0] -- cgit