aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xutils/plot-michels128
1 files changed, 127 insertions, 1 deletions
diff --git a/utils/plot-michels b/utils/plot-michels
index 01e7028..1379231 100755
--- a/utils/plot-michels
+++ b/utils/plot-michels
@@ -27,9 +27,31 @@ import numpy as np
from scipy.stats import iqr, poisson
from scipy.stats import iqr, norm, beta
from sddm.stats import *
+import emcee
+from sddm.dc import estimate_errors, EPSILON
+import nlopt
+import contextlib
+
+# from https://stackoverflow.com/questions/2891790/how-to-pretty-print-a-numpy-array-without-scientific-notation-and-with-given-pre
+@contextlib.contextmanager
+def printoptions(*args, **kwargs):
+ original = np.get_printoptions()
+ np.set_printoptions(*args, **kwargs)
+ try:
+ yield
+ finally:
+ np.set_printoptions(**original)
particle_id = {20: 'e', 22: 'u'}
+# Absolute tolerance for the minimizer.
+# Since we're minimizing the negative log likelihood, we really only care about
+# the value of the minimum to within ~0.05 (10% of the one sigma shift).
+# However, I have noticed before that setting a higher tolerance can sometimes
+# cause the fit to get stuck in a local minima, so we set it here to a very
+# small value.
+FTOL_ABS = 1e-10
+
def print_particle_probs(data):
n = [len(data[data.id == id]) for id in (20,22,2020,2022,2222)]
@@ -42,6 +64,33 @@ def print_particle_probs(data):
particle_id_str = ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)])
print("P(%s) = %.2f +/- %.2f" % (particle_id_str,mode[i]*100,std[i]*100))
+def make_nll(data, mc, bins, print_nll=False):
+ def nll(x, grad=None):
+ if any(x[i] < 0 for i in (0,2)):
+ return np.inf
+
+ data_hist = np.histogram(data.ke.values*(1+x[1]),bins=bins)[0]
+
+ # Get the Monte Carlo histograms. We need to do this within the
+ # likelihood function since we apply the energy resolution parameters
+ # to the Monte Carlo.
+ cdf = norm.cdf(bins[:,np.newaxis],mc.ke.values,mc.ke.values*x[2])
+ mc_hist = np.sum(cdf[1:] - cdf[:-1],axis=-1)*x[0]
+
+ # Calculate the negative log of the likelihood of observing the data
+ # given the fit parameters
+ oi = data_hist
+ ei = mc_hist + EPSILON
+ N = ei.sum()
+ nll = N + np.sum(gammaln(oi+1)) - np.sum(oi*np.log(ei))
+
+ if print_nll:
+ # Print the result
+ print("nll = %.2f" % nll)
+
+ return nll
+ return nll
+
if __name__ == '__main__':
import argparse
import numpy as np
@@ -57,6 +106,8 @@ if __name__ == '__main__':
parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds")
parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files")
parser.add_argument("--nhit-thresh", type=int, default=None, help="nhit threshold to apply to events before processing (should only be used for testing to speed things up)")
+ parser.add_argument("--print-nll", action='store_true', default=False, help="print nll values")
+ parser.add_argument("--steps", type=int, default=1000, help="number of steps in the MCMC chain")
args = parser.parse_args()
setup_matplotlib(args.save)
@@ -123,6 +174,82 @@ if __name__ == '__main__':
michel_low_nhit = michel[michel.muon_gtid.isin(ev.gtid.values) & (michel.muon_nhit < 2500)]
michel_low_nhit_mc = michel_mc[michel_mc.muon_gtid.isin(ev_mc.gtid.values) & (michel_mc.muon_nhit < 2500)]
+ bins = np.linspace(0,100,41)
+
+ nll = make_nll(michel_low_nhit,michel_low_nhit_mc,bins,args.print_nll)
+
+ x0 = np.array([len(michel_low_nhit)/len(michel_low_nhit_mc),0.0,EPSILON])
+ opt = nlopt.opt(nlopt.LN_SBPLX, len(x0))
+ opt.set_min_objective(nll)
+ low = np.array([EPSILON,-1,EPSILON])
+ high = np.array([1e9,1.0,1e9])
+ opt.set_lower_bounds(low)
+ opt.set_upper_bounds(high)
+ opt.set_ftol_abs(FTOL_ABS)
+ opt.set_initial_step([0.01]*len(x0))
+
+ xopt = opt.optimize(x0)
+ print("xopt = ", xopt)
+ nll_xopt = nll(xopt)
+ print("nll(xopt) = ", nll(xopt))
+
+ stepsizes = estimate_errors(nll,xopt,low,high)
+
+ with printoptions(precision=3, suppress=True):
+ print("Errors: ", stepsizes)
+
+ pos = np.empty((20, len(x0)),dtype=np.double)
+ for i in range(pos.shape[0]):
+ pos[i] = xopt + np.random.randn(len(x0))*stepsizes
+ pos[i,:] = np.clip(pos[i,:],low,high)
+
+ nwalkers, ndim = pos.shape
+
+ sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x: -nll(x))
+ with np.errstate(invalid='ignore'):
+ sampler.run_mcmc(pos, args.steps)
+
+ print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction)))
+
+ try:
+ print("autocorrelation time: ", sampler.get_autocorr_time(quiet=True))
+ except Exception as e:
+ print(e)
+
+ log_prob = sampler.get_log_prob(flat=True)
+ samples = sampler.get_chain(flat=True)
+ flat_samples = samples.reshape((-1,len(x0)))
+
+ plt.figure()
+ plt.subplot(2,2,1)
+ plt.hist(flat_samples[:,0],bins=100,histtype='step')
+ plt.xlabel("Normalization")
+ despine(ax=plt.gca(),left=True,trim=True)
+ plt.gca().get_yaxis().set_visible(False)
+ plt.subplot(2,2,2)
+ plt.hist(flat_samples[:,1],bins=100,histtype='step')
+ plt.xlabel("Energy Bias")
+ despine(ax=plt.gca(),left=True,trim=True)
+ plt.gca().get_yaxis().set_visible(False)
+ plt.subplot(2,2,3)
+ plt.hist(flat_samples[:,2],bins=100,histtype='step')
+ plt.xlabel("Energy Resolution")
+ despine(ax=plt.gca(),left=True,trim=True)
+ plt.gca().get_yaxis().set_visible(False)
+ if args.save:
+ plt.savefig("michel_electron_fit_posterior.pdf")
+ plt.savefig("michel_electron_fit_posterior.eps")
+ else:
+ plt.suptitle("Fit Parameters")
+
+ mode_bias = samples[log_prob.argmin()][1]
+ mean_bias, error_bias = np.mean(samples[:,1]), np.std(samples[:,1])
+ mode_resolution = samples[log_prob.argmin()][2]
+ mean_resolution, error_resolution = np.mean(samples[:,2]), np.std(samples[:,2])
+
+ print("Energy bias = %.2g +/- %.2g" % (mode_bias,error_bias))
+ print("Energy resolution = %.2g +/- %.2g" % (mode_resolution,error_resolution))
+
print("Particle ID probability for Michel electrons:")
print("Data")
print_particle_probs(michel_low_nhit)
@@ -140,7 +267,6 @@ if __name__ == '__main__':
plt.suptitle("Michel Electrons")
fig = plt.figure()
- bins = np.linspace(0,100,41)
plt.hist(michel_low_nhit.ke.values, bins=bins, histtype='step', color='C0', label="Data")
plt.hist(michel_low_nhit_mc.ke.values, weights=np.tile(len(michel_low_nhit)/len(michel_low_nhit_mc),len(michel_low_nhit_mc.ke.values)), bins=bins, histtype='step', color='C1', label="Monte Carlo")
hist = np.histogram(michel_low_nhit.ke.values,bins=bins)[0]