major updates to the chi2 analysis

This commit fixes the chi2 analysis so that it is no longer biased. Previously, the chi2 analysis pull plots showed a consistent bias. At first, I thought this was due to the fact that the posterior wasn't gaussian, but even after switching to percentile plots based on the algorithm outlined in "Validating Bayesian Inference Algorithms with Simulation-Based Calibration", I was still seeing a bias. I finally tracked it down to the fact that I was applying the energy scale parameters to the data instead of the Monte Carlo. Therefore, in this commit I update the posterior to now apply the energy scale parameters to the Monte Carlo instead of the data. This has the slight disadvantage that the final histograms will be binned in the biased energy, but that's not really a big deal. In addition, this commit contains several other updates: - switch to plotting percentile plots based on the algorithm in "Validating Bayesian Inference Algorithms with Simulation-Based Calibration" instead of pull plots - apply both the energy scale and resolution at the individual particle level, i.e. there is no longer an energy resolution term for electron + muon fits - separate pull plots and coverage plots. Previously I was making both the p-value coverage plots and the pull plots at the same time. However, the pull plots shouldn't have anything to do with the GENIE weights whereas the p-value coverage plots should draw samples weighted by the GENIE weights. In addition, for the pull plots we draw new truth parameters on every iteration whereas for the p-value coverage plots we only draw them once. - switch to using KDEMove() for the MCMC since I think it samples multimodal distributions a lot better than the default emcee move. - I now correct for the reconstruction energy bias in plot-michel and plot-muons
author: tlatorre <tlatorre@uchicago.edu> 2020-10-05 14:36:40 -0500
committer: tlatorre <tlatorre@uchicago.edu> 2020-10-05 14:36:40 -0500
commit: d48484c29d09a944de6f9251a3c659e76279464e (patch)
tree: c056eaa94e7db434365c285d9a96c0e3659f9d95 /utils/plot-michels
parent: 1cd79549ee8ceb0f5e64e06611d891098905c05a (diff)
download: sddm-d48484c29d09a944de6f9251a3c659e76279464e.tar.gz
sddm-d48484c29d09a944de6f9251a3c659e76279464e.tar.bz2
sddm-d48484c29d09a944de6f9251a3c659e76279464e.zip
1 files changed, 9 insertions, 13 deletions
diff --git a/utils/plot-michels b/utils/plot-michels
index b313c3e..9637e13 100755
--- a/utils/plot-michels
+++ b/utils/plot-michels
@@ -30,17 +30,7 @@ from sddm.stats import *
 import emcee
 from sddm.dc import estimate_errors, EPSILON
 import nlopt
-import contextlib
-
-# from https://stackoverflow.com/questions/2891790/how-to-pretty-print-a-numpy-array-without-scientific-notation-and-with-given-pre
-@contextlib.contextmanager
-def printoptions(*args, **kwargs):
-    original = np.get_printoptions()
-    np.set_printoptions(*args, **kwargs)
-    try:
-        yield
-    finally: 
-        np.set_printoptions(**original)
+from sddm import printoptions
 
 particle_id = {20: 'e', 22: 'u'}
 
@@ -100,6 +90,7 @@ if __name__ == '__main__':
     from sddm.plot_energy import *
     from sddm.plot import *
     from sddm import setup_matplotlib
+    from sddm.utils import correct_energy_bias
 
     parser = argparse.ArgumentParser("plot fit results")
     parser.add_argument("filenames", nargs='+', help="input files")
@@ -108,6 +99,7 @@ if __name__ == '__main__':
     parser.add_argument("--nhit-thresh", type=int, default=None, help="nhit threshold to apply to events before processing (should only be used for testing to speed things up)")
     parser.add_argument("--print-nll", action='store_true', default=False, help="print nll values")
     parser.add_argument("--steps", type=int, default=1000, help="number of steps in the MCMC chain")
+    parser.add_argument("--walkers", type=int, default=100, help="number of walkers")
     args = parser.parse_args()
 
     setup_matplotlib(args.save)
@@ -120,7 +112,9 @@ if __name__ == '__main__':
     for run, df in rhdr.groupby('run'):
         evs.append(get_events(df.filename.values, merge_fits=True, nhit_thresh=args.nhit_thresh))
     ev = pd.concat(evs)
+    ev = correct_energy_bias(ev)
     ev_mc = get_events(args.mc, merge_fits=True)
+    ev_mc = correct_energy_bias(ev_mc)
 
     # Drop events without fits
     ev = ev[~np.isnan(ev.fmin)]
@@ -195,17 +189,19 @@ if __name__ == '__main__':
 
     stepsizes = estimate_errors(nll,xopt,low,high)
 
+    stepsizes[1] = 0.1
+
     with printoptions(precision=3, suppress=True):
         print("Errors: ", stepsizes)
 
-    pos = np.empty((20, len(x0)),dtype=np.double)
+    pos = np.empty((args.walkers, len(x0)),dtype=np.double)
     for i in range(pos.shape[0]):
         pos[i] = xopt + np.random.randn(len(x0))*stepsizes
         pos[i,:] = np.clip(pos[i,:],low,high)
 
     nwalkers, ndim = pos.shape
 
-    sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x: -nll(x))
+    sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x: -nll(x), moves=emcee.moves.KDEMove())
     with np.errstate(invalid='ignore'):
         sampler.run_mcmc(pos, args.steps)
author	tlatorre <tlatorre@uchicago.edu>	2020-10-05 14:36:40 -0500
committer	tlatorre <tlatorre@uchicago.edu>	2020-10-05 14:36:40 -0500
commit	d48484c29d09a944de6f9251a3c659e76279464e (patch)
tree	c056eaa94e7db434365c285d9a96c0e3659f9d95 /utils/plot-michels
parent	1cd79549ee8ceb0f5e64e06611d891098905c05a (diff)
download	sddm-d48484c29d09a944de6f9251a3c659e76279464e.tar.gz sddm-d48484c29d09a944de6f9251a3c659e76279464e.tar.bz2 sddm-d48484c29d09a944de6f9251a3c659e76279464e.zip