diff options
Diffstat (limited to 'utils/dc-closure-test')
-rwxr-xr-x | utils/dc-closure-test | 555 |
1 files changed, 555 insertions, 0 deletions
diff --git a/utils/dc-closure-test b/utils/dc-closure-test new file mode 100755 index 0000000..54d2629 --- /dev/null +++ b/utils/dc-closure-test @@ -0,0 +1,555 @@ +#!/usr/bin/env python +# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago> +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <https://www.gnu.org/licenses/>. + +from __future__ import print_function, division +import numpy as np +from scipy.stats import iqr +import nlopt +from scipy.stats import poisson +import contextlib +import sys +from math import exp +import emcee +from scipy.optimize import brentq +from scipy.stats import truncnorm +from matplotlib.lines import Line2D +from sddm.plot import despine +from sddm.dc import * +from sddm.plot_energy import * + +try: + from emcee import moves +except ImportError: + print("emcee version 2.2.1 is required",file=sys.stderr) + sys.exit(1) + +# from https://stackoverflow.com/questions/2891790/how-to-pretty-print-a-numpy-array-without-scientific-notation-and-with-given-pre +@contextlib.contextmanager +def printoptions(*args, **kwargs): + original = np.get_printoptions() + np.set_printoptions(*args, **kwargs) + try: + yield + finally: + np.set_printoptions(**original) + +def radius_cut(ev): + ev['radius_cut'] = np.digitize((ev.r/PSUP_RADIUS)**3,(0.9,)) + return ev + +def udotr_cut(ev): + ev['udotr_cut'] = np.digitize(ev.udotr,(-0.5,)) + return ev + +def psi_cut(ev): + ev['psi_cut'] = np.digitize(ev.psi,(6.0,)) + return ev + +def cos_theta_cut(ev): + ev['cos_theta_cut'] = np.digitize(ev.cos_theta,(-0.5,)) + return ev + +def z_cut(ev): + ev['z_cut'] = np.digitize(ev.z,(0.0,)) + return ev + +# Constraint to enforce the fact that P(r,psi,z,udotr|muon) all add up to 1.0. +# In the likelihood function we set the last possibility for r and udotr equal +# to 1.0 minus the others. Therefore, we need to enforce the fact that the +# others must add up to less than 1. +muon_r_psi_z_udotr = Constraint(range(11,26)) + +# Constraint to enforce the fact that P(z,udotr|noise) all add up to 1.0. In +# the likelihood function we set the last possibility for r and udotr equal to +# 1.0 minus the others. Therefore, we need to enforce the fact that the others +# must add up to less than 1. +noise_z_udotr = Constraint(range(28,31)) + +# Constraint to enforce the fact that P(r,z,udotr|neck) all add up to 1.0. In +# the likelihood function we set the last possibility for r and udotr equal to +# 1.0 minus the others. Therefore, we need to enforce the fact that the others +# must add up to less than 1. +neck_r_z_udotr = Constraint(range(31,38)) + +# Constraint to enforce the fact that P(r,udotr|flasher) all add up to 1.0. In +# the likelihood function we set the last possibility for r and udotr equal to +# 1.0 minus the others. Therefore, we need to enforce the fact that the others +# must add up to less than 1 +flasher_r_udotr = Constraint(range(39,42)) + +# Constraint to enforce the fact that P(r,udotr|breakdown) all add up to 1.0. +# In the likelihood function we set the last possibility for r and udotr equal +# to 1.0 minus the others. Therefore, we need to enforce the fact that the +# others must add up to less than 1. +breakdown_r_udotr = Constraint(range(44,47)) + +def make_nll(data, sacrifice, constraints): + def nll(x, grad=None, fill_value=1e9): + if grad is not None and grad.size > 0: + raise Exception("nll got passed grad!") + + nll = 0.0 + # Here we explicitly return a crazy high value if one of the + # constraints is violated. When using nlopt it should respect all the + # constraints, *but* later when we do the Metropolis Hastings algorithm + # we don't have any way to add the constraints explicitly. + for constraint in constraints: + if constraint(x) > 0: + nll += fill_value + 1e4*constraint(x)**2 + + if (x <= 0).any() or (x[6:] >= 1).any(): + nll += fill_value + 1e4*np.sum((x[x < 0])**2) + 1e4*np.sum((x[6:][x[6:] > 1]-1)**2) + + if nll: + return nll + + (mu_signal, mu_muon, mu_noise, mu_neck, mu_flasher, mu_breakdown, + contamination_muon, contamination_noise, contamination_neck, contamination_flasher, contamination_breakdown, + p_r_psi_z_udotr_muon_lolololo, # 11 + p_r_psi_z_udotr_muon_lololohi, + p_r_psi_z_udotr_muon_lolohilo, + p_r_psi_z_udotr_muon_lolohihi, + p_r_psi_z_udotr_muon_lohilolo, + p_r_psi_z_udotr_muon_lohilohi, + p_r_psi_z_udotr_muon_lohihilo, + p_r_psi_z_udotr_muon_lohihihi, + p_r_psi_z_udotr_muon_hilololo, + p_r_psi_z_udotr_muon_hilolohi, + p_r_psi_z_udotr_muon_hilohilo, + p_r_psi_z_udotr_muon_hilohihi, + p_r_psi_z_udotr_muon_hihilolo, + p_r_psi_z_udotr_muon_hihilohi, + p_r_psi_z_udotr_muon_hihihilo, + p_r_noise_lo, p_psi_noise_lo, # 26, 27 + p_z_udotr_noise_lolo, # 28 + p_z_udotr_noise_lohi, + p_z_udotr_noise_hilo, + p_r_z_udotr_neck_lololo, # 31 + p_r_z_udotr_neck_lolohi, + p_r_z_udotr_neck_lohilo, + p_r_z_udotr_neck_lohihi, + p_r_z_udotr_neck_hilolo, + p_r_z_udotr_neck_hilohi, + p_r_z_udotr_neck_hihilo, + p_psi_neck_lo, # 38 + p_r_udotr_flasher_lolo, p_r_udotr_flasher_lohi, p_r_udotr_flasher_hilo, # 39, ..., 41 + p_psi_flasher_lo, p_z_flasher_lo, + p_r_udotr_breakdown_lolo, p_r_udotr_breakdown_lohi, p_r_udotr_breakdown_hilo, # 44, ..., 46 + p_psi_breakdown_lo, p_z_breakdown_lo, + p_neck_given_muon) = x + + p_r_udotr_flasher_hihi = 1-p_r_udotr_flasher_lolo-p_r_udotr_flasher_lohi-p_r_udotr_flasher_hilo + p_r_udotr_breakdown_hihi = 1-p_r_udotr_breakdown_lolo-p_r_udotr_breakdown_lohi-p_r_udotr_breakdown_hilo + p_r_psi_z_udotr_muon_hihihihi = 1 - \ + p_r_psi_z_udotr_muon_lolololo - \ + p_r_psi_z_udotr_muon_lololohi - \ + p_r_psi_z_udotr_muon_lolohilo - \ + p_r_psi_z_udotr_muon_lolohihi - \ + p_r_psi_z_udotr_muon_lohilolo - \ + p_r_psi_z_udotr_muon_lohilohi - \ + p_r_psi_z_udotr_muon_lohihilo - \ + p_r_psi_z_udotr_muon_lohihihi - \ + p_r_psi_z_udotr_muon_hilololo - \ + p_r_psi_z_udotr_muon_hilolohi - \ + p_r_psi_z_udotr_muon_hilohilo - \ + p_r_psi_z_udotr_muon_hilohihi - \ + p_r_psi_z_udotr_muon_hihilolo - \ + p_r_psi_z_udotr_muon_hihilohi - \ + p_r_psi_z_udotr_muon_hihihilo + p_r_z_udotr_neck_hihihi = 1 - p_r_z_udotr_neck_lololo - p_r_z_udotr_neck_lolohi - p_r_z_udotr_neck_lohilo - p_r_z_udotr_neck_lohihi - p_r_z_udotr_neck_hilolo - p_r_z_udotr_neck_hilohi - p_r_z_udotr_neck_hihilo + p_z_udotr_noise_hihi = 1 - p_z_udotr_noise_lolo - p_z_udotr_noise_lohi - p_z_udotr_noise_hilo + + # Muon events + # first 6 parameters are the mean number of signal and bgs + p_muon = np.array([\ + [[[p_r_psi_z_udotr_muon_lolololo, p_r_psi_z_udotr_muon_lololohi], \ + [p_r_psi_z_udotr_muon_lolohilo, p_r_psi_z_udotr_muon_lolohihi]], \ + [[p_r_psi_z_udotr_muon_lohilolo, p_r_psi_z_udotr_muon_lohilohi], \ + [p_r_psi_z_udotr_muon_lohihilo, p_r_psi_z_udotr_muon_lohihihi]]], \ + [[[p_r_psi_z_udotr_muon_hilololo, p_r_psi_z_udotr_muon_hilolohi], \ + [p_r_psi_z_udotr_muon_hilohilo, p_r_psi_z_udotr_muon_hilohihi]], \ + [[p_r_psi_z_udotr_muon_hihilolo, p_r_psi_z_udotr_muon_hihilohi], \ + [p_r_psi_z_udotr_muon_hihihilo, p_r_psi_z_udotr_muon_hihihihi]]]]) + expected_muon = p_muon*contamination_muon*mu_muon + sacrifice['muon']*mu_signal + + nll -= fast_poisson_logpmf(data['muon'],expected_muon).sum() + + # Noise events + p_r_noise = np.array([p_r_noise_lo,1-p_r_noise_lo]) + p_psi_noise = np.array([p_psi_noise_lo,1-p_psi_noise_lo]) + p_z_udotr_noise = np.array([\ + [p_z_udotr_noise_lolo,p_z_udotr_noise_lohi], + [p_z_udotr_noise_hilo,p_z_udotr_noise_hihi]]) + p_noise = p_r_noise[:,np.newaxis,np.newaxis,np.newaxis]*p_psi_noise[:,np.newaxis,np.newaxis]*p_z_udotr_noise + expected_noise = p_noise*contamination_noise*mu_noise + sacrifice['noise']*mu_signal + + nll -= fast_poisson_logpmf(data['noise'],expected_noise).sum() + + # Neck events + # FIXME: for now assume parameterized same as muon + p_r_z_udotr_neck = np.array([\ + [[p_r_z_udotr_neck_lololo, p_r_z_udotr_neck_lolohi], \ + [p_r_z_udotr_neck_lohilo, p_r_z_udotr_neck_lohihi]], \ + [[p_r_z_udotr_neck_hilolo, p_r_z_udotr_neck_hilohi], \ + [p_r_z_udotr_neck_hihilo, p_r_z_udotr_neck_hihihi]]]) + p_psi_neck = np.array([p_psi_neck_lo,1-p_psi_neck_lo]) + p_neck = p_r_z_udotr_neck[:,np.newaxis,:,:]*p_psi_neck[:,np.newaxis,np.newaxis] + expected_neck = p_neck*contamination_neck*mu_neck + sacrifice['neck']*mu_signal + # FIXME: pdf should be different for muon given neck + expected_neck += p_muon*p_neck_given_muon*mu_muon + + nll -= fast_poisson_logpmf(data['neck'],expected_neck).sum() + + # Flasher events + p_r_udotr_flasher = np.array([\ + [p_r_udotr_flasher_lolo,p_r_udotr_flasher_lohi], \ + [p_r_udotr_flasher_hilo,p_r_udotr_flasher_hihi]]) + p_psi_flasher = np.array([p_psi_flasher_lo,1-p_psi_flasher_lo]) + p_z_flasher = np.array([p_z_flasher_lo,1-p_z_flasher_lo]) + p_flasher = p_r_udotr_flasher[:,np.newaxis,np.newaxis,:]*p_psi_flasher[:,np.newaxis,np.newaxis]*p_z_flasher[:,np.newaxis] + expected_flasher = p_flasher*contamination_flasher*mu_flasher + sacrifice['flasher']*mu_signal + + nll -= fast_poisson_logpmf(data['flasher'],expected_flasher).sum() + + # Breakdown events + p_r_udotr_breakdown = np.array([\ + [p_r_udotr_breakdown_lolo,p_r_udotr_breakdown_lohi], \ + [p_r_udotr_breakdown_hilo,p_r_udotr_breakdown_hihi]]) + p_psi_breakdown = np.array([p_psi_breakdown_lo,1-p_psi_breakdown_lo]) + p_z_breakdown = np.array([p_z_breakdown_lo,1-p_z_breakdown_lo]) + p_breakdown = p_r_udotr_breakdown[:,np.newaxis,np.newaxis,:]*p_psi_breakdown[:,np.newaxis,np.newaxis]*p_z_breakdown[:,np.newaxis] + expected_breakdown = p_breakdown*contamination_breakdown*mu_breakdown + sacrifice['breakdown']*mu_signal + + nll -= fast_poisson_logpmf(data['breakdown'],expected_breakdown).sum() + + # Signal like events + expected_signal = np.zeros_like(expected_muon) + expected_signal += mu_signal*sacrifice['signal'] + expected_signal += p_muon*(1-contamination_muon)*mu_muon + expected_signal += p_neck*(1-contamination_neck)*mu_neck + expected_signal += p_noise*(1-contamination_noise)*mu_noise + expected_signal += p_flasher*(1-contamination_flasher)*mu_flasher + expected_signal += p_breakdown*(1-contamination_breakdown)*mu_breakdown + + nll -= fast_poisson_logpmf(data['signal'],expected_signal).sum() + + if not np.isfinite(nll): + print("x = ", x) + print("p_r_z_udotr_neck = ", p_r_z_udotr_neck) + print("expected_muon = ", expected_muon) + print("expected_noise = ", expected_noise) + print("expected_neck = ", expected_neck) + print("expected_flasher = ", expected_flasher) + print("expected_breakdown = ", expected_breakdown) + print("nll is not finite!") + sys.exit(0) + + return nll + return nll + +def fit(data, sacrifice, steps): + constraints = [flasher_r_udotr, breakdown_r_udotr,muon_r_psi_z_udotr,neck_r_z_udotr,noise_z_udotr] + nll = make_nll(data,sacrifice,constraints) + + x0 = [] + for bg in ['signal','muon','noise','neck','flasher','breakdown']: + x0.append(data[bg].sum()) + + # contamination + x0 += [0.99]*5 + + if data['muon'].sum() > 0: + # P(r,psi,z,udotr|muon) + x0 += [data['muon'][0,0,0,0].sum()/data['muon'].sum()] + x0 += [data['muon'][0,0,0,1].sum()/data['muon'].sum()] + x0 += [data['muon'][0,0,1,0].sum()/data['muon'].sum()] + x0 += [data['muon'][0,0,1,1].sum()/data['muon'].sum()] + x0 += [data['muon'][0,1,0,0].sum()/data['muon'].sum()] + x0 += [data['muon'][0,1,0,1].sum()/data['muon'].sum()] + x0 += [data['muon'][0,1,1,0].sum()/data['muon'].sum()] + x0 += [data['muon'][0,1,1,1].sum()/data['muon'].sum()] + x0 += [data['muon'][1,0,0,0].sum()/data['muon'].sum()] + x0 += [data['muon'][1,0,0,1].sum()/data['muon'].sum()] + x0 += [data['muon'][1,0,1,0].sum()/data['muon'].sum()] + x0 += [data['muon'][1,0,1,1].sum()/data['muon'].sum()] + x0 += [data['muon'][1,1,0,0].sum()/data['muon'].sum()] + x0 += [data['muon'][1,1,0,1].sum()/data['muon'].sum()] + x0 += [data['muon'][1,1,1,0].sum()/data['muon'].sum()] + else: + x0 += [0.1]*15 + + if data['noise'].sum() > 0: + # P(r|noise) + x0 += [data['noise'][0].sum()/data['noise'].sum()] + # P(psi|noise) + x0 += [data['noise'][:,0].sum()/data['noise'].sum()] + # P(z,udotr|noise) + x0 += [data['noise'][:,:,0,0].sum()/data['noise'].sum()] + x0 += [data['noise'][:,:,0,1].sum()/data['noise'].sum()] + x0 += [data['noise'][:,:,1,0].sum()/data['noise'].sum()] + else: + x0 += [0.1]*5 + + if data['neck'].sum() > 0: + # P(r,z,udotr|neck) + x0 += [data['neck'][0,:,0,0].sum()/data['neck'].sum()] + x0 += [data['neck'][0,:,0,1].sum()/data['neck'].sum()] + x0 += [data['neck'][0,:,1,0].sum()/data['neck'].sum()] + x0 += [data['neck'][0,:,1,1].sum()/data['neck'].sum()] + x0 += [data['neck'][1,:,0,0].sum()/data['neck'].sum()] + x0 += [data['neck'][1,:,0,1].sum()/data['neck'].sum()] + x0 += [data['neck'][1,:,1,0].sum()/data['neck'].sum()] + # P(psi|neck) + x0 += [data['neck'][:,0].sum()/data['neck'].sum()] + else: + x0 += [0.1]*8 + + if data['flasher'].sum() > 0: + # P(r,udotr|flasher) + x0 += [data['flasher'][0,:,:,0].sum()/data['flasher'].sum()] + x0 += [data['flasher'][0,:,:,1].sum()/data['flasher'].sum()] + x0 += [data['flasher'][1,:,:,0].sum()/data['flasher'].sum()] + # P(psi|flasher) + x0 += [data['flasher'][:,0].sum()/data['flasher'].sum()] + # P(z|flasher) + x0 += [data['flasher'][:,:,0].sum()/data['flasher'].sum()] + else: + x0 += [0.1]*5 + + if data['breakdown'].sum() > 0: + # P(r,udotr|breakdown) + x0 += [data['breakdown'][0,:,:,0].sum()/data['breakdown'].sum()] + x0 += [data['breakdown'][0,:,:,1].sum()/data['breakdown'].sum()] + x0 += [data['breakdown'][1,:,:,0].sum()/data['breakdown'].sum()] + # P(psi|breakdown) + x0 += [data['breakdown'][:,0].sum()/data['breakdown'].sum()] + # P(z|breakdown) + x0 += [data['breakdown'][:,:,0].sum()/data['breakdown'].sum()] + else: + x0 += [0.1]*5 + + # P(neck|muon) + x0 += [EPSILON] + + x0 = np.array(x0) + + # Use the COBYLA algorithm here because it is the only derivative free + # minimization routine which honors inequality constraints + # Edit: SBPLX seems to work better + opt = nlopt.opt(nlopt.LN_SBPLX, len(x0)) + opt.set_min_objective(nll) + # set lower bounds to 1e-10 to prevent nans if we predict something should + # be 0 but observe an event. + low = np.ones_like(x0)*EPSILON + high = np.array([1e9]*6 + [1-EPSILON]*(len(x0)-6)) + x0[x0 < low] = low[x0 < low] + x0[x0 > high] = high[x0 > high] + opt.set_lower_bounds(low) + opt.set_upper_bounds(high) + opt.set_ftol_abs(1e-10) + opt.set_initial_step([1]*6 + [0.01]*(len(x0)-6)) + #for constraint in constraints: + #opt.add_inequality_constraint(constraint,0) + + xopt = opt.optimize(x0) + nll_xopt = nll(xopt) + print("nll(xopt) = ", nll(xopt)) + + while True: + xopt = opt.optimize(xopt) + if not nll(xopt) < nll_xopt - 1e-10: + break + nll_xopt = nll(xopt) + print("nll(xopt) = ", nll(xopt)) + #print("n = ", opt.get_numevals()) + + stepsizes = estimate_errors(nll,xopt,constraints) + with printoptions(precision=3, suppress=True): + print("Errors: ", stepsizes) + + #samples = metropolis_hastings(nll,xopt,stepsizes,100000) + #print("nll(xopt) = %.2g" % nll(xopt)) + + pos = np.empty((10, len(x0)),dtype=np.double) + for i in range(pos.shape[0]): + pos[i] = xopt + np.random.randn(len(x0))*stepsizes + pos[i,:6] = np.clip(pos[i,:6],EPSILON,1e9) + pos[i,6:] = np.clip(pos[i,6:],EPSILON,1-EPSILON) + + for constraint in constraints: + if constraint(pos[i]) >= 0: + pos[i] = constraint.renormalize_no_fix(pos[i]) + + nwalkers, ndim = pos.shape + + proposal = get_proposal_func(stepsizes*0.1,low,high) + sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x, grad, fill_value: -nll(x,grad,fill_value), moves=emcee.moves.MHMove(proposal),args=[None,np.inf]) + with np.errstate(invalid='ignore'): + sampler.run_mcmc(pos, steps) + + samples = sampler.chain.reshape((-1,len(x0))) + + return samples + +if __name__ == '__main__': + import argparse + import numpy as np + import pandas as pd + import sys + import h5py + from sddm import setup_matplotlib + + parser = argparse.ArgumentParser("plot fit results") + parser.add_argument("filenames", nargs='+', help="input files") + parser.add_argument("--steps", type=int, default=100000, help="number of steps in the MCMC chain") + parser.add_argument("--save", action="store_true", default=False, help="save plots") + parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files") + parser.add_argument("-n", type=int, default=10, help="number of fits to run") + args = parser.parse_args() + + setup_matplotlib(args.save) + + import matplotlib.pyplot as plt + + ev = get_events(args.filenames,merge_fits=True) + ev_mc = get_events(args.mc, merge_fits=True) + + # figure out bins for high level variables + ev = radius_cut(ev) + ev = psi_cut(ev) + ev = cos_theta_cut(ev) + ev = z_cut(ev) + ev = udotr_cut(ev) + + ev['noise'] = ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_ITC | DC_ESUM) != 0 + ev['neck'] = ((ev.dc & DC_NECK) != 0) & ~ev.noise + ev['flasher'] = ((ev.dc & DC_FLASHER) != 0) & ~(ev.noise | ev.neck) & (ev.nhit < 1000) + ev['breakdown'] = ((ev.dc & (DC_FLASHER | DC_BREAKDOWN)) != 0) & ~(ev.noise | ev.neck) & (ev.nhit >= 1000) + ev['muon'] = ((ev.dc & DC_MUON) != 0) & ~(ev.noise | ev.neck | ev.flasher | ev.breakdown) + ev['signal'] = ~(ev.noise | ev.neck | ev.flasher | ev.breakdown | ev.muon) + + ev_mc = ev_mc[ev_mc.prompt] + + # figure out bins for high level variables + ev_mc = radius_cut(ev_mc) + ev_mc = psi_cut(ev_mc) + ev_mc = cos_theta_cut(ev_mc) + ev_mc = z_cut(ev_mc) + ev_mc = udotr_cut(ev_mc) + + ev_mc['noise'] = ev_mc.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_ITC | DC_ESUM) != 0 + ev_mc['neck'] = ((ev_mc.dc & DC_NECK) != 0) & ~ev_mc.noise + ev_mc['flasher'] = ((ev_mc.dc & DC_FLASHER) != 0) & ~(ev_mc.noise | ev_mc.neck) & (ev_mc.nhit < 1000) + ev_mc['breakdown'] = ((ev_mc.dc & (DC_FLASHER | DC_BREAKDOWN)) != 0) & ~(ev_mc.noise | ev_mc.neck) & (ev_mc.nhit >= 1000) + ev_mc['muon'] = ((ev_mc.dc & DC_MUON) != 0) & ~(ev_mc.noise | ev_mc.neck | ev_mc.flasher | ev_mc.breakdown) + ev_mc['signal'] = ~(ev_mc.noise | ev_mc.neck | ev_mc.flasher | ev_mc.breakdown | ev_mc.muon) + + contamination_pull = {} + + nbg = {} + for bg in ['signal','muon','noise','neck','flasher','breakdown']: + nbg[bg] = 100 + contamination_pull[bg] = [] + + for i in range(args.n): + data = {} + for bg in ['signal','muon','noise','neck','flasher','breakdown']: + data[bg] = np.zeros((2,2,2,2),dtype=int) + if bg == 'signal': + for bg2 in ['signal','muon','noise','neck','flasher','breakdown']: + if bg2 == 'signal': + for _, row in ev_mc[ev_mc[bg2]].sample(n=nbg[bg2],replace=True).iterrows(): + data[bg][row.radius_cut][row.psi_cut][row.z_cut][row.udotr_cut] += 1 + else: + for _, row in ev[ev[bg2]].sample(n=nbg[bg2],replace=True).iterrows(): + data[bg][row.radius_cut][row.psi_cut][row.z_cut][row.udotr_cut] += 1 + else: + for _, row in ev[ev[bg]].iterrows(): + data[bg][row.radius_cut][row.psi_cut][row.z_cut][row.udotr_cut] += 1 + + # FIXME: Double check that what I'm calculating here matches with what I + # expect + sacrifice = {} + for bg in ['signal','muon','noise','neck','flasher','breakdown']: + sacrifice[bg] = np.zeros((2,2,2,2),dtype=float) + for _, row in ev_mc[ev_mc[bg]].iterrows(): + sacrifice[bg][row.radius_cut][row.psi_cut][row.z_cut][row.udotr_cut] += 1 + + sacrifice[bg] /= len(ev_mc) + + samples = fit(data, sacrifice, args.steps) + + (mu_signal, mu_muon, mu_noise, mu_neck, mu_flasher, mu_breakdown, + contamination_muon, contamination_noise, contamination_neck, contamination_flasher, contamination_breakdown, + p_r_psi_z_udotr_muon_lolololo, # 11 + p_r_psi_z_udotr_muon_lololohi, + p_r_psi_z_udotr_muon_lolohilo, + p_r_psi_z_udotr_muon_lolohihi, + p_r_psi_z_udotr_muon_lohilolo, + p_r_psi_z_udotr_muon_lohilohi, + p_r_psi_z_udotr_muon_lohihilo, + p_r_psi_z_udotr_muon_lohihihi, + p_r_psi_z_udotr_muon_hilololo, + p_r_psi_z_udotr_muon_hilolohi, + p_r_psi_z_udotr_muon_hilohilo, + p_r_psi_z_udotr_muon_hilohihi, + p_r_psi_z_udotr_muon_hihilolo, + p_r_psi_z_udotr_muon_hihilohi, + p_r_psi_z_udotr_muon_hihihilo, + p_r_noise_lo, p_psi_noise_lo, # 26, 27 + p_z_udotr_noise_lolo, # 28 + p_z_udotr_noise_lohi, + p_z_udotr_noise_hilo, + p_r_z_udotr_neck_lololo, # 31 + p_r_z_udotr_neck_lolohi, + p_r_z_udotr_neck_lohilo, + p_r_z_udotr_neck_lohihi, + p_r_z_udotr_neck_hilolo, + p_r_z_udotr_neck_hilohi, + p_r_z_udotr_neck_hihilo, + p_psi_neck_lo, # 38 + p_r_udotr_flasher_lolo, p_r_udotr_flasher_lohi, p_r_udotr_flasher_hilo, # 39, ..., 41 + p_psi_flasher_lo, p_z_flasher_lo, + p_r_udotr_breakdown_lolo, p_r_udotr_breakdown_lohi, p_r_udotr_breakdown_hilo, # 44, ..., 46 + p_psi_breakdown_lo, p_z_breakdown_lo, + p_neck_given_muon) = samples.T + + for i, bg in enumerate(['signal','muon','noise','neck','flasher','breakdown']): + if i == 0: + contamination = samples[:,i] + else: + contamination = samples[:,i]*(1-samples[:,5+i]) + mean = np.mean(contamination) + std = np.std(contamination) + contamination_pull[bg].append((mean - nbg[bg])/std) + + fig = plt.figure() + axes = [] + for i, bg in enumerate(['signal','muon','noise','neck','flasher','breakdown']): + axes.append(plt.subplot(3,2,i+1)) + plt.hist(contamination_pull[bg],bins=100,histtype='step') + plt.title(bg.capitalize()) + for ax in axes: + ax.set_xlim((-10,10)) + despine(ax=ax,left=True,trim=True) + ax.get_yaxis().set_visible(False) + plt.tight_layout() + + if args.save: + fig.savefig("contamination_pull_plot.pdf") + fig.savefig("contamination_pull_plot.eps") + else: + plt.show() |