diff options
Diffstat (limited to 'utils/dc')
-rwxr-xr-x | utils/dc | 1225 |
1 files changed, 1225 insertions, 0 deletions
diff --git a/utils/dc b/utils/dc new file mode 100755 index 0000000..30c5123 --- /dev/null +++ b/utils/dc @@ -0,0 +1,1225 @@ +#!/usr/bin/env python +# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago> +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <https://www.gnu.org/licenses/>. + +from __future__ import print_function, division +import numpy as np +from scipy.stats import iqr +import nlopt +from scipy.stats import poisson +import contextlib +import sys +from math import exp +import emcee +from scipy.optimize import brentq +from scipy.stats import truncnorm +from matplotlib.lines import Line2D + +try: + from emcee import moves +except ImportError: + print("emcee version 2.2.1 is required",file=sys.stderr) + sys.exit(1) + +# on retina screens, the default plots are way too small +# by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1 +# Qt5 will scale everything using the dpi in ~/.Xresources +import matplotlib +matplotlib.use("Qt5Agg") + +# Default figure size. Currently set to my monitor width and height so that +# things are properly formatted +FIGSIZE = (13.78,7.48) + +matplotlib.rcParams['figure.figsize'] = FIGSIZE + +matplotlib.rc('font', size=22) + +font = {'family':'serif', 'serif': ['computer modern roman']} +matplotlib.rc('font',**font) + +matplotlib.rc('text', usetex=True) + +# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py +def despine(fig=None, ax=None, top=True, right=True, left=False, + bottom=False, offset=None, trim=False): + """Remove the top and right spines from plot(s). + + fig : matplotlib figure, optional + Figure to despine all axes of, default uses current figure. + ax : matplotlib axes, optional + Specific axes object to despine. + top, right, left, bottom : boolean, optional + If True, remove that spine. + offset : int or dict, optional + Absolute distance, in points, spines should be moved away + from the axes (negative values move spines inward). A single value + applies to all spines; a dict can be used to set offset values per + side. + trim : bool, optional + If True, limit spines to the smallest and largest major tick + on each non-despined axis. + + Returns + ------- + None + + """ + # Get references to the axes we want + if fig is None and ax is None: + axes = plt.gcf().axes + elif fig is not None: + axes = fig.axes + elif ax is not None: + axes = [ax] + + for ax_i in axes: + for side in ["top", "right", "left", "bottom"]: + # Toggle the spine objects + is_visible = not locals()[side] + ax_i.spines[side].set_visible(is_visible) + if offset is not None and is_visible: + try: + val = offset.get(side, 0) + except AttributeError: + val = offset + _set_spine_position(ax_i.spines[side], ('outward', val)) + + # Potentially move the ticks + if left and not right: + maj_on = any( + t.tick1line.get_visible() + for t in ax_i.yaxis.majorTicks + ) + min_on = any( + t.tick1line.get_visible() + for t in ax_i.yaxis.minorTicks + ) + ax_i.yaxis.set_ticks_position("right") + for t in ax_i.yaxis.majorTicks: + t.tick2line.set_visible(maj_on) + for t in ax_i.yaxis.minorTicks: + t.tick2line.set_visible(min_on) + + if bottom and not top: + maj_on = any( + t.tick1line.get_visible() + for t in ax_i.xaxis.majorTicks + ) + min_on = any( + t.tick1line.get_visible() + for t in ax_i.xaxis.minorTicks + ) + ax_i.xaxis.set_ticks_position("top") + for t in ax_i.xaxis.majorTicks: + t.tick2line.set_visible(maj_on) + for t in ax_i.xaxis.minorTicks: + t.tick2line.set_visible(min_on) + + if trim: + # clip off the parts of the spines that extend past major ticks + xticks = ax_i.get_xticks() + if xticks.size: + firsttick = np.compress(xticks >= min(ax_i.get_xlim()), + xticks)[0] + lasttick = np.compress(xticks <= max(ax_i.get_xlim()), + xticks)[-1] + ax_i.spines['bottom'].set_bounds(firsttick, lasttick) + ax_i.spines['top'].set_bounds(firsttick, lasttick) + newticks = xticks.compress(xticks <= lasttick) + newticks = newticks.compress(newticks >= firsttick) + ax_i.set_xticks(newticks) + + yticks = ax_i.get_yticks() + if yticks.size: + firsttick = np.compress(yticks >= min(ax_i.get_ylim()), + yticks)[0] + lasttick = np.compress(yticks <= max(ax_i.get_ylim()), + yticks)[-1] + ax_i.spines['left'].set_bounds(firsttick, lasttick) + ax_i.spines['right'].set_bounds(firsttick, lasttick) + newticks = yticks.compress(yticks <= lasttick) + newticks = newticks.compress(newticks >= firsttick) + ax_i.set_yticks(newticks) + +# Lower bound for probabilities in fit. We set this to be nonzero to prevent +# nans in case an expected value is predicted to be zero. +EPSILON = 1e-10 + +def get_proposal_func(stepsizes, low, high): + """ + Returns a function which produces proposal steps for a Metropolis Hastings + MCMC. This function can then be passed to emcee.moves.MHMove(). + + Steps are proposed according to a truncated normal distribution with mean + at the current position and with standard deviations given by stepsizes. + The bounds for each step are given by the two arrays low and high. + """ + stepsizes = np.atleast_1d(stepsizes) + low = np.atleast_1d(low) + high = np.atleast_1d(high) + def proposal(x0, random): + """ + Returns a tuple of proposed positions and the log-ratio of the proposal + probabilities. + + x0 should be a (K,ndim) dimensional numpy array where K is the number + of walkers and ndim is the number of dimensions in the likelihood function. + """ + # calculate a and b needed for truncnorm + # Note: These are not just low and high b/c + # + # The standard form of this distribution is a standard normal + # truncated to the range [a, b] --- notice that a and b are defined + # over the domain of the standard normal. To convert clip values + # for a specific mean and standard deviation, use:: + # + # a, b = (myclip_a - my_mean) / my_std, (myclip_b - my_mean) / my_std + a, b = (low - x0)/stepsizes, (high - x0)/stepsizes + x = truncnorm.rvs(a,b,x0,stepsizes,random_state=random) + # Note: Should be able to do this: + # + # log_p_x_given_x0 = truncnorm.logpdf(x,a,b,x0,stepsizes).sum(axis=1) + # + # but I think there is a bug in truncnorm.logpdf() which barfs when + # passed 2D arrays, so instead we just loop over the first axis. + log_p_x_given_x0 = np.empty(x0.shape[0]) + for i in range(x0.shape[0]): + log_p_x_given_x0[i] = truncnorm.logpdf(x[i],a[i],b[i],x0[i],stepsizes).sum() + a, b = (low - x)/stepsizes, (high - x)/stepsizes + # Note: Should be able to do this: + # + # log_p_x0_given_x = truncnorm.logpdf(x0,a,b,x,stepsizes).sum(axis=1) + # + # but I think there is a bug in truncnorm.logpdf() which barfs when + # passed 2D arrays, so instead we just loop over the first axis. + log_p_x0_given_x = np.empty(x0.shape[0]) + for i in range(x0.shape[0]): + log_p_x0_given_x[i] = truncnorm.logpdf(x0[i],a[i],b[i],x[i],stepsizes).sum() + return x, log_p_x0_given_x - log_p_x_given_x0 + return proposal + +def estimate_errors(nll, xopt, constraints): + """ + Return approximate 1 sigma errors for each parameter assuming all + parameters are uncorrelated. + """ + nll_xopt = nll(xopt) + + def root(x, xopt, i): + """ + Function to bisect where the negative log likelihood reaches 0.5 from the minimum. + """ + xtest = xopt.copy() + xtest[i] = x + for constraint in constraints: + if constraint(xtest) >= 0: + xtest = constraint.renormalize(xtest,i) + return nll(xtest) - (nll_xopt + 0.5) + + errors = np.empty_like(xopt) + + for i in range(len(xopt)): + if i < 6: + xlo = brentq(root,0,xopt[i],args=(xopt,i)) + xhi = brentq(root,xopt[i],1e9,args=(xopt,i)) + else: + xlo = brentq(root,0,xopt[i],args=(xopt,i)) + xhi = brentq(root,xopt[i],1,args=(xopt,i)) + + errors[i] = max(xopt[i]-xlo,xhi-xopt[i]) + + return errors + +def metropolis_hastings(nll, x0, stepsizes=None, size=1000): + x = np.asarray(x0) + + if stepsizes is None: + stepsizes = np.ones_like(x) + else: + stepsizes = np.asarray(stepsizes) + + accept = 0 + + nll_old = nll(x) + + samples = np.empty((size,len(x0)),dtype=np.double) + + try: + for i in range(size): + if i % 100 == 0: + sys.stdout.write("\r%i/%i acceptance rate = %i/%i = %.2g%%" % (i+1,size,accept,i+1,accept*100/float(i+1))) + sys.stdout.flush() + + xnew = x + np.random.randn(len(x))*stepsizes + + nll_new = nll(xnew) + + if nll_new < nll_old or np.random.rand() < exp(nll_old-nll_new): + x = xnew + nll_new = nll_old + accept += 1 + + samples[i] = x + except KeyboardInterrupt: + sys.stdout.write("\n") + sys.stdout.flush() + print("ctrl-c caught. quitting...") + samples.resize((i,len(x))) + else: + sys.stdout.write("\n") + sys.stdout.flush() + + return samples + +# from https://stackoverflow.com/questions/2891790/how-to-pretty-print-a-numpy-array-without-scientific-notation-and-with-given-pre +@contextlib.contextmanager +def printoptions(*args, **kwargs): + original = np.get_printoptions() + np.set_printoptions(*args, **kwargs) + try: + yield + finally: + np.set_printoptions(**original) + +# from https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python +class bcolors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + +# on retina screens, the default plots are way too small +# by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1 +# Qt5 will scale everything using the dpi in ~/.Xresources +import matplotlib +matplotlib.use("Qt5Agg") + +SNOMAN_MASS = { + 20: 0.511, + 21: 0.511, + 22: 105.658, + 23: 105.658 +} + +AV_RADIUS = 600.0 +PSUP_RADIUS = 840.0 + +# Data cleaning bitmasks. +DC_MUON = 0x1 +DC_JUNK = 0x2 +DC_CRATE_ISOTROPY = 0x4 +DC_QVNHIT = 0x8 +DC_NECK = 0x10 +DC_FLASHER = 0x20 +DC_ESUM = 0x40 +DC_OWL = 0x80 +DC_OWL_TRIGGER = 0x100 +DC_FTS = 0x200 +DC_ITC = 0x400 +DC_BREAKDOWN = 0x800 + +def plot_hist(df, title=None, muons=False): + for id, df_id in sorted(df.groupby('id')): + if id == 20: + plt.subplot(3,4,1) + elif id == 22: + plt.subplot(3,4,2) + elif id == 2020: + plt.subplot(3,4,5) + elif id == 2022: + plt.subplot(3,4,6) + elif id == 2222: + plt.subplot(3,4,7) + elif id == 202020: + plt.subplot(3,4,9) + elif id == 202022: + plt.subplot(3,4,10) + elif id == 202222: + plt.subplot(3,4,11) + elif id == 222222: + plt.subplot(3,4,12) + + if muons: + plt.hist(df_id.ke.values, bins=np.linspace(20,1000e3,100), histtype='step') + else: + plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step') + plt.xlabel("Energy (MeV)") + plt.title(str(id)) + + if title: + plt.suptitle(title) + + if len(df): + plt.tight_layout() + +def chunks(l, n): + """Yield successive n-sized chunks from l.""" + for i in range(0, len(l), n): + yield l[i:i + n] + +def print_warning(msg): + print(bcolors.FAIL + msg + bcolors.ENDC,file=sys.stderr) + +def unwrap(p, delta, axis=-1): + """ + A modified version of np.unwrap() useful for unwrapping the 50 MHz clock. + It unwraps discontinuities bigger than delta/2 by delta. + + Example: + + >>> a = np.arange(10) % 5 + >>> a + array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4]) + >>> unwrap(a,5) + array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]) + + In the case of the 50 MHz clock delta should be 0x7ffffffffff*20.0. + """ + p = np.asarray(p) + nd = p.ndim + dd = np.diff(p, axis=axis) + slice1 = [slice(None, None)]*nd # full slices + slice1[axis] = slice(1, None) + slice1 = tuple(slice1) + ddmod = np.mod(dd + delta/2, delta) - delta/2 + np.copyto(ddmod, delta/2, where=(ddmod == -delta/2) & (dd > 0)) + ph_correct = ddmod - dd + np.copyto(ph_correct, 0, where=abs(dd) < delta/2) + up = np.array(p, copy=True, dtype='d') + up[slice1] = p[slice1] + ph_correct.cumsum(axis) + return up + +def unwrap_50_mhz_clock(gtr): + """ + Unwrap an array with 50 MHz clock times. These times should all be in + nanoseconds and come from the KEV_GTR variable in the EV bank. + + Note: We assume here that the events are already ordered contiguously by + GTID, so you shouldn't pass an array with multiple runs! + """ + return unwrap(gtr,0x7ffffffffff*20.0) + +def retrigger_cut(ev): + """ + Cuts all retrigger events. + """ + return ev[ev.dt > 500] + +def breakdown_follower_cut(ev): + """ + Cuts all events within 1 second of breakdown events. + """ + breakdowns = ev[ev.dc & DC_BREAKDOWN != 0] + return ev[~np.any((ev.gtr.values > breakdowns.gtr.values[:,np.newaxis]) & \ + (ev.gtr.values < breakdowns.gtr.values[:,np.newaxis] + 1e9),axis=0)] + +def flasher_follower_cut(ev): + """ + Cuts all events within 200 microseconds of flasher events. + """ + flashers = ev[ev.dc & DC_FLASHER != 0] + return ev[~np.any((ev.gtr.values > flashers.gtr.values[:,np.newaxis]) & \ + (ev.gtr.values < flashers.gtr.values[:,np.newaxis] + 200e3),axis=0)] + +def muon_follower_cut(ev): + """ + Cuts all events 200 microseconds after a muon. + """ + muons = ev[ev.dc & DC_MUON != 0] + return ev[~np.any((ev.gtr.values > muons.gtr.values[:,np.newaxis]) & \ + (ev.gtr.values < muons.gtr.values[:,np.newaxis] + 200e3),axis=0)] + +def michel_cut(ev): + """ + Looks for Michel electrons after muons. + """ + prompt_plus_muons = ev[ev.prompt | ((ev.dc & DC_MUON) != 0)] + + # Michel electrons and neutrons can be any event which is not a prompt + # event + follower = ev[~ev.prompt] + + # require Michel events to pass more of the SNO data cleaning cuts + michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] + + michel = michel[michel.nhit >= 100] + + # Accept events which had a muon more than 800 nanoseconds but less + # than 20 microseconds before them. The 800 nanoseconds cut comes from + # Richie's thesis. He also mentions that the In Time Channel Spread Cut + # is very effective at cutting electrons events caused by muons, so I + # should implement that. + # + # Note: We currently don't look across run boundaries. This should be a + # *very* small effect, and the logic to do so would be very complicated + # since I would have to deal with 50 MHz clock rollovers, etc. + # + # Do a simple python for loop here over the runs since we create + # temporary arrays with shape (michel.size,prompt_plus_muons.size) + # which could be too big for the full dataset. + # + # There might be some clever way to do this without the loop in Pandas, + # but I don't know how. + if prompt_plus_muons.size and michel.size: + return michel[np.any((michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \ + (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3),axis=0)] + else: + return pd.DataFrame(columns=ev.columns) + +def atmospheric_events(ev): + """ + Tags atmospheric events which have a neutron follower. + """ + prompt = ev[ev.prompt] + + # Michel electrons and neutrons can be any event which is not a prompt + # event + follower = ev[~ev.prompt] + + ev['atm'] = np.zeros(len(ev),dtype=np.bool) + + if prompt.size and follower.size: + # neutron followers have to obey stricter set of data cleaning cuts + neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] + neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)] + r = np.sqrt(neutron.ftp_x**2 + neutron.ftp_y**2 + neutron.ftp_z**2) + neutron = neutron[r < AV_RADIUS] + neutron = neutron[neutron.rsp_energy > 4.0] + + # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt) + ev.loc[ev.prompt,'atm'] = np.any((neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \ + (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1) + + return ev + +def gtid_sort(ev, first_gtid): + """ + Adds 0x1000000 to the gtid_sort column for all gtids before the first gtid + in a run, which should be passed as a dictionary. This column can then be + used to sort the events sequentially. + + This function should be passed to ev.groupby('run').apply(). We use this + idiom instead of just looping over the groupby results since groupby() + makes a copy of the dataframe, i.e. + + for run, ev_run in ev.groupby('run'): + ev_run.loc[ev_run.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 + + would produce a SettingWithCopyWarning, so instead we use: + + ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid) + + which doesn't have this problem. + """ + # see https://stackoverflow.com/questions/32460593/including-the-group-name-in-the-apply-function-pandas-python + run = ev.name + + if run not in first_gtid: + print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run) + first_gtid[run] = ev.gtid.iloc[0] + + ev.loc[ev.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 + + return ev + +def prompt_event(ev): + ev['prompt'] = (ev.nhit >= 100) + ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6)) + return ev + +def radius_cut(ev): + ev['radius_cut'] = np.digitize((ev.r/PSUP_RADIUS)**3,(0.9,)) + return ev + +def udotr_cut(ev): + ev['udotr_cut'] = np.digitize(ev.udotr,(-0.5,)) + return ev + +def psi_cut(ev): + ev['psi_cut'] = np.digitize(ev.psi,(6.0,)) + return ev + +def cos_theta_cut(ev): + ev['cos_theta_cut'] = np.digitize(ev.cos_theta,(-0.5,)) + return ev + +def z_cut(ev): + ev['z_cut'] = np.digitize(ev.z,(0.0,)) + return ev + +class Constraint(object): + def __init__(self, range): + self.range = range + + def __call__(self, x, grad=None): + return np.sum(x[self.range]) - 1 + EPSILON + + def renormalize(self, x, fix): + if fix not in self.range: + return x + x = x.copy() + while self(x) >= 0: + for i in self.range: + if i == fix: + continue + x[i] -= self(x) + return x + + def renormalize_no_fix(self, x): + x = x.copy() + while self(x) >= 0: + for i in self.range: + if x[i] < 10*EPSILON: + continue + x[i] -= x[i]/2.0 + return x + +# Constraint to enforce the fact that P(psi,z,udotr|muon) all add up to 1.0. In +# the likelihood function we set the last possibility for r and udotr equal to +# 1.0 minus the others. Therefore, we need to enforce the fact that the others +# must add up to less than 1. +muon_psi_z_udotr = Constraint(range(12,19)) + +# Constraint to enforce the fact that P(z,udotr|noise) all add up to 1.0. In +# the likelihood function we set the last possibility for r and udotr equal to +# 1.0 minus the others. Therefore, we need to enforce the fact that the others +# must add up to less than 1. +noise_z_udotr = Constraint(range(21,24)) + +# Constraint to enforce the fact that P(r,z,udotr|neck) all add up to 1.0. In +# the likelihood function we set the last possibility for r and udotr equal to +# 1.0 minus the others. Therefore, we need to enforce the fact that the others +# must add up to less than 1. +neck_r_z_udotr = Constraint(range(24,31)) + +# Constraint to enforce the fact that P(r,udotr|flasher) all add up to 1.0. In +# the likelihood function we set the last possibility for r and udotr equal to +# 1.0 minus the others. Therefore, we need to enforce the fact that the others +# must add up to less than 1 +flasher_r_udotr = Constraint(range(32,35)) + +# Constraint to enforce the fact that P(r,udotr|breakdown) all add up to 1.0. +# In the likelihood function we set the last possibility for r and udotr equal +# to 1.0 minus the others. Therefore, we need to enforce the fact that the +# others must add up to less than 1. +breakdown_r_udotr = Constraint(range(37,40)) + +def make_nll(data, sacrifice,constraints): + def nll(x, grad=None, fill_value=1e9): + if grad is not None and grad.size > 0: + raise Exception("nll got passed grad!") + + nll = 0.0 + # Here we explicitly return a crazy high value if one of the + # constraints is violated. When using nlopt it should respect all the + # constraints, *but* later when we do the Metropolis Hastings algorithm + # we don't have any way to add the constraints explicitly. + for constraint in constraints: + if constraint(x) > 0: + nll += fill_value + 1e4*constraint(x)**2 + + if (x <= 0).any() or (x[6:] >= 1).any(): + nll += fill_value + 1e4*np.sum((x[x < 0])**2) + 1e4*np.sum((x[6:][x[6:] > 1]-1)**2) + + if nll: + return nll + + mu_signal, mu_muon, mu_noise, mu_neck, mu_flasher, mu_breakdown, \ + contamination_muon, contamination_noise, contamination_neck, contamination_flasher, contamination_breakdown, \ + p_r_muon_lo, \ + p_psi_z_udotr_muon_lololo, \ + p_psi_z_udotr_muon_lolohi, \ + p_psi_z_udotr_muon_lohilo, \ + p_psi_z_udotr_muon_lohihi, \ + p_psi_z_udotr_muon_hilolo, \ + p_psi_z_udotr_muon_hilohi, \ + p_psi_z_udotr_muon_hihilo, \ + p_r_noise_lo, p_psi_noise_lo, \ + p_z_udotr_noise_lolo, \ + p_z_udotr_noise_lohi, \ + p_z_udotr_noise_hilo, \ + p_r_z_udotr_neck_lololo, \ + p_r_z_udotr_neck_lolohi, \ + p_r_z_udotr_neck_lohilo, \ + p_r_z_udotr_neck_lohihi, \ + p_r_z_udotr_neck_hilolo, \ + p_r_z_udotr_neck_hilohi, \ + p_r_z_udotr_neck_hihilo, \ + p_psi_neck_lo, \ + p_r_udotr_flasher_lolo, p_r_udotr_flasher_lohi, p_r_udotr_flasher_hilo, p_psi_flasher_lo, p_z_flasher_lo, \ + p_r_udotr_breakdown_lolo, p_r_udotr_breakdown_lohi, p_r_udotr_breakdown_hilo, p_psi_breakdown_lo, p_z_breakdown_lo, \ + p_neck_given_muon = x + + p_r_udotr_flasher_hihi = 1-p_r_udotr_flasher_lolo-p_r_udotr_flasher_lohi-p_r_udotr_flasher_hilo + p_r_udotr_breakdown_hihi = 1-p_r_udotr_breakdown_lolo-p_r_udotr_breakdown_lohi-p_r_udotr_breakdown_hilo + p_psi_z_udotr_muon_hihihi = 1 - p_psi_z_udotr_muon_lololo - p_psi_z_udotr_muon_lolohi - p_psi_z_udotr_muon_lohilo - p_psi_z_udotr_muon_lohihi - p_psi_z_udotr_muon_hilolo - p_psi_z_udotr_muon_hilohi - p_psi_z_udotr_muon_hihilo + p_r_z_udotr_neck_hihihi = 1 - p_r_z_udotr_neck_lololo - p_r_z_udotr_neck_lolohi - p_r_z_udotr_neck_lohilo - p_r_z_udotr_neck_lohihi - p_r_z_udotr_neck_hilolo - p_r_z_udotr_neck_hilohi - p_r_z_udotr_neck_hihilo + p_z_udotr_noise_hihi = 1 - p_z_udotr_noise_lolo - p_z_udotr_noise_lohi - p_z_udotr_noise_hilo + + # Muon events + # first 6 parameters are the mean number of signal and bgs + p_r_muon = np.array([p_r_muon_lo,1-p_r_muon_lo]) + p_psi_z_udotr_muon = np.array([\ + [[p_psi_z_udotr_muon_lololo, p_psi_z_udotr_muon_lolohi], \ + [p_psi_z_udotr_muon_lohilo, p_psi_z_udotr_muon_lohihi]], \ + [[p_psi_z_udotr_muon_hilolo, p_psi_z_udotr_muon_hilohi], \ + [p_psi_z_udotr_muon_hihilo, p_psi_z_udotr_muon_hihihi]]]) + p_muon = p_r_muon[:,np.newaxis,np.newaxis,np.newaxis]*p_psi_z_udotr_muon + expected_muon = p_muon*contamination_muon*mu_muon + sacrifice['muon']*mu_signal + + nll -= poisson.logpmf(data['muon'],expected_muon).sum() + + # Noise events + p_r_noise = np.array([p_r_noise_lo,1-p_r_noise_lo]) + p_psi_noise = np.array([p_psi_noise_lo,1-p_psi_noise_lo]) + p_z_udotr_noise = np.array([\ + [p_z_udotr_noise_lolo,p_z_udotr_noise_lohi], + [p_z_udotr_noise_hilo,p_z_udotr_noise_hihi]]) + p_noise = p_r_noise[:,np.newaxis,np.newaxis,np.newaxis]*p_psi_noise[:,np.newaxis,np.newaxis]*p_z_udotr_noise + expected_noise = p_noise*contamination_noise*mu_noise + sacrifice['noise']*mu_signal + + nll -= poisson.logpmf(data['noise'],expected_noise).sum() + + # Neck events + # FIXME: for now assume parameterized same as muon + p_r_z_udotr_neck = np.array([\ + [[p_r_z_udotr_neck_lololo, p_r_z_udotr_neck_lolohi], \ + [p_r_z_udotr_neck_lohilo, p_r_z_udotr_neck_lohihi]], \ + [[p_r_z_udotr_neck_hilolo, p_r_z_udotr_neck_hilohi], \ + [p_r_z_udotr_neck_hihilo, p_r_z_udotr_neck_hihihi]]]) + p_psi_neck = np.array([p_psi_neck_lo,1-p_psi_neck_lo]) + p_neck = p_r_z_udotr_neck[:,np.newaxis,:,:]*p_psi_neck[:,np.newaxis,np.newaxis] + expected_neck = p_neck*contamination_neck*mu_neck + sacrifice['neck']*mu_signal + # FIXME: pdf should be different for muon given neck + expected_neck += p_muon*p_neck_given_muon*mu_muon + + nll -= poisson.logpmf(data['neck'],expected_neck).sum() + + # Flasher events + p_r_udotr_flasher = np.array([\ + [p_r_udotr_flasher_lolo,p_r_udotr_flasher_lohi], \ + [p_r_udotr_flasher_hilo,p_r_udotr_flasher_hihi]]) + p_psi_flasher = np.array([p_psi_flasher_lo,1-p_psi_flasher_lo]) + p_z_flasher = np.array([p_z_flasher_lo,1-p_z_flasher_lo]) + p_flasher = p_r_udotr_flasher[:,np.newaxis,np.newaxis,:]*p_psi_flasher[:,np.newaxis,np.newaxis]*p_z_flasher[:,np.newaxis] + expected_flasher = p_flasher*contamination_flasher*mu_flasher + sacrifice['flasher']*mu_signal + + nll -= poisson.logpmf(data['flasher'],expected_flasher).sum() + + # Breakdown events + p_r_udotr_breakdown = np.array([\ + [p_r_udotr_breakdown_lolo,p_r_udotr_breakdown_lohi], \ + [p_r_udotr_breakdown_hilo,p_r_udotr_breakdown_hihi]]) + p_psi_breakdown = np.array([p_psi_breakdown_lo,1-p_psi_breakdown_lo]) + p_z_breakdown = np.array([p_z_breakdown_lo,1-p_z_breakdown_lo]) + p_breakdown = p_r_udotr_breakdown[:,np.newaxis,np.newaxis,:]*p_psi_breakdown[:,np.newaxis,np.newaxis]*p_z_breakdown[:,np.newaxis] + expected_breakdown = p_breakdown*contamination_breakdown*mu_breakdown + sacrifice['breakdown']*mu_signal + + nll -= poisson.logpmf(data['breakdown'],expected_breakdown).sum() + + # Signal like events + expected_signal = np.zeros_like(expected_muon) + expected_signal += mu_signal*sacrifice['signal'] + expected_signal += p_muon*(1-contamination_muon)*mu_muon + expected_signal += p_neck*(1-contamination_neck)*mu_neck + expected_signal += p_noise*(1-contamination_noise)*mu_noise + expected_signal += p_flasher*(1-contamination_flasher)*mu_flasher + expected_signal += p_breakdown*(1-contamination_breakdown)*mu_breakdown + + nll -= poisson.logpmf(data['signal'],expected_signal).sum() + + if not np.isfinite(nll): + print("x = ", x) + print("p_r_z_udotr_neck = ", p_r_z_udotr_neck) + print("expected_muon = ", expected_muon) + print("expected_noise = ", expected_noise) + print("expected_neck = ", expected_neck) + print("expected_flasher = ", expected_flasher) + print("expected_breakdown = ", expected_breakdown) + print("nll is not finite!") + sys.exit(0) + + return nll + return nll + +if __name__ == '__main__': + import argparse + import matplotlib.pyplot as plt + import numpy as np + import pandas as pd + import sys + import h5py + + parser = argparse.ArgumentParser("plot fit results") + parser.add_argument("filenames", nargs='+', help="input files") + parser.add_argument("--save", action="store_true", default=False, help="save plots") + args = parser.parse_args() + + for filename in args.filenames: + ev = pd.read_hdf(filename,"ev") + + ev = pd.concat([pd.read_hdf(filename, "ev") for filename in args.filenames],ignore_index=True) + fits = pd.concat([pd.read_hdf(filename, "fits") for filename in args.filenames],ignore_index=True) + rhdr = pd.concat([pd.read_hdf(filename, "rhdr") for filename in args.filenames],ignore_index=True) + + first_gtid = rhdr.set_index('run').to_dict()['first_gtid'] + + # First, remove junk events since orphans won't have a 50 MHz clock and so + # could screw up the 50 MHz clock unwrapping + ev = ev[ev.dc & DC_JUNK == 0] + + # We need the events to be in time order here in order to calculate the + # delta t between events. It's not obvious exactly how to do this. You + # could sort by GTID, but that wraps around. Similarly we can't sort by the + # 50 MHz clock because it also wraps around. Finally, I'm hesitant to sort + # by the 10 MHz clock since it can be unreliable. + # + # Update: Phil proposed a clever way to get the events in order using the + # GTID: + # + # > The GTID rollover should be easy to handle because there should never + # > be two identical GTID's in a run. So if you order the events by GTID, + # > you can assume that events with GTID's that come logically before the + # > first GTID in the run must have occurred after the other events. + # + # Therefore, we can just add 0x1000000 to all GTIDs before the first GTID + # in the event and sort on that. We get the first GTID from the RHDR bank. + ev['gtid_sort'] = ev['gtid'].copy() + + ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid).reset_index(level=0,drop=True) + + ev = ev.sort_values(by=['run','gtid_sort'],kind='mergesort') + + for run, ev_run in ev.groupby('run'): + # Warn about 50 MHz clock jumps since they could indicate that the + # events aren't in order. + dt = np.diff(ev_run.gtr) + if np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)): + print_warning("Warning: %i 50 MHz clock jumps in run %i. Are the events in order?" % \ + (np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)),run)) + + # unwrap the 50 MHz clock within each run + ev.gtr = ev.groupby(['run'],as_index=False)['gtr'].transform(unwrap_50_mhz_clock) + + for run, ev_run in ev.groupby('run'): + # Warn about GTID jumps since we could be missing a potential flasher + # and/or breakdown, and we need all the events in order to do a + # retrigger cut + if np.count_nonzero(np.diff(ev_run.gtid) != 1): + print_warning("Warning: %i GTID jumps in run %i" % (np.count_nonzero(np.diff(ev_run.gtid) != 1),run)) + + # calculate the time difference between each event and the previous event + # so we can tag retrigger events + ev['dt'] = ev.groupby(['run'],as_index=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values)))) + + # This is a bit of a hack. It appears that many times the fit will + # actually do much better by including a very low energy electron or + # muon. I believe the reason for this is that of course my likelihood + # function is not perfect (for example, I don't include the correct + # angular distribution for Rayleigh scattered light), and so the fitter + # often wants to add a very low energy electron or muon to fix things. + # + # Ideally I would fix the likelihood function, but for now we just + # discard any fit results which have a very low energy electron or + # muon. + # + # FIXME: Test this since query() is new to pandas + fits = fits.query('not (n > 1 and ((id1 == 20 and energy1 < 20) or (id2 == 20 and energy2 < 20) or (id3 == 20 and energy3 < 20)))') + fits = fits.query('not (n > 1 and ((id2 == 22 and energy1 < 200) or (id2 == 22 and energy2 < 200) or (id3 == 22 and energy3 < 200)))') + + # Calculate the approximate Ockham factor. + # See Chapter 20 in "Probability Theory: The Logic of Science" by Jaynes + # + # Note: This is a really approximate form by assuming that the shape of + # the likelihood space is equal to the average uncertainty in the + # different parameters. + fits['w'] = fits['n']*np.log(0.1*0.001) + np.log(fits['energy1']) + fits['n']*np.log(1e-4/(4*np.pi)) + fits['w'] -= fits['n']*100 + + # Note: we index on the left hand site with loc to avoid a copy error + # + # See https://www.dataquest.io/blog/settingwithcopywarning/ + fits.loc[fits['n'] > 1, 'w'] += np.log(fits[fits['n'] > 1]['energy2']) + fits.loc[fits['n'] > 2, 'w'] += np.log(fits[fits['n'] > 2]['energy3']) + + fits['fmin'] = fits['fmin'] - fits['w'] + + fits['ke'] = fits['energy1'] + fits['id'] = fits['id1'] + fits.loc[fits['n'] == 2, 'id'] = fits['id1']*100 + fits['id2'] + fits.loc[fits['n'] == 3, 'id'] = fits['id1']*10000 + fits['id2']*100 + fits['id3'] + fits['theta'] = fits['theta1'] + + print("number of events = %i" % len(ev)) + + # Now, select prompt events. + # + # We define a prompt event here as any event with an NHIT > 100 and whose + # previous > 100 nhit event was more than 250 ms ago + # + # Note: It's important we do this *before* applying the data cleaning cuts + # since otherwise we may have a prompt event identified only after the + # cuts. + # + # For example, suppose there was a breakdown and for whatever reason + # the *second* event after the breakdown didn't get tagged correctly. If we + # apply the data cleaning cuts first and then tag prompt events then this + # event will get tagged as a prompt event. + ev['prompt'] = (ev.nhit >= 100) + ev = ev.groupby('run',as_index=False).apply(prompt_event).reset_index(level=0,drop=True) + + print("number of events after prompt nhit cut = %i" % np.count_nonzero(ev.prompt)) + + # flasher follower cut + ev = ev.groupby('run',as_index=False).apply(flasher_follower_cut).reset_index(level=0,drop=True) + + # breakdown follower cut + ev = ev.groupby('run',as_index=False).apply(breakdown_follower_cut).reset_index(level=0,drop=True) + + # retrigger cut + ev = ev.groupby('run',as_index=False).apply(retrigger_cut).reset_index(level=0,drop=True) + + ev = ev[ev.prompt] + + ev.set_index(['run','gtid']) + + ev = pd.merge(fits,ev,how='inner',on=['run','gtid']) + ev_single_particle = ev[(ev.id2 == 0) & (ev.id3 == 0)] + ev_single_particle = ev_single_particle.sort_values('fmin').groupby(['run','gtid']).nth(0) + ev = ev.sort_values('fmin').groupby(['run','gtid']).nth(0) + ev['psi'] /= ev.nhit_cal + + ev['cos_theta'] = np.cos(ev_single_particle['theta1']) + ev['r'] = np.sqrt(ev.x**2 + ev.y**2 + ev.z**2) + ev['udotr'] = np.sin(ev_single_particle.theta1)*np.cos(ev_single_particle.phi1)*ev_single_particle.x + \ + np.sin(ev_single_particle.theta1)*np.sin(ev_single_particle.phi1)*ev_single_particle.y + \ + np.cos(ev_single_particle.theta1)*ev_single_particle.z + ev['udotr'] /= ev.r + + # figure out bins for high level variables + ev = radius_cut(ev) + ev = psi_cut(ev) + ev = cos_theta_cut(ev) + ev = z_cut(ev) + ev = udotr_cut(ev) + + ev['noise'] = ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_ITC | DC_ESUM) != 0 + ev['neck'] = ((ev.dc & DC_NECK) != 0) & ~ev.noise + ev['flasher'] = ((ev.dc & DC_FLASHER) != 0) & ~(ev.noise | ev.neck) & (ev.nhit < 1000) + ev['breakdown'] = ((ev.dc & (DC_FLASHER | DC_BREAKDOWN)) != 0) & ~(ev.noise | ev.neck) & (ev.nhit >= 1000) + ev['muon'] = ((ev.dc & DC_MUON) != 0) & ~(ev.noise | ev.neck | ev.flasher | ev.breakdown) + ev['signal'] = ~(ev.noise | ev.neck | ev.flasher | ev.breakdown | ev.muon) + + data = {} + for bg in ['signal','muon','noise','neck','flasher','breakdown']: + data[bg] = np.zeros((2,2,2,2),dtype=int) + for _, row in ev[ev[bg]].iterrows(): + data[bg][row.radius_cut][row.psi_cut][row.z_cut][row.udotr_cut] += 1 + + # FIXME: Estimate for now, needs to come from MC + sacrifice = {bg: 0.0 for bg in ['muon','noise','neck','flasher','breakdown']} + sacrifice['signal'] = np.zeros((len(np.unique(ev.radius_cut)),len(np.unique(ev.psi_cut)),len(np.unique(ev.cos_theta_cut)),len(np.unique(ev.z_cut))),dtype=int) + p_r_signal = np.array([0.9,0.1]) + p_psi_signal = np.array([1.0,0.0]) + p_z_signal = np.array([0.5,0.5]) + p_udotr_signal = np.array([0.25,0.75]) + sacrifice['signal'] = p_r_signal[:,np.newaxis,np.newaxis,np.newaxis]*p_psi_signal[:,np.newaxis,np.newaxis]*p_z_signal[:,np.newaxis]*p_udotr_signal + + constraints = [flasher_r_udotr, breakdown_r_udotr,muon_psi_z_udotr,neck_r_z_udotr,noise_z_udotr] + nll = make_nll(data,sacrifice,constraints) + + x0 = [] + for bg in ['signal','muon','noise','neck','flasher','breakdown']: + x0.append(data[bg].sum()) + + # contamination + x0 += [0.99]*5 + + if data['muon'].sum() > 0: + # P(r|muon) + x0 += [data['muon'][0].sum()/data['muon'].sum()] + # P(psi,z,udotr|muon) + x0 += [data['muon'][:,0,0,0].sum()/data['muon'].sum()] + x0 += [data['muon'][:,0,0,1].sum()/data['muon'].sum()] + x0 += [data['muon'][:,0,1,0].sum()/data['muon'].sum()] + x0 += [data['muon'][:,0,1,1].sum()/data['muon'].sum()] + x0 += [data['muon'][:,1,0,0].sum()/data['muon'].sum()] + x0 += [data['muon'][:,1,0,1].sum()/data['muon'].sum()] + x0 += [data['muon'][:,1,1,0].sum()/data['muon'].sum()] + else: + x0 += [0.1]*8 + + if data['noise'].sum() > 0: + # P(r|noise) + x0 += [data['noise'][0].sum()/data['noise'].sum()] + # P(psi|noise) + x0 += [data['noise'][:,0].sum()/data['noise'].sum()] + # P(z,udotr|noise) + x0 += [data['noise'][:,:,0,0].sum()/data['noise'].sum()] + x0 += [data['noise'][:,:,0,1].sum()/data['noise'].sum()] + x0 += [data['noise'][:,:,1,0].sum()/data['noise'].sum()] + else: + x0 += [0.1]*5 + + if data['neck'].sum() > 0: + # P(r,z,udotr|neck) + x0 += [data['neck'][0,:,0,0].sum()/data['neck'].sum()] + x0 += [data['neck'][0,:,0,1].sum()/data['neck'].sum()] + x0 += [data['neck'][0,:,1,0].sum()/data['neck'].sum()] + x0 += [data['neck'][0,:,1,1].sum()/data['neck'].sum()] + x0 += [data['neck'][1,:,0,0].sum()/data['neck'].sum()] + x0 += [data['neck'][1,:,0,1].sum()/data['neck'].sum()] + x0 += [data['neck'][1,:,1,0].sum()/data['neck'].sum()] + # P(psi|neck) + x0 += [data['neck'][:,0].sum()/data['neck'].sum()] + else: + x0 += [0.1]*8 + + if data['neck'].sum() > 0: + # P(r,udotr|flasher) + x0 += [data['flasher'][0,:,:,0].sum()/data['flasher'].sum()] + x0 += [data['flasher'][0,:,:,1].sum()/data['flasher'].sum()] + x0 += [data['flasher'][1,:,:,0].sum()/data['flasher'].sum()] + # P(psi|flasher) + x0 += [data['flasher'][:,0].sum()/data['flasher'].sum()] + # P(z|flasher) + x0 += [data['flasher'][:,:,0].sum()/data['flasher'].sum()] + else: + x0 += [0.1]*5 + + if data['breakdown'].sum() > 0: + # P(r,udotr|breakdown) + x0 += [data['breakdown'][0,:,:,0].sum()/data['breakdown'].sum()] + x0 += [data['breakdown'][0,:,:,1].sum()/data['breakdown'].sum()] + x0 += [data['breakdown'][1,:,:,0].sum()/data['breakdown'].sum()] + # P(psi|breakdown) + x0 += [data['breakdown'][:,0].sum()/data['breakdown'].sum()] + # P(z|breakdown) + x0 += [data['breakdown'][:,:,0].sum()/data['breakdown'].sum()] + else: + x0 += [0.1]*5 + + # P(neck|muon) + x0 += [EPSILON] + + x0 = np.array(x0) + + # Use the COBYLA algorithm here because it is the only derivative free + # minimization routine which honors inequality constraints + # Edit: SBPLX seems to work better + opt = nlopt.opt(nlopt.LN_SBPLX, len(x0)) + opt.set_min_objective(nll) + # set lower bounds to 1e-10 to prevent nans if we predict something should + # be 0 but observe an event. + low = np.ones_like(x0)*EPSILON + high = np.array([1e9]*6 + [1-EPSILON]*(len(x0)-6)) + x0[x0 < low] = low[x0 < low] + x0[x0 > high] = high[x0 > high] + opt.set_lower_bounds(low) + opt.set_upper_bounds(high) + opt.set_ftol_abs(1e-10) + opt.set_initial_step([1]*6 + [0.01]*(len(x0)-6)) + #for constraint in constraints: + #opt.add_inequality_constraint(constraint,0) + + xopt = opt.optimize(x0) + nll_xopt = nll(xopt) + print("nll(xopt) = ", nll(xopt)) + + while True: + xopt = opt.optimize(xopt) + if not nll(xopt) < nll_xopt - 1e-10: + break + nll_xopt = nll(xopt) + print("nll(xopt) = ", nll(xopt)) + #print("n = ", opt.get_numevals()) + + stepsizes = estimate_errors(nll,xopt,constraints) + with printoptions(precision=3, suppress=True): + print("Errors: ", stepsizes) + + #samples = metropolis_hastings(nll,xopt,stepsizes,100000) + #print("nll(xopt) = %.2g" % nll(xopt)) + + pos = np.empty((10, len(x0)),dtype=np.double) + for i in range(pos.shape[0]): + pos[i] = xopt + np.random.randn(len(x0))*stepsizes + pos[i,:6] = np.clip(pos[i,:6],EPSILON,1e9) + pos[i,6:] = np.clip(pos[i,6:],EPSILON,1-EPSILON) + + for constraint in constraints: + if constraint(pos[i]) >= 0: + pos[i] = constraint.renormalize_no_fix(pos[i]) + + nwalkers, ndim = pos.shape + + proposal = get_proposal_func(stepsizes*0.1,low,high) + sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x, grad, fill_value: -nll(x,grad,fill_value), moves=emcee.moves.MHMove(proposal),args=[None,np.inf]) + with np.errstate(invalid='ignore'): + sampler.run_mcmc(pos, 100000) + + print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction))) + + try: + print("autocorrelation time: ", sampler.get_autocorr_time()) + except Exception as e: + print(e) + + # Plot walker positions as a function of step number for the expected + # number of events + fig, axes = plt.subplots(6, num=1, sharex=True) + samples = sampler.get_chain() + labels = ["Signal","Muon","Noise","Neck","Flasher","Breakdown"] + for i, bg in enumerate(['signal','muon','noise','neck','flasher','breakdown']): + ax = axes[i] + ax.plot(samples[:,:,i], "k", alpha=0.3) + ax.set_xlim(0, len(samples)) + ax.set_ylabel(labels[i], rotation=0) + ax.yaxis.set_label_coords(-0.1, 0.5) + despine(ax=ax,trim=True) + plt.subplots_adjust(left=0.2) + fig.tight_layout() + + # Plot walker positions as a function of step number for the background cut + # efficiencies + fig, axes = plt.subplots(5, num=2, sharex=True) + samples = sampler.get_chain() + tag_labels = ['M','N','Ne','F','B'] + for i, bg in enumerate(['muon','noise','neck','flasher','breakdown']): + ax = axes[i] + ax.plot(samples[:,:,6+i], "k", alpha=0.3) + ax.set_xlim(0, len(samples)) + ax.set_ylabel(r"$P(\mathrm{%s}\mid\mathrm{%s})$" % (tag_labels[i],bg), rotation=0) + ax.yaxis.set_label_coords(-0.1, 0.5) + despine(ax=ax,trim=True) + plt.subplots_adjust(left=0.2) + fig.tight_layout() + + samples = sampler.chain.reshape((-1,len(x0))) + + plt.figure(3) + for i, bg in enumerate(['signal','muon','noise','neck','flasher','breakdown']): + ax = plt.subplot(3,2,i+1) + plt.hist(samples[:,i],bins=100,histtype='step') + plt.title(bg.capitalize()) + despine(ax=ax,left=True,trim=True) + ax.get_yaxis().set_visible(False) + plt.legend() + plt.tight_layout() + + plt.figure(4) + for i, bg in enumerate(['muon','noise','neck','flasher','breakdown']): + ax = plt.subplot(3,2,i+1) + plt.hist(samples[:,6+i],bins=100,histtype='step') + plt.title(bg.capitalize()) + despine(ax=ax,left=True,trim=True) + ax.get_yaxis().set_visible(False) + plt.legend() + plt.tight_layout() + + mu_signal, mu_muon, mu_noise, mu_neck, mu_flasher, mu_breakdown, \ + contamination_muon, contamination_noise, contamination_neck, contamination_flasher, contamination_breakdown, \ + p_r_muon_lo, \ + p_psi_z_udotr_muon_lololo, \ + p_psi_z_udotr_muon_lolohi, \ + p_psi_z_udotr_muon_lohilo, \ + p_psi_z_udotr_muon_lohihi, \ + p_psi_z_udotr_muon_hilolo, \ + p_psi_z_udotr_muon_hilohi, \ + p_psi_z_udotr_muon_hihilo, \ + p_r_noise_lo, p_psi_noise_lo, \ + p_z_udotr_noise_lolo, \ + p_z_udotr_noise_lohi, \ + p_z_udotr_noise_hilo, \ + p_r_z_udotr_neck_lololo, \ + p_r_z_udotr_neck_lolohi, \ + p_r_z_udotr_neck_lohilo, \ + p_r_z_udotr_neck_lohihi, \ + p_r_z_udotr_neck_hilolo, \ + p_r_z_udotr_neck_hilohi, \ + p_r_z_udotr_neck_hihilo, \ + p_psi_neck_lo, \ + p_r_udotr_flasher_lolo, p_r_udotr_flasher_lohi, p_r_udotr_flasher_hilo, p_psi_flasher_lo, p_z_flasher_lo, \ + p_r_udotr_breakdown_lolo, p_r_udotr_breakdown_lohi, p_r_udotr_breakdown_hilo, p_psi_breakdown_lo, p_z_breakdown_lo, \ + p_neck_given_muon = samples.T + + p_r = [sacrifice['signal'][0].sum(), p_r_muon_lo, p_r_noise_lo, \ + p_r_z_udotr_neck_lololo + p_r_z_udotr_neck_lolohi + p_r_z_udotr_neck_lohilo + p_r_z_udotr_neck_lohihi, \ + p_r_udotr_flasher_lolo + p_r_udotr_flasher_lohi, \ + p_r_udotr_breakdown_lolo + p_r_udotr_breakdown_lohi] + + p_psi = [sacrifice['signal'][:,0].sum(), \ + p_psi_z_udotr_muon_lololo + p_psi_z_udotr_muon_lolohi + p_psi_z_udotr_muon_lohilo + p_psi_z_udotr_muon_lohihi, + p_psi_noise_lo, \ + p_psi_neck_lo, \ + p_psi_flasher_lo, \ + p_psi_breakdown_lo] + + ylim_max = 0 + fig = plt.figure(5) + axes = [] + for i, bg in enumerate(['signal','muon','noise','neck','flasher','breakdown']): + axes.append(plt.subplot(3,2,i+1)) + if i == 0: + plt.hist(samples[:,i],bins=100,histtype='step',label="After DC cuts") + plt.hist(samples[:,i]*p_r[i],bins=100,histtype='step',linestyle=':',label="+ radius cut") + plt.hist(samples[:,i]*p_r[i]*p_psi[i],bins=100,histtype='step',linestyle='--',label=r"+ $\psi$ cut") + else: + plt.hist(samples[:,i]*(1-samples[:,5+i]),bins=100,histtype='step') + plt.hist(samples[:,i]*(1-samples[:,5+i])*p_r[i],bins=100,histtype='step',linestyle=':') + plt.hist(samples[:,i]*(1-samples[:,5+i])*p_r[i]*p_psi[i],bins=100,histtype='step',linestyle='--') + plt.title(bg.capitalize()) + xlim_max = max(ax.get_xlim()[1] for ax in axes) + for ax in axes: + ax.set_xlim((0,xlim_max)) + despine(ax=ax,left=True,trim=True) + ax.get_yaxis().set_visible(False) + # Create new legend handles but use the colors from the existing ones + handles, labels = axes[0].get_legend_handles_labels() + new_handles = [Line2D([], [], c=h.get_edgecolor()) for h in handles] + fig.legend(new_handles,labels,loc='upper right') + plt.legend() + plt.tight_layout() + + if args.save: + plt.figure(1) + plt.savefig("dc_walker_pos_num_events.pdf") + plt.savefig("dc_walker_pos_num_events.eps") + plt.figure(2) + plt.savefig("dc_walker_pos_cut_eff.pdf") + plt.savefig("dc_walker_pos_cut_eff.eps") + plt.figure(3) + plt.savefig("dc_num_events.pdf") + plt.savefig("dc_num_events.eps") + plt.figure(4) + plt.savefig("dc_cut_eff.pdf") + plt.savefig("dc_cut_eff.eps") + plt.figure(5) + plt.savefig("dc_num_events_after_cuts.pdf") + plt.savefig("dc_num_events_after_cuts.eps") + + plt.figure(3) + plt.suptitle("Expected number of events") + plt.figure(4) + plt.suptitle("Probability of correctly tagging background") + plt.figure(5) + plt.suptitle("Expected number of Backgrounds after cuts") + + plt.show() |