aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortlatorre <tlatorre@uchicago.edu>2020-05-12 11:34:47 -0500
committertlatorre <tlatorre@uchicago.edu>2020-05-12 11:34:47 -0500
commit764bf1b496de0d3d3a22b988a0634ea68434bb26 (patch)
tree83dde566645f6b7f1abef44ae9654bb62482a0ac
parentc24438b1fa9d368f2b05d623c7a2cb0d27852cfc (diff)
downloadsddm-764bf1b496de0d3d3a22b988a0634ea68434bb26.tar.gz
sddm-764bf1b496de0d3d3a22b988a0634ea68434bb26.tar.bz2
sddm-764bf1b496de0d3d3a22b988a0634ea68434bb26.zip
add a script to do a closure test on the contamination analysis
-rwxr-xr-xutils/dc217
-rwxr-xr-xutils/dc-closure-test555
-rwxr-xr-xutils/plot-energy120
-rwxr-xr-xutils/sddm/dc.py7
-rwxr-xr-xutils/sddm/plot_energy.py139
5 files changed, 741 insertions, 297 deletions
diff --git a/utils/dc b/utils/dc
index 0c14053..16591df 100755
--- a/utils/dc
+++ b/utils/dc
@@ -266,178 +266,20 @@ if __name__ == '__main__':
import pandas as pd
import sys
import h5py
+ from sddm import setup_matplotlib
parser = argparse.ArgumentParser("plot fit results")
parser.add_argument("filenames", nargs='+', help="input files")
parser.add_argument("--steps", type=int, default=100000, help="number of steps in the MCMC chain")
parser.add_argument("--save", action="store_true", default=False, help="save plots")
+ parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files")
args = parser.parse_args()
- if args.save:
- # default \textwidth for a fullpage article in Latex is 16.50764 cm.
- # You can figure this out by compiling the following TeX document:
- #
- # \documentclass{article}
- # \usepackage{fullpage}
- # \usepackage{layouts}
- # \begin{document}
- # textwidth in cm: \printinunitsof{cm}\prntlen{\textwidth}
- # \end{document}
-
- width = 16.50764
- width /= 2.54 # cm -> inches
- # According to this page:
- # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf,
- # Tufte suggests an aspect ratio of 1.5 - 1.6.
- height = width/1.5
- FIGSIZE = (width,height)
-
- import matplotlib.pyplot as plt
-
- font = {'family':'serif', 'serif': ['computer modern roman']}
- plt.rc('font',**font)
-
- plt.rc('text', usetex=True)
- else:
- # on retina screens, the default plots are way too small
- # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1
- # Qt5 will scale everything using the dpi in ~/.Xresources
- import matplotlib
- matplotlib.use("Qt5Agg")
-
- import matplotlib.pyplot as plt
-
- # Default figure size. Currently set to my monitor width and height so that
- # things are properly formatted
- FIGSIZE = (13.78,7.48)
-
- # Make the defalt font bigger
- plt.rc('font', size=22)
-
- for filename in args.filenames:
- ev = pd.read_hdf(filename,"ev")
-
- ev = pd.concat([pd.read_hdf(filename, "ev") for filename in args.filenames],ignore_index=True)
- fits = pd.concat([pd.read_hdf(filename, "fits") for filename in args.filenames],ignore_index=True)
- rhdr = pd.concat([pd.read_hdf(filename, "rhdr") for filename in args.filenames],ignore_index=True)
-
- first_gtid = rhdr.set_index('run').to_dict()['first_gtid']
-
- # First, remove junk events since orphans won't have a 50 MHz clock and so
- # could screw up the 50 MHz clock unwrapping
- ev = ev[ev.dc & DC_JUNK == 0]
-
- # We need the events to be in time order here in order to calculate the
- # delta t between events. It's not obvious exactly how to do this. You
- # could sort by GTID, but that wraps around. Similarly we can't sort by the
- # 50 MHz clock because it also wraps around. Finally, I'm hesitant to sort
- # by the 10 MHz clock since it can be unreliable.
- #
- # Update: Phil proposed a clever way to get the events in order using the
- # GTID:
- #
- # > The GTID rollover should be easy to handle because there should never
- # > be two identical GTID's in a run. So if you order the events by GTID,
- # > you can assume that events with GTID's that come logically before the
- # > first GTID in the run must have occurred after the other events.
- #
- # Therefore, we can just add 0x1000000 to all GTIDs before the first GTID
- # in the event and sort on that. We get the first GTID from the RHDR bank.
- ev['gtid_sort'] = ev['gtid'].copy()
-
- ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid).reset_index(level=0,drop=True)
-
- ev = ev.sort_values(by=['run','gtid_sort'],kind='mergesort')
-
- for run, ev_run in ev.groupby('run'):
- # Warn about 50 MHz clock jumps since they could indicate that the
- # events aren't in order.
- dt = np.diff(ev_run.gtr)
- if np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)):
- print_warning("Warning: %i 50 MHz clock jumps in run %i. Are the events in order?" % \
- (np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)),run))
-
- # unwrap the 50 MHz clock within each run
- ev.gtr = ev.groupby(['run'],as_index=False)['gtr'].transform(unwrap_50_mhz_clock)
-
- for run, ev_run in ev.groupby('run'):
- # Warn about GTID jumps since we could be missing a potential flasher
- # and/or breakdown, and we need all the events in order to do a
- # retrigger cut
- if np.count_nonzero(np.diff(ev_run.gtid) != 1):
- print_warning("Warning: %i GTID jumps in run %i" % (np.count_nonzero(np.diff(ev_run.gtid) != 1),run))
-
- # calculate the time difference between each event and the previous event
- # so we can tag retrigger events
- ev['dt'] = ev.groupby(['run'],as_index=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values))))
-
- # Calculate the approximate Ockham factor.
- # See Chapter 20 in "Probability Theory: The Logic of Science" by Jaynes
- #
- # Note: This is a really approximate form by assuming that the shape of
- # the likelihood space is equal to the average uncertainty in the
- # different parameters.
- fits['w'] = fits['n']*np.log(0.05/10e3) + np.log(fits['energy1']) + fits['n']*np.log(1e-4/(4*np.pi))
- fits['w'] -= fits['n']*100
-
- # Note: we index on the left hand site with loc to avoid a copy error
- #
- # See https://www.dataquest.io/blog/settingwithcopywarning/
- fits.loc[fits['n'] > 1, 'w'] += np.log(fits[fits['n'] > 1]['energy2'])
- fits.loc[fits['n'] > 2, 'w'] += np.log(fits[fits['n'] > 2]['energy3'])
-
- fits['fmin'] = fits['fmin'] - fits['w']
-
- fits['ke'] = fits['energy1']
- fits['id'] = fits['id1']
- fits.loc[fits['n'] == 2, 'id'] = fits['id1']*100 + fits['id2']
- fits.loc[fits['n'] == 3, 'id'] = fits['id1']*10000 + fits['id2']*100 + fits['id3']
- fits['theta'] = fits['theta1']
-
- print("number of events = %i" % len(ev))
-
- # Now, select prompt events.
- #
- # We define a prompt event here as any event with an NHIT > 100 and whose
- # previous > 100 nhit event was more than 250 ms ago
- #
- # Note: It's important we do this *before* applying the data cleaning cuts
- # since otherwise we may have a prompt event identified only after the
- # cuts.
- #
- # For example, suppose there was a breakdown and for whatever reason
- # the *second* event after the breakdown didn't get tagged correctly. If we
- # apply the data cleaning cuts first and then tag prompt events then this
- # event will get tagged as a prompt event.
- ev = ev.groupby('run',as_index=False).apply(prompt_event).reset_index(level=0,drop=True)
-
- print("number of events after prompt nhit cut = %i" % np.count_nonzero(ev.prompt))
-
- # flasher follower cut
- ev = ev.groupby('run',as_index=False).apply(flasher_follower_cut).reset_index(level=0,drop=True)
-
- # breakdown follower cut
- ev = ev.groupby('run',as_index=False).apply(breakdown_follower_cut).reset_index(level=0,drop=True)
-
- # retrigger cut
- ev = ev.groupby('run',as_index=False).apply(retrigger_cut).reset_index(level=0,drop=True)
-
- ev = ev[ev.prompt]
-
- ev.set_index(['run','gtid'])
-
- ev = pd.merge(fits,ev,how='inner',on=['run','gtid'])
- ev_single_particle = ev[(ev.id2 == 0) & (ev.id3 == 0)]
- ev_single_particle = ev_single_particle.sort_values('fmin').groupby(['run','gtid']).nth(0)
- ev = ev.sort_values('fmin').groupby(['run','gtid']).nth(0)
- ev['psi'] /= ev.nhit_cal
-
- ev['cos_theta'] = np.cos(ev_single_particle['theta1'])
- ev['r'] = np.sqrt(ev.x**2 + ev.y**2 + ev.z**2)
- ev['udotr'] = np.sin(ev_single_particle.theta1)*np.cos(ev_single_particle.phi1)*ev_single_particle.x + \
- np.sin(ev_single_particle.theta1)*np.sin(ev_single_particle.phi1)*ev_single_particle.y + \
- np.cos(ev_single_particle.theta1)*ev_single_particle.z
- ev['udotr'] /= ev.r
+ setup_matplotlib(args.save)
+
+ import matplotlib.pyplot as plt
+
+ ev = get_events(args.filenames,merge_fits=True)
# figure out bins for high level variables
ev = radius_cut(ev)
@@ -459,14 +301,33 @@ if __name__ == '__main__':
for _, row in ev[ev[bg]].iterrows():
data[bg][row.radius_cut][row.psi_cut][row.z_cut][row.udotr_cut] += 1
- # FIXME: Estimate for now, needs to come from MC
- sacrifice = {bg: 0.0 for bg in ['muon','noise','neck','flasher','breakdown']}
- sacrifice['signal'] = np.zeros((len(np.unique(ev.radius_cut)),len(np.unique(ev.psi_cut)),len(np.unique(ev.cos_theta_cut)),len(np.unique(ev.z_cut))),dtype=int)
- p_r_signal = np.array([0.9,0.1])
- p_psi_signal = np.array([1.0,0.0])
- p_z_signal = np.array([0.5,0.5])
- p_udotr_signal = np.array([0.25,0.75])
- sacrifice['signal'] = p_r_signal[:,np.newaxis,np.newaxis,np.newaxis]*p_psi_signal[:,np.newaxis,np.newaxis]*p_z_signal[:,np.newaxis]*p_udotr_signal
+ ev_mc = get_events(args.mc, merge_fits=True)
+
+ ev_mc = ev_mc[ev_mc.prompt]
+
+ # figure out bins for high level variables
+ ev_mc = radius_cut(ev_mc)
+ ev_mc = psi_cut(ev_mc)
+ ev_mc = cos_theta_cut(ev_mc)
+ ev_mc = z_cut(ev_mc)
+ ev_mc = udotr_cut(ev_mc)
+
+ ev_mc['noise'] = ev_mc.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_ITC | DC_ESUM) != 0
+ ev_mc['neck'] = ((ev_mc.dc & DC_NECK) != 0) & ~ev_mc.noise
+ ev_mc['flasher'] = ((ev_mc.dc & DC_FLASHER) != 0) & ~(ev_mc.noise | ev_mc.neck) & (ev_mc.nhit < 1000)
+ ev_mc['breakdown'] = ((ev_mc.dc & (DC_FLASHER | DC_BREAKDOWN)) != 0) & ~(ev_mc.noise | ev_mc.neck) & (ev_mc.nhit >= 1000)
+ ev_mc['muon'] = ((ev_mc.dc & DC_MUON) != 0) & ~(ev_mc.noise | ev_mc.neck | ev_mc.flasher | ev_mc.breakdown)
+ ev_mc['signal'] = ~(ev_mc.noise | ev_mc.neck | ev_mc.flasher | ev_mc.breakdown | ev_mc.muon)
+
+ # FIXME: Double check that what I'm calculating here matches with what I
+ # expect
+ sacrifice = {}
+ for bg in ['signal','muon','noise','neck','flasher','breakdown']:
+ sacrifice[bg] = np.zeros((2,2,2,2),dtype=float)
+ for _, row in ev_mc[ev_mc[bg]].iterrows():
+ sacrifice[bg][row.radius_cut][row.psi_cut][row.z_cut][row.udotr_cut] += 1
+
+ sacrifice[bg] /= len(ev_mc)
constraints = [flasher_r_udotr, breakdown_r_udotr,muon_r_psi_z_udotr,neck_r_z_udotr,noise_z_udotr]
nll = make_nll(data,sacrifice,constraints)
@@ -616,7 +477,7 @@ if __name__ == '__main__':
# Plot walker positions as a function of step number for the expected
# number of events
- fig, axes = plt.subplots(6, figsize=FIGSIZE, num=1, sharex=True)
+ fig, axes = plt.subplots(6, num=1, sharex=True)
samples = sampler.get_chain()
labels = ["Signal","Muon","Noise","Neck","Flasher","Breakdown"]
for i, bg in enumerate(['signal','muon','noise','neck','flasher','breakdown']):
@@ -631,7 +492,7 @@ if __name__ == '__main__':
# Plot walker positions as a function of step number for the background cut
# efficiencies
- fig, axes = plt.subplots(5, figsize=FIGSIZE, num=2, sharex=True)
+ fig, axes = plt.subplots(5, num=2, sharex=True)
samples = sampler.get_chain()
tag_labels = ['M','N','Ne','F','B']
for i, bg in enumerate(['muon','noise','neck','flasher','breakdown']):
@@ -646,7 +507,7 @@ if __name__ == '__main__':
samples = sampler.chain.reshape((-1,len(x0)))
- plt.figure(3, figsize=FIGSIZE)
+ plt.figure(3)
for i, bg in enumerate(['signal','muon','noise','neck','flasher','breakdown']):
ax = plt.subplot(3,2,i+1)
plt.hist(samples[:,i],bins=100,histtype='step')
@@ -656,7 +517,7 @@ if __name__ == '__main__':
plt.legend()
plt.tight_layout()
- plt.figure(4, figsize=FIGSIZE)
+ plt.figure(4)
for i, bg in enumerate(['muon','noise','neck','flasher','breakdown']):
ax = plt.subplot(3,2,i+1)
plt.hist(samples[:,6+i],bins=100,histtype='step')
@@ -732,7 +593,7 @@ if __name__ == '__main__':
p_psi_breakdown_lo]
ylim_max = 0
- fig = plt.figure(5, figsize=FIGSIZE)
+ fig = plt.figure(5)
axes = []
for i, bg in enumerate(['signal','muon','noise','neck','flasher','breakdown']):
axes.append(plt.subplot(3,2,i+1))
diff --git a/utils/dc-closure-test b/utils/dc-closure-test
new file mode 100755
index 0000000..54d2629
--- /dev/null
+++ b/utils/dc-closure-test
@@ -0,0 +1,555 @@
+#!/usr/bin/env python
+# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago>
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <https://www.gnu.org/licenses/>.
+
+from __future__ import print_function, division
+import numpy as np
+from scipy.stats import iqr
+import nlopt
+from scipy.stats import poisson
+import contextlib
+import sys
+from math import exp
+import emcee
+from scipy.optimize import brentq
+from scipy.stats import truncnorm
+from matplotlib.lines import Line2D
+from sddm.plot import despine
+from sddm.dc import *
+from sddm.plot_energy import *
+
+try:
+ from emcee import moves
+except ImportError:
+ print("emcee version 2.2.1 is required",file=sys.stderr)
+ sys.exit(1)
+
+# from https://stackoverflow.com/questions/2891790/how-to-pretty-print-a-numpy-array-without-scientific-notation-and-with-given-pre
+@contextlib.contextmanager
+def printoptions(*args, **kwargs):
+ original = np.get_printoptions()
+ np.set_printoptions(*args, **kwargs)
+ try:
+ yield
+ finally:
+ np.set_printoptions(**original)
+
+def radius_cut(ev):
+ ev['radius_cut'] = np.digitize((ev.r/PSUP_RADIUS)**3,(0.9,))
+ return ev
+
+def udotr_cut(ev):
+ ev['udotr_cut'] = np.digitize(ev.udotr,(-0.5,))
+ return ev
+
+def psi_cut(ev):
+ ev['psi_cut'] = np.digitize(ev.psi,(6.0,))
+ return ev
+
+def cos_theta_cut(ev):
+ ev['cos_theta_cut'] = np.digitize(ev.cos_theta,(-0.5,))
+ return ev
+
+def z_cut(ev):
+ ev['z_cut'] = np.digitize(ev.z,(0.0,))
+ return ev
+
+# Constraint to enforce the fact that P(r,psi,z,udotr|muon) all add up to 1.0.
+# In the likelihood function we set the last possibility for r and udotr equal
+# to 1.0 minus the others. Therefore, we need to enforce the fact that the
+# others must add up to less than 1.
+muon_r_psi_z_udotr = Constraint(range(11,26))
+
+# Constraint to enforce the fact that P(z,udotr|noise) all add up to 1.0. In
+# the likelihood function we set the last possibility for r and udotr equal to
+# 1.0 minus the others. Therefore, we need to enforce the fact that the others
+# must add up to less than 1.
+noise_z_udotr = Constraint(range(28,31))
+
+# Constraint to enforce the fact that P(r,z,udotr|neck) all add up to 1.0. In
+# the likelihood function we set the last possibility for r and udotr equal to
+# 1.0 minus the others. Therefore, we need to enforce the fact that the others
+# must add up to less than 1.
+neck_r_z_udotr = Constraint(range(31,38))
+
+# Constraint to enforce the fact that P(r,udotr|flasher) all add up to 1.0. In
+# the likelihood function we set the last possibility for r and udotr equal to
+# 1.0 minus the others. Therefore, we need to enforce the fact that the others
+# must add up to less than 1
+flasher_r_udotr = Constraint(range(39,42))
+
+# Constraint to enforce the fact that P(r,udotr|breakdown) all add up to 1.0.
+# In the likelihood function we set the last possibility for r and udotr equal
+# to 1.0 minus the others. Therefore, we need to enforce the fact that the
+# others must add up to less than 1.
+breakdown_r_udotr = Constraint(range(44,47))
+
+def make_nll(data, sacrifice, constraints):
+ def nll(x, grad=None, fill_value=1e9):
+ if grad is not None and grad.size > 0:
+ raise Exception("nll got passed grad!")
+
+ nll = 0.0
+ # Here we explicitly return a crazy high value if one of the
+ # constraints is violated. When using nlopt it should respect all the
+ # constraints, *but* later when we do the Metropolis Hastings algorithm
+ # we don't have any way to add the constraints explicitly.
+ for constraint in constraints:
+ if constraint(x) > 0:
+ nll += fill_value + 1e4*constraint(x)**2
+
+ if (x <= 0).any() or (x[6:] >= 1).any():
+ nll += fill_value + 1e4*np.sum((x[x < 0])**2) + 1e4*np.sum((x[6:][x[6:] > 1]-1)**2)
+
+ if nll:
+ return nll
+
+ (mu_signal, mu_muon, mu_noise, mu_neck, mu_flasher, mu_breakdown,
+ contamination_muon, contamination_noise, contamination_neck, contamination_flasher, contamination_breakdown,
+ p_r_psi_z_udotr_muon_lolololo, # 11
+ p_r_psi_z_udotr_muon_lololohi,
+ p_r_psi_z_udotr_muon_lolohilo,
+ p_r_psi_z_udotr_muon_lolohihi,
+ p_r_psi_z_udotr_muon_lohilolo,
+ p_r_psi_z_udotr_muon_lohilohi,
+ p_r_psi_z_udotr_muon_lohihilo,
+ p_r_psi_z_udotr_muon_lohihihi,
+ p_r_psi_z_udotr_muon_hilololo,
+ p_r_psi_z_udotr_muon_hilolohi,
+ p_r_psi_z_udotr_muon_hilohilo,
+ p_r_psi_z_udotr_muon_hilohihi,
+ p_r_psi_z_udotr_muon_hihilolo,
+ p_r_psi_z_udotr_muon_hihilohi,
+ p_r_psi_z_udotr_muon_hihihilo,
+ p_r_noise_lo, p_psi_noise_lo, # 26, 27
+ p_z_udotr_noise_lolo, # 28
+ p_z_udotr_noise_lohi,
+ p_z_udotr_noise_hilo,
+ p_r_z_udotr_neck_lololo, # 31
+ p_r_z_udotr_neck_lolohi,
+ p_r_z_udotr_neck_lohilo,
+ p_r_z_udotr_neck_lohihi,
+ p_r_z_udotr_neck_hilolo,
+ p_r_z_udotr_neck_hilohi,
+ p_r_z_udotr_neck_hihilo,
+ p_psi_neck_lo, # 38
+ p_r_udotr_flasher_lolo, p_r_udotr_flasher_lohi, p_r_udotr_flasher_hilo, # 39, ..., 41
+ p_psi_flasher_lo, p_z_flasher_lo,
+ p_r_udotr_breakdown_lolo, p_r_udotr_breakdown_lohi, p_r_udotr_breakdown_hilo, # 44, ..., 46
+ p_psi_breakdown_lo, p_z_breakdown_lo,
+ p_neck_given_muon) = x
+
+ p_r_udotr_flasher_hihi = 1-p_r_udotr_flasher_lolo-p_r_udotr_flasher_lohi-p_r_udotr_flasher_hilo
+ p_r_udotr_breakdown_hihi = 1-p_r_udotr_breakdown_lolo-p_r_udotr_breakdown_lohi-p_r_udotr_breakdown_hilo
+ p_r_psi_z_udotr_muon_hihihihi = 1 - \
+ p_r_psi_z_udotr_muon_lolololo - \
+ p_r_psi_z_udotr_muon_lololohi - \
+ p_r_psi_z_udotr_muon_lolohilo - \
+ p_r_psi_z_udotr_muon_lolohihi - \
+ p_r_psi_z_udotr_muon_lohilolo - \
+ p_r_psi_z_udotr_muon_lohilohi - \
+ p_r_psi_z_udotr_muon_lohihilo - \
+ p_r_psi_z_udotr_muon_lohihihi - \
+ p_r_psi_z_udotr_muon_hilololo - \
+ p_r_psi_z_udotr_muon_hilolohi - \
+ p_r_psi_z_udotr_muon_hilohilo - \
+ p_r_psi_z_udotr_muon_hilohihi - \
+ p_r_psi_z_udotr_muon_hihilolo - \
+ p_r_psi_z_udotr_muon_hihilohi - \
+ p_r_psi_z_udotr_muon_hihihilo
+ p_r_z_udotr_neck_hihihi = 1 - p_r_z_udotr_neck_lololo - p_r_z_udotr_neck_lolohi - p_r_z_udotr_neck_lohilo - p_r_z_udotr_neck_lohihi - p_r_z_udotr_neck_hilolo - p_r_z_udotr_neck_hilohi - p_r_z_udotr_neck_hihilo
+ p_z_udotr_noise_hihi = 1 - p_z_udotr_noise_lolo - p_z_udotr_noise_lohi - p_z_udotr_noise_hilo
+
+ # Muon events
+ # first 6 parameters are the mean number of signal and bgs
+ p_muon = np.array([\
+ [[[p_r_psi_z_udotr_muon_lolololo, p_r_psi_z_udotr_muon_lololohi], \
+ [p_r_psi_z_udotr_muon_lolohilo, p_r_psi_z_udotr_muon_lolohihi]], \
+ [[p_r_psi_z_udotr_muon_lohilolo, p_r_psi_z_udotr_muon_lohilohi], \
+ [p_r_psi_z_udotr_muon_lohihilo, p_r_psi_z_udotr_muon_lohihihi]]], \
+ [[[p_r_psi_z_udotr_muon_hilololo, p_r_psi_z_udotr_muon_hilolohi], \
+ [p_r_psi_z_udotr_muon_hilohilo, p_r_psi_z_udotr_muon_hilohihi]], \
+ [[p_r_psi_z_udotr_muon_hihilolo, p_r_psi_z_udotr_muon_hihilohi], \
+ [p_r_psi_z_udotr_muon_hihihilo, p_r_psi_z_udotr_muon_hihihihi]]]])
+ expected_muon = p_muon*contamination_muon*mu_muon + sacrifice['muon']*mu_signal
+
+ nll -= fast_poisson_logpmf(data['muon'],expected_muon).sum()
+
+ # Noise events
+ p_r_noise = np.array([p_r_noise_lo,1-p_r_noise_lo])
+ p_psi_noise = np.array([p_psi_noise_lo,1-p_psi_noise_lo])
+ p_z_udotr_noise = np.array([\
+ [p_z_udotr_noise_lolo,p_z_udotr_noise_lohi],
+ [p_z_udotr_noise_hilo,p_z_udotr_noise_hihi]])
+ p_noise = p_r_noise[:,np.newaxis,np.newaxis,np.newaxis]*p_psi_noise[:,np.newaxis,np.newaxis]*p_z_udotr_noise
+ expected_noise = p_noise*contamination_noise*mu_noise + sacrifice['noise']*mu_signal
+
+ nll -= fast_poisson_logpmf(data['noise'],expected_noise).sum()
+
+ # Neck events
+ # FIXME: for now assume parameterized same as muon
+ p_r_z_udotr_neck = np.array([\
+ [[p_r_z_udotr_neck_lololo, p_r_z_udotr_neck_lolohi], \
+ [p_r_z_udotr_neck_lohilo, p_r_z_udotr_neck_lohihi]], \
+ [[p_r_z_udotr_neck_hilolo, p_r_z_udotr_neck_hilohi], \
+ [p_r_z_udotr_neck_hihilo, p_r_z_udotr_neck_hihihi]]])
+ p_psi_neck = np.array([p_psi_neck_lo,1-p_psi_neck_lo])
+ p_neck = p_r_z_udotr_neck[:,np.newaxis,:,:]*p_psi_neck[:,np.newaxis,np.newaxis]
+ expected_neck = p_neck*contamination_neck*mu_neck + sacrifice['neck']*mu_signal
+ # FIXME: pdf should be different for muon given neck
+ expected_neck += p_muon*p_neck_given_muon*mu_muon
+
+ nll -= fast_poisson_logpmf(data['neck'],expected_neck).sum()
+
+ # Flasher events
+ p_r_udotr_flasher = np.array([\
+ [p_r_udotr_flasher_lolo,p_r_udotr_flasher_lohi], \
+ [p_r_udotr_flasher_hilo,p_r_udotr_flasher_hihi]])
+ p_psi_flasher = np.array([p_psi_flasher_lo,1-p_psi_flasher_lo])
+ p_z_flasher = np.array([p_z_flasher_lo,1-p_z_flasher_lo])
+ p_flasher = p_r_udotr_flasher[:,np.newaxis,np.newaxis,:]*p_psi_flasher[:,np.newaxis,np.newaxis]*p_z_flasher[:,np.newaxis]
+ expected_flasher = p_flasher*contamination_flasher*mu_flasher + sacrifice['flasher']*mu_signal
+
+ nll -= fast_poisson_logpmf(data['flasher'],expected_flasher).sum()
+
+ # Breakdown events
+ p_r_udotr_breakdown = np.array([\
+ [p_r_udotr_breakdown_lolo,p_r_udotr_breakdown_lohi], \
+ [p_r_udotr_breakdown_hilo,p_r_udotr_breakdown_hihi]])
+ p_psi_breakdown = np.array([p_psi_breakdown_lo,1-p_psi_breakdown_lo])
+ p_z_breakdown = np.array([p_z_breakdown_lo,1-p_z_breakdown_lo])
+ p_breakdown = p_r_udotr_breakdown[:,np.newaxis,np.newaxis,:]*p_psi_breakdown[:,np.newaxis,np.newaxis]*p_z_breakdown[:,np.newaxis]
+ expected_breakdown = p_breakdown*contamination_breakdown*mu_breakdown + sacrifice['breakdown']*mu_signal
+
+ nll -= fast_poisson_logpmf(data['breakdown'],expected_breakdown).sum()
+
+ # Signal like events
+ expected_signal = np.zeros_like(expected_muon)
+ expected_signal += mu_signal*sacrifice['signal']
+ expected_signal += p_muon*(1-contamination_muon)*mu_muon
+ expected_signal += p_neck*(1-contamination_neck)*mu_neck
+ expected_signal += p_noise*(1-contamination_noise)*mu_noise
+ expected_signal += p_flasher*(1-contamination_flasher)*mu_flasher
+ expected_signal += p_breakdown*(1-contamination_breakdown)*mu_breakdown
+
+ nll -= fast_poisson_logpmf(data['signal'],expected_signal).sum()
+
+ if not np.isfinite(nll):
+ print("x = ", x)
+ print("p_r_z_udotr_neck = ", p_r_z_udotr_neck)
+ print("expected_muon = ", expected_muon)
+ print("expected_noise = ", expected_noise)
+ print("expected_neck = ", expected_neck)
+ print("expected_flasher = ", expected_flasher)
+ print("expected_breakdown = ", expected_breakdown)
+ print("nll is not finite!")
+ sys.exit(0)
+
+ return nll
+ return nll
+
+def fit(data, sacrifice, steps):
+ constraints = [flasher_r_udotr, breakdown_r_udotr,muon_r_psi_z_udotr,neck_r_z_udotr,noise_z_udotr]
+ nll = make_nll(data,sacrifice,constraints)
+
+ x0 = []
+ for bg in ['signal','muon','noise','neck','flasher','breakdown']:
+ x0.append(data[bg].sum())
+
+ # contamination
+ x0 += [0.99]*5
+
+ if data['muon'].sum() > 0:
+ # P(r,psi,z,udotr|muon)
+ x0 += [data['muon'][0,0,0,0].sum()/data['muon'].sum()]
+ x0 += [data['muon'][0,0,0,1].sum()/data['muon'].sum()]
+ x0 += [data['muon'][0,0,1,0].sum()/data['muon'].sum()]
+ x0 += [data['muon'][0,0,1,1].sum()/data['muon'].sum()]
+ x0 += [data['muon'][0,1,0,0].sum()/data['muon'].sum()]
+ x0 += [data['muon'][0,1,0,1].sum()/data['muon'].sum()]
+ x0 += [data['muon'][0,1,1,0].sum()/data['muon'].sum()]
+ x0 += [data['muon'][0,1,1,1].sum()/data['muon'].sum()]
+ x0 += [data['muon'][1,0,0,0].sum()/data['muon'].sum()]
+ x0 += [data['muon'][1,0,0,1].sum()/data['muon'].sum()]
+ x0 += [data['muon'][1,0,1,0].sum()/data['muon'].sum()]
+ x0 += [data['muon'][1,0,1,1].sum()/data['muon'].sum()]
+ x0 += [data['muon'][1,1,0,0].sum()/data['muon'].sum()]
+ x0 += [data['muon'][1,1,0,1].sum()/data['muon'].sum()]
+ x0 += [data['muon'][1,1,1,0].sum()/data['muon'].sum()]
+ else:
+ x0 += [0.1]*15
+
+ if data['noise'].sum() > 0:
+ # P(r|noise)
+ x0 += [data['noise'][0].sum()/data['noise'].sum()]
+ # P(psi|noise)
+ x0 += [data['noise'][:,0].sum()/data['noise'].sum()]
+ # P(z,udotr|noise)
+ x0 += [data['noise'][:,:,0,0].sum()/data['noise'].sum()]
+ x0 += [data['noise'][:,:,0,1].sum()/data['noise'].sum()]
+ x0 += [data['noise'][:,:,1,0].sum()/data['noise'].sum()]
+ else:
+ x0 += [0.1]*5
+
+ if data['neck'].sum() > 0:
+ # P(r,z,udotr|neck)
+ x0 += [data['neck'][0,:,0,0].sum()/data['neck'].sum()]
+ x0 += [data['neck'][0,:,0,1].sum()/data['neck'].sum()]
+ x0 += [data['neck'][0,:,1,0].sum()/data['neck'].sum()]
+ x0 += [data['neck'][0,:,1,1].sum()/data['neck'].sum()]
+ x0 += [data['neck'][1,:,0,0].sum()/data['neck'].sum()]
+ x0 += [data['neck'][1,:,0,1].sum()/data['neck'].sum()]
+ x0 += [data['neck'][1,:,1,0].sum()/data['neck'].sum()]
+ # P(psi|neck)
+ x0 += [data['neck'][:,0].sum()/data['neck'].sum()]
+ else:
+ x0 += [0.1]*8
+
+ if data['flasher'].sum() > 0:
+ # P(r,udotr|flasher)
+ x0 += [data['flasher'][0,:,:,0].sum()/data['flasher'].sum()]
+ x0 += [data['flasher'][0,:,:,1].sum()/data['flasher'].sum()]
+ x0 += [data['flasher'][1,:,:,0].sum()/data['flasher'].sum()]
+ # P(psi|flasher)
+ x0 += [data['flasher'][:,0].sum()/data['flasher'].sum()]
+ # P(z|flasher)
+ x0 += [data['flasher'][:,:,0].sum()/data['flasher'].sum()]
+ else:
+ x0 += [0.1]*5
+
+ if data['breakdown'].sum() > 0:
+ # P(r,udotr|breakdown)
+ x0 += [data['breakdown'][0,:,:,0].sum()/data['breakdown'].sum()]
+ x0 += [data['breakdown'][0,:,:,1].sum()/data['breakdown'].sum()]
+ x0 += [data['breakdown'][1,:,:,0].sum()/data['breakdown'].sum()]
+ # P(psi|breakdown)
+ x0 += [data['breakdown'][:,0].sum()/data['breakdown'].sum()]
+ # P(z|breakdown)
+ x0 += [data['breakdown'][:,:,0].sum()/data['breakdown'].sum()]
+ else:
+ x0 += [0.1]*5
+
+ # P(neck|muon)
+ x0 += [EPSILON]
+
+ x0 = np.array(x0)
+
+ # Use the COBYLA algorithm here because it is the only derivative free
+ # minimization routine which honors inequality constraints
+ # Edit: SBPLX seems to work better
+ opt = nlopt.opt(nlopt.LN_SBPLX, len(x0))
+ opt.set_min_objective(nll)
+ # set lower bounds to 1e-10 to prevent nans if we predict something should
+ # be 0 but observe an event.
+ low = np.ones_like(x0)*EPSILON
+ high = np.array([1e9]*6 + [1-EPSILON]*(len(x0)-6))
+ x0[x0 < low] = low[x0 < low]
+ x0[x0 > high] = high[x0 > high]
+ opt.set_lower_bounds(low)
+ opt.set_upper_bounds(high)
+ opt.set_ftol_abs(1e-10)
+ opt.set_initial_step([1]*6 + [0.01]*(len(x0)-6))
+ #for constraint in constraints:
+ #opt.add_inequality_constraint(constraint,0)
+
+ xopt = opt.optimize(x0)
+ nll_xopt = nll(xopt)
+ print("nll(xopt) = ", nll(xopt))
+
+ while True:
+ xopt = opt.optimize(xopt)
+ if not nll(xopt) < nll_xopt - 1e-10:
+ break
+ nll_xopt = nll(xopt)
+ print("nll(xopt) = ", nll(xopt))
+ #print("n = ", opt.get_numevals())
+
+ stepsizes = estimate_errors(nll,xopt,constraints)
+ with printoptions(precision=3, suppress=True):
+ print("Errors: ", stepsizes)
+
+ #samples = metropolis_hastings(nll,xopt,stepsizes,100000)
+ #print("nll(xopt) = %.2g" % nll(xopt))
+
+ pos = np.empty((10, len(x0)),dtype=np.double)
+ for i in range(pos.shape[0]):
+ pos[i] = xopt + np.random.randn(len(x0))*stepsizes
+ pos[i,:6] = np.clip(pos[i,:6],EPSILON,1e9)
+ pos[i,6:] = np.clip(pos[i,6:],EPSILON,1-EPSILON)
+
+ for constraint in constraints:
+ if constraint(pos[i]) >= 0:
+ pos[i] = constraint.renormalize_no_fix(pos[i])
+
+ nwalkers, ndim = pos.shape
+
+ proposal = get_proposal_func(stepsizes*0.1,low,high)
+ sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x, grad, fill_value: -nll(x,grad,fill_value), moves=emcee.moves.MHMove(proposal),args=[None,np.inf])
+ with np.errstate(invalid='ignore'):
+ sampler.run_mcmc(pos, steps)
+
+ samples = sampler.chain.reshape((-1,len(x0)))
+
+ return samples
+
+if __name__ == '__main__':
+ import argparse
+ import numpy as np
+ import pandas as pd
+ import sys
+ import h5py
+ from sddm import setup_matplotlib
+
+ parser = argparse.ArgumentParser("plot fit results")
+ parser.add_argument("filenames", nargs='+', help="input files")
+ parser.add_argument("--steps", type=int, default=100000, help="number of steps in the MCMC chain")
+ parser.add_argument("--save", action="store_true", default=False, help="save plots")
+ parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files")
+ parser.add_argument("-n", type=int, default=10, help="number of fits to run")
+ args = parser.parse_args()
+
+ setup_matplotlib(args.save)
+
+ import matplotlib.pyplot as plt
+
+ ev = get_events(args.filenames,merge_fits=True)
+ ev_mc = get_events(args.mc, merge_fits=True)
+
+ # figure out bins for high level variables
+ ev = radius_cut(ev)
+ ev = psi_cut(ev)
+ ev = cos_theta_cut(ev)
+ ev = z_cut(ev)
+ ev = udotr_cut(ev)
+
+ ev['noise'] = ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_ITC | DC_ESUM) != 0
+ ev['neck'] = ((ev.dc & DC_NECK) != 0) & ~ev.noise
+ ev['flasher'] = ((ev.dc & DC_FLASHER) != 0) & ~(ev.noise | ev.neck) & (ev.nhit < 1000)
+ ev['breakdown'] = ((ev.dc & (DC_FLASHER | DC_BREAKDOWN)) != 0) & ~(ev.noise | ev.neck) & (ev.nhit >= 1000)
+ ev['muon'] = ((ev.dc & DC_MUON) != 0) & ~(ev.noise | ev.neck | ev.flasher | ev.breakdown)
+ ev['signal'] = ~(ev.noise | ev.neck | ev.flasher | ev.breakdown | ev.muon)
+
+ ev_mc = ev_mc[ev_mc.prompt]
+
+ # figure out bins for high level variables
+ ev_mc = radius_cut(ev_mc)
+ ev_mc = psi_cut(ev_mc)
+ ev_mc = cos_theta_cut(ev_mc)
+ ev_mc = z_cut(ev_mc)
+ ev_mc = udotr_cut(ev_mc)
+
+ ev_mc['noise'] = ev_mc.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_ITC | DC_ESUM) != 0
+ ev_mc['neck'] = ((ev_mc.dc & DC_NECK) != 0) & ~ev_mc.noise
+ ev_mc['flasher'] = ((ev_mc.dc & DC_FLASHER) != 0) & ~(ev_mc.noise | ev_mc.neck) & (ev_mc.nhit < 1000)
+ ev_mc['breakdown'] = ((ev_mc.dc & (DC_FLASHER | DC_BREAKDOWN)) != 0) & ~(ev_mc.noise | ev_mc.neck) & (ev_mc.nhit >= 1000)
+ ev_mc['muon'] = ((ev_mc.dc & DC_MUON) != 0) & ~(ev_mc.noise | ev_mc.neck | ev_mc.flasher | ev_mc.breakdown)
+ ev_mc['signal'] = ~(ev_mc.noise | ev_mc.neck | ev_mc.flasher | ev_mc.breakdown | ev_mc.muon)
+
+ contamination_pull = {}
+
+ nbg = {}
+ for bg in ['signal','muon','noise','neck','flasher','breakdown']:
+ nbg[bg] = 100
+ contamination_pull[bg] = []
+
+ for i in range(args.n):
+ data = {}
+ for bg in ['signal','muon','noise','neck','flasher','breakdown']:
+ data[bg] = np.zeros((2,2,2,2),dtype=int)
+ if bg == 'signal':
+ for bg2 in ['signal','muon','noise','neck','flasher','breakdown']:
+ if bg2 == 'signal':
+ for _, row in ev_mc[ev_mc[bg2]].sample(n=nbg[bg2],replace=True).iterrows():
+ data[bg][row.radius_cut][row.psi_cut][row.z_cut][row.udotr_cut] += 1
+ else:
+ for _, row in ev[ev[bg2]].sample(n=nbg[bg2],replace=True).iterrows():
+ data[bg][row.radius_cut][row.psi_cut][row.z_cut][row.udotr_cut] += 1
+ else:
+ for _, row in ev[ev[bg]].iterrows():
+ data[bg][row.radius_cut][row.psi_cut][row.z_cut][row.udotr_cut] += 1
+
+ # FIXME: Double check that what I'm calculating here matches with what I
+ # expect
+ sacrifice = {}
+ for bg in ['signal','muon','noise','neck','flasher','breakdown']:
+ sacrifice[bg] = np.zeros((2,2,2,2),dtype=float)
+ for _, row in ev_mc[ev_mc[bg]].iterrows():
+ sacrifice[bg][row.radius_cut][row.psi_cut][row.z_cut][row.udotr_cut] += 1
+
+ sacrifice[bg] /= len(ev_mc)
+
+ samples = fit(data, sacrifice, args.steps)
+
+ (mu_signal, mu_muon, mu_noise, mu_neck, mu_flasher, mu_breakdown,
+ contamination_muon, contamination_noise, contamination_neck, contamination_flasher, contamination_breakdown,
+ p_r_psi_z_udotr_muon_lolololo, # 11
+ p_r_psi_z_udotr_muon_lololohi,
+ p_r_psi_z_udotr_muon_lolohilo,
+ p_r_psi_z_udotr_muon_lolohihi,
+ p_r_psi_z_udotr_muon_lohilolo,
+ p_r_psi_z_udotr_muon_lohilohi,
+ p_r_psi_z_udotr_muon_lohihilo,
+ p_r_psi_z_udotr_muon_lohihihi,
+ p_r_psi_z_udotr_muon_hilololo,
+ p_r_psi_z_udotr_muon_hilolohi,
+ p_r_psi_z_udotr_muon_hilohilo,
+ p_r_psi_z_udotr_muon_hilohihi,
+ p_r_psi_z_udotr_muon_hihilolo,
+ p_r_psi_z_udotr_muon_hihilohi,
+ p_r_psi_z_udotr_muon_hihihilo,
+ p_r_noise_lo, p_psi_noise_lo, # 26, 27
+ p_z_udotr_noise_lolo, # 28
+ p_z_udotr_noise_lohi,
+ p_z_udotr_noise_hilo,
+ p_r_z_udotr_neck_lololo, # 31
+ p_r_z_udotr_neck_lolohi,
+ p_r_z_udotr_neck_lohilo,
+ p_r_z_udotr_neck_lohihi,
+ p_r_z_udotr_neck_hilolo,
+ p_r_z_udotr_neck_hilohi,
+ p_r_z_udotr_neck_hihilo,
+ p_psi_neck_lo, # 38
+ p_r_udotr_flasher_lolo, p_r_udotr_flasher_lohi, p_r_udotr_flasher_hilo, # 39, ..., 41
+ p_psi_flasher_lo, p_z_flasher_lo,
+ p_r_udotr_breakdown_lolo, p_r_udotr_breakdown_lohi, p_r_udotr_breakdown_hilo, # 44, ..., 46
+ p_psi_breakdown_lo, p_z_breakdown_lo,
+ p_neck_given_muon) = samples.T
+
+ for i, bg in enumerate(['signal','muon','noise','neck','flasher','breakdown']):
+ if i == 0:
+ contamination = samples[:,i]
+ else:
+ contamination = samples[:,i]*(1-samples[:,5+i])
+ mean = np.mean(contamination)
+ std = np.std(contamination)
+ contamination_pull[bg].append((mean - nbg[bg])/std)
+
+ fig = plt.figure()
+ axes = []
+ for i, bg in enumerate(['signal','muon','noise','neck','flasher','breakdown']):
+ axes.append(plt.subplot(3,2,i+1))
+ plt.hist(contamination_pull[bg],bins=100,histtype='step')
+ plt.title(bg.capitalize())
+ for ax in axes:
+ ax.set_xlim((-10,10))
+ despine(ax=ax,left=True,trim=True)
+ ax.get_yaxis().set_visible(False)
+ plt.tight_layout()
+
+ if args.save:
+ fig.savefig("contamination_pull_plot.pdf")
+ fig.savefig("contamination_pull_plot.eps")
+ else:
+ plt.show()
diff --git a/utils/plot-energy b/utils/plot-energy
index 4cd116b..6997f3b 100755
--- a/utils/plot-energy
+++ b/utils/plot-energy
@@ -117,125 +117,7 @@ if __name__ == '__main__':
import matplotlib.pyplot as plt
- ev = pd.concat([pd.read_hdf(filename, "ev") for filename in args.filenames],ignore_index=True)
- fits = pd.concat([pd.read_hdf(filename, "fits") for filename in args.filenames],ignore_index=True)
- rhdr = pd.concat([pd.read_hdf(filename, "rhdr") for filename in args.filenames],ignore_index=True)
-
- first_gtid = rhdr.set_index('run').to_dict()['first_gtid']
-
- # First, remove junk events since orphans won't have a 50 MHz clock and so
- # could screw up the 50 MHz clock unwrapping
- ev = ev[ev.dc & DC_JUNK == 0]
-
- # We need the events to be in time order here in order to calculate the
- # delta t between events. It's not obvious exactly how to do this. You
- # could sort by GTID, but that wraps around. Similarly we can't sort by the
- # 50 MHz clock because it also wraps around. Finally, I'm hesitant to sort
- # by the 10 MHz clock since it can be unreliable.
- #
- # Update: Phil proposed a clever way to get the events in order using the
- # GTID:
- #
- # > The GTID rollover should be easy to handle because there should never
- # > be two identical GTID's in a run. So if you order the events by GTID,
- # > you can assume that events with GTID's that come logically before the
- # > first GTID in the run must have occurred after the other events.
- #
- # Therefore, we can just add 0x1000000 to all GTIDs before the first GTID
- # in the event and sort on that. We get the first GTID from the RHDR bank.
- ev['gtid_sort'] = ev['gtid'].copy()
-
- ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid).reset_index(level=0,drop=True)
-
- ev = ev.sort_values(by=['run','gtid_sort'],kind='mergesort')
-
- for run, ev_run in ev.groupby('run'):
- # Warn about 50 MHz clock jumps since they could indicate that the
- # events aren't in order.
- dt = np.diff(ev_run.gtr)
- if np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)):
- print_warning("Warning: %i 50 MHz clock jumps in run %i. Are the events in order?" % \
- (np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)),run))
-
- # unwrap the 50 MHz clock within each run
- ev.gtr = ev.groupby(['run'],group_keys=False)['gtr'].transform(unwrap_50_mhz_clock)
-
- for run, ev_run in ev.groupby('run'):
- # Warn about GTID jumps since we could be missing a potential flasher
- # and/or breakdown, and we need all the events in order to do a
- # retrigger cut
- if np.count_nonzero(np.diff(ev_run.gtid) != 1):
- print_warning("Warning: %i GTID jumps in run %i" % (np.count_nonzero(np.diff(ev_run.gtid) != 1),run))
-
- # calculate the time difference between each event and the previous event
- # so we can tag retrigger events
- ev['dt'] = ev.groupby(['run'],group_keys=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values))))
-
- # Calculate the approximate Ockham factor.
- # See Chapter 20 in "Probability Theory: The Logic of Science" by Jaynes
- #
- # Note: This is a really approximate form by assuming that the shape of
- # the likelihood space is equal to the average uncertainty in the
- # different parameters.
- fits['w'] = fits['n']*np.log(0.05/10e3) + np.log(fits['energy1']) + fits['n']*np.log(1e-4/(4*np.pi))
-
- # Apply a fudge factor to the Ockham factor of 100 for each extra particle
- # FIXME: I chose 100 a while ago but didn't really investigate what the
- # optimal value was or exactly why it was needed. Should do this.
- fits['w'] -= fits['n']*100
-
- # Note: we index on the left hand site with loc to avoid a copy error
- #
- # See https://www.dataquest.io/blog/settingwithcopywarning/
- fits.loc[fits['n'] > 1, 'w'] += np.log(fits[fits['n'] > 1]['energy2'])
- fits.loc[fits['n'] > 2, 'w'] += np.log(fits[fits['n'] > 2]['energy3'])
-
- fits['fmin'] = fits['fmin'] - fits['w']
-
- # See https://stackoverflow.com/questions/11976503/how-to-keep-index-when-using-pandas-merge
- # for how to properly divide the psi column by nhit_cal which is in the ev
- # dataframe before we actually merge
- fits['psi'] /= fits.reset_index().merge(ev,on=['run','gtid']).set_index('index')['nhit_cal']
- fits.loc[fits['n'] == 1,'ke'] = fits['energy1']
- fits.loc[fits['n'] == 2,'ke'] = fits['energy1'] + fits['energy2']
- fits.loc[fits['n'] == 3,'ke'] = fits['energy1'] + fits['energy2'] + fits['energy3']
- fits['id'] = fits['id1']
- fits.loc[fits['n'] == 2, 'id'] = fits['id1']*100 + fits['id2']
- fits.loc[fits['n'] == 3, 'id'] = fits['id1']*10000 + fits['id2']*100 + fits['id3']
- fits['theta'] = fits['theta1']
- fits['r'] = np.sqrt(fits.x**2 + fits.y**2 + fits.z**2)
- fits['r_psup'] = (fits['r']/PSUP_RADIUS)**3
-
- ev['ftp_r'] = np.sqrt(ev.ftp_x**2 + ev.ftp_y**2 + ev.ftp_z**2)
- ev['ftp_r_psup'] = (ev['ftp_r']/PSUP_RADIUS)**3
-
- print("number of events = %i" % len(ev))
-
- # Now, select prompt events.
- #
- # We define a prompt event here as any event with an NHIT > 100 and whose
- # previous > 100 nhit event was more than 250 ms ago
- #
- # Note: It's important we do this *before* applying the data cleaning cuts
- # since otherwise we may have a prompt event identified only after the
- # cuts.
- #
- # For example, suppose there was a breakdown and for whatever reason
- # the *second* event after the breakdown didn't get tagged correctly. If we
- # apply the data cleaning cuts first and then tag prompt events then this
- # event will get tagged as a prompt event.
- ev = ev.groupby('run',group_keys=False).apply(prompt_event)
-
- print("number of events after prompt nhit cut = %i" % np.count_nonzero(ev.prompt))
-
- # flasher follower cut
- ev = ev.groupby('run',group_keys=False).apply(flasher_follower_cut)
-
- # breakdown follower cut
- ev = ev.groupby('run',group_keys=False).apply(breakdown_follower_cut)
-
- # retrigger cut
- ev = ev.groupby('run',group_keys=False).apply(retrigger_cut)
+ ev, fits = get_events(args.filenames)
if args.dc:
ev = ev[ev.prompt]
diff --git a/utils/sddm/dc.py b/utils/sddm/dc.py
index b5cf336..9081a21 100755
--- a/utils/sddm/dc.py
+++ b/utils/sddm/dc.py
@@ -359,3 +359,10 @@ class Constraint(object):
continue
x[i] -= x[i]/2.0
return x
+
+def fast_poisson_logpmf(k,mu):
+ """
+ A fast version of poisson.logpmf(k,mu) which doesn't include the constant
+ term of -log(k!) since for likelihood functions it is not necessary.
+ """
+ return -mu + k*np.log(mu)
diff --git a/utils/sddm/plot_energy.py b/utils/sddm/plot_energy.py
index 0c6dcf6..f496b0b 100755
--- a/utils/sddm/plot_energy.py
+++ b/utils/sddm/plot_energy.py
@@ -347,3 +347,142 @@ def michel_spectrum(T):
y[mask] = GF**2*MUON_MASS**5*x[mask]**2*(6-4*x[mask]+FINE_STRUCTURE_CONSTANT*f(x[mask])/np.pi)/(192*np.pi**3)
y *= 2*MUON_MASS
return y
+
+def get_events(filenames, merge_fits=False):
+ ev = pd.concat([pd.read_hdf(filename, "ev") for filename in filenames],ignore_index=True)
+ fits = pd.concat([pd.read_hdf(filename, "fits") for filename in filenames],ignore_index=True)
+ rhdr = pd.concat([pd.read_hdf(filename, "rhdr") for filename in filenames],ignore_index=True)
+
+ first_gtid = rhdr.set_index('run').to_dict()['first_gtid']
+
+ # First, remove junk events since orphans won't have a 50 MHz clock and so
+ # could screw up the 50 MHz clock unwrapping
+ ev = ev[ev.dc & DC_JUNK == 0]
+
+ # We need the events to be in time order here in order to calculate the
+ # delta t between events. It's not obvious exactly how to do this. You
+ # could sort by GTID, but that wraps around. Similarly we can't sort by the
+ # 50 MHz clock because it also wraps around. Finally, I'm hesitant to sort
+ # by the 10 MHz clock since it can be unreliable.
+ #
+ # Update: Phil proposed a clever way to get the events in order using the
+ # GTID:
+ #
+ # > The GTID rollover should be easy to handle because there should never
+ # > be two identical GTID's in a run. So if you order the events by GTID,
+ # > you can assume that events with GTID's that come logically before the
+ # > first GTID in the run must have occurred after the other events.
+ #
+ # Therefore, we can just add 0x1000000 to all GTIDs before the first GTID
+ # in the event and sort on that. We get the first GTID from the RHDR bank.
+ ev['gtid_sort'] = ev['gtid'].copy()
+
+ ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid).reset_index(level=0,drop=True)
+
+ ev = ev.sort_values(by=['run','gtid_sort'],kind='mergesort')
+
+ for run, ev_run in ev.groupby('run'):
+ # Warn about 50 MHz clock jumps since they could indicate that the
+ # events aren't in order.
+ dt = np.diff(ev_run.gtr)
+ if np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)):
+ print_warning("Warning: %i 50 MHz clock jumps in run %i. Are the events in order?" % \
+ (np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)),run))
+
+ # unwrap the 50 MHz clock within each run
+ ev.gtr = ev.groupby(['run'],group_keys=False)['gtr'].transform(unwrap_50_mhz_clock)
+
+ for run, ev_run in ev.groupby('run'):
+ # Warn about GTID jumps since we could be missing a potential flasher
+ # and/or breakdown, and we need all the events in order to do a
+ # retrigger cut
+ if np.count_nonzero(np.diff(ev_run.gtid) != 1):
+ print_warning("Warning: %i GTID jumps in run %i" % (np.count_nonzero(np.diff(ev_run.gtid) != 1),run))
+
+ # calculate the time difference between each event and the previous event
+ # so we can tag retrigger events
+ ev['dt'] = ev.groupby(['run'],group_keys=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values))))
+
+ # Calculate the approximate Ockham factor.
+ # See Chapter 20 in "Probability Theory: The Logic of Science" by Jaynes
+ #
+ # Note: This is a really approximate form by assuming that the shape of
+ # the likelihood space is equal to the average uncertainty in the
+ # different parameters.
+ fits['w'] = fits['n']*np.log(0.05/10e3) + np.log(fits['energy1']) + fits['n']*np.log(1e-4/(4*np.pi))
+
+ # Apply a fudge factor to the Ockham factor of 100 for each extra particle
+ # FIXME: I chose 100 a while ago but didn't really investigate what the
+ # optimal value was or exactly why it was needed. Should do this.
+ fits['w'] -= fits['n']*100
+
+ # Note: we index on the left hand site with loc to avoid a copy error
+ #
+ # See https://www.dataquest.io/blog/settingwithcopywarning/
+ fits.loc[fits['n'] > 1, 'w'] += np.log(fits[fits['n'] > 1]['energy2'])
+ fits.loc[fits['n'] > 2, 'w'] += np.log(fits[fits['n'] > 2]['energy3'])
+
+ fits['fmin'] = fits['fmin'] - fits['w']
+
+ # See https://stackoverflow.com/questions/11976503/how-to-keep-index-when-using-pandas-merge
+ # for how to properly divide the psi column by nhit_cal which is in the ev
+ # dataframe before we actually merge
+ fits['psi'] /= fits.reset_index().merge(ev,on=['run','gtid']).set_index('index')['nhit_cal']
+ fits.loc[fits['n'] == 1,'ke'] = fits['energy1']
+ fits.loc[fits['n'] == 2,'ke'] = fits['energy1'] + fits['energy2']
+ fits.loc[fits['n'] == 3,'ke'] = fits['energy1'] + fits['energy2'] + fits['energy3']
+ fits['id'] = fits['id1']
+ fits.loc[fits['n'] == 2, 'id'] = fits['id1']*100 + fits['id2']
+ fits.loc[fits['n'] == 3, 'id'] = fits['id1']*10000 + fits['id2']*100 + fits['id3']
+ fits['theta'] = fits['theta1']
+ fits['r'] = np.sqrt(fits.x**2 + fits.y**2 + fits.z**2)
+ fits['r_psup'] = (fits['r']/PSUP_RADIUS)**3
+
+ ev['ftp_r'] = np.sqrt(ev.ftp_x**2 + ev.ftp_y**2 + ev.ftp_z**2)
+ ev['ftp_r_psup'] = (ev['ftp_r']/PSUP_RADIUS)**3
+
+ # Now, select prompt events.
+ #
+ # We define a prompt event here as any event with an NHIT > 100 and whose
+ # previous > 100 nhit event was more than 250 ms ago
+ #
+ # Note: It's important we do this *before* applying the data cleaning cuts
+ # since otherwise we may have a prompt event identified only after the
+ # cuts.
+ #
+ # For example, suppose there was a breakdown and for whatever reason
+ # the *second* event after the breakdown didn't get tagged correctly. If we
+ # apply the data cleaning cuts first and then tag prompt events then this
+ # event will get tagged as a prompt event.
+ ev = ev.groupby('run',group_keys=False).apply(prompt_event)
+
+ print("number of events after prompt nhit cut = %i" % np.count_nonzero(ev.prompt))
+
+ # flasher follower cut
+ ev = ev.groupby('run',group_keys=False).apply(flasher_follower_cut)
+
+ # breakdown follower cut
+ ev = ev.groupby('run',group_keys=False).apply(breakdown_follower_cut)
+
+ # retrigger cut
+ ev = ev.groupby('run',group_keys=False).apply(retrigger_cut)
+
+ if merge_fits:
+ ev.set_index(['run','gtid'])
+
+ ev = pd.merge(fits,ev,how='inner',on=['run','gtid'])
+ ev_single_particle = ev[(ev.id2 == 0) & (ev.id3 == 0)]
+ ev_single_particle = ev_single_particle.sort_values('fmin').groupby(['run','gtid']).nth(0)
+ ev = ev.sort_values('fmin').groupby(['run','gtid']).nth(0)
+ ev['psi'] /= ev.nhit_cal
+
+ ev['cos_theta'] = np.cos(ev_single_particle['theta1'])
+ ev['r'] = np.sqrt(ev.x**2 + ev.y**2 + ev.z**2)
+ ev['udotr'] = np.sin(ev_single_particle.theta1)*np.cos(ev_single_particle.phi1)*ev_single_particle.x + \
+ np.sin(ev_single_particle.theta1)*np.sin(ev_single_particle.phi1)*ev_single_particle.y + \
+ np.cos(ev_single_particle.theta1)*ev_single_particle.z
+ ev['udotr'] /= ev.r
+
+ return ev
+
+ return ev, fits