aboutsummaryrefslogtreecommitdiff
path: root/utils/dc
diff options
context:
space:
mode:
Diffstat (limited to 'utils/dc')
-rwxr-xr-xutils/dc217
1 files changed, 39 insertions, 178 deletions
diff --git a/utils/dc b/utils/dc
index 0c14053..16591df 100755
--- a/utils/dc
+++ b/utils/dc
@@ -266,178 +266,20 @@ if __name__ == '__main__':
import pandas as pd
import sys
import h5py
+ from sddm import setup_matplotlib
parser = argparse.ArgumentParser("plot fit results")
parser.add_argument("filenames", nargs='+', help="input files")
parser.add_argument("--steps", type=int, default=100000, help="number of steps in the MCMC chain")
parser.add_argument("--save", action="store_true", default=False, help="save plots")
+ parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files")
args = parser.parse_args()
- if args.save:
- # default \textwidth for a fullpage article in Latex is 16.50764 cm.
- # You can figure this out by compiling the following TeX document:
- #
- # \documentclass{article}
- # \usepackage{fullpage}
- # \usepackage{layouts}
- # \begin{document}
- # textwidth in cm: \printinunitsof{cm}\prntlen{\textwidth}
- # \end{document}
-
- width = 16.50764
- width /= 2.54 # cm -> inches
- # According to this page:
- # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf,
- # Tufte suggests an aspect ratio of 1.5 - 1.6.
- height = width/1.5
- FIGSIZE = (width,height)
-
- import matplotlib.pyplot as plt
-
- font = {'family':'serif', 'serif': ['computer modern roman']}
- plt.rc('font',**font)
-
- plt.rc('text', usetex=True)
- else:
- # on retina screens, the default plots are way too small
- # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1
- # Qt5 will scale everything using the dpi in ~/.Xresources
- import matplotlib
- matplotlib.use("Qt5Agg")
-
- import matplotlib.pyplot as plt
-
- # Default figure size. Currently set to my monitor width and height so that
- # things are properly formatted
- FIGSIZE = (13.78,7.48)
-
- # Make the defalt font bigger
- plt.rc('font', size=22)
-
- for filename in args.filenames:
- ev = pd.read_hdf(filename,"ev")
-
- ev = pd.concat([pd.read_hdf(filename, "ev") for filename in args.filenames],ignore_index=True)
- fits = pd.concat([pd.read_hdf(filename, "fits") for filename in args.filenames],ignore_index=True)
- rhdr = pd.concat([pd.read_hdf(filename, "rhdr") for filename in args.filenames],ignore_index=True)
-
- first_gtid = rhdr.set_index('run').to_dict()['first_gtid']
-
- # First, remove junk events since orphans won't have a 50 MHz clock and so
- # could screw up the 50 MHz clock unwrapping
- ev = ev[ev.dc & DC_JUNK == 0]
-
- # We need the events to be in time order here in order to calculate the
- # delta t between events. It's not obvious exactly how to do this. You
- # could sort by GTID, but that wraps around. Similarly we can't sort by the
- # 50 MHz clock because it also wraps around. Finally, I'm hesitant to sort
- # by the 10 MHz clock since it can be unreliable.
- #
- # Update: Phil proposed a clever way to get the events in order using the
- # GTID:
- #
- # > The GTID rollover should be easy to handle because there should never
- # > be two identical GTID's in a run. So if you order the events by GTID,
- # > you can assume that events with GTID's that come logically before the
- # > first GTID in the run must have occurred after the other events.
- #
- # Therefore, we can just add 0x1000000 to all GTIDs before the first GTID
- # in the event and sort on that. We get the first GTID from the RHDR bank.
- ev['gtid_sort'] = ev['gtid'].copy()
-
- ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid).reset_index(level=0,drop=True)
-
- ev = ev.sort_values(by=['run','gtid_sort'],kind='mergesort')
-
- for run, ev_run in ev.groupby('run'):
- # Warn about 50 MHz clock jumps since they could indicate that the
- # events aren't in order.
- dt = np.diff(ev_run.gtr)
- if np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)):
- print_warning("Warning: %i 50 MHz clock jumps in run %i. Are the events in order?" % \
- (np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)),run))
-
- # unwrap the 50 MHz clock within each run
- ev.gtr = ev.groupby(['run'],as_index=False)['gtr'].transform(unwrap_50_mhz_clock)
-
- for run, ev_run in ev.groupby('run'):
- # Warn about GTID jumps since we could be missing a potential flasher
- # and/or breakdown, and we need all the events in order to do a
- # retrigger cut
- if np.count_nonzero(np.diff(ev_run.gtid) != 1):
- print_warning("Warning: %i GTID jumps in run %i" % (np.count_nonzero(np.diff(ev_run.gtid) != 1),run))
-
- # calculate the time difference between each event and the previous event
- # so we can tag retrigger events
- ev['dt'] = ev.groupby(['run'],as_index=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values))))
-
- # Calculate the approximate Ockham factor.
- # See Chapter 20 in "Probability Theory: The Logic of Science" by Jaynes
- #
- # Note: This is a really approximate form by assuming that the shape of
- # the likelihood space is equal to the average uncertainty in the
- # different parameters.
- fits['w'] = fits['n']*np.log(0.05/10e3) + np.log(fits['energy1']) + fits['n']*np.log(1e-4/(4*np.pi))
- fits['w'] -= fits['n']*100
-
- # Note: we index on the left hand site with loc to avoid a copy error
- #
- # See https://www.dataquest.io/blog/settingwithcopywarning/
- fits.loc[fits['n'] > 1, 'w'] += np.log(fits[fits['n'] > 1]['energy2'])
- fits.loc[fits['n'] > 2, 'w'] += np.log(fits[fits['n'] > 2]['energy3'])
-
- fits['fmin'] = fits['fmin'] - fits['w']
-
- fits['ke'] = fits['energy1']
- fits['id'] = fits['id1']
- fits.loc[fits['n'] == 2, 'id'] = fits['id1']*100 + fits['id2']
- fits.loc[fits['n'] == 3, 'id'] = fits['id1']*10000 + fits['id2']*100 + fits['id3']
- fits['theta'] = fits['theta1']
-
- print("number of events = %i" % len(ev))
-
- # Now, select prompt events.
- #
- # We define a prompt event here as any event with an NHIT > 100 and whose
- # previous > 100 nhit event was more than 250 ms ago
- #
- # Note: It's important we do this *before* applying the data cleaning cuts
- # since otherwise we may have a prompt event identified only after the
- # cuts.
- #
- # For example, suppose there was a breakdown and for whatever reason
- # the *second* event after the breakdown didn't get tagged correctly. If we
- # apply the data cleaning cuts first and then tag prompt events then this
- # event will get tagged as a prompt event.
- ev = ev.groupby('run',as_index=False).apply(prompt_event).reset_index(level=0,drop=True)
-
- print("number of events after prompt nhit cut = %i" % np.count_nonzero(ev.prompt))
-
- # flasher follower cut
- ev = ev.groupby('run',as_index=False).apply(flasher_follower_cut).reset_index(level=0,drop=True)
-
- # breakdown follower cut
- ev = ev.groupby('run',as_index=False).apply(breakdown_follower_cut).reset_index(level=0,drop=True)
-
- # retrigger cut
- ev = ev.groupby('run',as_index=False).apply(retrigger_cut).reset_index(level=0,drop=True)
-
- ev = ev[ev.prompt]
-
- ev.set_index(['run','gtid'])
-
- ev = pd.merge(fits,ev,how='inner',on=['run','gtid'])
- ev_single_particle = ev[(ev.id2 == 0) & (ev.id3 == 0)]
- ev_single_particle = ev_single_particle.sort_values('fmin').groupby(['run','gtid']).nth(0)
- ev = ev.sort_values('fmin').groupby(['run','gtid']).nth(0)
- ev['psi'] /= ev.nhit_cal
-
- ev['cos_theta'] = np.cos(ev_single_particle['theta1'])
- ev['r'] = np.sqrt(ev.x**2 + ev.y**2 + ev.z**2)
- ev['udotr'] = np.sin(ev_single_particle.theta1)*np.cos(ev_single_particle.phi1)*ev_single_particle.x + \
- np.sin(ev_single_particle.theta1)*np.sin(ev_single_particle.phi1)*ev_single_particle.y + \
- np.cos(ev_single_particle.theta1)*ev_single_particle.z
- ev['udotr'] /= ev.r
+ setup_matplotlib(args.save)
+
+ import matplotlib.pyplot as plt
+
+ ev = get_events(args.filenames,merge_fits=True)
# figure out bins for high level variables
ev = radius_cut(ev)
@@ -459,14 +301,33 @@ if __name__ == '__main__':
for _, row in ev[ev[bg]].iterrows():
data[bg][row.radius_cut][row.psi_cut][row.z_cut][row.udotr_cut] += 1
- # FIXME: Estimate for now, needs to come from MC
- sacrifice = {bg: 0.0 for bg in ['muon','noise','neck','flasher','breakdown']}
- sacrifice['signal'] = np.zeros((len(np.unique(ev.radius_cut)),len(np.unique(ev.psi_cut)),len(np.unique(ev.cos_theta_cut)),len(np.unique(ev.z_cut))),dtype=int)
- p_r_signal = np.array([0.9,0.1])
- p_psi_signal = np.array([1.0,0.0])
- p_z_signal = np.array([0.5,0.5])
- p_udotr_signal = np.array([0.25,0.75])
- sacrifice['signal'] = p_r_signal[:,np.newaxis,np.newaxis,np.newaxis]*p_psi_signal[:,np.newaxis,np.newaxis]*p_z_signal[:,np.newaxis]*p_udotr_signal
+ ev_mc = get_events(args.mc, merge_fits=True)
+
+ ev_mc = ev_mc[ev_mc.prompt]
+
+ # figure out bins for high level variables
+ ev_mc = radius_cut(ev_mc)
+ ev_mc = psi_cut(ev_mc)
+ ev_mc = cos_theta_cut(ev_mc)
+ ev_mc = z_cut(ev_mc)
+ ev_mc = udotr_cut(ev_mc)
+
+ ev_mc['noise'] = ev_mc.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_ITC | DC_ESUM) != 0
+ ev_mc['neck'] = ((ev_mc.dc & DC_NECK) != 0) & ~ev_mc.noise
+ ev_mc['flasher'] = ((ev_mc.dc & DC_FLASHER) != 0) & ~(ev_mc.noise | ev_mc.neck) & (ev_mc.nhit < 1000)
+ ev_mc['breakdown'] = ((ev_mc.dc & (DC_FLASHER | DC_BREAKDOWN)) != 0) & ~(ev_mc.noise | ev_mc.neck) & (ev_mc.nhit >= 1000)
+ ev_mc['muon'] = ((ev_mc.dc & DC_MUON) != 0) & ~(ev_mc.noise | ev_mc.neck | ev_mc.flasher | ev_mc.breakdown)
+ ev_mc['signal'] = ~(ev_mc.noise | ev_mc.neck | ev_mc.flasher | ev_mc.breakdown | ev_mc.muon)
+
+ # FIXME: Double check that what I'm calculating here matches with what I
+ # expect
+ sacrifice = {}
+ for bg in ['signal','muon','noise','neck','flasher','breakdown']:
+ sacrifice[bg] = np.zeros((2,2,2,2),dtype=float)
+ for _, row in ev_mc[ev_mc[bg]].iterrows():
+ sacrifice[bg][row.radius_cut][row.psi_cut][row.z_cut][row.udotr_cut] += 1
+
+ sacrifice[bg] /= len(ev_mc)
constraints = [flasher_r_udotr, breakdown_r_udotr,muon_r_psi_z_udotr,neck_r_z_udotr,noise_z_udotr]
nll = make_nll(data,sacrifice,constraints)
@@ -616,7 +477,7 @@ if __name__ == '__main__':
# Plot walker positions as a function of step number for the expected
# number of events
- fig, axes = plt.subplots(6, figsize=FIGSIZE, num=1, sharex=True)
+ fig, axes = plt.subplots(6, num=1, sharex=True)
samples = sampler.get_chain()
labels = ["Signal","Muon","Noise","Neck","Flasher","Breakdown"]
for i, bg in enumerate(['signal','muon','noise','neck','flasher','breakdown']):
@@ -631,7 +492,7 @@ if __name__ == '__main__':
# Plot walker positions as a function of step number for the background cut
# efficiencies
- fig, axes = plt.subplots(5, figsize=FIGSIZE, num=2, sharex=True)
+ fig, axes = plt.subplots(5, num=2, sharex=True)
samples = sampler.get_chain()
tag_labels = ['M','N','Ne','F','B']
for i, bg in enumerate(['muon','noise','neck','flasher','breakdown']):
@@ -646,7 +507,7 @@ if __name__ == '__main__':
samples = sampler.chain.reshape((-1,len(x0)))
- plt.figure(3, figsize=FIGSIZE)
+ plt.figure(3)
for i, bg in enumerate(['signal','muon','noise','neck','flasher','breakdown']):
ax = plt.subplot(3,2,i+1)
plt.hist(samples[:,i],bins=100,histtype='step')
@@ -656,7 +517,7 @@ if __name__ == '__main__':
plt.legend()
plt.tight_layout()
- plt.figure(4, figsize=FIGSIZE)
+ plt.figure(4)
for i, bg in enumerate(['muon','noise','neck','flasher','breakdown']):
ax = plt.subplot(3,2,i+1)
plt.hist(samples[:,6+i],bins=100,histtype='step')
@@ -732,7 +593,7 @@ if __name__ == '__main__':
p_psi_breakdown_lo]
ylim_max = 0
- fig = plt.figure(5, figsize=FIGSIZE)
+ fig = plt.figure(5)
axes = []
for i, bg in enumerate(['signal','muon','noise','neck','flasher','breakdown']):
axes.append(plt.subplot(3,2,i+1))