1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
|
#!/usr/bin/env python
# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import print_function, division
import yaml
try:
from yaml import CLoader as Loader
except ImportError:
from yaml.loader import SafeLoader as Loader
import numpy as np
from scipy.stats import iqr
from matplotlib.lines import Line2D
# on retina screens, the default plots are way too small
# by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1
# Qt5 will scale everything using the dpi in ~/.Xresources
import matplotlib
matplotlib.use("Qt5Agg")
SNOMAN_MASS = {
20: 0.511,
21: 0.511,
22: 105.658,
23: 105.658
}
AV_RADIUS = 600.0
# Data cleaning bitmasks.
DC_MUON = 0x1
DC_JUNK = 0x2
DC_CRATE_ISOTROPY = 0x4
DC_QVNHIT = 0x8
DC_NECK = 0x10
DC_FLASHER = 0x20
DC_ESUM = 0x40
DC_OWL = 0x80
DC_OWL_TRIGGER = 0x100
DC_FTS = 0x200
def plot_hist(x, label=None):
# determine the bin width using the Freedman Diaconis rule
# see https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule
h = 2*iqr(x)/len(x)**(1/3)
n = max(int((np.max(x)-np.min(x))/h),10)
bins = np.linspace(np.min(x),np.max(x),n)
plt.hist(x, bins=bins, histtype='step', label=label)
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
if __name__ == '__main__':
import argparse
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sys
parser = argparse.ArgumentParser("plot fit results")
parser.add_argument("filenames", nargs='+', help="input files")
args = parser.parse_args()
events = []
fit_results = []
for filename in args.filenames:
print(filename)
with open(filename) as f:
data = yaml.load(f.read(),Loader=Loader)
for i, event in enumerate(data['data']):
for ev in event['ev']:
events.append((
ev['run'],
ev['gtr'],
ev['nhit'],
ev['gtid'],
ev['dc'],
ev['ftp']['x'] if 'ftp' in ev else np.nan,
ev['ftp']['y'] if 'ftp' in ev else np.nan,
ev['ftp']['z'] if 'ftp' in ev else np.nan,
ev['rsp']['energy'] if 'rsp' in ev and ev['rsp']['energy'] > 0 else np.nan,
))
if 'fit' not in ev:
continue
for id, fit_result in [x for x in ev['fit'].iteritems() if isinstance(x[0],int)]:
# FIXME: Should I just store the particle ids in the YAML
# output as a list of particle ids instead of a single
# integer?
ids = map(int,chunks(str(id),2))
energy = 0.0
skip = False
for i, ke in zip(ids,np.atleast_1d(fit_result['energy'])):
energy += ke + SNOMAN_MASS[i]
# This is a bit of a hack. It appears that many times
# the fit will actually do much better by including a
# very low energy electron or muon. I believe the
# reason for this is that of course my likelihood
# function is not perfect (for example, I don't include
# the correct angular distribution for Rayleigh
# scattered light), and so the fitter often wants to
# add a very low energy electron or muon to fix things.
#
# Ideally I would fix the likelihood function, but for
# now we just discard any fit results which have a very
# low energy electron or muon.
if len(ids) > 1 and i == 20 and ke < 20.0:
skip = True
if len(ids) > 1 and i == 22 and ke < 200.0:
skip = True
if skip:
continue
# Calculate the approximate Ockham factor.
# See Chapter 20 in "Probability Theory: The Logic of Science" by Jaynes
#
# Note: This is a really approximate form by assuming that
# the shape of the likelihood space is equal to the average
# uncertainty in the different parameters.
w = len(ids)*np.log(0.1*0.001) + np.sum(np.log(fit_result['energy'])) + len(ids)*np.log(1e-4/(4*np.pi))
fit_results.append((
ev['run'],
ev['gtid'],
id,
fit_result['posx'],
fit_result['posy'],
fit_result['posz'],
fit_result['t0'],
energy,
fit_result['fmin'] - w,
fit_result['psi']/ev['nhit']))
# create a dataframe
# note: we have to first create a numpy structured array since there is no
# way to pass a list of data types to the DataFrame constructor. See
# https://github.com/pandas-dev/pandas/issues/4464
array = np.array(fit_results,
dtype=[('run',np.int), # run number
('gtid',np.int), # gtid
('id',np.int), # particle id
('x', np.double), # x
('y',np.double), # y
('z',np.double), # z
('t0',np.double), # t0
('ke',np.double), # kinetic energy
('fmin',np.double), # negative log likelihood
('psi',np.double)] # goodness of fit parameter
)
df = pd.DataFrame.from_records(array)
array = np.array(events,
dtype=[('run',np.int), # run number
('gtr',np.double), # 50 MHz clock in ns
('nhit',np.int), # number of PMTs hit
('gtid',np.int), # gtid
('dc',np.int), # data cleaning word
('ftpx',np.double), # data cleaning word
('ftpy',np.double), # data cleaning word
('ftpz',np.double), # data cleaning word
('rsp_energy',np.double)] # data cleaning word
)
df_ev = pd.DataFrame.from_records(array)
# remove events 200 microseconds after a muon
muons = df_ev[(df_ev.dc & DC_MUON) != 0]
print("number of events = %i" % len(df_ev))
print("number of muons = %i" % len(muons))
df_ev = df_ev[(df_ev.dc & DC_MUON) == 0]
print("number of events after muon cut = %i" % len(df_ev))
if muons.size:
# FIXME: need to deal with 50 MHz clock rollover
df_ev = df_ev[~np.any((df_ev.gtr.values > muons.gtr.values[:,np.newaxis]) & (df_ev.gtr.values <= (muons.gtr.values[:,np.newaxis] + 200e3)),axis=0)]
print("number of events after muon follower cut = %i" % len(df_ev))
# perform prompt event data cleaning
df_ev = df_ev[df_ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK) == 0]
print("number of events after data cleaning = %i" % len(df_ev))
# select prompt events
# FIXME: how to deal with two prompt events one after another
prompt = df_ev[df_ev.nhit >= 100]
print("number of events after prompt nhit cut = %i" % len(prompt))
if prompt.size:
# FIXME: need to deal with 50 MHz clock rollover
# neutron followers have to obey stricter set of data cleaning cuts
neutron = df_ev[df_ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == DC_FTS]
neutron = neutron[~np.isnan(neutron.ftpx) & ~np.isnan(neutron.rsp_energy)]
r = np.sqrt(neutron.ftpx**2 + neutron.ftpy**2 + neutron.ftpz**2)
neutron = neutron[r < AV_RADIUS]
neutron = neutron[neutron.rsp_energy > 4.0]
# neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt)
df_ev = prompt[~np.any((neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1)]
else:
df_ev = prompt
print("number of events after neutron follower cut = %i" % len(df_ev))
df = pd.merge(df,df_ev,how='inner',on=['run','gtid'])
# get rid of events which don't have a fit
nan = np.isnan(df.fmin.values)
df = df[~nan]
if np.count_nonzero(nan):
print("skipping %i events because they are missing fit information!" % np.count_nonzero(nan),file=sys.stderr)
# get the best fit
df = df.sort_values('fmin').groupby(['run','gtid']).first()
# require r < 6 meters
df = df[np.sqrt(df.x.values**2 + df.y.values**2 + df.z.values**2) < AV_RADIUS]
print("number of events after radius cut = %i" % len(df))
# Note: Need to design and apply a psi based cut here, and apply the muon
# and neutron follower cuts.
for id, df_id in sorted(df.groupby('id')):
if id == 20:
plt.subplot(3,4,1)
elif id == 22:
plt.subplot(3,4,2)
elif id == 2020:
plt.subplot(3,4,5)
elif id == 2022:
plt.subplot(3,4,6)
elif id == 2222:
plt.subplot(3,4,7)
elif id == 202020:
plt.subplot(3,4,9)
elif id == 202022:
plt.subplot(3,4,10)
elif id == 202222:
plt.subplot(3,4,11)
elif id == 222222:
plt.subplot(3,4,12)
plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step')
plt.xlabel("Energy (MeV)")
plt.title(str(id))
plt.tight_layout()
plt.show()
|