utils/plot-energy


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243

#!/usr/bin/env python
# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <https://www.gnu.org/licenses/>.

from __future__ import print_function, division
import yaml
try:
    from yaml import CLoader as Loader
except ImportError:
    from yaml.loader import SafeLoader as Loader
import numpy as np
from scipy.stats import iqr
from matplotlib.lines import Line2D

# on retina screens, the default plots are way too small
# by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1
# Qt5 will scale everything using the dpi in ~/.Xresources
import matplotlib
matplotlib.use("Qt5Agg")

SNOMAN_MASS = {
    20: 0.511,
    21: 0.511,
    22: 105.658,
    23: 105.658
}

AV_RADIUS = 600.0

# Data cleaning bitmasks.
DC_MUON           = 0x1
DC_JUNK           = 0x2
DC_CRATE_ISOTROPY = 0x4
DC_QVNHIT         = 0x8
DC_NECK           = 0x10
DC_FLASHER        = 0x20
DC_ESUM           = 0x40
DC_OWL            = 0x80
DC_OWL_TRIGGER    = 0x100
DC_FTS            = 0x200

def plot_hist(x, label=None):
    # determine the bin width using the Freedman Diaconis rule
    # see https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule
    h = 2*iqr(x)/len(x)**(1/3)
    n = max(int((np.max(x)-np.min(x))/h),10)
    bins = np.linspace(np.min(x),np.max(x),n)
    plt.hist(x, bins=bins, histtype='step', label=label)

def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

if __name__ == '__main__':
    import argparse
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    import sys

    parser = argparse.ArgumentParser("plot fit results")
    parser.add_argument("filenames", nargs='+', help="input files")
    args = parser.parse_args()

    fit_results = []

    for filename in args.filenames:
        print(filename)
        with open(filename) as f:
            data = yaml.load(f.read(),Loader=Loader)

        for i, event in enumerate(data['data']):
            for ev in event['ev']:
                if 'fit' not in ev:
                    fit_results.append((
                        ev['run'],
                        ev['gtr'],
                        ev['nhit'],
                        ev['gtid'],
                        ev['dc'],
                        0,
                        np.nan,
                        np.nan,
                        np.nan,
                        np.nan,
                        np.nan,
                        np.nan,
                        np.nan))
                    continue
                for id, fit_result in [x for x in ev['fit'].iteritems() if isinstance(x[0],int)]:
                    # FIXME: Should I just store the particle ids in the YAML
                    # output as a list of particle ids instead of a single
                    # integer?
                    ids = map(int,chunks(str(id),2))
                    energy = 0.0
                    skip = False
                    for i, ke in zip(ids,np.atleast_1d(fit_result['energy'])):
                        energy += ke + SNOMAN_MASS[i]

                        # This is a bit of a hack. It appears that many times
                        # the fit will actually do much better by including a
                        # very low energy electron or muon. I believe the
                        # reason for this is that of course my likelihood
                        # function is not perfect (for example, I don't include
                        # the correct angular distribution for Rayleigh
                        # scattered light), and so the fitter often wants to
                        # add a very low energy electron or muon to fix things.
                        #
                        # Ideally I would fix the likelihood function, but for
                        # now we just discard any fit results which have a very
                        # low energy electron or muon.
                        if len(ids) > 1 and i == 20 and ke < 20.0:
                            skip = True

                        if len(ids) > 1 and i == 22 and ke < 200.0:
                            skip = True

                    if skip:
                        continue

                    # Calculate the approximate Ockham factor.
                    # See Chapter 20 in "Probability Theory: The Logic of Science" by Jaynes
                    #
                    # Note: This is a really approximate form by assuming that
                    # the shape of the likelihood space is equal to the average
                    # uncertainty in the different parameters.
                    w = len(ids)*np.log(0.1*0.001) + np.sum(np.log(fit_result['energy'])) + len(ids)*np.log(1e-4/(4*np.pi))

                    fit_results.append((
                        ev['run'],
                        ev['gtr'],
                        ev['nhit'],
                        ev['gtid'],
                        ev['dc'],
                        id,
                        fit_result['posx'],
                        fit_result['posy'],
                        fit_result['posz'],
                        fit_result['t0'],
                        energy,
                        fit_result['fmin'] - w,
                        fit_result['psi']/ev['nhit']))

    # create a dataframe
    # note: we have to first create a numpy structured array since there is no
    # way to pass a list of data types to the DataFrame constructor. See
    # https://github.com/pandas-dev/pandas/issues/4464
    array = np.array(fit_results,
                     dtype=[('run',np.int),      # run number
                            ('gtr',np.double),   # 50 MHz clock in ns
                            ('nhit',np.int),     # number of PMTs hit
                            ('gtid',np.int),     # gtid
                            ('dc',np.int),       # data cleaning word
                            ('id',np.int),       # particle id
                            ('x', np.double),    # x
                            ('y',np.double),     # y
                            ('z',np.double),     # z
                            ('t0',np.double),    # t0
                            ('ke',np.double),    # kinetic energy
                            ('fmin',np.double),  # negative log likelihood
                            ('psi',np.double)]   # goodness of fit parameter
                   )
    df = pd.DataFrame.from_records(array)

    # remove events 200 microseconds after a muon
    muons = df[(df.dc & DC_MUON) != 0]

    print(len(df))
    print("nmuons = %i" % len(muons))

    df = df[(df.dc & DC_MUON) == 0]

    if muons.size:
        # FIXME: need to deal with 50 MHz clock rollover
        df = df[~np.any((df.gtr.values > muons.gtr.values[:,np.newaxis]) & (df.gtr.values <= (muons.gtr.values[:,np.newaxis] + 200e3)),axis=0)]

    print(len(df))

    # perform prompt event data cleaning
    df = df[df.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK) == 0]

    print(len(df))

    # apply prompt event selection
    df = df[df.nhit >= 100]

    print(len(df))

    # get rid of events which don't have a fit
    nan = np.isnan(df.fmin.values)
    df = df[~nan]

    if np.count_nonzero(nan):
        print("skipping %i events because they are missing fit information!" % np.count_nonzero(nan),file=sys.stderr)

    # get the best fit
    df = df.sort_values('fmin').groupby(['run','gtid']).first()

    # require r < 6 meters
    df = df[np.sqrt(df.x.values**2 + df.y.values**2 + df.z.values**2) < AV_RADIUS]

    # Note: Need to design and apply a psi based cut here, and apply the muon
    # and neutron follower cuts.

    for id, df_id in sorted(df.groupby('id')):
        if id == 20:
            plt.subplot(3,4,1)
        elif id == 22:
            plt.subplot(3,4,2)
        elif id == 2020:
            plt.subplot(3,4,5)
        elif id == 2022:
            plt.subplot(3,4,6)
        elif id == 2222:
            plt.subplot(3,4,7)
        elif id == 202020:
            plt.subplot(3,4,9)
        elif id == 202022:
            plt.subplot(3,4,10)
        elif id == 202222:
            plt.subplot(3,4,11)
        elif id == 222222:
            plt.subplot(3,4,12)

        plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step')
        plt.xlabel("Energy (MeV)")
        plt.title(str(id))

    plt.tight_layout()
    plt.show()