utils/plot-energy


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

#!/usr/bin/env python
# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <https://www.gnu.org/licenses/>.

from __future__ import print_function, division
import yaml
try:
    from yaml import CLoader as Loader
except ImportError:
    from yaml.loader import SafeLoader as Loader
import numpy as np
from scipy.stats import iqr
from matplotlib.lines import Line2D

# on retina screens, the default plots are way too small
# by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1
# Qt5 will scale everything using the dpi in ~/.Xresources
import matplotlib
matplotlib.use("Qt5Agg")

SNOMAN_MASS = {
    20: 0.511,
    21: 0.511,
    22: 105.658,
    23: 105.658
}

AV_RADIUS = 600.0

def plot_hist(x, label=None):
    # determine the bin width using the Freedman Diaconis rule
    # see https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule
    h = 2*iqr(x)/len(x)**(1/3)
    n = max(int((np.max(x)-np.min(x))/h),10)
    bins = np.linspace(np.min(x),np.max(x),n)
    plt.hist(x, bins=bins, histtype='step', label=label)

def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

if __name__ == '__main__':
    import argparse
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd

    parser = argparse.ArgumentParser("plot fit results")
    parser.add_argument("filenames", nargs='+', help="input files")
    args = parser.parse_args()

    fit_results = []

    for filename in args.filenames:
        print(filename)
        with open(filename) as f:
            data = yaml.load(f.read(),Loader=Loader)

        for i, event in enumerate(data['data']):
            for ev in event['ev']:
                if 'fit' not in ev:
                    continue
                for id, fit_result in [x for x in ev['fit'].iteritems() if isinstance(x[0],int)]:
                    # FIXME: Should I just store the particle ids in the YAML
                    # output as a list of particle ids instead of a single
                    # integer?
                    ids = map(int,chunks(str(id),2))
                    energy = 0.0
                    skip = False
                    for i, ke in zip(ids,np.atleast_1d(fit_result['energy'])):
                        energy += ke + SNOMAN_MASS[i]

                        # This is a bit of a hack. It appears that many times
                        # the fit will actually do much better by including a
                        # very low energy electron or muon. I believe the
                        # reason for this is that of course my likelihood
                        # function is not perfect (for example, I don't include
                        # the correct angular distribution for Rayleigh
                        # scattered light), and so the fitter often wants to
                        # add a very low energy electron or muon to fix things.
                        #
                        # Ideally I would fix the likelihood function, but for
                        # now we just discard any fit results which have a very
                        # low energy electron or muon.
                        if len(ids) > 1 and i == 20 and ke < 20.0:
                            skip = True

                        if len(ids) > 1 and i == 22 and ke < 200.0:
                            skip = True

                    if skip:
                        continue

                    # Calculate the approximate Ockham factor.
                    # See Chapter 20 in "Probability Theory: The Logic of Science" by Jaynes
                    #
                    # Note: This is a really approximate form by assuming that
                    # the shape of the likelihood space is equal to the average
                    # uncertainty in the different parameters.
                    w = len(ids)*np.log(0.1*0.001) + np.sum(np.log(fit_result['energy'])) + len(ids)*np.log(1e-4/(4*np.pi))

                    fit_results.append((
                        ev['run'],
                        ev['gtid'],
                        id,
                        fit_result['posx'],
                        fit_result['posy'],
                        fit_result['posz'],
                        fit_result['t0'],
                        energy,
                        fit_result['fmin'] - w,
                        fit_result['psi']/ev['nhit']))

    # create a dataframe
    # note: we have to first create a numpy structured array since there is no
    # way to pass a list of data types to the DataFrame constructor. See
    # https://github.com/pandas-dev/pandas/issues/4464
    array = np.array(fit_results,
                     dtype=[('run',np.int),      # run number
                            ('gtid',np.int),     # gtid
                            ('id',np.int),       # particle id
                            ('x', np.double),    # x
                            ('y',np.double),     # y
                            ('z',np.double),     # z
                            ('t0',np.double),    # t0
                            ('ke',np.double),    # kinetic energy
                            ('fmin',np.double),  # negative log likelihood
                            ('psi',np.double)]   # goodness of fit parameter
                   )
    df = pd.DataFrame.from_records(array)

    # get the best fit
    df = df.sort_values('fmin').groupby(['run','gtid']).first()

    # require r < 6 meters
    df = df[np.sqrt(df.x.values**2 + df.y.values**2 + df.z.values**2) < AV_RADIUS]

    # Note: Need to design and apply a psi based cut here, and apply the muon
    # and neutron follower cuts.

    for id, df_id in sorted(df.groupby('id')):
        if id == 20:
            plt.subplot(3,4,1)
        elif id == 22:
            plt.subplot(3,4,2)
        elif id == 2020:
            plt.subplot(3,4,5)
        elif id == 2022:
            plt.subplot(3,4,6)
        elif id == 2222:
            plt.subplot(3,4,7)
        elif id == 202020:
            plt.subplot(3,4,9)
        elif id == 202022:
            plt.subplot(3,4,10)
        elif id == 202222:
            plt.subplot(3,4,11)
        elif id == 222222:
            plt.subplot(3,4,12)

        plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step')
        plt.xlabel("Energy (MeV)")
        plt.title(str(id))

    plt.tight_layout()
    plt.show()