#!/usr/bin/env python # Copyright (c) 2019, Anthony Latorre # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along with # this program. If not, see . from __future__ import print_function, division import yaml try: from yaml import CLoader as Loader except ImportError: from yaml.loader import SafeLoader as Loader import numpy as np from scipy.stats import iqr from matplotlib.lines import Line2D # on retina screens, the default plots are way too small # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1 # Qt5 will scale everything using the dpi in ~/.Xresources import matplotlib matplotlib.use("Qt5Agg") SNOMAN_MASS = { 20: 0.511, 21: 0.511, 22: 105.658, 23: 105.658 } AV_RADIUS = 600.0 def plot_hist(x, label=None): # determine the bin width using the Freedman Diaconis rule # see https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule h = 2*iqr(x)/len(x)**(1/3) n = max(int((np.max(x)-np.min(x))/h),10) bins = np.linspace(np.min(x),np.max(x),n) plt.hist(x, bins=bins, histtype='step', label=label) def chunks(l, n): """Yield successive n-sized chunks from l.""" for i in range(0, len(l), n): yield l[i:i + n] if __name__ == '__main__': import argparse import matplotlib.pyplot as plt import numpy as np import pandas as pd parser = argparse.ArgumentParser("plot fit results") parser.add_argument("filenames", nargs='+', help="input files") args = parser.parse_args() fit_results = [] for filename in args.filenames: print(filename) with open(filename) as f: data = yaml.load(f.read(),Loader=Loader) for i, event in enumerate(data['data']): for ev in event['ev']: if 'fit' not in ev: continue for id, fit_result in [x for x in ev['fit'].iteritems() if isinstance(x[0],int)]: # FIXME: Should I just store the particle ids in the YAML # output as a list of particle ids instead of a single # integer? ids = map(int,chunks(str(id),2)) energy = 0.0 skip = False for i, ke in zip(ids,np.atleast_1d(fit_result['energy'])): energy += ke + SNOMAN_MASS[i] # This is a bit of a hack. It appears that many times # the fit will actually do much better by including a # very low energy electron or muon. I believe the # reason for this is that of course my likelihood # function is not perfect (for example, I don't include # the correct angular distribution for Rayleigh # scattered light), and so the fitter often wants to # add a very low energy electron or muon to fix things. # # Ideally I would fix the likelihood function, but for # now we just discard any fit results which have a very # low energy electron or muon. if len(ids) > 1 and i == 20 and ke < 20.0: skip = True if len(ids) > 1 and i == 22 and ke < 200.0: skip = True if skip: continue # Calculate the approximate Ockham factor. # See Chapter 20 in "Probability Theory: The Logic of Science" by Jaynes # # Note: This is a really approximate form by assuming that # the shape of the likelihood space is equal to the average # uncertainty in the different parameters. w = len(ids)*np.log(0.1*0.001) + np.sum(np.log(fit_result['energy'])) + len(ids)*np.log(1e-4/(4*np.pi)) fit_results.append(( ev['run'], ev['gtid'], id, fit_result['posx'], fit_result['posy'], fit_result['posz'], fit_result['t0'], energy, fit_result['fmin'] - w, fit_result['psi']/ev['nhit'])) # create a dataframe # note: we have to first create a numpy structured array since there is no # way to pass a list of data types to the DataFrame constructor. See # https://github.com/pandas-dev/pandas/issues/4464 array = np.array(fit_results, dtype=[('run',np.int), # run number ('gtid',np.int), # gtid ('id',np.int), # particle id ('x', np.double), # x ('y',np.double), # y ('z',np.double), # z ('t0',np.double), # t0 ('ke',np.double), # kinetic energy ('fmin',np.double), # negative log likelihood ('psi',np.double)] # goodness of fit parameter ) df = pd.DataFrame.from_records(array) # get the best fit df = df.sort_values('fmin').groupby(['run','gtid']).first() # require r < 6 meters df = df[np.sqrt(df.x.values**2 + df.y.values**2 + df.z.values**2) < AV_RADIUS] # Note: Need to design and apply a psi based cut here, and apply the muon # and neutron follower cuts. for id, df_id in sorted(df.groupby('id')): if id == 20: plt.subplot(3,4,1) elif id == 22: plt.subplot(3,4,2) elif id == 2020: plt.subplot(3,4,5) elif id == 2022: plt.subplot(3,4,6) elif id == 2222: plt.subplot(3,4,7) elif id == 202020: plt.subplot(3,4,9) elif id == 202022: plt.subplot(3,4,10) elif id == 202222: plt.subplot(3,4,11) elif id == 222222: plt.subplot(3,4,12) plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step') plt.xlabel("Energy (MeV)") plt.title(str(id)) plt.tight_layout() plt.show()