1 files changed, 143 insertions, 14 deletions
diff --git a/utils/plot-energy b/utils/plot-energy
index 66691e0..6044adb 100755
--- a/utils/plot-energy
+++ b/utils/plot-energy
@@ -15,15 +15,12 @@
 # this program. If not, see <https://www.gnu.org/licenses/>.
 
 from __future__ import print_function, division
-import yaml
-try:
-    from yaml import CLoader as Loader
-except ImportError:
-    from yaml.loader import SafeLoader as Loader
 import numpy as np
-from scipy.stats import iqr
+from scipy.stats import iqr, poisson
 from matplotlib.lines import Line2D
 
+PSUP_RADIUS = 840.0
+
 # from https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python
 class bcolors:
     HEADER = '\033[95m'
@@ -41,6 +38,11 @@ class bcolors:
 import matplotlib
 matplotlib.use("Qt5Agg")
 
+font = {'family':'serif', 'serif': ['computer modern roman']}
+matplotlib.rc('font',**font)
+
+matplotlib.rc('text', usetex=True)
+
 SNOMAN_MASS = {
     20: 0.511,
     21: 0.511,
@@ -64,7 +66,7 @@ DC_FTS            = 0x200
 DC_ITC            = 0x400
 DC_BREAKDOWN      = 0x800
 
-def plot_hist(df, title=None):
+def plot_hist(df, title=None, muons=False):
     for id, df_id in sorted(df.groupby('id')):
         if id == 20:
             plt.subplot(3,4,1)
@@ -85,8 +87,12 @@ def plot_hist(df, title=None):
         elif id == 222222:
             plt.subplot(3,4,12)
 
-        plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step')
-        plt.xlabel("Energy (MeV)")
+        if muons:
+            plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step')
+            plt.xlabel("log10(Energy (GeV))")
+        else:
+            plt.hist(df_id.ke.values, bins=np.linspace(20,1000e3,100), histtype='step')
+            plt.xlabel("Energy (MeV)")
         plt.title(str(id))
 
     if title:
@@ -259,12 +265,71 @@ def gtid_sort(ev, first_gtid):
 
     if run not in first_gtid:
         print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run)
-        first_gtid[run] = ev.gtid[0]
+        first_gtid[run] = ev.gtid.iloc[0]
 
     ev.loc[ev.gtid < first_gtid[run],'gtid_sort'] += 0x1000000
 
     return ev
 
+def prompt_event(ev):
+    ev['prompt'] = (ev.nhit >= 100)
+    ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6))
+    return ev
+
+def plot_corner_plot(ev, title, save=None):
+    variables = ['r_psup','psi','z','udotr']
+    labels = [r'$(r/r_\mathrm{PSUP})^3$',r'$\psi$','z',r'$\vec{u}\cdot\vec{r}$']
+    limits = [(0,1),(0,10),(-840,840),(-1,1)]
+    cuts = [0.9,6,0,-0.5]
+
+    ev = ev.dropna(subset=variables)
+
+    f = plt.figure(figsize=(8,8))
+    for i in range(len(variables)):
+        for j in range(len(variables)):
+            if j > i:
+                continue
+            ax = plt.subplot(len(variables),len(variables),i*len(variables)+j+1)
+            if i == j:
+                plt.hist(ev[variables[i]],bins=np.linspace(limits[i][0],limits[i][1],100),histtype='step')
+                plt.gca().set_xlim(limits[i])
+            else:
+                p_i_lo = np.count_nonzero(ev[variables[i]] < cuts[i])/len(ev)
+                p_j_lo = np.count_nonzero(ev[variables[j]] < cuts[j])/len(ev)
+                p_lolo = p_i_lo*p_j_lo
+                p_lohi = p_i_lo*(1-p_j_lo)
+                p_hilo = (1-p_i_lo)*p_j_lo
+                p_hihi = (1-p_i_lo)*(1-p_j_lo)
+                n_lolo = np.count_nonzero((ev[variables[i]] < cuts[i]) & (ev[variables[j]] < cuts[j]))
+                n_lohi = np.count_nonzero((ev[variables[i]] < cuts[i]) & (ev[variables[j]] >= cuts[j]))
+                n_hilo = np.count_nonzero((ev[variables[i]] >= cuts[i]) & (ev[variables[j]] < cuts[j]))
+                n_hihi = np.count_nonzero((ev[variables[i]] >= cuts[i]) & (ev[variables[j]] >= cuts[j]))
+                n = len(ev)
+                observed = np.array([n_lolo,n_lohi,n_hilo,n_hihi])
+                expected = n*np.array([p_lolo,p_lohi,p_hilo,p_hihi])
+                psi = -poisson.logpmf(observed,expected).sum() + poisson.logpmf(observed,observed).sum()
+                psi /= np.std(-poisson.logpmf(np.random.poisson(observed,size=(10000,4)),observed).sum(axis=1) + poisson.logpmf(observed,observed).sum())
+                plt.scatter(ev[variables[j]],ev[variables[i]],s=0.5)
+                plt.gca().set_xlim(limits[j])
+                plt.gca().set_ylim(limits[i])
+                plt.title(r"$\psi = %.1f$" % psi)
+            if i == len(variables) - 1:
+                plt.xlabel(labels[j])
+            else:
+                plt.setp(ax.get_xticklabels(),visible=False)
+            if j == 0:
+                plt.ylabel(labels[i])
+            else:
+                plt.setp(ax.get_yticklabels(),visible=False)
+            plt.axvline(cuts[j],color='k',ls='--',alpha=0.5)
+            if i != j:
+                plt.axhline(cuts[i],color='k',ls='--',alpha=0.5)
+
+    if save:
+        plt.savefig(save)
+
+    plt.suptitle(title)
+
 if __name__ == '__main__':
     import argparse
     import matplotlib.pyplot as plt
@@ -275,6 +340,7 @@ if __name__ == '__main__':
 
     parser = argparse.ArgumentParser("plot fit results")
     parser.add_argument("filenames", nargs='+', help="input files")
+    parser.add_argument("--dc", action='store_true', default=False, help="plot corner plots for backgrounds")
     args = parser.parse_args()
 
     ev = pd.concat([pd.read_hdf(filename, "ev") for filename in args.filenames],ignore_index=True)
@@ -354,6 +420,11 @@ if __name__ == '__main__':
     # different parameters.
     fits['w'] = fits['n']*np.log(0.1*0.001) + np.log(fits['energy1']) + fits['n']*np.log(1e-4/(4*np.pi))
 
+    # Apply a fudge factor to the Ockham factor of 100 for each extra particle
+    # FIXME: I chose 100 a while ago but didn't really investigate what the
+    # optimal value was or exactly why it was needed. Should do this.
+    fits['w'] -= fits['n']*100
+
     # Note: we index on the left hand site with loc to avoid a copy error
     #
     # See https://www.dataquest.io/blog/settingwithcopywarning/
@@ -362,12 +433,19 @@ if __name__ == '__main__':
 
     fits['fmin'] = fits['fmin'] - fits['w']
 
-    fits['psi'] /= fits.merge(ev,on=['run','gtid'])['nhit']
-    fits['ke'] = fits['energy1']
+    # See https://stackoverflow.com/questions/11976503/how-to-keep-index-when-using-pandas-merge
+    # for how to properly divide the psi column by nhit_cal which is in the ev
+    # dataframe before we actually merge
+    fits['psi'] /= fits.reset_index().merge(ev,on=['run','gtid']).set_index('index')['nhit_cal']
+    fits.loc[fits['n'] == 1,'ke'] = fits['energy1']
+    fits.loc[fits['n'] == 2,'ke'] = fits['energy1'] + fits['energy2']
+    fits.loc[fits['n'] == 3,'ke'] = fits['energy1'] + fits['energy2'] + fits['energy3']
     fits['id'] = fits['id1']
     fits.loc[fits['n'] == 2, 'id'] = fits['id1']*100 + fits['id2']
     fits.loc[fits['n'] == 3, 'id'] = fits['id1']*10000 + fits['id2']*100 + fits['id3']
     fits['theta'] = fits['theta1']
+    fits['r'] = np.sqrt(fits.x**2 + fits.y**2 + fits.z**2)
+    fits['r_psup'] = (fits['r']/PSUP_RADIUS)**3
 
     print("number of events = %i" % len(ev))
 
@@ -384,8 +462,7 @@ if __name__ == '__main__':
     # the *second* event after the breakdown didn't get tagged correctly. If we
     # apply the data cleaning cuts first and then tag prompt events then this
     # event will get tagged as a prompt event.
-    ev['prompt'] = (ev.nhit >= 100)
-    ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6))
+    ev = ev.groupby('run',as_index=False).apply(prompt_event).reset_index(level=0,drop=True)
 
     print("number of events after prompt nhit cut = %i" % np.count_nonzero(ev.prompt))
 
@@ -398,6 +475,58 @@ if __name__ == '__main__':
     # retrigger cut
     ev = ev.groupby('run',as_index=False).apply(retrigger_cut).reset_index(level=0,drop=True)
 
+    ev = ev[ev.prompt]
+
+    if args.dc:
+        ev.set_index(['run','gtid'])
+
+        ev = pd.merge(fits,ev,how='inner',on=['run','gtid'])
+        ev_single_particle = ev[(ev.id2 == 0) & (ev.id3 == 0)]
+        ev_single_particle = ev_single_particle.sort_values('fmin').groupby(['run','gtid']).nth(0)
+        ev = ev.sort_values('fmin').groupby(['run','gtid']).nth(0)
+
+        ev['cos_theta'] = np.cos(ev['theta1'])
+        ev['udotr'] = np.sin(ev_single_particle.theta1)*np.cos(ev_single_particle.phi1)*ev_single_particle.x + \
+                      np.sin(ev_single_particle.theta1)*np.sin(ev_single_particle.phi1)*ev_single_particle.y + \
+                      np.cos(ev_single_particle.theta1)*ev_single_particle.z
+        ev['udotr'] /= ev.r
+
+        flashers = ev[ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ITC | DC_BREAKDOWN) == DC_FLASHER]
+        muon = ev[ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ITC | DC_BREAKDOWN | DC_MUON) == DC_MUON]
+        neck = ev[(ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_NECK)) == DC_NECK]
+        noise = ev[(ev.dc & (DC_ITC | DC_QVNHIT | DC_JUNK | DC_CRATE_ISOTROPY)) != 0]
+        breakdown = ev[ev.nhit >= 1000]
+        breakdown = breakdown[breakdown.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_NECK | DC_ITC) == 0]
+        breakdown = breakdown[breakdown.dc & (DC_FLASHER | DC_BREAKDOWN) != 0]
+        signal = ev[ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ITC | DC_BREAKDOWN | DC_MUON) == 0]
+
+        with pd.option_context('display.max_rows', None, 'display.max_columns', None):
+            print("Noise events")
+            print(noise[['psi','x','y','z','id1','id2']])
+            print("Muons")
+            print(muon[['psi','r','id1','id2','id3','energy1','energy2','energy3']])
+            print("Neck")
+            print(neck[neck.psi < 6][['psi','r','id1','cos_theta']])
+            print("Flashers")
+            print(flashers[flashers.udotr > 0])
+            print("Signal")
+            print(signal)
+
+        # save as PDF b/c EPS doesn't support alpha values
+        plot_corner_plot(breakdown,"Breakdowns",save="breakdown_corner_plot.pdf")
+        plot_corner_plot(muon,"Muons",save='muon_corner_plot.pdf')
+        plot_corner_plot(flashers,"Flashers",save="flashers_corner_plot.pdf")
+        plot_corner_plot(neck,"Neck",save="neck_corner_plot.pdf")
+        plot_corner_plot(noise,"Noise",save="noise_corner_plot.pdf")
+        plot_corner_plot(signal,"Signal",save="signal_corner_plot.pdf")
+
+        plt.figure()
+        plot_hist(flashers,"Flashers")
+        plt.figure()
+        plot_hist(muon,"Muons",muons=True)
+        plt.show()
+        sys.exit(0)
+
     # First, do basic data cleaning which is done for all events.
     ev = ev[ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ITC | DC_BREAKDOWN) == 0]