3 files changed, 315 insertions, 1 deletions
diff --git a/utils/chi2 b/utils/chi2
new file mode 100755
index 0000000..8c9e0e3
--- /dev/null
+++ b/utils/chi2
@@ -0,0 +1,309 @@
+#!/usr/bin/env python
+# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago>
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <https://www.gnu.org/licenses/>.
+"""
+Script to plot final fit results along with sidebands for the dark matter
+analysis. To run it just run:
+
+    $ ./plot-energy [list of fit results]
+
+Currently it will plot energy distributions for external muons, michel
+electrons, atmospheric events with neutron followers, and prompt signal like
+events. Each of these plots will have a different subplot for the particle ID
+of the best fit, i.e. single electron, single muon, double electron, electron +
+muon, or double muon.
+"""
+from __future__ import print_function, division
+import numpy as np
+from scipy.stats import iqr, poisson
+from matplotlib.lines import Line2D
+from scipy.stats import iqr, norm, beta
+from scipy.special import spence
+from itertools import izip_longest
+
+particle_id = {20: 'e', 22: r'\mu'}
+
+def plot_hist2(df, muons=False, norm=1.0, color=None):
+    for id, df_id in sorted(df.groupby('id')):
+        if id == 20:
+            plt.subplot(2,3,1)
+        elif id == 22:
+            plt.subplot(2,3,2)
+        elif id == 2020:
+            plt.subplot(2,3,4)
+        elif id == 2022:
+            plt.subplot(2,3,5)
+        elif id == 2222:
+            plt.subplot(2,3,6)
+
+        if muons:
+            plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step')
+            plt.xlabel("log10(Energy (GeV))")
+        else:
+            bins = np.logspace(np.log10(20),np.log10(10e3),21)
+            plt.hist(df_id.ke.values, bins=bins, histtype='step', weights=np.tile(norm,len(df_id.ke.values)),color=color)
+            plt.gca().set_xscale("log")
+            plt.xlabel("Energy (MeV)")
+        plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$')
+
+    if len(df):
+        plt.tight_layout()
+
+def plot_hist(df, muons=False):
+    for id, df_id in sorted(df.groupby('id')):
+        if id == 20:
+            plt.subplot(3,4,1)
+        elif id == 22:
+            plt.subplot(3,4,2)
+        elif id == 2020:
+            plt.subplot(3,4,5)
+        elif id == 2022:
+            plt.subplot(3,4,6)
+        elif id == 2222:
+            plt.subplot(3,4,7)
+        elif id == 202020:
+            plt.subplot(3,4,9)
+        elif id == 202022:
+            plt.subplot(3,4,10)
+        elif id == 202222:
+            plt.subplot(3,4,11)
+        elif id == 222222:
+            plt.subplot(3,4,12)
+
+        if muons:
+            plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step')
+            plt.xlabel("log10(Energy (GeV))")
+        else:
+            plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step')
+            plt.xlabel("Energy (MeV)")
+        plt.title(str(id))
+
+    if len(df):
+        plt.tight_layout()
+
+def make_nll(data_hists, mc_hists):
+    def nll(x, grad=None):
+        nll = 0
+        for id in data_hists:
+            N = data_hists[id].sum()
+            nll -= poisson.logpmf(N,mc_hists[id].sum()*x[0])
+            if N > 0:
+                p = mc_hists[id]/mc_hists[id].sum()
+                # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think).
+                p += 1e-10
+                p /= p.sum()
+                nll -= multinomial.logpmf(data_hists[id],N,p)
+        return nll
+    return nll
+
+def chi2(samples,expected):
+    return np.sum((samples-expected)**2/expected,axis=-1)
+
+def get_multinomial_prob(data, mc, size=1000):
+    N = mc.sum()
+    # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think).
+    mc = mc + 1e-10
+    p = mc/mc.sum()
+    chi2_data = chi2(data,mc)
+    chi2_samples = []
+    for i in range(size):
+        n = np.random.poisson(N)
+        samples = multinomial.rvs(n,p)
+        chi2_samples.append(chi2(samples,mc))
+    chi2_samples = np.array(chi2_samples)
+    return np.count_nonzero(chi2_samples >= chi2_data)/len(chi2_samples)
+
+if __name__ == '__main__':
+    import argparse
+    import numpy as np
+    import pandas as pd
+    import sys
+    import h5py
+    from sddm.plot_energy import *
+    from sddm.plot import *
+    from sddm import setup_matplotlib
+    import nlopt
+
+    parser = argparse.ArgumentParser("plot fit results")
+    parser.add_argument("filenames", nargs='+', help="input files")
+    parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds")
+    parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files")
+    parser.add_argument("--nhit-thresh", type=int, default=None, help="nhit threshold to apply to events before processing (should only be used for testing to speed things up)")
+    args = parser.parse_args()
+
+    setup_matplotlib(args.save)
+
+    import matplotlib.pyplot as plt
+
+    ev = get_events(args.filenames,merge_fits=True,nhit_thresh=args.nhit_thresh)
+    ev_mc = get_events(args.mc,merge_fits=True,nhit_thresh=args.nhit_thresh)
+
+    ev = ev.reset_index()
+    ev_mc = ev_mc.reset_index()
+
+    # First, do basic data cleaning which is done for all events.
+    ev = ev[ev.signal]
+    ev_mc = ev_mc[ev_mc.signal]
+
+    # 00-orphan cut
+    ev = ev[(ev.gtid & 0xff) != 0]
+    ev_mc = ev_mc[(ev_mc.gtid & 0xff) != 0]
+
+    print("number of events after data cleaning = %i" % np.count_nonzero(ev.prompt))
+
+    # remove events 200 microseconds after a muon
+    ev = ev.groupby('run',group_keys=False).apply(muon_follower_cut)
+
+    # Get rid of muon events in our main event sample
+    ev = ev[(ev.dc & DC_MUON) == 0]
+
+    ev = ev[~np.isnan(ev.fmin)]
+    ev_mc = ev_mc[~np.isnan(ev_mc.fmin)]
+
+    prompt = ev[ev.prompt & ~ev.atm & ~ev.muon]
+    atm = ev[ev.atm]
+
+    prompt_mc = ev_mc[ev_mc.prompt & ~ev_mc.atm & ~ev_mc.muon]
+    atm_mc = ev_mc[ev_mc.atm]
+
+    # require (r/r_psup)^3 < 0.9
+    prompt = prompt[prompt.r_psup < 0.9]
+    atm = atm[atm.r_psup < 0.9]
+
+    prompt_mc = prompt_mc[prompt_mc.r_psup < 0.9]
+    atm_mc = atm_mc[atm_mc.r_psup < 0.9]
+
+    # require psi < 6
+    prompt = prompt[prompt.psi < 6]
+    atm = atm[atm.psi < 6]
+
+    prompt_mc = prompt_mc[prompt_mc.psi < 6]
+    atm_mc = atm_mc[atm_mc.psi < 6]
+
+    print("number of events after psi cut = %i" % len(prompt))
+
+    data = prompt
+    mc = prompt_mc
+
+    bins = np.logspace(np.log10(20),np.log10(10e3),21)
+    data_hists = {}
+    mc_hists = {}
+    for id in (20,22,2020,2022,2222):
+        df_id = data[data.id == id]
+        if len(df_id):
+            data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]
+        else:
+            data_hists[id] = np.zeros(len(bins)-1,dtype=np.int)
+
+    for id in (20,22,2020,2022,2222):
+        df_id = mc[mc.id == id]
+        if len(df_id):
+            mc_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]
+        else:
+            mc_hists[id] = np.zeros(len(bins)-1,dtype=np.int)
+
+    data_atm_hists = {}
+    mc_atm_hists = {}
+    for id in (20,22,2020,2022,2222):
+        df_id = atm[atm.id == id]
+        if len(df_id):
+            data_atm_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]
+        else:
+            data_atm_hists[id] = np.zeros(len(bins)-1,dtype=np.int)
+
+    for id in (20,22,2020,2022,2222):
+        df_id = atm_mc[atm_mc.id == id]
+        if len(df_id):
+            mc_atm_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]
+        else:
+            mc_atm_hists[id] = np.zeros(len(bins)-1,dtype=np.int)
+
+    nll = make_nll(data_hists,mc_hists)
+
+    x0 = np.array([1.0])
+    opt = nlopt.opt(nlopt.LN_SBPLX, len(x0))
+    opt.set_min_objective(nll)
+    low = np.array([1e-10])
+    high = np.array([10])
+    opt.set_lower_bounds(low)
+    opt.set_upper_bounds(high)
+    opt.set_ftol_abs(1e-10)
+    opt.set_initial_step([0.01])
+
+    xopt = opt.optimize(x0)
+    nll_xopt = nll(xopt)
+    print("nll(xopt) = ", nll(xopt))
+
+    prob = {}
+    for id in (20,22,2020,2022,2222):
+        prob[id] = get_multinomial_prob(data_hists[id],mc_hists[id]*xopt[0])
+        print(id, prob[id])
+
+    prob_atm = {}
+    for id in (20,22,2020,2022,2222):
+        prob_atm[id] = get_multinomial_prob(data_atm_hists[id],mc_atm_hists[id]*xopt[0])
+        print(id, prob_atm[id])
+
+    handles = [Line2D([0], [0], color='C0'),
+               Line2D([0], [0], color='C1')]
+    labels = ('Data','Monte Carlo')
+
+    fig = plt.figure()
+    plot_hist2(prompt,color='C0')
+    plot_hist2(prompt_mc,norm=xopt[0],color='C1')
+    for id in (20,22,2020,2022,2222):
+        if id == 20:
+            plt.subplot(2,3,1)
+        elif id == 22:
+            plt.subplot(2,3,2)
+        elif id == 2020:
+            plt.subplot(2,3,4)
+        elif id == 2022:
+            plt.subplot(2,3,5)
+        elif id == 2222:
+            plt.subplot(2,3,6)
+        plt.text(0.95,0.95,"p = %.2f" % prob[id],horizontalalignment='right',verticalalignment='top',transform=plt.gca().transAxes)
+    fig.legend(handles,labels,loc='upper right')
+
+    despine(fig,trim=True)
+    if args.save:
+        plt.savefig("chi2_prompt.pdf")
+        plt.savefig("chi2_prompt.eps")
+    else:
+        plt.suptitle("Without Neutron Follower")
+    fig = plt.figure()
+    plot_hist2(atm,color='C0')
+    plot_hist2(atm_mc,norm=xopt[0],color='C1')
+    for id in (20,22,2020,2022,2222):
+        if id == 20:
+            plt.subplot(2,3,1)
+        elif id == 22:
+            plt.subplot(2,3,2)
+        elif id == 2020:
+            plt.subplot(2,3,4)
+        elif id == 2022:
+            plt.subplot(2,3,5)
+        elif id == 2222:
+            plt.subplot(2,3,6)
+        plt.text(0.95,0.95,"p = %.2f" % prob_atm[id],horizontalalignment='right',verticalalignment='top',transform=plt.gca().transAxes)
+    fig.legend(handles,labels,loc='upper right')
+
+    despine(fig,trim=True)
+    if args.save:
+        plt.savefig("chi2_atm.pdf")
+        plt.savefig("chi2_atm.eps")
+    else:
+        plt.suptitle("With Neutron Follower")
+    plt.show()
diff --git a/utils/plot-energy b/utils/plot-energy
index 4568bcc..ef34f97 100755
--- a/utils/plot-energy
+++ b/utils/plot-energy
@@ -295,6 +295,8 @@ if __name__ == '__main__':
         stopping_muons['T_dx'] = dx_to_energy(stopping_muons.dx)
         stopping_muons['dT'] = stopping_muons['energy1'] - stopping_muons['T_dx']
 
+        print(stopping_muons[['r','r_psup','dx','T_dx','energy1','r_michel','x','y','z','x_michel','y_michel','z_michel','ftp_r_michel']])
+
         fig = plt.figure()
         plt.hist((stopping_muons['energy1']-stopping_muons['T_dx'])*100/stopping_muons['T_dx'], bins=np.linspace(-100,100,200), histtype='step')
         despine(fig,trim=True)
diff --git a/utils/sddm/plot_energy.py b/utils/sddm/plot_energy.py
index 1a55a85..4d166e1 100755
--- a/utils/sddm/plot_energy.py
+++ b/utils/sddm/plot_energy.py
@@ -414,11 +414,14 @@ def michel_spectrum(T):
     y *= 2*MUON_MASS
     return y
 
-def get_events(filenames, merge_fits=False):
+def get_events(filenames, merge_fits=False, nhit_thresh=None):
     ev = pd.concat([read_hdf(filename, "ev") for filename in filenames],ignore_index=True)
     fits = pd.concat([read_hdf(filename, "fits") for filename in filenames],ignore_index=True)
     rhdr = pd.concat([read_hdf(filename, "rhdr") for filename in filenames],ignore_index=True)
 
+    if nhit_thresh is not None:
+        ev = ev[ev.nhit_cal >= nhit_thresh]
+
     if len(ev) == 0:
         if merge_fits:
             return ev