aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xutils/chi2309
-rwxr-xr-xutils/plot-energy2
-rwxr-xr-xutils/sddm/plot_energy.py5
3 files changed, 315 insertions, 1 deletions
diff --git a/utils/chi2 b/utils/chi2
new file mode 100755
index 0000000..8c9e0e3
--- /dev/null
+++ b/utils/chi2
@@ -0,0 +1,309 @@
+#!/usr/bin/env python
+# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago>
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <https://www.gnu.org/licenses/>.
+"""
+Script to plot final fit results along with sidebands for the dark matter
+analysis. To run it just run:
+
+ $ ./plot-energy [list of fit results]
+
+Currently it will plot energy distributions for external muons, michel
+electrons, atmospheric events with neutron followers, and prompt signal like
+events. Each of these plots will have a different subplot for the particle ID
+of the best fit, i.e. single electron, single muon, double electron, electron +
+muon, or double muon.
+"""
+from __future__ import print_function, division
+import numpy as np
+from scipy.stats import iqr, poisson
+from matplotlib.lines import Line2D
+from scipy.stats import iqr, norm, beta
+from scipy.special import spence
+from itertools import izip_longest
+
+particle_id = {20: 'e', 22: r'\mu'}
+
+def plot_hist2(df, muons=False, norm=1.0, color=None):
+ for id, df_id in sorted(df.groupby('id')):
+ if id == 20:
+ plt.subplot(2,3,1)
+ elif id == 22:
+ plt.subplot(2,3,2)
+ elif id == 2020:
+ plt.subplot(2,3,4)
+ elif id == 2022:
+ plt.subplot(2,3,5)
+ elif id == 2222:
+ plt.subplot(2,3,6)
+
+ if muons:
+ plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step')
+ plt.xlabel("log10(Energy (GeV))")
+ else:
+ bins = np.logspace(np.log10(20),np.log10(10e3),21)
+ plt.hist(df_id.ke.values, bins=bins, histtype='step', weights=np.tile(norm,len(df_id.ke.values)),color=color)
+ plt.gca().set_xscale("log")
+ plt.xlabel("Energy (MeV)")
+ plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$')
+
+ if len(df):
+ plt.tight_layout()
+
+def plot_hist(df, muons=False):
+ for id, df_id in sorted(df.groupby('id')):
+ if id == 20:
+ plt.subplot(3,4,1)
+ elif id == 22:
+ plt.subplot(3,4,2)
+ elif id == 2020:
+ plt.subplot(3,4,5)
+ elif id == 2022:
+ plt.subplot(3,4,6)
+ elif id == 2222:
+ plt.subplot(3,4,7)
+ elif id == 202020:
+ plt.subplot(3,4,9)
+ elif id == 202022:
+ plt.subplot(3,4,10)
+ elif id == 202222:
+ plt.subplot(3,4,11)
+ elif id == 222222:
+ plt.subplot(3,4,12)
+
+ if muons:
+ plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step')
+ plt.xlabel("log10(Energy (GeV))")
+ else:
+ plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step')
+ plt.xlabel("Energy (MeV)")
+ plt.title(str(id))
+
+ if len(df):
+ plt.tight_layout()
+
+def make_nll(data_hists, mc_hists):
+ def nll(x, grad=None):
+ nll = 0
+ for id in data_hists:
+ N = data_hists[id].sum()
+ nll -= poisson.logpmf(N,mc_hists[id].sum()*x[0])
+ if N > 0:
+ p = mc_hists[id]/mc_hists[id].sum()
+ # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think).
+ p += 1e-10
+ p /= p.sum()
+ nll -= multinomial.logpmf(data_hists[id],N,p)
+ return nll
+ return nll
+
+def chi2(samples,expected):
+ return np.sum((samples-expected)**2/expected,axis=-1)
+
+def get_multinomial_prob(data, mc, size=1000):
+ N = mc.sum()
+ # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think).
+ mc = mc + 1e-10
+ p = mc/mc.sum()
+ chi2_data = chi2(data,mc)
+ chi2_samples = []
+ for i in range(size):
+ n = np.random.poisson(N)
+ samples = multinomial.rvs(n,p)
+ chi2_samples.append(chi2(samples,mc))
+ chi2_samples = np.array(chi2_samples)
+ return np.count_nonzero(chi2_samples >= chi2_data)/len(chi2_samples)
+
+if __name__ == '__main__':
+ import argparse
+ import numpy as np
+ import pandas as pd
+ import sys
+ import h5py
+ from sddm.plot_energy import *
+ from sddm.plot import *
+ from sddm import setup_matplotlib
+ import nlopt
+
+ parser = argparse.ArgumentParser("plot fit results")
+ parser.add_argument("filenames", nargs='+', help="input files")
+ parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds")
+ parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files")
+ parser.add_argument("--nhit-thresh", type=int, default=None, help="nhit threshold to apply to events before processing (should only be used for testing to speed things up)")
+ args = parser.parse_args()
+
+ setup_matplotlib(args.save)
+
+ import matplotlib.pyplot as plt
+
+ ev = get_events(args.filenames,merge_fits=True,nhit_thresh=args.nhit_thresh)
+ ev_mc = get_events(args.mc,merge_fits=True,nhit_thresh=args.nhit_thresh)
+
+ ev = ev.reset_index()
+ ev_mc = ev_mc.reset_index()
+
+ # First, do basic data cleaning which is done for all events.
+ ev = ev[ev.signal]
+ ev_mc = ev_mc[ev_mc.signal]
+
+ # 00-orphan cut
+ ev = ev[(ev.gtid & 0xff) != 0]
+ ev_mc = ev_mc[(ev_mc.gtid & 0xff) != 0]
+
+ print("number of events after data cleaning = %i" % np.count_nonzero(ev.prompt))
+
+ # remove events 200 microseconds after a muon
+ ev = ev.groupby('run',group_keys=False).apply(muon_follower_cut)
+
+ # Get rid of muon events in our main event sample
+ ev = ev[(ev.dc & DC_MUON) == 0]
+
+ ev = ev[~np.isnan(ev.fmin)]
+ ev_mc = ev_mc[~np.isnan(ev_mc.fmin)]
+
+ prompt = ev[ev.prompt & ~ev.atm & ~ev.muon]
+ atm = ev[ev.atm]
+
+ prompt_mc = ev_mc[ev_mc.prompt & ~ev_mc.atm & ~ev_mc.muon]
+ atm_mc = ev_mc[ev_mc.atm]
+
+ # require (r/r_psup)^3 < 0.9
+ prompt = prompt[prompt.r_psup < 0.9]
+ atm = atm[atm.r_psup < 0.9]
+
+ prompt_mc = prompt_mc[prompt_mc.r_psup < 0.9]
+ atm_mc = atm_mc[atm_mc.r_psup < 0.9]
+
+ # require psi < 6
+ prompt = prompt[prompt.psi < 6]
+ atm = atm[atm.psi < 6]
+
+ prompt_mc = prompt_mc[prompt_mc.psi < 6]
+ atm_mc = atm_mc[atm_mc.psi < 6]
+
+ print("number of events after psi cut = %i" % len(prompt))
+
+ data = prompt
+ mc = prompt_mc
+
+ bins = np.logspace(np.log10(20),np.log10(10e3),21)
+ data_hists = {}
+ mc_hists = {}
+ for id in (20,22,2020,2022,2222):
+ df_id = data[data.id == id]
+ if len(df_id):
+ data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]
+ else:
+ data_hists[id] = np.zeros(len(bins)-1,dtype=np.int)
+
+ for id in (20,22,2020,2022,2222):
+ df_id = mc[mc.id == id]
+ if len(df_id):
+ mc_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]
+ else:
+ mc_hists[id] = np.zeros(len(bins)-1,dtype=np.int)
+
+ data_atm_hists = {}
+ mc_atm_hists = {}
+ for id in (20,22,2020,2022,2222):
+ df_id = atm[atm.id == id]
+ if len(df_id):
+ data_atm_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]
+ else:
+ data_atm_hists[id] = np.zeros(len(bins)-1,dtype=np.int)
+
+ for id in (20,22,2020,2022,2222):
+ df_id = atm_mc[atm_mc.id == id]
+ if len(df_id):
+ mc_atm_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]
+ else:
+ mc_atm_hists[id] = np.zeros(len(bins)-1,dtype=np.int)
+
+ nll = make_nll(data_hists,mc_hists)
+
+ x0 = np.array([1.0])
+ opt = nlopt.opt(nlopt.LN_SBPLX, len(x0))
+ opt.set_min_objective(nll)
+ low = np.array([1e-10])
+ high = np.array([10])
+ opt.set_lower_bounds(low)
+ opt.set_upper_bounds(high)
+ opt.set_ftol_abs(1e-10)
+ opt.set_initial_step([0.01])
+
+ xopt = opt.optimize(x0)
+ nll_xopt = nll(xopt)
+ print("nll(xopt) = ", nll(xopt))
+
+ prob = {}
+ for id in (20,22,2020,2022,2222):
+ prob[id] = get_multinomial_prob(data_hists[id],mc_hists[id]*xopt[0])
+ print(id, prob[id])
+
+ prob_atm = {}
+ for id in (20,22,2020,2022,2222):
+ prob_atm[id] = get_multinomial_prob(data_atm_hists[id],mc_atm_hists[id]*xopt[0])
+ print(id, prob_atm[id])
+
+ handles = [Line2D([0], [0], color='C0'),
+ Line2D([0], [0], color='C1')]
+ labels = ('Data','Monte Carlo')
+
+ fig = plt.figure()
+ plot_hist2(prompt,color='C0')
+ plot_hist2(prompt_mc,norm=xopt[0],color='C1')
+ for id in (20,22,2020,2022,2222):
+ if id == 20:
+ plt.subplot(2,3,1)
+ elif id == 22:
+ plt.subplot(2,3,2)
+ elif id == 2020:
+ plt.subplot(2,3,4)
+ elif id == 2022:
+ plt.subplot(2,3,5)
+ elif id == 2222:
+ plt.subplot(2,3,6)
+ plt.text(0.95,0.95,"p = %.2f" % prob[id],horizontalalignment='right',verticalalignment='top',transform=plt.gca().transAxes)
+ fig.legend(handles,labels,loc='upper right')
+
+ despine(fig,trim=True)
+ if args.save:
+ plt.savefig("chi2_prompt.pdf")
+ plt.savefig("chi2_prompt.eps")
+ else:
+ plt.suptitle("Without Neutron Follower")
+ fig = plt.figure()
+ plot_hist2(atm,color='C0')
+ plot_hist2(atm_mc,norm=xopt[0],color='C1')
+ for id in (20,22,2020,2022,2222):
+ if id == 20:
+ plt.subplot(2,3,1)
+ elif id == 22:
+ plt.subplot(2,3,2)
+ elif id == 2020:
+ plt.subplot(2,3,4)
+ elif id == 2022:
+ plt.subplot(2,3,5)
+ elif id == 2222:
+ plt.subplot(2,3,6)
+ plt.text(0.95,0.95,"p = %.2f" % prob_atm[id],horizontalalignment='right',verticalalignment='top',transform=plt.gca().transAxes)
+ fig.legend(handles,labels,loc='upper right')
+
+ despine(fig,trim=True)
+ if args.save:
+ plt.savefig("chi2_atm.pdf")
+ plt.savefig("chi2_atm.eps")
+ else:
+ plt.suptitle("With Neutron Follower")
+ plt.show()
diff --git a/utils/plot-energy b/utils/plot-energy
index 4568bcc..ef34f97 100755
--- a/utils/plot-energy
+++ b/utils/plot-energy
@@ -295,6 +295,8 @@ if __name__ == '__main__':
stopping_muons['T_dx'] = dx_to_energy(stopping_muons.dx)
stopping_muons['dT'] = stopping_muons['energy1'] - stopping_muons['T_dx']
+ print(stopping_muons[['r','r_psup','dx','T_dx','energy1','r_michel','x','y','z','x_michel','y_michel','z_michel','ftp_r_michel']])
+
fig = plt.figure()
plt.hist((stopping_muons['energy1']-stopping_muons['T_dx'])*100/stopping_muons['T_dx'], bins=np.linspace(-100,100,200), histtype='step')
despine(fig,trim=True)
diff --git a/utils/sddm/plot_energy.py b/utils/sddm/plot_energy.py
index 1a55a85..4d166e1 100755
--- a/utils/sddm/plot_energy.py
+++ b/utils/sddm/plot_energy.py
@@ -414,11 +414,14 @@ def michel_spectrum(T):
y *= 2*MUON_MASS
return y
-def get_events(filenames, merge_fits=False):
+def get_events(filenames, merge_fits=False, nhit_thresh=None):
ev = pd.concat([read_hdf(filename, "ev") for filename in filenames],ignore_index=True)
fits = pd.concat([read_hdf(filename, "fits") for filename in filenames],ignore_index=True)
rhdr = pd.concat([read_hdf(filename, "rhdr") for filename in filenames],ignore_index=True)
+ if nhit_thresh is not None:
+ ev = ev[ev.nhit_cal >= nhit_thresh]
+
if len(ev) == 0:
if merge_fits:
return ev