diff options
Diffstat (limited to 'utils')
-rwxr-xr-x | utils/chi2 | 309 | ||||
-rwxr-xr-x | utils/plot-energy | 2 | ||||
-rwxr-xr-x | utils/sddm/plot_energy.py | 5 |
3 files changed, 315 insertions, 1 deletions
diff --git a/utils/chi2 b/utils/chi2 new file mode 100755 index 0000000..8c9e0e3 --- /dev/null +++ b/utils/chi2 @@ -0,0 +1,309 @@ +#!/usr/bin/env python +# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago> +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <https://www.gnu.org/licenses/>. +""" +Script to plot final fit results along with sidebands for the dark matter +analysis. To run it just run: + + $ ./plot-energy [list of fit results] + +Currently it will plot energy distributions for external muons, michel +electrons, atmospheric events with neutron followers, and prompt signal like +events. Each of these plots will have a different subplot for the particle ID +of the best fit, i.e. single electron, single muon, double electron, electron + +muon, or double muon. +""" +from __future__ import print_function, division +import numpy as np +from scipy.stats import iqr, poisson +from matplotlib.lines import Line2D +from scipy.stats import iqr, norm, beta +from scipy.special import spence +from itertools import izip_longest + +particle_id = {20: 'e', 22: r'\mu'} + +def plot_hist2(df, muons=False, norm=1.0, color=None): + for id, df_id in sorted(df.groupby('id')): + if id == 20: + plt.subplot(2,3,1) + elif id == 22: + plt.subplot(2,3,2) + elif id == 2020: + plt.subplot(2,3,4) + elif id == 2022: + plt.subplot(2,3,5) + elif id == 2222: + plt.subplot(2,3,6) + + if muons: + plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step') + plt.xlabel("log10(Energy (GeV))") + else: + bins = np.logspace(np.log10(20),np.log10(10e3),21) + plt.hist(df_id.ke.values, bins=bins, histtype='step', weights=np.tile(norm,len(df_id.ke.values)),color=color) + plt.gca().set_xscale("log") + plt.xlabel("Energy (MeV)") + plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$') + + if len(df): + plt.tight_layout() + +def plot_hist(df, muons=False): + for id, df_id in sorted(df.groupby('id')): + if id == 20: + plt.subplot(3,4,1) + elif id == 22: + plt.subplot(3,4,2) + elif id == 2020: + plt.subplot(3,4,5) + elif id == 2022: + plt.subplot(3,4,6) + elif id == 2222: + plt.subplot(3,4,7) + elif id == 202020: + plt.subplot(3,4,9) + elif id == 202022: + plt.subplot(3,4,10) + elif id == 202222: + plt.subplot(3,4,11) + elif id == 222222: + plt.subplot(3,4,12) + + if muons: + plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step') + plt.xlabel("log10(Energy (GeV))") + else: + plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step') + plt.xlabel("Energy (MeV)") + plt.title(str(id)) + + if len(df): + plt.tight_layout() + +def make_nll(data_hists, mc_hists): + def nll(x, grad=None): + nll = 0 + for id in data_hists: + N = data_hists[id].sum() + nll -= poisson.logpmf(N,mc_hists[id].sum()*x[0]) + if N > 0: + p = mc_hists[id]/mc_hists[id].sum() + # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think). + p += 1e-10 + p /= p.sum() + nll -= multinomial.logpmf(data_hists[id],N,p) + return nll + return nll + +def chi2(samples,expected): + return np.sum((samples-expected)**2/expected,axis=-1) + +def get_multinomial_prob(data, mc, size=1000): + N = mc.sum() + # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think). + mc = mc + 1e-10 + p = mc/mc.sum() + chi2_data = chi2(data,mc) + chi2_samples = [] + for i in range(size): + n = np.random.poisson(N) + samples = multinomial.rvs(n,p) + chi2_samples.append(chi2(samples,mc)) + chi2_samples = np.array(chi2_samples) + return np.count_nonzero(chi2_samples >= chi2_data)/len(chi2_samples) + +if __name__ == '__main__': + import argparse + import numpy as np + import pandas as pd + import sys + import h5py + from sddm.plot_energy import * + from sddm.plot import * + from sddm import setup_matplotlib + import nlopt + + parser = argparse.ArgumentParser("plot fit results") + parser.add_argument("filenames", nargs='+', help="input files") + parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds") + parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files") + parser.add_argument("--nhit-thresh", type=int, default=None, help="nhit threshold to apply to events before processing (should only be used for testing to speed things up)") + args = parser.parse_args() + + setup_matplotlib(args.save) + + import matplotlib.pyplot as plt + + ev = get_events(args.filenames,merge_fits=True,nhit_thresh=args.nhit_thresh) + ev_mc = get_events(args.mc,merge_fits=True,nhit_thresh=args.nhit_thresh) + + ev = ev.reset_index() + ev_mc = ev_mc.reset_index() + + # First, do basic data cleaning which is done for all events. + ev = ev[ev.signal] + ev_mc = ev_mc[ev_mc.signal] + + # 00-orphan cut + ev = ev[(ev.gtid & 0xff) != 0] + ev_mc = ev_mc[(ev_mc.gtid & 0xff) != 0] + + print("number of events after data cleaning = %i" % np.count_nonzero(ev.prompt)) + + # remove events 200 microseconds after a muon + ev = ev.groupby('run',group_keys=False).apply(muon_follower_cut) + + # Get rid of muon events in our main event sample + ev = ev[(ev.dc & DC_MUON) == 0] + + ev = ev[~np.isnan(ev.fmin)] + ev_mc = ev_mc[~np.isnan(ev_mc.fmin)] + + prompt = ev[ev.prompt & ~ev.atm & ~ev.muon] + atm = ev[ev.atm] + + prompt_mc = ev_mc[ev_mc.prompt & ~ev_mc.atm & ~ev_mc.muon] + atm_mc = ev_mc[ev_mc.atm] + + # require (r/r_psup)^3 < 0.9 + prompt = prompt[prompt.r_psup < 0.9] + atm = atm[atm.r_psup < 0.9] + + prompt_mc = prompt_mc[prompt_mc.r_psup < 0.9] + atm_mc = atm_mc[atm_mc.r_psup < 0.9] + + # require psi < 6 + prompt = prompt[prompt.psi < 6] + atm = atm[atm.psi < 6] + + prompt_mc = prompt_mc[prompt_mc.psi < 6] + atm_mc = atm_mc[atm_mc.psi < 6] + + print("number of events after psi cut = %i" % len(prompt)) + + data = prompt + mc = prompt_mc + + bins = np.logspace(np.log10(20),np.log10(10e3),21) + data_hists = {} + mc_hists = {} + for id in (20,22,2020,2022,2222): + df_id = data[data.id == id] + if len(df_id): + data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0] + else: + data_hists[id] = np.zeros(len(bins)-1,dtype=np.int) + + for id in (20,22,2020,2022,2222): + df_id = mc[mc.id == id] + if len(df_id): + mc_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0] + else: + mc_hists[id] = np.zeros(len(bins)-1,dtype=np.int) + + data_atm_hists = {} + mc_atm_hists = {} + for id in (20,22,2020,2022,2222): + df_id = atm[atm.id == id] + if len(df_id): + data_atm_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0] + else: + data_atm_hists[id] = np.zeros(len(bins)-1,dtype=np.int) + + for id in (20,22,2020,2022,2222): + df_id = atm_mc[atm_mc.id == id] + if len(df_id): + mc_atm_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0] + else: + mc_atm_hists[id] = np.zeros(len(bins)-1,dtype=np.int) + + nll = make_nll(data_hists,mc_hists) + + x0 = np.array([1.0]) + opt = nlopt.opt(nlopt.LN_SBPLX, len(x0)) + opt.set_min_objective(nll) + low = np.array([1e-10]) + high = np.array([10]) + opt.set_lower_bounds(low) + opt.set_upper_bounds(high) + opt.set_ftol_abs(1e-10) + opt.set_initial_step([0.01]) + + xopt = opt.optimize(x0) + nll_xopt = nll(xopt) + print("nll(xopt) = ", nll(xopt)) + + prob = {} + for id in (20,22,2020,2022,2222): + prob[id] = get_multinomial_prob(data_hists[id],mc_hists[id]*xopt[0]) + print(id, prob[id]) + + prob_atm = {} + for id in (20,22,2020,2022,2222): + prob_atm[id] = get_multinomial_prob(data_atm_hists[id],mc_atm_hists[id]*xopt[0]) + print(id, prob_atm[id]) + + handles = [Line2D([0], [0], color='C0'), + Line2D([0], [0], color='C1')] + labels = ('Data','Monte Carlo') + + fig = plt.figure() + plot_hist2(prompt,color='C0') + plot_hist2(prompt_mc,norm=xopt[0],color='C1') + for id in (20,22,2020,2022,2222): + if id == 20: + plt.subplot(2,3,1) + elif id == 22: + plt.subplot(2,3,2) + elif id == 2020: + plt.subplot(2,3,4) + elif id == 2022: + plt.subplot(2,3,5) + elif id == 2222: + plt.subplot(2,3,6) + plt.text(0.95,0.95,"p = %.2f" % prob[id],horizontalalignment='right',verticalalignment='top',transform=plt.gca().transAxes) + fig.legend(handles,labels,loc='upper right') + + despine(fig,trim=True) + if args.save: + plt.savefig("chi2_prompt.pdf") + plt.savefig("chi2_prompt.eps") + else: + plt.suptitle("Without Neutron Follower") + fig = plt.figure() + plot_hist2(atm,color='C0') + plot_hist2(atm_mc,norm=xopt[0],color='C1') + for id in (20,22,2020,2022,2222): + if id == 20: + plt.subplot(2,3,1) + elif id == 22: + plt.subplot(2,3,2) + elif id == 2020: + plt.subplot(2,3,4) + elif id == 2022: + plt.subplot(2,3,5) + elif id == 2222: + plt.subplot(2,3,6) + plt.text(0.95,0.95,"p = %.2f" % prob_atm[id],horizontalalignment='right',verticalalignment='top',transform=plt.gca().transAxes) + fig.legend(handles,labels,loc='upper right') + + despine(fig,trim=True) + if args.save: + plt.savefig("chi2_atm.pdf") + plt.savefig("chi2_atm.eps") + else: + plt.suptitle("With Neutron Follower") + plt.show() diff --git a/utils/plot-energy b/utils/plot-energy index 4568bcc..ef34f97 100755 --- a/utils/plot-energy +++ b/utils/plot-energy @@ -295,6 +295,8 @@ if __name__ == '__main__': stopping_muons['T_dx'] = dx_to_energy(stopping_muons.dx) stopping_muons['dT'] = stopping_muons['energy1'] - stopping_muons['T_dx'] + print(stopping_muons[['r','r_psup','dx','T_dx','energy1','r_michel','x','y','z','x_michel','y_michel','z_michel','ftp_r_michel']]) + fig = plt.figure() plt.hist((stopping_muons['energy1']-stopping_muons['T_dx'])*100/stopping_muons['T_dx'], bins=np.linspace(-100,100,200), histtype='step') despine(fig,trim=True) diff --git a/utils/sddm/plot_energy.py b/utils/sddm/plot_energy.py index 1a55a85..4d166e1 100755 --- a/utils/sddm/plot_energy.py +++ b/utils/sddm/plot_energy.py @@ -414,11 +414,14 @@ def michel_spectrum(T): y *= 2*MUON_MASS return y -def get_events(filenames, merge_fits=False): +def get_events(filenames, merge_fits=False, nhit_thresh=None): ev = pd.concat([read_hdf(filename, "ev") for filename in filenames],ignore_index=True) fits = pd.concat([read_hdf(filename, "fits") for filename in filenames],ignore_index=True) rhdr = pd.concat([read_hdf(filename, "rhdr") for filename in filenames],ignore_index=True) + if nhit_thresh is not None: + ev = ev[ev.nhit_cal >= nhit_thresh] + if len(ev) == 0: if merge_fits: return ev |