#!/usr/bin/env python # Copyright (c) 2019, Anthony Latorre # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along with # this program. If not, see . """ Script to plot final fit results along with sidebands for the dark matter analysis. To run it just run: $ ./plot-energy [list of fit results] Currently it will plot energy distributions for external muons, michel electrons, atmospheric events with neutron followers, and prompt signal like events. Each of these plots will have a different subplot for the particle ID of the best fit, i.e. single electron, single muon, double electron, electron + muon, or double muon. """ from __future__ import print_function, division import numpy as np from scipy.stats import iqr, poisson from matplotlib.lines import Line2D from scipy.stats import iqr, norm, beta from scipy.special import spence from itertools import izip_longest from sddm.stats import * # Uncertainty on the energy scale # FIXME: Should get real number from stopping muons ENERGY_SCALE_MEAN = {'e': 1.0, 'u': 1.0, 'eu': 1.0} ENERGY_SCALE_UNCERTAINTY = {'e':0.1, 'u': 0.1, 'eu': 0.1} ENERGY_RESOLUTION_MEAN = {'e': 0.0, 'u': 0.0, 'eu': 0.0} ENERGY_RESOLUTION_UNCERTAINTY = {'e':0.1, 'u': 0.1, 'eu': 0.1} particle_id = {20: 'e', 22: r'\mu'} def plot_hist2(hists, bins, color=None): for id in (20,22,2020,2022,2222): if id == 20: plt.subplot(2,3,1) elif id == 22: plt.subplot(2,3,2) elif id == 2020: plt.subplot(2,3,4) elif id == 2022: plt.subplot(2,3,5) elif id == 2222: plt.subplot(2,3,6) bincenters = (bins[1:] + bins[:-1])/2 plt.hist(bincenters, bins=bins, histtype='step', weights=hists[id],color=color) plt.gca().set_xscale("log") plt.xlabel("Energy (MeV)") plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$') if len(df): plt.tight_layout() def get_mc_hists(data,x,bins,apply_norm=False): mc_hists = {} # FIXME: May need to increase number of bins here bins2 = np.logspace(np.log10(20),np.log10(10e3),1000) bincenters2 = (bins2[1:] + bins2[:-1])/2 for id in (20,22,2020,2022,2222): ke = data[data.id == id].ke.values if id in (20,2020): ke = ke*x[1] scale = bincenters2*x[2] elif id in (22,2222): ke = ke*x[3] scale = bincenters2*x[4] elif id == 2022: ke = ke*x[5] scale = bincenters2*x[6] hist = np.histogram(ke,bins=bins2)[0] cdf = norm.cdf(bins[:,np.newaxis],bincenters2,scale)*hist mc_hists[id] = np.sum(cdf[1:] - cdf[:-1],axis=-1) if apply_norm: mc_hists[id] *= x[0] return mc_hists def get_data_hists(data,bins): data_hists = {} for id in (20,22,2020,2022,2222): df_id = data[data.id == id] data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0] return data_hists # Likelihood Fit Parameters # 0 - Atmospheric Neutrino Flux Scale # 1 - Electron energy bias # 2 - Electron energy resolution # 3 - Muon energy bias # 4 - Muon energy resolution # 5 - Electron + Muon energy bias # 6 - Electron + Muon energy resolution def make_nll(data, mc, bins): data_hists = get_data_hists(data,bins) def nll(x, grad=None): if any(x[i] < 0 for i in range(len(x))): return np.inf mc_hists = get_mc_hists(mc,x,bins,apply_norm=True) nll = 0 for id in data_hists: oi = data_hists[id] ei = mc_hists[id] + EPSILON N = ei.sum() nll -= -N - np.sum(gammaln(oi+1)) + np.sum(oi*np.log(ei)) nll -= norm.logpdf(x[1],ENERGY_SCALE_MEAN['e'],ENERGY_SCALE_UNCERTAINTY['e']) nll -= norm.logpdf(x[3],ENERGY_SCALE_MEAN['u'],ENERGY_SCALE_UNCERTAINTY['u']) nll -= norm.logpdf(x[5],ENERGY_SCALE_MEAN['eu'],ENERGY_SCALE_UNCERTAINTY['eu']) nll -= norm.logpdf(x[2],ENERGY_RESOLUTION_MEAN['e'],ENERGY_RESOLUTION_UNCERTAINTY['e']) nll -= norm.logpdf(x[4],ENERGY_RESOLUTION_MEAN['u'],ENERGY_RESOLUTION_UNCERTAINTY['u']) nll -= norm.logpdf(x[6],ENERGY_RESOLUTION_MEAN['eu'],ENERGY_RESOLUTION_UNCERTAINTY['eu']) print("nll = %.2f" % nll) return nll return nll def get_mc_hists_posterior(data,data_hists,x,bins): mc_hists = get_mc_hists(data,x,bins) for id in (20,22,2020,2022,2222): mc_hists[id] = get_mc_hist_posterior(mc_hists[id],data_hists[id],norm=x[0]) return mc_hists def get_data_hist(data,x,bins): return np.histogram(data,bins=bins)[0] def get_multinomial_prob(data, data_mc, id, x_samples, bins, percentile=99.0, size=10000): """ Returns the p-value that the histogram of the data is drawn from the MC histogram. The p-value is calculated by first sampling the posterior of the fit parameters `size` times. For each iteration we calculate a p-value. We then return the `percentile` percentile of all the p-values. This approach is similar to both the supremum and posterior predictive methods of calculating a p-value. For more information on different methods of calculating p-values see https://cds.cern.ch/record/1099967/files/p23.pdf. Arguments: data: 1D array of KE values data_mc: 1D array of MC KE values x_samples: MCMC samples of the floated parameters in the fit bins: bins used to bin the mc histogram size: number of values to compute """ data_hists = get_data_hists(data,bins) ps = [] for i in range(size): x = x_samples[np.random.randint(x_samples.shape[0])] mc = get_mc_hists_posterior(data_mc,data_hists,x,bins)[id] N = mc.sum() # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think). mc = mc + 1e-10 p = mc/mc.sum() chi2_data = nllr(data_hists[id],mc) # To draw the multinomial samples we first draw the expected number of # events from a Poisson distribution and then loop over the counts and # unique values. The reason we do this is that you can't call # multinomial.rvs with a multidimensional `n` array, and looping over every # single entry takes forever ns = np.random.poisson(N,size=1000) samples = [] for n, count in zip(*np.unique(ns,return_counts=True)): samples.append(multinomial.rvs(n,p,size=count)) samples = np.concatenate(samples) # Calculate the negative log likelihood ratio for the data simulated under # the null hypothesis chi2_samples = nllr(samples,mc) ps.append(np.count_nonzero(chi2_samples >= chi2_data)/len(chi2_samples)) return np.percentile(ps,percentile) if __name__ == '__main__': import argparse import numpy as np import pandas as pd import sys import h5py from sddm.plot_energy import * from sddm.plot import * from sddm import setup_matplotlib import nlopt import emcee parser = argparse.ArgumentParser("plot fit results") parser.add_argument("filenames", nargs='+', help="input files") parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds") parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files") parser.add_argument("--nhit-thresh", type=int, default=None, help="nhit threshold to apply to events before processing (should only be used for testing to speed things up)") parser.add_argument("--steps", type=int, default=1000, help="number of steps in the MCMC chain") args = parser.parse_args() setup_matplotlib(args.save) import matplotlib.pyplot as plt # Loop over runs to prevent using too much memory evs = [] rhdr = pd.concat([read_hdf(filename, "rhdr").assign(filename=filename) for filename in args.filenames],ignore_index=True) for run, df in rhdr.groupby('run'): evs.append(get_events(df.filename.values, merge_fits=True, nhit_thresh=args.nhit_thresh)) ev = pd.concat(evs) ev_mc = get_events(args.mc, merge_fits=True) ev_mc = get_events(args.mc,merge_fits=True,nhit_thresh=args.nhit_thresh) ev = ev.reset_index() ev_mc = ev_mc.reset_index() # First, do basic data cleaning which is done for all events. ev = ev[ev.signal] ev_mc = ev_mc[ev_mc.signal] # 00-orphan cut ev = ev[(ev.gtid & 0xff) != 0] ev_mc = ev_mc[(ev_mc.gtid & 0xff) != 0] print("number of events after data cleaning = %i" % np.count_nonzero(ev.prompt)) # remove events 200 microseconds after a muon ev = ev.groupby('run',group_keys=False).apply(muon_follower_cut) # Get rid of muon events in our main event sample ev = ev[(ev.dc & DC_MUON) == 0] ev = ev[~np.isnan(ev.fmin)] ev_mc = ev_mc[~np.isnan(ev_mc.fmin)] prompt = ev[ev.prompt & ~ev.atm & ~ev.muon] atm = ev[ev.atm] prompt_mc = ev_mc[ev_mc.prompt & ~ev_mc.atm & ~ev_mc.muon] atm_mc = ev_mc[ev_mc.atm] # require (r/r_psup)^3 < 0.9 prompt = prompt[prompt.r_psup < 0.9] atm = atm[atm.r_psup < 0.9] prompt_mc = prompt_mc[prompt_mc.r_psup < 0.9] atm_mc = atm_mc[atm_mc.r_psup < 0.9] # require psi < 6 prompt = prompt[prompt.psi < 6] atm = atm[atm.psi < 6] prompt_mc = prompt_mc[prompt_mc.psi < 6] atm_mc = atm_mc[atm_mc.psi < 6] print("number of events after psi cut = %i" % len(prompt)) data = prompt mc = prompt_mc bins = np.logspace(np.log10(20),np.log10(10e3),21) nll = make_nll(data,mc,bins) x0 = np.array([1.0,1.0,EPSILON,1.0,EPSILON,1.0,EPSILON]) opt = nlopt.opt(nlopt.LN_SBPLX, len(x0)) opt.set_min_objective(nll) low = np.array([EPSILON]*len(x0)) high = np.array([10]*len(x0)) opt.set_lower_bounds(low) opt.set_upper_bounds(high) opt.set_ftol_abs(1e-10) opt.set_initial_step([0.01]*len(x0)) xopt = opt.optimize(x0) print("xopt = ", xopt) nll_xopt = nll(xopt) print("nll(xopt) = ", nll(xopt)) pos = np.empty((20, len(x0)),dtype=np.double) for i in range(pos.shape[0]): pos[i] = xopt + np.random.randn(len(x0))*0.1 pos[i,:] = np.clip(pos[i,:],low,high) nwalkers, ndim = pos.shape sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x: -nll(x)) with np.errstate(invalid='ignore'): sampler.run_mcmc(pos, args.steps) print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction))) try: print("autocorrelation time: ", sampler.get_autocorr_time(quiet=True)) except Exception as e: print(e) samples = sampler.chain.reshape((-1,len(x0))) plt.figure() plt.subplot(3,3,1) plt.hist(samples[:,0],bins=100,histtype='step') plt.xlabel("Atmospheric Flux Scale") plt.subplot(3,3,2) plt.hist(samples[:,1],bins=100,histtype='step') plt.xlabel("Electron Energy Scale") plt.subplot(3,3,3) plt.hist(samples[:,1],bins=100,histtype='step') plt.xlabel("Electron Energy Resolution") plt.subplot(3,3,4) plt.hist(samples[:,1],bins=100,histtype='step') plt.xlabel("Muon Energy Scale") plt.subplot(3,3,5) plt.hist(samples[:,1],bins=100,histtype='step') plt.xlabel("Muon Energy Resolution") plt.subplot(3,3,6) plt.hist(samples[:,1],bins=100,histtype='step') plt.xlabel("Electron + Muon Energy Scale") plt.subplot(3,3,7) plt.hist(samples[:,1],bins=100,histtype='step') plt.xlabel("Electron + Muon Energy Resolution") prob = {} for id in (20,22,2020,2022,2222): prob[id] = get_multinomial_prob(data,mc,id,samples,bins) print(id, prob[id]) prob_atm = {} for id in (20,22,2020,2022,2222): prob_atm[id] = get_multinomial_prob(atm,atm_mc,id,samples,bins) print(id, prob_atm[id]) handles = [Line2D([0], [0], color='C0'), Line2D([0], [0], color='C1')] labels = ('Data','Monte Carlo') fig = plt.figure() hists = get_data_hists(prompt,bins) hists_mc = get_mc_hists(prompt_mc,xopt,bins,apply_norm=True) plot_hist2(hists,bins=bins,color='C0') plot_hist2(hists_mc,bins=bins,color='C1') for id in (20,22,2020,2022,2222): if id == 20: plt.subplot(2,3,1) elif id == 22: plt.subplot(2,3,2) elif id == 2020: plt.subplot(2,3,4) elif id == 2022: plt.subplot(2,3,5) elif id == 2222: plt.subplot(2,3,6) plt.text(0.95,0.95,"p = %.2f" % prob[id],horizontalalignment='right',verticalalignment='top',transform=plt.gca().transAxes) fig.legend(handles,labels,loc='upper right') despine(fig,trim=True) if args.save: plt.savefig("chi2_prompt.pdf") plt.savefig("chi2_prompt.eps") else: plt.suptitle("Without Neutron Follower") fig = plt.figure() hists = get_data_hists(atm,bins) hists_mc = get_mc_hists(atm_mc,xopt,bins,apply_norm=True) plot_hist2(hists,bins=bins,color='C0') plot_hist2(hists_mc,bins=bins,color='C1') for id in (20,22,2020,2022,2222): if id == 20: plt.subplot(2,3,1) elif id == 22: plt.subplot(2,3,2) elif id == 2020: plt.subplot(2,3,4) elif id == 2022: plt.subplot(2,3,5) elif id == 2222: plt.subplot(2,3,6) plt.text(0.95,0.95,"p = %.2f" % prob_atm[id],horizontalalignment='right',verticalalignment='top',transform=plt.gca().transAxes) fig.legend(handles,labels,loc='upper right') despine(fig,trim=True) if args.save: plt.savefig("chi2_atm.pdf") plt.savefig("chi2_atm.eps") else: plt.suptitle("With Neutron Follower") plt.show()