#!/usr/bin/env python # Copyright (c) 2019, Anthony Latorre # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along with # this program. If not, see . """ Script to plot final fit results along with sidebands for the dark matter analysis. To run it just run: $ ./plot-energy [list of fit results] Currently it will plot energy distributions for external muons, michel electrons, atmospheric events with neutron followers, and prompt signal like events. Each of these plots will have a different subplot for the particle ID of the best fit, i.e. single electron, single muon, double electron, electron + muon, or double muon. """ from __future__ import print_function, division import numpy as np from scipy.stats import iqr, poisson from matplotlib.lines import Line2D from scipy.stats import iqr, norm, beta from scipy.special import spence from itertools import izip_longest from sddm.stats import * # Uncertainty on the energy scale # FIXME: Should get real number from stopping muons ENERGY_SCALE_MEAN = 1.0 ENERGY_SCALE_UNCERTAINTY = 0.1 particle_id = {20: 'e', 22: r'\mu'} def plot_hist2(df, norm=1.0, scale=1.0, color=None): for id, df_id in sorted(df.groupby('id')): if id == 20: plt.subplot(2,3,1) elif id == 22: plt.subplot(2,3,2) elif id == 2020: plt.subplot(2,3,4) elif id == 2022: plt.subplot(2,3,5) elif id == 2222: plt.subplot(2,3,6) bins = np.logspace(np.log10(20),np.log10(10e3),21) plt.hist(df_id.ke.values*scale, bins=bins, histtype='step', weights=np.tile(norm,len(df_id.ke.values)),color=color) plt.gca().set_xscale("log") plt.xlabel("Energy (MeV)") plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$') if len(df): plt.tight_layout() def plot_hist(df, muons=False): for id, df_id in sorted(df.groupby('id')): if id == 20: plt.subplot(3,4,1) elif id == 22: plt.subplot(3,4,2) elif id == 2020: plt.subplot(3,4,5) elif id == 2022: plt.subplot(3,4,6) elif id == 2222: plt.subplot(3,4,7) elif id == 202020: plt.subplot(3,4,9) elif id == 202022: plt.subplot(3,4,10) elif id == 202222: plt.subplot(3,4,11) elif id == 222222: plt.subplot(3,4,12) if muons: plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step') plt.xlabel("log10(Energy (GeV))") else: plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step') plt.xlabel("Energy (MeV)") plt.title(str(id)) if len(df): plt.tight_layout() def make_nll(data, mc_hists): def nll(x, grad=None): if x[0] < 0 or x[1] < 0: return np.inf data_hists = {} for id in (20,22,2020,2022,2222): df_id = data[data.id == id] if len(df_id): data_hists[id] = np.histogram(df_id.ke.values*x[1],bins=bins)[0] else: data_hists[id] = np.zeros(len(bins)-1,dtype=np.int) nll = 0 for id in data_hists: N = data_hists[id].sum() nll -= poisson.logpmf(N,mc_hists[id].sum()*x[0]) if N > 0: p = mc_hists[id]/mc_hists[id].sum() # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think). p += 1e-10 p /= p.sum() nll -= multinomial.logpmf(data_hists[id],N,p) return nll - norm.logpdf(x[1],ENERGY_SCALE_MEAN,ENERGY_SCALE_UNCERTAINTY) return nll def get_mc_hist(data,x,bins): hist = np.histogram(data,bins=bins)[0] return sample_mc_hist(hist,norm=x[0]/100.0) def get_data_hist(data,x,bins): return np.histogram(data*x[1],bins=bins)[0] def get_multinomial_prob(data, data_mc, x_samples, bins, size=10000): """ Returns the p-value that the histogram of the data is drawn from the MC histogram. Arguments: data: 1D array of KE values data_mc: 1D array of MC KE values x_samples: MCMC samples of the floated parameters in the fit bins: bins used to bin the mc histogram size: number of values to compute """ chi2_data = [] chi2_samples = [] for i in range(size): x = x_samples[np.random.randint(x_samples.shape[0])] mc = get_mc_hist(data_mc,x,bins) N = mc.sum() # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think). mc = mc + 1e-10 p = mc/mc.sum() data_hist = get_data_hist(data,x,bins) chi2_data.append(chi2(data_hist,mc)) n = np.random.poisson(N) samples = multinomial.rvs(n,p) chi2_samples.append(chi2(samples,mc)) chi2_samples = np.array(chi2_samples) chi2_data = np.array(chi2_data) return np.count_nonzero(chi2_samples >= chi2_data)/len(chi2_samples) if __name__ == '__main__': import argparse import numpy as np import pandas as pd import sys import h5py from sddm.plot_energy import * from sddm.plot import * from sddm import setup_matplotlib import nlopt import emcee parser = argparse.ArgumentParser("plot fit results") parser.add_argument("filenames", nargs='+', help="input files") parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds") parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files") parser.add_argument("--nhit-thresh", type=int, default=None, help="nhit threshold to apply to events before processing (should only be used for testing to speed things up)") parser.add_argument("--steps", type=int, default=1000, help="number of steps in the MCMC chain") args = parser.parse_args() setup_matplotlib(args.save) import matplotlib.pyplot as plt # Loop over runs to prevent using too much memory evs = [] rhdr = pd.concat([read_hdf(filename, "rhdr").assign(filename=filename) for filename in args.filenames],ignore_index=True) for run, df in rhdr.groupby('run'): evs.append(get_events(df.filename.values, merge_fits=True, nhit_thresh=args.nhit_thresh)) ev = pd.concat(evs) ev_mc = get_events(args.mc, merge_fits=True) ev_mc = get_events(args.mc,merge_fits=True,nhit_thresh=args.nhit_thresh) ev = ev.reset_index() ev_mc = ev_mc.reset_index() # First, do basic data cleaning which is done for all events. ev = ev[ev.signal] ev_mc = ev_mc[ev_mc.signal] # 00-orphan cut ev = ev[(ev.gtid & 0xff) != 0] ev_mc = ev_mc[(ev_mc.gtid & 0xff) != 0] print("number of events after data cleaning = %i" % np.count_nonzero(ev.prompt)) # remove events 200 microseconds after a muon ev = ev.groupby('run',group_keys=False).apply(muon_follower_cut) # Get rid of muon events in our main event sample ev = ev[(ev.dc & DC_MUON) == 0] ev = ev[~np.isnan(ev.fmin)] ev_mc = ev_mc[~np.isnan(ev_mc.fmin)] prompt = ev[ev.prompt & ~ev.atm & ~ev.muon] atm = ev[ev.atm] prompt_mc = ev_mc[ev_mc.prompt & ~ev_mc.atm & ~ev_mc.muon] atm_mc = ev_mc[ev_mc.atm] # require (r/r_psup)^3 < 0.9 prompt = prompt[prompt.r_psup < 0.9] atm = atm[atm.r_psup < 0.9] prompt_mc = prompt_mc[prompt_mc.r_psup < 0.9] atm_mc = atm_mc[atm_mc.r_psup < 0.9] # require psi < 6 prompt = prompt[prompt.psi < 6] atm = atm[atm.psi < 6] prompt_mc = prompt_mc[prompt_mc.psi < 6] atm_mc = atm_mc[atm_mc.psi < 6] print("number of events after psi cut = %i" % len(prompt)) data = prompt mc = prompt_mc bins = np.logspace(np.log10(20),np.log10(10e3),21) data_hists = {} mc_hists = {} for id in (20,22,2020,2022,2222): df_id = data[data.id == id] if len(df_id): data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0] else: data_hists[id] = np.zeros(len(bins)-1,dtype=np.int) for id in (20,22,2020,2022,2222): df_id = mc[mc.id == id] if len(df_id): mc_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]/100 else: mc_hists[id] = np.zeros(len(bins)-1,dtype=np.int) data_atm_hists = {} mc_atm_hists = {} for id in (20,22,2020,2022,2222): df_id = atm[atm.id == id] if len(df_id): data_atm_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0] else: data_atm_hists[id] = np.zeros(len(bins)-1,dtype=np.int) for id in (20,22,2020,2022,2222): df_id = atm_mc[atm_mc.id == id] if len(df_id): mc_atm_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]/100 else: mc_atm_hists[id] = np.zeros(len(bins)-1,dtype=np.int) nll = make_nll(data,mc_hists) x0 = np.array([1.0,1.0]) opt = nlopt.opt(nlopt.LN_SBPLX, len(x0)) opt.set_min_objective(nll) low = np.array([1e-10,1e-10]) high = np.array([10,10]) opt.set_lower_bounds(low) opt.set_upper_bounds(high) opt.set_ftol_abs(1e-10) opt.set_initial_step([0.01,0.01]) xopt = opt.optimize(x0) print("xopt = ", xopt) nll_xopt = nll(xopt) print("nll(xopt) = ", nll(xopt)) pos = np.empty((10, len(x0)),dtype=np.double) for i in range(pos.shape[0]): pos[i] = xopt + np.random.randn(len(x0))*0.1 pos[i,:] = np.clip(pos[i,:],1e-10,10) nwalkers, ndim = pos.shape sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x: -nll(x)) with np.errstate(invalid='ignore'): sampler.run_mcmc(pos, args.steps) print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction))) try: print("autocorrelation time: ", sampler.get_autocorr_time(quiet=True)) except Exception as e: print(e) samples = sampler.chain.reshape((-1,len(x0))) plt.figure() plt.subplot(2,2,1) plt.hist(samples[:,0],bins=100,histtype='step') plt.xlabel("Atmospheric Flux Scale") plt.subplot(2,2,2) plt.hist(samples[:,1],bins=100,histtype='step') plt.xlabel("Energy Scale") prob = {} for id in (20,22,2020,2022,2222): prob[id] = get_multinomial_prob(data[data.id == id].ke.values,mc[mc.id == id].ke.values,samples,bins) print(id, prob[id]) prob_atm = {} for id in (20,22,2020,2022,2222): prob_atm[id] = get_multinomial_prob(atm[atm.id == id].ke.values,atm_mc[atm_mc.id == id].ke.values,samples,bins) print(id, prob_atm[id]) handles = [Line2D([0], [0], color='C0'), Line2D([0], [0], color='C1')] labels = ('Data','Monte Carlo') fig = plt.figure() plot_hist2(prompt,scale=xopt[1],color='C0') plot_hist2(prompt_mc,norm=xopt[0]/100,color='C1') for id in (20,22,2020,2022,2222): if id == 20: plt.subplot(2,3,1) elif id == 22: plt.subplot(2,3,2) elif id == 2020: plt.subplot(2,3,4) elif id == 2022: plt.subplot(2,3,5) elif id == 2222: plt.subplot(2,3,6) plt.text(0.95,0.95,"p = %.2f" % prob[id],horizontalalignment='right',verticalalignment='top',transform=plt.gca().transAxes) fig.legend(handles,labels,loc='upper right') despine(fig,trim=True) if args.save: plt.savefig("chi2_prompt.pdf") plt.savefig("chi2_prompt.eps") else: plt.suptitle("Without Neutron Follower") fig = plt.figure() plot_hist2(atm,scale=xopt[1],color='C0') plot_hist2(atm_mc,norm=xopt[0]/100,color='C1') for id in (20,22,2020,2022,2222): if id == 20: plt.subplot(2,3,1) elif id == 22: plt.subplot(2,3,2) elif id == 2020: plt.subplot(2,3,4) elif id == 2022: plt.subplot(2,3,5) elif id == 2222: plt.subplot(2,3,6) plt.text(0.95,0.95,"p = %.2f" % prob_atm[id],horizontalalignment='right',verticalalignment='top',transform=plt.gca().transAxes) fig.legend(handles,labels,loc='upper right') despine(fig,trim=True) if args.save: plt.savefig("chi2_atm.pdf") plt.savefig("chi2_atm.eps") else: plt.suptitle("With Neutron Follower") plt.show()