#!/usr/bin/env python # Copyright (c) 2019, Anthony Latorre # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along with # this program. If not, see . """ Script to do final dark matter search analysis. To run it just run: $ ./dm-search [list of data fit results] --mc [list of atmospheric MC files] --muon-mc [list of muon MC files] --steps [steps] After running you will get a plot showing the limits for back to back dark matter at a range of energies. """ from __future__ import print_function, division import numpy as np from scipy.stats import iqr, poisson from matplotlib.lines import Line2D from scipy.stats import iqr, norm, beta, percentileofscore from scipy.special import spence from itertools import izip_longest from sddm.stats import * from sddm.dc import estimate_errors, EPSILON, truncnorm_scaled import emcee from sddm import printoptions from sddm.utils import fast_cdf, correct_energy_bias from scipy.integrate import quad from sddm.dm import * from sddm import SNOMAN_MASS import nlopt # Likelihood Fit Parameters # 0 - Atmospheric Neutrino Flux Scale # 1 - Electron energy bias # 2 - Electron energy resolution # 3 - Muon energy bias # 4 - Muon energy resolution # 5 - External Muon scale # 6 - Dark Matter Scale # Number of events to use in the dark matter Monte Carlo histogram when fitting # Ideally this would be as big as possible, but the bigger it is, the more time # the fit takes. DM_SAMPLES = 10000 DM_MASSES = {2020: np.logspace(np.log10(22),np.log10(10e3 - 500),101), 2222: np.logspace(np.log10(318),np.log10(10e3 - 500),101)} DISCOVERY_P_VALUE = 0.05 # Energy resolution as a fraction of energy for dark matter signal DM_ENERGY_RESOLUTION = 0.1 FIT_PARS = [ 'Atmospheric Neutrino Flux Scale', 'Electron energy bias', 'Electron energy resolution', 'Muon energy bias', 'Muon energy resolution', 'External Muon scale', 'Dark Matter Scale'] # Uncertainty on the energy scale # # - the muon energy scale and resolution terms come directly from measurements # on stopping muons, so those are known well. # - for electrons, we only have Michel electrons at the low end of our energy # range, and therefore we don't really have any good way of constraining the # energy scale or resolution. However, if we assume that the ~7% energy bias # in the muons is from the single PE distribution (it seems likely to me that # that is a major part of the bias), then the energy scale should be roughly # the same. Since the Michel electron distributions are consistent, we leave # the mean value at 0, but to be conservative, we set the error to 10%. # - The energy resolution for muons was pretty much spot on, and so we expect # the same from electrons. In addition, the Michel spectrum is consistent so # at that energy level we don't see anything which leads us to expect a major # difference. To be conservative, and because I don't think it really affects # the analysis at all, I'll leave the uncertainty here at 10% anyways. PRIORS = [ 1.0, # Atmospheric Neutrino Scale 0.015, # Electron energy scale 0.0, # Electron energy resolution 0.053, # Muon energy scale 0.0, # Muon energy resolution 0.0, # Muon scale 0.0, # Dark Matter Scale ] PRIOR_UNCERTAINTIES = [ 0.2, # Atmospheric Neutrino Scale 0.03, # Electron energy scale 0.05, # Electron energy resolution 0.01, # Muon energy scale 0.013, # Muon energy resolution 10.0, # Muon scale np.inf,# Dark Matter Scale ] # Lower bounds for the fit parameters PRIORS_LOW = [ EPSILON, -10, EPSILON, -10, EPSILON, 0, 0, ] # Upper bounds for the fit parameters PRIORS_HIGH = [ 10, 10, 10, 10, 10, 1e9, 1000, ] particle_id = {20: 'e', 22: r'\mu'} def plot_hist2(hists, bins, color=None): for id in (20,22,2020,2022,2222): if id == 20: plt.subplot(2,3,1) elif id == 22: plt.subplot(2,3,2) elif id == 2020: plt.subplot(2,3,4) elif id == 2022: plt.subplot(2,3,5) elif id == 2222: plt.subplot(2,3,6) bincenters = (bins[1:] + bins[:-1])/2 plt.hist(bincenters, bins=bins, histtype='step', weights=hists[id],color=color) plt.gca().set_xscale("log") plt.xlabel("Energy (MeV)") plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$') if len(hists): plt.tight_layout() def get_mc_hists(data,x,bins,scale=1.0,reweight=False): """ Returns the expected Monte Carlo histograms for the atmospheric neutrino background. Args: - data: pandas dataframe of the Monte Carlo events - x: fit parameters - bins: histogram bins - scale: multiply histograms by an overall scale factor This function does two basic things: 1. apply the energy bias and resolution corrections 2. histogram the results Returns a dictionary mapping particle id combo -> histogram. """ df_dict = {} for id in (20,22,2020,2022,2222): df_dict[id] = data[data.id == id] return get_mc_hists_fast(df_dict,x,bins,scale,reweight) def get_mc_hists_fast(df_dict,x,bins,scale=1.0,reweight=False): """ Same as get_mc_hists() but the first argument is a dictionary mapping particle id -> dataframe. This is much faster than selecting the events from the dataframe every time. """ mc_hists = {} for id in (20,22,2020,2022,2222): df = df_dict[id] if id == 20: ke = df.energy1.values*(1+x[1]) resolution = df.energy1.values*max(EPSILON,x[2]) elif id == 2020: ke = df.energy1.values*(1+x[1]) + df.energy2.values*(1+x[1]) resolution = np.sqrt((df.energy1.values*max(EPSILON,x[2]))**2 + (df.energy2.values*max(EPSILON,x[2]))**2) elif id == 22: ke = df.energy1.values*(1+x[3]) resolution = df.energy1.values*max(EPSILON,x[4]) elif id == 2222: ke = df.energy1.values*(1+x[3]) + df.energy2.values*(1+x[3]) resolution = np.sqrt((df.energy1.values*max(EPSILON,x[4]))**2 + (df.energy2.values*max(EPSILON,x[4]))**2) elif id == 2022: ke = df.energy1.values*(1+x[1]) + df.energy2.values*(1+x[3]) resolution = np.sqrt((df.energy1.values*max(EPSILON,x[2]))**2 + (df.energy2.values*max(EPSILON,x[4]))**2) if reweight: cdf = fast_cdf(bins[:,np.newaxis],ke,resolution)*df.weight.values else: cdf = fast_cdf(bins[:,np.newaxis],ke,resolution) mc_hists[id] = np.sum(cdf[1:] - cdf[:-1],axis=-1) mc_hists[id] *= scale return mc_hists def get_data_hists(data,bins,scale=1.0): """ Returns the data histogrammed into `bins`. """ data_hists = {} for id in (20,22,2020,2022,2222): data_hists[id] = np.histogram(data[data.id == id].ke.values,bins=bins)[0]*scale return data_hists def make_nll(dm_particle_id, dm_mass, dm_energy, data, muons, mc, atmo_scale_factor, muon_scale_factor, bins, print_nll=False): df_dict = {} for id in (20,22,2020,2022,2222): df_dict[id] = mc[mc.id == id] df_dict_muon = {} for id in (20,22,2020,2022,2222): df_dict_muon[id] = muons[muons.id == id] data_hists = get_data_hists(data,bins) dm_sample = get_dm_sample(DM_SAMPLES,dm_particle_id,dm_mass,dm_energy) df_dict_dm = {} for id in (20,22,2020,2022,2222): df_dict_dm[id] = dm_sample[dm_sample.id == id] def nll(x, grad=None): if (x < PRIORS_LOW).any() or (x > PRIORS_HIGH).any(): return np.inf # Get the Monte Carlo histograms. We need to do this within the # likelihood function since we apply the energy resolution parameters # to the Monte Carlo. mc_hists = get_mc_hists_fast(df_dict,x,bins,scale=1/atmo_scale_factor) muon_hists = get_mc_hists_fast(df_dict_muon,x,bins,scale=1/muon_scale_factor) dm_hists = get_mc_hists_fast(df_dict_dm,x,bins,scale=1/len(dm_sample)) # Calculate the negative log of the likelihood of observing the data # given the fit parameters nll = 0 for id in data_hists: oi = data_hists[id] ei = mc_hists[id]*x[0] + muon_hists[id]*x[5] + dm_hists[id]*x[6] + EPSILON N = ei.sum() nll -= -N - np.sum(gammaln(oi+1)) + np.sum(oi*np.log(ei)) # Add the priors nll -= norm.logpdf(x[:6],PRIORS[:6],PRIOR_UNCERTAINTIES[:6]).sum() if print_nll: # Print the result print("nll = %.2f" % nll) return nll return nll def do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll=False,walkers=100,thin=10): """ Run the fit and return the minimum along with samples from running an MCMC starting near the minimum. Args: - data: pandas dataframe representing the data to fit - muon: pandas dataframe representing the expected background from external muons - data_mc: pandas dataframe representing the expected background from atmospheric neutrino events - bins: an array of bins to use for the fit - steps: the number of MCMC steps to run Returns a tuple (xopt, samples) where samples is an array of shape (steps, number of parameters). """ nll = make_nll(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,print_nll) pos = np.empty((walkers, len(PRIORS)),dtype=np.double) for i in range(pos.shape[0]): pos[i] = sample_priors() nwalkers, ndim = pos.shape # We use the KDEMove here because I think it should sample the likelihood # better. Because we have energy scale parameters and we are doing a binned # likelihood, the likelihood is discontinuous. There can also be several # local minima. The author of emcee recommends using the KDEMove with a lot # of workers to try and properly sample a multimodal distribution. In # addition, I've found that the autocorrelation time for the KDEMove is # much better than the other moves. sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x: -nll(x), moves=emcee.moves.KDEMove()) with np.errstate(invalid='ignore'): sampler.run_mcmc(pos, steps) print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction))) try: print("autocorrelation time: ", sampler.get_autocorr_time(quiet=True)) except Exception as e: print(e) samples = sampler.get_chain(flat=True,thin=thin) # Now, we use nlopt to find the best set of parameters. We start at the # best starting point from the MCMC and then run the SBPLX routine. x0 = sampler.get_chain(flat=True)[sampler.get_log_prob(flat=True).argmax()] opt = nlopt.opt(nlopt.LN_SBPLX, len(x0)) opt.set_min_objective(nll) low = np.array(PRIORS_LOW) high = np.array(PRIORS_HIGH) opt.set_lower_bounds(low) opt.set_upper_bounds(high) opt.set_ftol_abs(1e-10) opt.set_initial_step([0.01]*len(x0)) xopt = opt.optimize(x0) return xopt, samples def sample_priors(): """ Returns a random sample of the fit parameters from the priors. For the first 6 parameters we use a truncated normal distribution, and for the last parameter we use a uniform distribution. """ return np.concatenate((truncnorm_scaled(PRIORS_LOW[:6],PRIORS_HIGH[:6],PRIORS[:6],PRIOR_UNCERTAINTIES[:6]),[np.random.uniform(PRIORS_LOW[6],PRIORS_HIGH[6])])) def get_dm_sample(n,dm_particle_id,dm_mass,dm_energy): """ Returns a dataframe containing events from a dark matter particle. Args: - n: int number of events - dm_particle_id: int the particle id of the DM particle (2020 or 2222) - dm_energy: float The total kinetic energy of the DM particle - dm_resolution: float The fractional energy resolution of the dark matter particle, i.e. the actual energy resolution will be dm_energy*dm_resolution. """ id1 = dm_particle_id//100 id2 = dm_particle_id % 100 m1 = SNOMAN_MASS[id1] m2 = SNOMAN_MASS[id2] energy1 = [] data = np.empty(n,dtype=[('energy1',np.double),('energy2',np.double),('ke',np.double),('id1',np.int),('id2',np.int),('id',np.int)]) for i, (v1, v2) in enumerate(islice(gen_decay(dm_mass,dm_energy,m1,m2),n)): pos = rand_ball(PSUP_RADIUS) E1 = v1[0] E2 = v2[0] p1 = np.linalg.norm(v1[1:]) p2 = np.linalg.norm(v2[1:]) T1 = E1 - m1 T2 = E2 - m2 # FIXME: Get electron and muon resolution T1 += norm.rvs(scale=T1*0.05) T2 += norm.rvs(scale=T2*0.05) data[i] = T1, T2, T1 + T2, id1, id2, dm_particle_id return pd.DataFrame(data) def integrate_mc_hist(mc, muons, id, x, a, b, atmo_scale_factor, muon_scale_factor): """ Integrate the Monte Carlo expected histograms from a to b. Args: - mc: DataFrame Pandas dataframe of atmospheric Monte Carlo events. - muons: DataFrame Pandas dataframe of external muon Monte Carlo events. - id: int The particle ID histogram to integrate. - x: array The fit parameters. - a, b: float Bounds of the integration. - atmo_scale_factor, muon_scale_factor: float Scale factors for the fit parameters. """ mc_hists = get_mc_hists(mc,x,bins,scale=x[0]/atmo_scale_factor) muon_hists = get_mc_hists(muons,x,bins,scale=x[5]/muon_scale_factor) def total_hist(x): if x < bins[0] or x > bins[-1]: return 0 i = np.digitize(x,bins) if i == len(bins): i = len(bins) - 1 return (mc_hists[id][i-1] + muon_hists[id][i-1])/(bins[i]-bins[i-1]) # Yes, this is a stupid simple integration that I don't need quad for, but # I don't want to have to code all the corner cases including when a and b # are in the same bin, etc. return quad(total_hist,a,b,points=bins[(bins >= a) & (bins <= b)])[0] def get_limits(dm_masses,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll,walkers,thin): limits = {} best_fit = {} discovery_array = {} for dm_particle_id in (2020,2222): limits[dm_particle_id] = [] best_fit[dm_particle_id] = [] discovery_array[dm_particle_id] = [] for dm_mass in dm_masses[dm_particle_id]: id1 = dm_particle_id//100 id2 = dm_particle_id % 100 m1 = SNOMAN_MASS[id1] m2 = SNOMAN_MASS[id2] dm_energy = dm_mass xopt, samples = do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll,walkers,thin) limit = np.percentile(samples[:,6],90) limits[dm_particle_id].append(limit) # Here, to determine if there is a discovery we make an approximate # calculation of the number of events which would be significant. # # We expect the likelihood to be approximately that of a Poisson # distribution with n background events and we are searching for a # signal s. n is constrained by the rest of the histograms, and so # we can treat is as being approximately fixed. In this case, the # likelihood looks approximately like: # # P(s) = e^(-(s+n))(s+n)**i/i! # # Where i is the actual number of events. Under the null hypothesis # (i.e. no dark matter), we expect i to be Poisson distributed with # mean n. Therefore s should have the same distribution but offset # by n. Therefore, to determine the threshold, we simply look for # the threshold we expect in n and then subtract n. dm_kinetic_energy = dm_energy - m1 - m2 a = dm_kinetic_energy - 2*dm_kinetic_energy*DM_ENERGY_RESOLUTION b = dm_kinetic_energy + 2*dm_kinetic_energy*DM_ENERGY_RESOLUTION n = integrate_mc_hist(data_mc, muon, dm_particle_id, xopt, a, b, atmo_scale_factor, muon_scale_factor) # Set our discovery threshold to the p-value we want divided by the # number of bins. The idea here is that the number of bins is # approximately equal to the number of trials so we need to # increase our discovery threshold to account for the look # elsewhere effect. threshold = DISCOVERY_P_VALUE/(len(bins)-1) discovery = poisson.ppf(1-threshold,n) + 1 - n discovery_array[dm_particle_id].append(discovery) best_fit[dm_particle_id].append(xopt[6]) return limits, best_fit, discovery_array if __name__ == '__main__': import argparse import numpy as np import pandas as pd import sys import h5py from sddm.plot_energy import * from sddm.plot import * from sddm import setup_matplotlib import nlopt parser = argparse.ArgumentParser("plot fit results") parser.add_argument("filenames", nargs='+', help="input files") parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds") parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files") parser.add_argument("--muon-mc", nargs='+', required=True, help="muon MC files") parser.add_argument("--nhit-thresh", type=int, default=None, help="nhit threshold to apply to events before processing (should only be used for testing to speed things up)") parser.add_argument("--steps", type=int, default=1000, help="number of steps in the MCMC chain") parser.add_argument("--pull", type=int, default=0, help="plot pull plots") parser.add_argument("--weights", nargs='+', required=True, help="GENIE reweight HDF5 files") parser.add_argument("--print-nll", action='store_true', default=False, help="print nll values") parser.add_argument("--walkers", type=int, default=100, help="number of walkers") parser.add_argument("--thin", type=int, default=10, help="number of steps to thin") parser.add_argument("--test", type=int, default=0, help="run tests to check discovery threshold") args = parser.parse_args() setup_matplotlib(args.save) import matplotlib.pyplot as plt # Loop over runs to prevent using too much memory evs = [] rhdr = pd.concat([read_hdf(filename, "rhdr").assign(filename=filename) for filename in args.filenames],ignore_index=True) for run, df in rhdr.groupby('run'): evs.append(get_events(df.filename.values, merge_fits=True, nhit_thresh=args.nhit_thresh)) ev = pd.concat(evs) ev = correct_energy_bias(ev) # Note: We loop over the MC filenames here instead of just passing the # whole list to get_events() because I had to rerun some of the MC events # using SNOMAN and so most of the runs actually have two different files # and otherwise the GTIDs will clash ev_mcs = [] for filename in args.mc: ev_mcs.append(get_events([filename], merge_fits=True, nhit_thresh=args.nhit_thresh, mc=True)) ev_mc = pd.concat(ev_mcs) muon_mc = get_events(args.muon_mc, merge_fits=True, nhit_thresh=args.nhit_thresh, mc=True) weights = pd.concat([read_hdf(filename, "weights") for filename in args.weights],ignore_index=True) ev_mc = correct_energy_bias(ev_mc) muon_mc = correct_energy_bias(muon_mc) # Set all prompt events in the MC to be muons muon_mc.loc[muon_mc.prompt & muon_mc.filename.str.contains("cosmic"),'muon'] = True ev = ev.reset_index() ev_mc = ev_mc.reset_index() muon_mc = muon_mc.reset_index() # 00-orphan cut ev = ev[(ev.gtid & 0xff) != 0] ev_mc = ev_mc[(ev_mc.gtid & 0xff) != 0] muon_mc = muon_mc[(muon_mc.gtid & 0xff) != 0] # remove events 200 microseconds after a muon ev = ev.groupby('run',group_keys=False).apply(muon_follower_cut) # Get rid of events which don't have a successful fit ev = ev[~np.isnan(ev.fmin)] ev_mc = ev_mc[~np.isnan(ev_mc.fmin)] muon_mc = muon_mc[~np.isnan(muon_mc.fmin)] # require (r/r_psup)^3 < 0.9 ev = ev[ev.r_psup < 0.9] ev_mc = ev_mc[ev_mc.r_psup < 0.9] muon_mc = muon_mc[muon_mc.r_psup < 0.9] # require psi < 6 ev = ev[ev.psi < 6] ev_mc = ev_mc[ev_mc.psi < 6] muon_mc = muon_mc[muon_mc.psi < 6] data = ev[ev.signal & ev.prompt & ~ev.atm] data_atm = ev[ev.signal & ev.prompt & ev.atm] # Right now we use the muon Monte Carlo in the fit. If you want to use the # actual data, you can comment the next two lines and then uncomment the # two after that. muon = muon_mc[muon_mc.muon & muon_mc.prompt & ~muon_mc.atm] muon_atm = muon_mc[muon_mc.muon & muon_mc.prompt & muon_mc.atm] #muon = ev[ev.muon & ev.prompt & ~ev.atm] #muon_atm = ev[ev.muon & ev.prompt & ev.atm] if (~rhdr.run.isin(ev_mc.run)).any(): print_warning("Error! The following runs have no Monte Carlo: %s" % \ rhdr.run[~rhdr.run.isin(ev_mc.run)].values) sys.exit(1) ev_mc = ev_mc[ev_mc.run.isin(rhdr.run)] data_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ~ev_mc.atm] data_atm_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ev_mc.atm] data_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ~ev_mc.atm] bins = np.logspace(np.log10(20),np.log10(10e3),21) atmo_scale_factor = 100.0 muon_scale_factor = len(muon) + len(muon_atm) if args.pull: pull = [[] for i in range(len(FIT_PARS))] # Set the random seed so we get reproducible results here np.random.seed(0) for i in range(args.pull): xtrue = sample_priors() # Calculate expected number of events N = len(data_mc)*xtrue[0]/atmo_scale_factor N_atm = len(data_atm_mc)*xtrue[0]/atmo_scale_factor N_muon = len(muon)*xtrue[5]/muon_scale_factor N_muon_atm = len(muon_atm)*xtrue[5]/muon_scale_factor N_dm = xtrue[6] # Calculate observed number of events n = np.random.poisson(N) n_atm = np.random.poisson(N_atm) n_muon = np.random.poisson(N_muon) n_muon_atm = np.random.poisson(N_muon_atm) n_dm = np.random.poisson(N_dm) dm_particle_id = np.random.choice([2020,2222]) dm_mass = np.random.uniform(20,10e3) dm_energy = dm_mass # Sample data from Monte Carlo data = pd.concat((data_mc.sample(n=n,replace=True), muon.sample(n=n_muon,replace=True),get_dm_sample(n_dm,dm_particle_id,dm_mass,dm_energy))) data_atm = pd.concat((data_atm_mc.sample(n=n_atm,replace=True), muon_atm.sample(n=n_muon_atm,replace=True))) # Smear the energies by the additional energy resolution data.loc[data.id1 == 20,'energy1'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data.id1 == 20))*xtrue[2]) data.loc[data.id1 == 22,'energy1'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data.id1 == 22))*xtrue[4]) data.loc[data.id2 == 20,'energy2'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data.id2 == 20))*xtrue[2]) data.loc[data.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data.id2 == 22))*xtrue[4]) data['ke'] = data['energy1'].fillna(0) + data['energy2'].fillna(0) + data['energy3'].fillna(0) data_atm.loc[data_atm.id1 == 20,'energy1'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data_atm.id1 == 20))*xtrue[2]) data_atm.loc[data_atm.id1 == 22,'energy1'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id1 == 22))*xtrue[4]) data_atm.loc[data_atm.id2 == 20,'energy2'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data_atm.id2 == 20))*xtrue[2]) data_atm.loc[data_atm.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id2 == 22))*xtrue[4]) data_atm['ke'] = data_atm['energy1'].fillna(0) + data_atm['energy2'].fillna(0) + data_atm['energy3'].fillna(0) xopt, samples = do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) for i in range(len(FIT_PARS)): # The "pull plots" we make here are actually produced via a # procedure called "Simulation Based Calibration". # # See https://arxiv.org/abs/1804.06788. pull[i].append(percentileofscore(samples[:,i],xtrue[i])) fig = plt.figure() axes = [] for i, name in enumerate(FIT_PARS): axes.append(plt.subplot(4,2,i+1)) n, bins, patches = plt.hist(pull[i],bins=np.linspace(0,100,11),histtype='step') expected = len(pull[i])/(len(bins)-1) plt.axhline(expected,color='k',ls='--',alpha=0.25) plt.axhspan(poisson.ppf(0.005,expected), poisson.ppf(0.995,expected), facecolor='0.5', alpha=0.25) plt.title(name) for ax in axes: despine(ax=ax,left=True,trim=True) ax.get_yaxis().set_visible(False) plt.tight_layout() if args.save: fig.savefig("dm_search_pull_plot.pdf") fig.savefig("dm_search_pull_plot.eps") else: plt.show() sys.exit(0) if args.test: # Set the random seed so we get reproducible results here np.random.seed(0) discoveries = 0 for i in range(args.test): xtrue = sample_priors() # Calculate expected number of events N = len(data_mc)*xtrue[0]/atmo_scale_factor N_atm = len(data_atm_mc)*xtrue[0]/atmo_scale_factor N_muon = len(muon)*xtrue[5]/muon_scale_factor N_muon_atm = len(muon_atm)*xtrue[5]/muon_scale_factor N_dm = xtrue[6] # Calculate observed number of events n = np.random.poisson(N) n_atm = np.random.poisson(N_atm) n_muon = np.random.poisson(N_muon) n_muon_atm = np.random.poisson(N_muon_atm) n_dm = np.random.poisson(N_dm) dm_particle_id = np.random.choice([2020,2222]) dm_mass = np.random.uniform(20,10e3) dm_energy = dm_mass # Sample data from Monte Carlo data = pd.concat((data_mc.sample(n=n,replace=True), muon.sample(n=n_muon,replace=True),get_dm_sample(n_dm,dm_particle_id,dm_mass,dm_energy))) data_atm = pd.concat((data_atm_mc.sample(n=n_atm,replace=True), muon_atm.sample(n=n_muon_atm,replace=True))) # Smear the energies by the additional energy resolution data.loc[data.id1 == 20,'energy1'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data.id1 == 20))*xtrue[2]) data.loc[data.id1 == 22,'energy1'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data.id1 == 22))*xtrue[4]) data.loc[data.id2 == 20,'energy2'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data.id2 == 20))*xtrue[2]) data.loc[data.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data.id2 == 22))*xtrue[4]) data['ke'] = data['energy1'].fillna(0) + data['energy2'].fillna(0) + data['energy3'].fillna(0) data_atm.loc[data_atm.id1 == 20,'energy1'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data_atm.id1 == 20))*xtrue[2]) data_atm.loc[data_atm.id1 == 22,'energy1'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id1 == 22))*xtrue[4]) data_atm.loc[data_atm.id2 == 20,'energy2'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data_atm.id2 == 20))*xtrue[2]) data_atm.loc[data_atm.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id2 == 22))*xtrue[4]) data_atm['ke'] = data_atm['energy1'].fillna(0) + data_atm['energy2'].fillna(0) + data_atm['energy3'].fillna(0) limits, best_fit, discovery_array = get_limits(DM_MASSES,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) for id in (2020,2222): if (best_fit[id] > discovery_array[id]).any(): discoveries += 1 print("expected %.2f discoveries" % DISCOVERY_P_VALUE) print("actually got %i/%i = %.2f discoveries" % (discoveries,args.test,discoveries/args.test)) sys.exit(0) limits, best_fit, discovery_array = get_limits(DM_MASSES,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) fig = plt.figure() for color, dm_particle_id in zip(('C0','C1'),(2020,2222)): plt.plot(DM_MASSES[dm_particle_id],limits[dm_particle_id],color=color,label='$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(dm_particle_id),2)]) + '$') plt.gca().set_xscale("log") despine(fig,trim=True) plt.xlabel("Energy (MeV)") plt.ylabel("Event Rate Limit (events)") plt.legend() plt.tight_layout() if args.save: plt.savefig("dm_search_limit.pdf") plt.savefig("dm_search_limit.eps") else: plt.suptitle("Dark Matter Limits") fig = plt.figure() for color, dm_particle_id in zip(('C0','C1'),(2020,2222)): plt.plot(DM_MASSES[dm_particle_id],best_fit[dm_particle_id],color=color,label='$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(dm_particle_id),2)]) + '$') plt.plot(DM_MASSES[dm_particle_id],discovery_array[dm_particle_id],color=color,ls='--') plt.gca().set_xscale("log") despine(fig,trim=True) plt.xlabel("Energy (MeV)") plt.ylabel("Event Rate Limit (events)") plt.legend() plt.tight_layout() if args.save: plt.savefig("dm_best_fit_with_discovery_threshold.pdf") plt.savefig("dm_best_fit_with_discovery_threshold.eps") else: plt.suptitle("Best Fit Dark Matter") plt.show()