#!/usr/bin/env python # Copyright (c) 2019, Anthony Latorre # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along with # this program. If not, see . """ Script to do final dark matter search analysis. To run it just run: $ ./dm-search [list of data fit results] --mc [list of atmospheric MC files] --muon-mc [list of muon MC files] --steps [steps] After running you will get a plot showing the limits for back to back dark matter at a range of energies. """ from __future__ import print_function, division import numpy as np from scipy.stats import iqr, poisson from matplotlib.lines import Line2D from scipy.stats import iqr, norm, beta, percentileofscore from scipy.special import spence from sddm.stats import * from sddm.dc import estimate_errors, EPSILON, truncnorm_scaled import emcee from sddm import printoptions from sddm.utils import fast_cdf, correct_energy_bias from scipy.integrate import quad from sddm.dm import * from sddm import SNOMAN_MASS, AV_RADIUS import nlopt from itertools import chain # Likelihood Fit Parameters # 0 - Atmospheric Neutrino Flux Scale # 1 - Electron energy bias # 2 - Electron energy resolution # 3 - Muon energy bias # 4 - Muon energy resolution # 5 - External Muon scale # 6 - Dark Matter Scale # Number of events to use in the dark matter Monte Carlo histogram when fitting # Ideally this would be as big as possible, but the bigger it is, the more time # the fit takes. DM_SAMPLES = 10000 DM_MASSES = {2020: np.logspace(np.log10(22),np.log10(1e3),101), 2222: np.logspace(np.log10(318),np.log10(1e3),101)} DISCOVERY_P_VALUE = 0.05 FIT_PARS = [ 'Atmospheric Neutrino Flux Scale', 'Electron energy bias', 'Electron energy resolution', 'Muon energy bias', 'Muon energy resolution', 'External Muon scale', 'Dark Matter Scale'] # Uncertainty on the energy scale # # - the muon energy scale and resolution terms come directly from measurements # on stopping muons, so those are known well. # - for electrons, we only have Michel electrons at the low end of our energy # range, and therefore we don't really have any good way of constraining the # energy scale or resolution. However, if we assume that the ~7% energy bias # in the muons is from the single PE distribution (it seems likely to me that # that is a major part of the bias), then the energy scale should be roughly # the same. Since the Michel electron distributions are consistent, we leave # the mean value at 0, but to be conservative, we set the error to 10%. # - The energy resolution for muons was pretty much spot on, and so we expect # the same from electrons. In addition, the Michel spectrum is consistent so # at that energy level we don't see anything which leads us to expect a major # difference. To be conservative, and because I don't think it really affects # the analysis at all, I'll leave the uncertainty here at 10% anyways. PRIORS = [ 1.0, # Atmospheric Neutrino Scale 0.015, # Electron energy scale 0.0, # Electron energy resolution 0.053, # Muon energy scale 0.0, # Muon energy resolution 0.0, # Muon scale 0.0, # Dark Matter Scale ] PRIOR_UNCERTAINTIES = [ 0.2, # Atmospheric Neutrino Scale 0.03, # Electron energy scale 0.05, # Electron energy resolution 0.01, # Muon energy scale 0.013, # Muon energy resolution 10.0, # Muon scale np.inf,# Dark Matter Scale ] # Lower bounds for the fit parameters PRIORS_LOW = [ EPSILON, -10, EPSILON, -10, EPSILON, 0, 0, ] # Upper bounds for the fit parameters PRIORS_HIGH = [ 10, 10, 10, 10, 10, 1e9, 1000, ] particle_id = {20: 'e', 22: r'\mu'} def plot_hist2(hists, bins, color=None): for id in (20,22,2020,2022,2222): if id == 20: plt.subplot(2,3,1) elif id == 22: plt.subplot(2,3,2) elif id == 2020: plt.subplot(2,3,4) elif id == 2022: plt.subplot(2,3,5) elif id == 2222: plt.subplot(2,3,6) bincenters = (bins[id][1:] + bins[id][:-1])/2 plt.hist(bincenters, bins=bins[id], histtype='step', weights=hists[id],color=color) plt.gca().set_xscale("log") major = np.array([10,100,1000,10000]) minor = np.unique(list(chain(*list(range(i,i*10,i) for i in major[:-1])))) minor = np.setdiff1d(minor,major) major = major[major <= bins[id][-1]] minor = minor[minor <= bins[id][-1]] plt.gca().set_xticks(major) plt.gca().set_xticks(minor,minor=True) plt.gca().set_xlim(10,10000) plt.xlabel("Energy (MeV)") plt.title('$' + ''.join([particle_id[int('
#!/usr/bin/env python
"""
Script to plot the probabilities for atmospheric neutrino oscillations. To run
it:

    $ ./plot-atmospheric-oscillations nue_osc_prob.txt
"""
from __future__ import print_function, division
import numpy as np

if __name__ == '__main__':
    import argparse
    from os.path import split, splitext
    from sddm import setup_matplotlib

    parser = argparse.ArgumentParser("script to plot atmospheric oscillations")
    parser.add_argument("filenames", nargs='+', help="oscillation probability filenames")
    parser.add_argument("--save", action="store_true", default=False, help="save plots")
    args = parser.parse_args()

    setup_matplotlib(args.save)

    import matplotlib.pyplot as plt

    for filename in args.filenames:
        head, tail = split(filename)
        root, ext = splitext(tail)

        e, z, pnue, pnum, pnut = np.genfromtxt(filename).T

        shape0 = len(np.unique(e))

        ee = e.reshape((shape0,-1))
        zz = z.reshape((shape0,-1))
        pnue = pnue.reshape((shape0,-1))
        pnum = pnum.reshape((shape0,-1))
        pnut = pnut.reshape((shape0,-1))

        levels = np.linspace(0,1,101)

        plt.figure()
        plt.contourf(ee,zz,pnue,levels=levels)
        plt.gca().set_xscale('log')
        plt.xlabel("Energy (GeV)")
        plt.ylabel("Cos(Zenith)")
        plt.colorbar()
        plt.tight_layout()
        if args.save:
            plt.savefig("%s_nue.pdf" % root)
            plt.savefig("%s_nue.eps" % root)
        else:
            plt.title(r"Probability to oscillate to $\nu_e$")
        plt.figure()
        plt.contourf(ee,zz,pnum,levels=levels)
        plt.gca().set_xscale('log')
        plt.xlabel("Energy (GeV)")
        plt.ylabel("Cos(Zenith)")
        plt.colorbar()
        plt.tight_layout()
        if args.save:
            plt.savefig("%s_num.pdf" % root)
            plt.savefig("%s_num.eps" % root)
        else:
            plt.title(r"Probability to oscillate to $\nu_\mu$")
        plt.figure()
        plt.contourf(ee,zz,pnut,levels=levels)
        plt.gca().set_xscale('log')
        plt.xlabel("Energy (GeV)")
        plt.ylabel("Cos(Zenith)")
        plt.colorbar()
        plt.tight_layout()
        if args.save:
            plt.savefig("%s_nut.pdf" % root)
            plt.savefig("%s_nut.eps" % root)
        else:
            plt.title(r"Probability to oscillate to $\nu_\tau$")

    plt.show()
for run, df in rhdr.groupby('run'): if args.run_list and run not in run_list: continue evs.append(get_events(df.filename.values, merge_fits=True, nhit_thresh=args.nhit_thresh)) ev = pd.concat(evs).reset_index() livetime = 0.0 livetime_pulse_gt = 0.0 for _ev in evs: if not np.isnan(_ev.attrs['time_10_mhz']): livetime += _ev.attrs['time_10_mhz'] else: livetime += _ev.attrs['time_pulse_gt'] livetime_pulse_gt += _ev.attrs['time_pulse_gt'] print("livetime = %.2f" % livetime) print("livetime (pulse gt) = %.2f" % livetime_pulse_gt) if args.run_info: livetime_run_info = 0.0 run_info = np.genfromtxt(args.run_info,usecols=range(4),dtype=(np.int,np.int,np.double,np.double)) for run in set(ev.run.values): for i in range(run_info.shape[0]): if run_info[i][0] == run: livetime_run_info += run_info[i][3] print("livetime (run info) = %.2f" % livetime_run_info) ev = correct_energy_bias(ev) # Note: We loop over the MC filenames here instead of just passing the # whole list to get_events() because I had to rerun some of the MC events # using SNOMAN and so most of the runs actually have two different files # and otherwise the GTIDs will clash ev_mcs = [] for filename in args.mc: ev_mcs.append(get_events([filename], merge_fits=True, nhit_thresh=args.nhit_thresh, mc=True)) ev_mc = pd.concat([ev_mc for ev_mc in ev_mcs if len(ev_mc) > 0]).reset_index() muon_mc = get_events(args.muon_mc, merge_fits=True, nhit_thresh=args.nhit_thresh, mc=True) weights = pd.concat([read_hdf(filename, "weights") for filename in args.weights],ignore_index=True) # Add the "flux_weight" column to the ev_mc data since I stupidly simulated # the muon neutrino flux for the tau neutrino flux in GENIE. Doh! mcpl = load_mcpl_files(args.mcpl) ev_mc = renormalize_data(ev_mc.reset_index(),mcpl) # Merge weights with MCPL dataframe to get the unique id column in the # weights dataframe since that is what we use to merge with the Monte # Carlo. weights = pd.merge(weights,mcpl[['run','evn','unique_id']],on=['run','evn'],how='left') # There are a handful of weights which turn out to be slightly negative for # some reason. For example: # # run evn universe weight # 10970 25 597 -0.000055 # 11389 87 729 -0.021397 # 11701 204 2 -0.000268 # 11919 120 82 -0.002245 # 11976 163 48 -0.000306 # 11976 163 710 -0.000022 # 12131 76 175 -0.000513 # 12207 70 255 -0.002925 # 12207 70 282 -0.014856 # 12207 70 368 -0.030593 # 12207 70 453 -0.019011 # 12207 70 520 -0.020748 # 12207 70 834 -0.028754 # 12207 70 942 -0.020309 # 12233 230 567 -0.000143 # 12618 168 235 -0.000020 # 13428 128 42 -0.083639 # 14264 23 995 -0.017637 # 15034 69 624 -0.000143 # 15752 154 957 -0.006827 weights = weights[weights.weight > 0] ev_mc = correct_energy_bias(ev_mc) muon_mc = correct_energy_bias(muon_mc) # Set all prompt events in the MC to be muons muon_mc.loc[muon_mc.prompt & muon_mc.filename.str.contains("cosmic"),'muon'] = True ev = ev.reset_index() ev_mc = ev_mc.reset_index() muon_mc = muon_mc.reset_index() # 00-orphan cut ev = ev[(ev.gtid & 0xff) != 0] ev_mc = ev_mc[(ev_mc.gtid & 0xff) != 0] muon_mc = muon_mc[(muon_mc.gtid & 0xff) != 0] # remove events 200 microseconds after a muon ev = ev.groupby('run',group_keys=False).apply(muon_follower_cut) # Get rid of events which don't have a successful fit ev = ev[~np.isnan(ev.fmin)] ev_mc = ev_mc[~np.isnan(ev_mc.fmin)] muon_mc = muon_mc[~np.isnan(muon_mc.fmin)] # require (r < av radius) ev = ev[ev.r < AV_RADIUS] ev_mc = ev_mc[ev_mc.r < AV_RADIUS] muon_mc = muon_mc[muon_mc.r < AV_RADIUS] fiducial_volume = (4/3)*np.pi*(AV_RADIUS)**3 # require psi < 6 ev = ev[ev.psi < 6] ev_mc = ev_mc[ev_mc.psi < 6] muon_mc = muon_mc[muon_mc.psi < 6] data = ev[ev.signal & ev.prompt & ~ev.atm] data_atm = ev[ev.signal & ev.prompt & ev.atm] # Right now we use the muon Monte Carlo in the fit. If you want to use the # actual data, you can comment the next two lines and then uncomment the # two after that. muon = muon_mc[muon_mc.muon & muon_mc.prompt & ~muon_mc.atm] muon_atm = muon_mc[muon_mc.muon & muon_mc.prompt & muon_mc.atm] #muon = ev[ev.muon & ev.prompt & ~ev.atm] #muon_atm = ev[ev.muon & ev.prompt & ev.atm] if (~ev.run.isin(ev_mc.run)).any(): print_warning("Error! The following runs have no Monte Carlo: %s" % \ np.unique(ev.run[~ev.run.isin(ev_mc.run)].values)) sys.exit(1) if not args.pull and not args.test: ev_mc = ev_mc[ev_mc.run.isin(ev.run)] data_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ~ev_mc.atm] data_atm_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ev_mc.atm] bins = {20:np.logspace(np.log10(20),np.log10(10e3),21), 22:np.logspace(np.log10(20),np.log10(10e3),21)[:-5], 2020:np.logspace(np.log10(20),np.log10(10e3),21), 2022:np.logspace(np.log10(20),np.log10(10e3),21)[:-5], 2222:np.logspace(np.log10(20),np.log10(10e3),21)[:-5]} atmo_scale_factor = 100.0 muon_scale_factor = len(muon) + len(muon_atm) if args.pull: pull = [[] for i in range(len(FIT_PARS))] # Set the random seed so we get reproducible results here np.random.seed(0) for i in range(args.pull): xtrue = sample_priors() # Calculate expected number of events N = len(data_mc)*xtrue[0]/atmo_scale_factor N_atm = len(data_atm_mc)*xtrue[0]/atmo_scale_factor N_muon = len(muon)*xtrue[5]/muon_scale_factor N_muon_atm = len(muon_atm)*xtrue[5]/muon_scale_factor N_dm = xtrue[6] # Calculate observed number of events n = np.random.poisson(N) n_atm = np.random.poisson(N_atm) n_muon = np.random.poisson(N_muon) n_muon_atm = np.random.poisson(N_muon_atm) n_dm = np.random.poisson(N_dm) dm_particle_id = np.random.choice([2020,2222]) dm_mass = np.random.uniform(20,10e3) dm_energy = dm_mass # Sample data from Monte Carlo data = pd.concat((data_mc.sample(n=n,weights='flux_weight',replace=True), muon.sample(n=n_muon,replace=True))) data_atm = pd.concat((data_atm_mc.sample(n=n_atm,weights='flux_weight',replace=True), muon_atm.sample(n=n_muon_atm,replace=True))) # Smear the energies by the additional energy resolution data.loc[data.id1 == 20,'energy1'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data.id1 == 20))*xtrue[2]) data.loc[data.id1 == 22,'energy1'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data.id1 == 22))*xtrue[4]) data.loc[data.id2 == 20,'energy2'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data.id2 == 20))*xtrue[2]) data.loc[data.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data.id2 == 22))*xtrue[4]) data['ke'] = data['energy1'].fillna(0) + data['energy2'].fillna(0) + data['energy3'].fillna(0) data_atm.loc[data_atm.id1 == 20,'energy1'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data_atm.id1 == 20))*xtrue[2]) data_atm.loc[data_atm.id1 == 22,'energy1'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id1 == 22))*xtrue[4]) data_atm.loc[data_atm.id2 == 20,'energy2'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data_atm.id2 == 20))*xtrue[2]) data_atm.loc[data_atm.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id2 == 22))*xtrue[4]) data_atm['ke'] = data_atm['energy1'].fillna(0) + data_atm['energy2'].fillna(0) + data_atm['energy3'].fillna(0) xopt, universe, samples = do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin,refit=False) for i in range(len(FIT_PARS)): # The "pull plots" we make here are actually produced via a # procedure called "Simulation Based Calibration". # # See https://arxiv.org/abs/1804.06788. pull[i].append(percentileofscore(samples[:,i],xtrue[i])) fig = plt.figure() axes = [] for i, name in enumerate(FIT_PARS): axes.append(plt.subplot(4,2,i+1)) n, bins, patches = plt.hist(pull[i],bins=np.linspace(0,100,11),histtype='step') expected = len(pull[i])/(len(bins)-1) plt.axhline(expected,color='k',ls='--',alpha=0.25) plt.axhspan(poisson.ppf(0.005,expected), poisson.ppf(0.995,expected), facecolor='0.5', alpha=0.25) plt.title(name) for ax in axes: despine(ax=ax,left=True,trim=True) ax.get_yaxis().set_visible(False) plt.tight_layout() if args.save: fig.savefig("dm_search_pull_plot.pdf") fig.savefig("dm_search_pull_plot.eps") else: plt.show() sys.exit(0) if args.test: # Set the random seed so we get reproducible results here np.random.seed(0) data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == 0],how='left',on=['run','unique_id']) data_atm_mc_with_weights = pd.merge(data_atm_mc,weights[weights.universe == 0],how='left',on=['run','unique_id']) discoveries = 0 data_mc_with_weights.weight *= data_mc_with_weights.flux_weight data_atm_mc_with_weights.weight *= data_atm_mc_with_weights.flux_weight for i in range(args.test): xtrue = sample_priors() # Calculate expected number of events N = len(data_mc)*xtrue[0]/atmo_scale_factor N_atm = len(data_atm_mc)*xtrue[0]/atmo_scale_factor N_muon = len(muon)*xtrue[5]/muon_scale_factor N_muon_atm = len(muon_atm)*xtrue[5]/muon_scale_factor # Calculate observed number of events n = np.random.poisson(N) n_atm = np.random.poisson(N_atm) n_muon = np.random.poisson(N_muon) n_muon_atm = np.random.poisson(N_muon_atm) # Sample data from Monte Carlo data = pd.concat((data_mc_with_weights.sample(n=n,replace=True,weights='weight'), muon.sample(n=n_muon,replace=True))) data_atm = pd.concat((data_atm_mc_with_weights.sample(n=n_atm,replace=True,weights='weight'), muon_atm.sample(n=n_muon_atm,replace=True))) # Smear the energies by the additional energy resolution data.loc[data.id1 == 20,'energy1'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data.id1 == 20))*xtrue[2]) data.loc[data.id1 == 22,'energy1'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data.id1 == 22))*xtrue[4]) data.loc[data.id2 == 20,'energy2'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data.id2 == 20))*xtrue[2]) data.loc[data.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data.id2 == 22))*xtrue[4]) data['ke'] = data['energy1'].fillna(0) + data['energy2'].fillna(0) + data['energy3'].fillna(0) data_atm.loc[data_atm.id1 == 20,'energy1'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data_atm.id1 == 20))*xtrue[2]) data_atm.loc[data_atm.id1 == 22,'energy1'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id1 == 22))*xtrue[4]) data_atm.loc[data_atm.id2 == 20,'energy2'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data_atm.id2 == 20))*xtrue[2]) data_atm.loc[data_atm.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id2 == 22))*xtrue[4]) data_atm['ke'] = data_atm['energy1'].fillna(0) + data_atm['energy2'].fillna(0) + data_atm['energy3'].fillna(0) limits, best_fit, discovery_array = get_limits(DM_MASSES,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) for id in (2020,2222): if (best_fit[id] > discovery_array[id]).any(): discoveries += 1 print("expected %.2f discoveries" % DISCOVERY_P_VALUE) print("actually got %i/%i = %.2f discoveries" % (discoveries,args.test,discoveries/args.test)) sys.exit(0) limits, best_fit, discovery_array = get_limits(DM_MASSES,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) fig = plt.figure() for color, dm_particle_id in zip(('C0','C1'),(2020,2222)): plt.plot(DM_MASSES[dm_particle_id],np.array(limits[dm_particle_id])*100**3*3600*24*365/fiducial_volume/livetime,color=color,label='$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(dm_particle_id),2)]) + '$') plt.gca().set_xscale("log") despine(fig,trim=True) plt.xlabel("Energy (MeV)") plt.ylabel("Event Rate Limit (events/$\mathrm{m}^3$/year)") plt.legend() plt.tight_layout() if args.save: plt.savefig("dm_search_limit.pdf") plt.savefig("dm_search_limit.eps") else: plt.suptitle("Dark Matter Limits") fig = plt.figure() for color, dm_particle_id in zip(('C0','C1'),(2020,2222)): plt.plot(DM_MASSES[dm_particle_id],best_fit[dm_particle_id],color=color,label='$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(dm_particle_id),2)]) + '$') plt.plot(DM_MASSES[dm_particle_id],discovery_array[dm_particle_id],color=color,ls='--') plt.gca().set_xscale("log") despine(fig,trim=True) plt.xlabel("Energy (MeV)") plt.ylabel("Event Rate Limit (events)") plt.legend() plt.tight_layout() if args.save: plt.savefig("dm_best_fit_with_discovery_threshold.pdf") plt.savefig("dm_best_fit_with_discovery_threshold.eps") else: plt.suptitle("Best Fit Dark Matter") plt.show()