aboutsummaryrefslogtreecommitdiff
path: root/utils/plot-energy
blob: 2fb2aa12ba9490310f5561b38813182a8dd8b783 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python
# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <https://www.gnu.org/licenses/>.
"""
Script to recreate MCPL files for events which never got simulated in SNOMAN.
"""
from __future__ import print_function, division
import numpy as np
import pandas as pd
from sddm import read_hdf
import gzip

def read_mcpl(filename):
    data = np.genfromtxt(filename,skip_header=1)
    rv = {}
    n = 1
    i = 1
    with gzip.open(filename,"r") as f:
        lines = f.readlines()
        nevents = int(lines[0].split()[0])
        while True:
            nparticles = int(data[i-1,0])
            rv[n] = lines[i:i+nparticles]
            n += 1
            i += nparticles

            if i - 1 >=
#!/usr/bin/env python
# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <https://www.gnu.org/licenses/>.
"""
Script to plot final fit results along with sidebands for the dark matter
analysis. To run it just run:

    $ ./plot-energy [list of fit results]

Currently it will plot energy distributions for external muons, michel
electrons, atmospheric events with neutron followers, and prompt signal like
events. Each of these plots will have a different subplot for the particle ID
of the best fit, i.e. single electron, single muon, double electron, electron +
muon, or double muon.
"""
from __future__ import print_function, division
import numpy as np
from scipy.stats import iqr, poisson
from matplotlib.lines import Line2D
from scipy.stats import iqr, norm, beta
from scipy.special import spence
from itertools import chain

particle_id = {20: 'e', 22: r'\mu'}

def plot_hist2(df, muons=False):
    for id, df_id in sorted(df.groupby('id')):
        if id == 20:
            plt.subplot(2,3,1)
        elif id == 22:
            plt.subplot(2,3,2)
        elif id == 2020:
            plt.subplot(2,3,4)
        elif id == 2022:
            plt.subplot(2,3,5)
        elif id == 2222:
            plt.subplot(2,3,6)

        if muons:
            plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step')
            plt.xlabel("log10(Energy (GeV))")
        else:
            bins = np.logspace(np.log10(20),np.log10(10e3),21)
            plt.hist(df_id.ke.values, bins=bins, histtype='step')
            plt.gca().set_xscale("log")
            plt.xlabel("Energy (MeV)")
            major = np.array([10,100,1000,10000])
            minor = np.unique(list(chain(*list(range(i,i*10,i) for i in major[:-1]))))
            minor = np.setdiff1d(minor,major)
            plt.gca().set_xticks(major)
            plt.gca().set_xticks(minor,minor=True)
        plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$')

    if len(df):
        plt.tight_layout()

def plot_hist(df, muons=False):
    for id, df_id in sorted(df.groupby('id')):
        if id == 20:
            plt.subplot(3,4,1)
        elif id == 22:
            plt.subplot(3,4,2)
        elif id == 2020:
            plt.subplot(3,4,5)
        elif id == 2022:
            plt.subplot(3,4,6)
        elif id == 2222:
            plt.subplot(3,4,7)
        elif id == 202020:
            plt.subplot(3,4,9)
        elif id == 202022:
            plt.subplot(3,4,10)
        elif id == 202222:
            plt.subplot(3,4,11)
        elif id == 222222:
            plt.subplot(3,4,12)

        if muons:
            plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step')
            plt.xlabel("log10(Energy (GeV))")
        else:
            plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step')
            plt.xlabel("Energy (MeV)")
        plt.title(str(id))

    if len(df):
        plt.tight_layout()

if __name__ == '__main__':
    import argparse
    import numpy as np
    import pandas as pd
    import sys
    import h5py
    from sddm.plot_energy import *
    from sddm.plot import *
    from sddm import setup_matplotlib

    parser = argparse.ArgumentParser("plot fit results")
    parser.add_argument("filenames", nargs='+', help="input files")
    parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds")
    args = parser.parse_args()

    setup_matplotlib(args.save)

    import matplotlib.pyplot as plt

    ev, fits = get_events(args.filenames)

    # First, do basic data cleaning which is done for all events.
    ev = ev[ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ITC | DC_BREAKDOWN) == 0]

    # 00-orphan cut
    ev = ev[(ev.gtid & 0xff) != 0]

    print("number of events after data cleaning = %i" % np.count_nonzero(ev.prompt))

    # Now, we select events tagged by the muon tag which should tag only
    # external muons. We keep the sample of muons since it's needed later to
    # identify Michel electrons and to apply the muon follower cut
    muons = ev[(ev.dc & DC_MUON) != 0]

    print("number of muons = %i" % len(muons))

    # Try to identify Michel electrons. Currently, the event selection is based
    # on Richie's thesis. Here, we do the following:
    #
    # 1. Apply more data cleaning cuts to potential Michel electrons
    # 2. Nhit >= 100
    # 3. It must be > 800 ns and less than 20 microseconds from a prompt event
    #    or a muon
    michel = ev[ev.michel]

    print("number of michel events = %i" % len(michel))

    print("number of events after neutron follower cut = %i" % np.count_nonzero(ev.prompt & (~ev.atm)))

    # remove events 200 microseconds after a muon
    ev = ev.groupby('run',group_keys=False).apply(muon_follower_cut)

    # Get rid of muon events in our main event sample
    ev = ev[(ev.dc & DC_MUON) == 0]

    prompt = ev[ev.prompt & ~ev.atm]

    atm = ev[ev.atm]

    print("number of events after muon cut = %i" % len(prompt))

    # Check to see if there are any events with missing fit information
    atm_ra = atm[['run','gtid']].to_records(index=False)
    muons_ra = muons[['run','gtid']].to_records(index=False)
    prompt_ra = prompt[['run','gtid']].to_records(index=False)
    michel_ra = michel[['run','gtid']].to_records(index=False)
    fits_ra = fits[['run','gtid']].to_records(index=False)

    if len(atm_ra) and np.count_nonzero(~np.isin(atm_ra,fits_ra)):
        print_warning("skipping %i atmospheric events because they are missing fit information!" % np.count_nonzero(~np.isin(atm_ra,fits_ra)))

    if len(muons_ra) and np.count_nonzero(~np.isin(muons_ra,fits_ra)):
        print_warning("skipping %i muon events because they are missing fit information!" % np.count_nonzero(~np.isin(muons_ra,fits_ra)))

    if len(prompt_ra) and np.count_nonzero(~np.isin(prompt_ra,fits_ra)):
        print_warning("skipping %i signal events because they are missing fit information!" % np.count_nonzero(~np.isin(prompt_ra,fits_ra)))

    if len(michel_ra) and np.count_nonzero(~np.isin(michel_ra,fits_ra)):
        print_warning("skipping %i Michel events because they are missing fit information!" % np.count_nonzero(~np.isin(michel_ra,fits_ra)))

    # Now, we merge the event info with the fitter info.
    #
    # Note: This means that the dataframe now contains multiple rows for each
    # event, one for each fit hypothesis.
    atm = pd.merge(fits,atm,how='inner',on=['run','gtid'])
    muons = pd.merge(fits,muons,how='inner',on=['run','gtid'])
    michel = pd.merge(fits,michel,how='inner',on=['run','gtid'])
    prompt = pd.merge(fits,prompt,how='inner',on=['run','gtid'])

    # get rid of events which don't have a fit
    nan = np.isnan(prompt.fmin.values)

    if np.count_nonzero(nan):
        print_warning("skipping %i signal events because the negative log likelihood is nan!" % len(prompt[nan].groupby(['run','gtid'])))

    prompt = prompt[~nan]

    nan_atm = np.isnan(atm.fmin.values)

    if np.count_nonzero(nan_atm):
        print_warning("skipping %i atmospheric events because the negative log likelihood is nan!" % len(atm[nan_atm].groupby(['run','gtid'])))

    atm = atm[~nan_atm]

    nan_muon = np.isnan(muons.fmin.values)

    if np.count_nonzero(nan_muon):
        print_warning("skipping %i muons because the negative log likelihood is nan!" % len(muons[nan_muon].groupby(['run','gtid'])))

    muons = muons[~nan_muon]

    nan_michel = np.isnan(michel.fmin.values)

    if np.count_nonzero(nan_michel):
        print_warning("skipping %i michel electron events because the negative log likelihood is nan!" % len(michel[nan_michel].groupby(['run','gtid'])))

    michel = michel[~nan_michel]

    # get the best fit
    prompt = prompt.sort_values('fmin').groupby(['run','gtid']).nth(0)
    atm = atm.sort_values('fmin').groupby(['run','gtid']).nth(0)
    michel_best_fit = michel.sort_values('fmin').groupby(['run','gtid']).nth(0)
    muon_best_fit = muons.sort_values('fmin').groupby(['run','gtid']).nth(0)
    muons = muons[muons.id == 22].sort_values('fmin').groupby(['run','gtid'],as_index=False).nth(0).reset_index(level=0,drop=True)

    # require (r/r_psup)^3 < 0.9
    prompt = prompt[prompt.r_psup < 0.9]
    atm = atm[atm.r_psup < 0.9]

    print("number of events after radius cut = %i" % len(prompt))

    # require psi < 6
    prompt = prompt[prompt.psi < 6]
    atm = atm[atm.psi < 6]

    print("number of events after psi cut = %i" % len(prompt))

    fig = plt.figure()
    plot_hist2(prompt)
    despine(fig,trim=True)
    if args.save:
        plt.savefig("prompt.pdf")
        plt.savefig("prompt.eps")
    else:
        plt.suptitle("Without Neutron Follower")
    fig = plt.figure()
    plot_hist2(atm)
    despine(fig,trim=True)
    if args.save:
        plt.savefig("atm.pdf")
        plt.savefig("atm.eps")
    else:
        plt.suptitle("With Neutron Follower")
    fig = plt.figure()
    plot_hist2(michel_best_fit)
    despine(fig,trim=True)
    if args.save:
        plt.savefig("michel_electrons.pdf")
        plt.savefig("michel_electrons.eps")
    else:
        plt.suptitle("Michel Electrons")
    fig = plt.figure()
    plot_hist2(muon_best_fit,muons=True)
    despine(fig,trim=True)
    if len(muon_best_fit):
        plt.tight_layout()
    if args.save:
        plt.savefig("external_muons.pdf")
        plt.savefig("external_muons.eps")
    else:
        plt.suptitle("External Muons")

    # Plot the energy and angular distribution for external muons
    fig = plt.figure()
    plt.subplot(2,1,1)
    plt.hist(muons.ke.values, bins=np.logspace(3,7,100), histtype='step')
    plt.xlabel("Energy (MeV)")
    plt.gca().set_xscale("log")
    plt.subplot(2,1,2)
    plt.hist(np.cos(muons.theta.values), bins=np.linspace(-1,1,100), histtype='step')
    despine(fig,trim=True)
    plt.xlabel(r"$\cos(\theta)$")
    plt.tight_layout()
    if args.save:
        plt.savefig("muon_energy_cos_theta.pdf")
        plt.savefig("muon_energy_cos_theta.eps")
    else:
        plt.suptitle("Muons")

    # For the Michel energy plot, we only look at the single particle electron
    # fit
    michel = michel[michel.id == 20].sort_values('fmin').groupby(['run','gtid'],as_index=False).nth(0).reset_index(level=0,drop=True)

    stopping_muons = pd.merge(muons,michel,left_on=['run','gtid'],right_on=['run','muon_gtid'],suffixes=('','_michel'))

    if len(stopping_muons):
        # project muon to PSUP
        stopping_muons['dx'] = stopping_muons.apply(get_dx,axis=1)
        # energy based on distance travelled
        stopping_muons['T_dx'] = dx_to_energy(stopping_muons.dx)
        stopping_muons['dT'] = stopping_muons['energy1'] - stopping_muons['T_dx']

        fig = plt.figure()
        plt.hist((stopping_muons['energy1']-stopping_muons['T_dx'])*100/stopping_muons['T_dx'], bins=np.linspace(-100,100,200), histtype='step')
        despine(fig,trim=True)
        plt.xlabel("Fractional energy difference (\%)")
        plt.title("Fractional energy difference for Stopping Muons")
        plt.tight_layout()
        if args.save:
            plt.savefig("stopping_muon_fractional_energy_difference.pdf")
            plt.savefig("stopping_muon_fractional_energy_difference.eps")
        else:
            plt.title("Stopping Muon Fractional Energy Difference")

        # 100 bins between 50 MeV and 10 GeV
        bins = np.arange(50,10000,1000)

        pd_bins = pd.cut(stopping_muons['energy1'],bins)

        T = (bins[1:] + bins[:-1])/2

        dT = stopping_muons.groupby(pd_bins)['dT'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err])

        fig = plt.figure()
        plt.errorbar(T,dT['median']*100/T,yerr=dT['median_err']*100/T)
        despine(fig,trim=True)
        plt.xlabel("Kinetic Energy (MeV)")
        plt.ylabel(r"Energy bias (\%)")
        plt.tight_layout()
        if args.save:
            plt.savefig("stopping_muon_energy_bias.pdf")
            plt.savefig("stopping_muon_energy_bias.eps")
        else:
            plt.title("Stopping Muon Energy Bias")

        fig = plt.figure()
        plt.errorbar(T,dT['iqr_std']*100/T,yerr=dT['iqr_std_err']*100/T)
        despine(fig,trim=True)
        plt.xlabel("Kinetic Energy (MeV)")
        plt.ylabel(r"Energy resolution (\%)")
        plt.tight_layout()
        if args.save:
            plt.savefig("stopping_muon_energy_resolution.pdf")
            plt.savefig("stopping_muon_energy_resolution.eps")
        else:
            plt.title("Stopping Muon Energy Resolution")

    fig = plt.figure()
    bins=np.linspace(0,100,100)
    plt.hist(michel.ke.values, bins=bins, histtype='step', label="Dark Matter Fitter")
    if michel.size:
        plt.hist(michel[~np.isnan(michel.rsp_energy.values)].rsp_energy.values, bins=np.linspace(20,100,100), histtype='step',label="RSP")
    x = np.linspace(0,100,1000)
    y = michel_spectrum(x)
    y /= np.trapz(y,x=x)
    N = len(michel)
    plt.plot(x, N*y*(bins[1]-bins[0]), ls='--', color='k', label="Michel Spectrum")
    despine(fig,trim=True)
    plt.xlabel("Energy (MeV)")
    plt.tight_layout()
    plt.legend()
    if args.save:
        plt.savefig("michel_electrons_ke.pdf")
        plt.savefig("michel_electrons_ke.eps")
    else:
        plt.title("Michel Electrons")
    plt.show()