utils/plot-orphans


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67

#!/usr/bin/env python
# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <https://www.gnu.org/licenses/>.
"""
Script to make a plot of the number of high nhit of orphans for each run. To
run it just run:

    $ ./plot-orphans [list of orphan data files]
"""
from __future__ import print_function, division
import numpy as np

if __name__ == '__main__':
    import argparse
    import numpy as np
    import pandas as pd
    from sddm import setup_matplotlib, read_hdf
    from sddm.plot import despine

    parser = argparse.ArgumentParser("plot fit results")
    parser.add_argument("filenames", nargs='+', help="input files")
    parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds")
    parser.add_argument("--nhit-thresh", type=int, default=100, help="nhit threshold for orphans")
    parser.add_argument("-o", "--output", default=None, help="output filename")
    args = parser.parse_args()

    setup_matplotlib(args.save)

    import matplotlib.pyplot as plt

    orphans_per_run = {}

    # Loop over runs to prevent using too much memory
    rhdr = pd.concat([read_hdf(filename, "rhdr").assign(filename=filename) for filename in args.filenames],ignore_index=True)
    for run, df in rhdr.groupby('run'):
        ev = pd.concat([pd.read_hdf(filename,"ev") for filename in df.filename.values])
        orphans = ev[~((ev.gtid == 0) & (ev.gtr == 0) & (ev.trg_type == 0))]
        orphans_per_run[run] = len(orphans[orphans.nhit >= args.nhit_thresh])

    if args.output:
        np.savetxt(args.output,sorted([run for run, orphans in orphans_per_run.iteritems() if orphans < 100]),fmt='%i')

    fig = plt.figure()
    plt.hist(orphans_per_run.values(),bins=np.linspace(0,1000,101),histtype='step')
    plt.axvline(x=100,ls='--',color='k')
    plt.xlabel("Number of orphans with Nhit > %i" % args.nhit_thresh)
    plt.gca().set_yscale("log")
    despine(fig,trim=True)
    plt.tight_layout()
    if args.save:
        plt.savefig("orphans.pdf")
        plt.savefig("orphans.eps")
    else:
        plt.title(r"Number of High Nhit Orphans Per Run")
        plt.show()