utils/cat-grid-jobs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107

#!/usr/bin/env python
# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <https://www.gnu.org/licenses/>.
"""
Script to combine the fit results from jobs submitted to the grid.

This script first runs zdab-cat on the zdab file to get the data cleaning words
and SNOMAN fitter results for every event in the file. It then adds any fit
results from the other files listed on the command line and prints the results
as YAML to stdout.

Example:

    $ cat-grid-jobs --dir ~/sddm/src/ ~/mc_atm_nu_no_osc_genie_010000_0.mcds ~/grid_job_results/*.txt > output.txt

"""

from __future__ import print_function, division
import yaml
try:
    from yaml import CLoader as Loader
except ImportError:
    from yaml.loader import SafeLoader as Loader

if __name__ == '__main__':
    import argparse
    import matplotlib.pyplot as plt
    import numpy as np
    import subprocess
    from os.path import join
    import os
    import sys

    parser = argparse.ArgumentParser("concatenate fit results from grid jobs into a single file")
    parser.add_argument("--dir", type=str, help="fitter directory", required=True)
    parser.add_argument("zdab", help="zdab input file")
    parser.add_argument("filenames", nargs='+', help="input files")
    args = parser.parse_args()

    fit_results = {}

    # First we create a dictionary mapping (run, gtid) -> fit results.
    for filename in args.filenames:
        with open(filename) as f:
            data = yaml.load(f,Loader=Loader)

        if data is None:
            continue

        for event in data['data']:
            if event['ev'] is None:
                continue

            # if the ev branch is filled in, it means the event was fit
            for ev in event['ev']:
                # add the git SHA1 hash to the fit results since technically it
                # could be different than the version in zdab-cat
                ev['fit']['git_sha1'] = data['git_sha1']
                ev['fit']['git_dirty'] = data['git_dirty']
                fit_results[(ev['run'],ev['gtid'])] = ev['fit']

    # Next we get the full event list along with the data cleaning word, FTP
    # position, FTK, and RSP energy from the original zdab and then add the fit
    # results.
    #
    # Note: We send stderr to /dev/null since there can be a lot of warnings
    # about PMT types and fit results
    with open(os.devnull, 'w') as f:
        popen = subprocess.Popen([join(args.dir,"zdab-cat"),args.zdab],stdout=subprocess.PIPE,stderr=f)

    total_events = 0
    events_with_fit = 0

    for data in yaml.load_all(popen.stdout,Loader=Loader):
        if 'ev' not in data:
            continue

        for ev in data['ev']:
            run = ev['run']
            gtid = ev['gtid']

            if (run,gtid) in fit_results:
                ev['fit'] = fit_results[(run,gtid)]
                events_with_fit += 1

            total_events += 1

    popen.wait()

    # Print out number of fit results that were added. Hopefully, this will
    # make it easy to catch an error if, for example, this gets run with a
    # mismatching zdab and fit results
    print("added %i/%i fit results to a total of %i events" % (events_with_fit, len(fit_results), total_events),file=sys.stderr)

    print(yaml.dump(data,default_flow_style=False))