1 files changed, 62 insertions, 7 deletions
diff --git a/utils/cat-grid-jobs b/utils/cat-grid-jobs
index 84623b8..f71205d 100755
--- a/utils/cat-grid-jobs
+++ b/utils/cat-grid-jobs
@@ -13,6 +13,19 @@
 #
 # You should have received a copy of the GNU General Public License along with
 # this program. If not, see <https://www.gnu.org/licenses/>.
+"""
+Script to combine the fit results from jobs submitted to the grid.
+
+This script first runs zdab-cat on the zdab file to get the data cleaning words
+and SNOMAN fitter results for every event in the file. It then adds any fit
+results from the other files listed on the command line and prints the results
+as YAML to stdout.
+
+Example:
+
+    $ cat-grid-jobs --dir ~/sddm/src/ ~/mc_atm_nu_no_osc_genie_010000_0.mcds ~/grid_job_results/*.txt > output.txt
+
+"""
 
 from __future__ import print_function, division
 import yaml
@@ -25,25 +38,67 @@ if __name__ == '__main__':
     import argparse
     import matplotlib.pyplot as plt
     import numpy as np
+    from subprocess import check_output
+    from os.path import join
+    import os
+    import sys
 
     parser = argparse.ArgumentParser("concatenate fit results from grid jobs into a single file")
+    parser.add_argument("--dir", type=str, help="fitter directory", required=True)
+    parser.add_argument("zdab", help="zdab input file")
     parser.add_argument("filenames", nargs='+', help="input files")
     args = parser.parse_args()
 
-    cat = []
+    fit_results = {}
 
+    # First we create a dictionary mapping (run, gtid) -> fit results.
     for filename in args.filenames:
         with open(filename) as f:
-            data = yaml.load(f.read(),Loader=Loader)
+            data = yaml.load(f,Loader=Loader)
 
         if data is None:
             continue
 
         for event in data['data']:
-            if event['ev'] is not None:
-                # if the ev branch is filled in, it means the event was fit
-                cat.append(event)
+            if event['ev'] is None:
+                continue
+
+            # if the ev branch is filled in, it means the event was fit
+            for ev in event['ev']:
+                # add the git SHA1 hash to the fit results since technically it
+                # could be different than the version in zdab-cat
+                ev['fit']['git_sha1'] = data['git_sha1']
+                ev['fit']['git_dirty'] = data['git_dirty']
+                fit_results[(ev['run'],ev['gtid'])] = ev['fit']
+
+    # Next we get the full event list along with the data cleaning word, FTP
+    # position, FTK, and RSP energy from the original zdab and then add the fit
+    # results.
+    #
+    # Note: We send stderr to /dev/null since there can be a lot of warnings
+    # about PMT types and fit results
+    with open(os.devnull, 'w') as f:
+        output = check_output([join(args.dir,"zdab-cat"),args.zdab],stderr=f)
+
+    data = yaml.load(output,Loader=Loader)
+
+    total_events = 0
+    events_with_fit = 0
+
+    for i, event in enumerate(data['data']):
+        for ev in event['ev']:
+            run = ev['run']
+            gtid = ev['gtid']
+
+            if (run,gtid) in fit_results:
+                ev['fit'] = fit_results[(run,gtid)]
+                events_with_fit += 1
+
+            total_events += 1
 
-    cat = sorted(cat,key=lambda event: event['ev'][0]['gtid'])
+    # Print out number of fit results that were added. Hopefully, this will
+    # make it easy to catch an error if, for example, this gets run with a
+    # mismatching zdab and fit results
+    print("added %i/%i fit results to a total of %i events" % (events_with_fit, len(fit_results), total_events),file=sys.stderr)
 
-    print(yaml.dump({'data':cat},default_flow_style=False))
+    print(yaml.dump(data,default_flow_style=False))