update cat-grid-jobs to not use hdf5 resize

This commit updates cat-grid-jobs to just add all the fits at once at the end instead of continuously resizing the fits dataset. The reason for this is that I noticed that several fit results files would occasionally have a large block of the fits be set to all zeros. I have no idea how this happened, but I suspect it might have been a bug with resizing the dataset so many times.
author: tlatorre <tlatorre@uchicago.edu> 2020-06-22 09:03:51 -0500
committer: tlatorre <tlatorre@uchicago.edu> 2020-06-22 09:03:51 -0500
commit: d64a97a832d4866d286ec48ba0f8f15ac471d046 (patch)
tree: f2b66cb9c2a6ad2fca4b5c2387dae5f238aea583 /utils/cat-grid-jobs
parent: da3448e12c7fdfb00e6d5f2a76cf4ec3ceb2ae7f (diff)
download: sddm-d64a97a832d4866d286ec48ba0f8f15ac471d046.tar.gz
sddm-d64a97a832d4866d286ec48ba0f8f15ac471d046.tar.bz2
sddm-d64a97a832d4866d286ec48ba0f8f15ac471d046.zip
1 files changed, 13 insertions, 9 deletions
diff --git a/utils/cat-grid-jobs b/utils/cat-grid-jobs
index 759d273..27a4024 100755
--- a/utils/cat-grid-jobs
+++ b/utils/cat-grid-jobs
@@ -128,6 +128,8 @@ def cat_grid_jobs(conn, output_dir, zdab_dir=None):
             if 'reprocessed' in filename:
                 fout.attrs['reprocessed'] = 1
 
+            fits = []
+
             total_events = fout['ev'].shape[0]
             for fit_result_filename in fit_results:
                 fit_result_head, fit_result_tail = split(fit_result_filename)
@@ -140,18 +142,20 @@ def cat_grid_jobs(conn, output_dir, zdab_dir=None):
                     if 'git_sha1' not in f.attrs:
                         log.warn("No git sha1 found for %s. Skipping..." % fit_result_tail)
                         continue
+
                     # Check to see if the git sha1 match
                     if fout.attrs['git_sha1'] != f.attrs['git_sha1']:
                         log.debug("git_sha1 is %s for current version but %s for %s" % (fout.attrs['git_sha1'],f.attrs['git_sha1'],fit_result_tail))
-                    # get fits which match up with the events
-                    valid_fits = f['fits'][np.isin(f['fits'][:][['run','gtid']],fout['ev'][:][['run','gtid']])]
-                    # Add the fit results
-                    fout['fits'].resize((fout['fits'].shape[0]+valid_fits.shape[0],))
-                    fout['fits'][-valid_fits.shape[0]:] = valid_fits
-                    events_with_fit += len(np.unique(valid_fits[['run','gtid']]))
-                    total_fits += len(np.unique(f['fits']['run','gtid']))
-
-        log.notice("%s (%s): added %i/%i fit results to a total of %i events" % (tail, uuid, events_with_fit, total_fits, total_events))
+
+                    fits.append(f['fits'][:])
+
+                    events_with_fit += len(np.unique(fits[-1][['run','gtid']]))
+                    total_fits += fits[-1].shape[0]
+
+            del fout['fits']
+            fout.create_dataset('fits',data=np.concatenate(fits))
+
+        log.notice("%s (%s): added %i fit results from %i events to a total of %i events" % (tail, uuid, total_fits, events_with_fit, total_events))
 
 if __name__ == '__main__':
     import argparse
author	tlatorre <tlatorre@uchicago.edu>	2020-06-22 09:03:51 -0500
committer	tlatorre <tlatorre@uchicago.edu>	2020-06-22 09:03:51 -0500
commit	d64a97a832d4866d286ec48ba0f8f15ac471d046 (patch)
tree	f2b66cb9c2a6ad2fca4b5c2387dae5f238aea583 /utils/cat-grid-jobs
parent	da3448e12c7fdfb00e6d5f2a76cf4ec3ceb2ae7f (diff)
download	sddm-d64a97a832d4866d286ec48ba0f8f15ac471d046.tar.gz sddm-d64a97a832d4866d286ec48ba0f8f15ac471d046.tar.bz2 sddm-d64a97a832d4866d286ec48ba0f8f15ac471d046.zip