aboutsummaryrefslogtreecommitdiff
path: root/utils/cat-grid-jobs
diff options
context:
space:
mode:
authortlatorre <tlatorre@uchicago.edu>2020-06-22 09:03:51 -0500
committertlatorre <tlatorre@uchicago.edu>2020-06-22 09:03:51 -0500
commitd64a97a832d4866d286ec48ba0f8f15ac471d046 (patch)
treef2b66cb9c2a6ad2fca4b5c2387dae5f238aea583 /utils/cat-grid-jobs
parentda3448e12c7fdfb00e6d5f2a76cf4ec3ceb2ae7f (diff)
downloadsddm-d64a97a832d4866d286ec48ba0f8f15ac471d046.tar.gz
sddm-d64a97a832d4866d286ec48ba0f8f15ac471d046.tar.bz2
sddm-d64a97a832d4866d286ec48ba0f8f15ac471d046.zip
update cat-grid-jobs to not use hdf5 resize
This commit updates cat-grid-jobs to just add all the fits at once at the end instead of continuously resizing the fits dataset. The reason for this is that I noticed that several fit results files would occasionally have a large block of the fits be set to all zeros. I have no idea how this happened, but I suspect it might have been a bug with resizing the dataset so many times.
Diffstat (limited to 'utils/cat-grid-jobs')
-rwxr-xr-xutils/cat-grid-jobs22
1 files changed, 13 insertions, 9 deletions
diff --git a/utils/cat-grid-jobs b/utils/cat-grid-jobs
index 759d273..27a4024 100755
--- a/utils/cat-grid-jobs
+++ b/utils/cat-grid-jobs
@@ -128,6 +128,8 @@ def cat_grid_jobs(conn, output_dir, zdab_dir=None):
if 'reprocessed' in filename:
fout.attrs['reprocessed'] = 1
+ fits = []
+
total_events = fout['ev'].shape[0]
for fit_result_filename in fit_results:
fit_result_head, fit_result_tail = split(fit_result_filename)
@@ -140,18 +142,20 @@ def cat_grid_jobs(conn, output_dir, zdab_dir=None):
if 'git_sha1' not in f.attrs:
log.warn("No git sha1 found for %s. Skipping..." % fit_result_tail)
continue
+
# Check to see if the git sha1 match
if fout.attrs['git_sha1'] != f.attrs['git_sha1']:
log.debug("git_sha1 is %s for current version but %s for %s" % (fout.attrs['git_sha1'],f.attrs['git_sha1'],fit_result_tail))
- # get fits which match up with the events
- valid_fits = f['fits'][np.isin(f['fits'][:][['run','gtid']],fout['ev'][:][['run','gtid']])]
- # Add the fit results
- fout['fits'].resize((fout['fits'].shape[0]+valid_fits.shape[0],))
- fout['fits'][-valid_fits.shape[0]:] = valid_fits
- events_with_fit += len(np.unique(valid_fits[['run','gtid']]))
- total_fits += len(np.unique(f['fits']['run','gtid']))
-
- log.notice("%s (%s): added %i/%i fit results to a total of %i events" % (tail, uuid, events_with_fit, total_fits, total_events))
+
+ fits.append(f['fits'][:])
+
+ events_with_fit += len(np.unique(fits[-1][['run','gtid']]))
+ total_fits += fits[-1].shape[0]
+
+ del fout['fits']
+ fout.create_dataset('fits',data=np.concatenate(fits))
+
+ log.notice("%s (%s): added %i fit results from %i events to a total of %i events" % (tail, uuid, total_fits, events_with_fit, total_events))
if __name__ == '__main__':
import argparse