diff options
author | tlatorre <tlatorre@uchicago.edu> | 2020-06-22 09:03:51 -0500 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2020-06-22 09:03:51 -0500 |
commit | d64a97a832d4866d286ec48ba0f8f15ac471d046 (patch) | |
tree | f2b66cb9c2a6ad2fca4b5c2387dae5f238aea583 /utils/cat-grid-jobs | |
parent | da3448e12c7fdfb00e6d5f2a76cf4ec3ceb2ae7f (diff) | |
download | sddm-d64a97a832d4866d286ec48ba0f8f15ac471d046.tar.gz sddm-d64a97a832d4866d286ec48ba0f8f15ac471d046.tar.bz2 sddm-d64a97a832d4866d286ec48ba0f8f15ac471d046.zip |
update cat-grid-jobs to not use hdf5 resize
This commit updates cat-grid-jobs to just add all the fits at once at the end
instead of continuously resizing the fits dataset. The reason for this is that
I noticed that several fit results files would occasionally have a large block
of the fits be set to all zeros. I have no idea how this happened, but I
suspect it might have been a bug with resizing the dataset so many times.
Diffstat (limited to 'utils/cat-grid-jobs')
-rwxr-xr-x | utils/cat-grid-jobs | 22 |
1 files changed, 13 insertions, 9 deletions
diff --git a/utils/cat-grid-jobs b/utils/cat-grid-jobs index 759d273..27a4024 100755 --- a/utils/cat-grid-jobs +++ b/utils/cat-grid-jobs @@ -128,6 +128,8 @@ def cat_grid_jobs(conn, output_dir, zdab_dir=None): if 'reprocessed' in filename: fout.attrs['reprocessed'] = 1 + fits = [] + total_events = fout['ev'].shape[0] for fit_result_filename in fit_results: fit_result_head, fit_result_tail = split(fit_result_filename) @@ -140,18 +142,20 @@ def cat_grid_jobs(conn, output_dir, zdab_dir=None): if 'git_sha1' not in f.attrs: log.warn("No git sha1 found for %s. Skipping..." % fit_result_tail) continue + # Check to see if the git sha1 match if fout.attrs['git_sha1'] != f.attrs['git_sha1']: log.debug("git_sha1 is %s for current version but %s for %s" % (fout.attrs['git_sha1'],f.attrs['git_sha1'],fit_result_tail)) - # get fits which match up with the events - valid_fits = f['fits'][np.isin(f['fits'][:][['run','gtid']],fout['ev'][:][['run','gtid']])] - # Add the fit results - fout['fits'].resize((fout['fits'].shape[0]+valid_fits.shape[0],)) - fout['fits'][-valid_fits.shape[0]:] = valid_fits - events_with_fit += len(np.unique(valid_fits[['run','gtid']])) - total_fits += len(np.unique(f['fits']['run','gtid'])) - - log.notice("%s (%s): added %i/%i fit results to a total of %i events" % (tail, uuid, events_with_fit, total_fits, total_events)) + + fits.append(f['fits'][:]) + + events_with_fit += len(np.unique(fits[-1][['run','gtid']])) + total_fits += fits[-1].shape[0] + + del fout['fits'] + fout.create_dataset('fits',data=np.concatenate(fits)) + + log.notice("%s (%s): added %i fit results from %i events to a total of %i events" % (tail, uuid, total_fits, events_with_fit, total_events)) if __name__ == '__main__': import argparse |