From d64a97a832d4866d286ec48ba0f8f15ac471d046 Mon Sep 17 00:00:00 2001 From: tlatorre Date: Mon, 22 Jun 2020 09:03:51 -0500 Subject: update cat-grid-jobs to not use hdf5 resize This commit updates cat-grid-jobs to just add all the fits at once at the end instead of continuously resizing the fits dataset. The reason for this is that I noticed that several fit results files would occasionally have a large block of the fits be set to all zeros. I have no idea how this happened, but I suspect it might have been a bug with resizing the dataset so many times. --- utils/cat-grid-jobs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'utils/cat-grid-jobs') diff --git a/utils/cat-grid-jobs b/utils/cat-grid-jobs index 759d273..27a4024 100755 --- a/utils/cat-grid-jobs +++ b/utils/cat-grid-jobs @@ -128,6 +128,8 @@ def cat_grid_jobs(conn, output_dir, zdab_dir=None): if 'reprocessed' in filename: fout.attrs['reprocessed'] = 1 + fits = [] + total_events = fout['ev'].shape[0] for fit_result_filename in fit_results: fit_result_head, fit_result_tail = split(fit_result_filename) @@ -140,18 +142,20 @@ def cat_grid_jobs(conn, output_dir, zdab_dir=None): if 'git_sha1' not in f.attrs: log.warn("No git sha1 found for %s. Skipping..." % fit_result_tail) continue + # Check to see if the git sha1 match if fout.attrs['git_sha1'] != f.attrs['git_sha1']: log.debug("git_sha1 is %s for current version but %s for %s" % (fout.attrs['git_sha1'],f.attrs['git_sha1'],fit_result_tail)) - # get fits which match up with the events - valid_fits = f['fits'][np.isin(f['fits'][:][['run','gtid']],fout['ev'][:][['run','gtid']])] - # Add the fit results - fout['fits'].resize((fout['fits'].shape[0]+valid_fits.shape[0],)) - fout['fits'][-valid_fits.shape[0]:] = valid_fits - events_with_fit += len(np.unique(valid_fits[['run','gtid']])) - total_fits += len(np.unique(f['fits']['run','gtid'])) - - log.notice("%s (%s): added %i/%i fit results to a total of %i events" % (tail, uuid, events_with_fit, total_fits, total_events)) + + fits.append(f['fits'][:]) + + events_with_fit += len(np.unique(fits[-1][['run','gtid']])) + total_fits += fits[-1].shape[0] + + del fout['fits'] + fout.create_dataset('fits',data=np.concatenate(fits)) + + log.notice("%s (%s): added %i fit results from %i events to a total of %i events" % (tail, uuid, total_fits, events_with_fit, total_events)) if __name__ == '__main__': import argparse -- cgit