diff options
author | tlatorre <tlatorre@uchicago.edu> | 2020-06-15 01:55:28 -0500 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2020-06-15 01:55:28 -0500 |
commit | 6d2445aa552cd923df18f753a00d83fc37b356e6 (patch) | |
tree | aaecea4abb158256cc07cb687e6f57425273caf8 /utils | |
parent | 9d2e3acc7e3b557956ed4159231e57df9ca9c3ff (diff) | |
download | sddm-6d2445aa552cd923df18f753a00d83fc37b356e6.tar.gz sddm-6d2445aa552cd923df18f753a00d83fc37b356e6.tar.bz2 sddm-6d2445aa552cd923df18f753a00d83fc37b356e6.zip |
update cat-grid-jobs to use reprocessed files if they're available
Diffstat (limited to 'utils')
-rwxr-xr-x | utils/cat-grid-jobs | 42 |
1 files changed, 34 insertions, 8 deletions
diff --git a/utils/cat-grid-jobs b/utils/cat-grid-jobs index 0755774..2c6cdf8 100755 --- a/utils/cat-grid-jobs +++ b/utils/cat-grid-jobs @@ -40,7 +40,7 @@ from sddm.logger import Logger log = Logger() -def cat_grid_jobs(conn, output_dir): +def cat_grid_jobs(conn, output_dir, zdab_dir=None): zdab_cat = which("zdab-cat") if zdab_cat is None: @@ -63,18 +63,31 @@ def cat_grid_jobs(conn, output_dir): fit_results = ['%s.hdf5' % splitext(fit_result_filename)[0] for fit_result_filename in fit_results] if len(fit_results) == 0: - log.debug("No fit results found for %s (%s)" % (tail, uuid)) + log.verbose("No fit results found for %s (%s)" % (tail, uuid)) continue output = join(output_dir,"%s_%s_fit_results.hdf5" % (root,uuid)) + if 'reduced' in root: + directories = [head] + if zdab_dir is not None: + directories += [zdab_dir] + for directory in directories: + for extension in [ext, '.zdab', '.zdab.gz']: + # Use the reprocessed version of the file if possible + reprocessed_filename = join(directory,root.replace('reduced','reprocessed')) + extension + + if os.path.exists(reprocessed_filename): + log.verbose("Found reprocessed file '%s'. Using that instead of '%s'" % (reprocessed_filename,tail)) + filename = reprocessed_filename + if os.path.exists(output): total_fits = 0 for fit_result_filename in fit_results: fit_result_head, fit_result_tail = split(fit_result_filename) if not os.path.exists(fit_result_filename): - log.warn("File '%s' does not exist!" % filename) + log.warn("File '%s' does not exist!" % fit_result_filename) continue with h5py.File(fit_result_filename,'r') as f: @@ -84,10 +97,15 @@ def cat_grid_jobs(conn, output_dir): total_fits += f['fits'].shape[0] with h5py.File(output,'r') as fout: - if 'fits' in fout: - if fout['fits'].shape[0] >= total_fits: - log.debug("skipping %s because there are already %i fit results" % (tail,total_fits)) - continue + if 'reprocessed' in filename and 'reprocessed' not in fout.attrs: + pass + elif 'fits' in fout and fout['fits'].shape[0] >= total_fits: + log.verbose("skipping %s because there are already %i fit results" % (tail,total_fits)) + continue + + if not os.path.exists(filename): + log.warn("File '%s' does not exist!" % filename) + continue # First we get the full event list along with the data cleaning word, FTP # position, FTK, and RSP energy from the original zdab and then add the fit @@ -104,6 +122,12 @@ def cat_grid_jobs(conn, output_dir): total_fits = 0 with h5py.File(output,"a") as fout: + # Mark the file as being reprocessed so we know in the future if we + # already used the reprocessed version instead of the reduced + # version + if 'reprocessed' in filename: + fout.attrs['reprocessed'] = True + total_events = fout['ev'].shape[0] for fit_result_filename in fit_results: fit_result_head, fit_result_tail = split(fit_result_filename) @@ -142,6 +166,8 @@ if __name__ == '__main__': help="filename for log file") parser.add_argument('--output-dir', default=None, help="output directory for fit results") + parser.add_argument('--zdab-dir', default=None, + help="extra directory to search for zdab files") args = parser.parse_args() log.set_verbosity(args.loglevel) @@ -163,5 +189,5 @@ if __name__ == '__main__': conn = sqlite3.connect(args.db) - cat_grid_jobs(conn, args.output_dir) + cat_grid_jobs(conn, args.output_dir, args.zdab_dir) conn.close() |