update cat-grid-jobs to use reprocessed files if they're available

author: tlatorre <tlatorre@uchicago.edu> 2020-06-15 01:55:28 -0500
committer: tlatorre <tlatorre@uchicago.edu> 2020-06-15 01:55:28 -0500
commit: 6d2445aa552cd923df18f753a00d83fc37b356e6 (patch)
tree: aaecea4abb158256cc07cb687e6f57425273caf8 /utils
parent: 9d2e3acc7e3b557956ed4159231e57df9ca9c3ff (diff)
download: sddm-6d2445aa552cd923df18f753a00d83fc37b356e6.tar.gz
sddm-6d2445aa552cd923df18f753a00d83fc37b356e6.tar.bz2
sddm-6d2445aa552cd923df18f753a00d83fc37b356e6.zip
1 files changed, 34 insertions, 8 deletions
diff --git a/utils/cat-grid-jobs b/utils/cat-grid-jobs
index 0755774..2c6cdf8 100755
--- a/utils/cat-grid-jobs
+++ b/utils/cat-grid-jobs
@@ -40,7 +40,7 @@ from sddm.logger import Logger
 
 log = Logger()
 
-def cat_grid_jobs(conn, output_dir):
+def cat_grid_jobs(conn, output_dir, zdab_dir=None):
     zdab_cat = which("zdab-cat")
 
     if zdab_cat is None:
@@ -63,18 +63,31 @@ def cat_grid_jobs(conn, output_dir):
         fit_results = ['%s.hdf5' % splitext(fit_result_filename)[0] for fit_result_filename in fit_results]
 
         if len(fit_results) == 0:
-            log.debug("No fit results found for %s (%s)" % (tail, uuid))
+            log.verbose("No fit results found for %s (%s)" % (tail, uuid))
             continue
 
         output = join(output_dir,"%s_%s_fit_results.hdf5" % (root,uuid))
 
+        if 'reduced' in root:
+            directories = [head]
+            if zdab_dir is not None:
+                directories += [zdab_dir]
+            for directory in directories:
+                for extension in [ext, '.zdab', '.zdab.gz']:
+                    # Use the reprocessed version of the file if possible
+                    reprocessed_filename = join(directory,root.replace('reduced','reprocessed')) + extension
+
+                    if os.path.exists(reprocessed_filename):
+                        log.verbose("Found reprocessed file '%s'. Using that instead of '%s'" % (reprocessed_filename,tail))
+                        filename = reprocessed_filename
+
         if os.path.exists(output):
             total_fits = 0
             for fit_result_filename in fit_results:
                 fit_result_head, fit_result_tail = split(fit_result_filename)
 
                 if not os.path.exists(fit_result_filename):
-                    log.warn("File '%s' does not exist!" % filename)
+                    log.warn("File '%s' does not exist!" % fit_result_filename)
                     continue
 
                 with h5py.File(fit_result_filename,'r') as f:
@@ -84,10 +97,15 @@ def cat_grid_jobs(conn, output_dir):
                     total_fits += f['fits'].shape[0]
 
             with h5py.File(output,'r') as fout:
-                if 'fits' in fout:
-                    if fout['fits'].shape[0] >= total_fits:
-                        log.debug("skipping %s because there are already %i fit results" % (tail,total_fits))
-                        continue
+                if 'reprocessed' in filename and 'reprocessed' not in fout.attrs:
+                    pass
+                elif 'fits' in fout and fout['fits'].shape[0] >= total_fits:
+                    log.verbose("skipping %s because there are already %i fit results" % (tail,total_fits))
+                    continue
+
+        if not os.path.exists(filename):
+            log.warn("File '%s' does not exist!" % filename)
+            continue
 
         # First we get the full event list along with the data cleaning word, FTP
         # position, FTK, and RSP energy from the original zdab and then add the fit
@@ -104,6 +122,12 @@ def cat_grid_jobs(conn, output_dir):
         total_fits = 0
 
         with h5py.File(output,"a") as fout:
+            # Mark the file as being reprocessed so we know in the future if we
+            # already used the reprocessed version instead of the reduced
+            # version
+            if 'reprocessed' in filename:
+                fout.attrs['reprocessed'] = True
+
             total_events = fout['ev'].shape[0]
             for fit_result_filename in fit_results:
                 fit_result_head, fit_result_tail = split(fit_result_filename)
@@ -142,6 +166,8 @@ if __name__ == '__main__':
                         help="filename for log file")
     parser.add_argument('--output-dir', default=None,
                         help="output directory for fit results")
+    parser.add_argument('--zdab-dir', default=None,
+                        help="extra directory to search for zdab files")
     args = parser.parse_args()
 
     log.set_verbosity(args.loglevel)
@@ -163,5 +189,5 @@ if __name__ == '__main__':
 
     conn = sqlite3.connect(args.db)
 
-    cat_grid_jobs(conn, args.output_dir)
+    cat_grid_jobs(conn, args.output_dir, args.zdab_dir)
     conn.close()
author	tlatorre <tlatorre@uchicago.edu>	2020-06-15 01:55:28 -0500
committer	tlatorre <tlatorre@uchicago.edu>	2020-06-15 01:55:28 -0500
commit	6d2445aa552cd923df18f753a00d83fc37b356e6 (patch)
tree	aaecea4abb158256cc07cb687e6f57425273caf8 /utils
parent	9d2e3acc7e3b557956ed4159231e57df9ca9c3ff (diff)
download	sddm-6d2445aa552cd923df18f753a00d83fc37b356e6.tar.gz sddm-6d2445aa552cd923df18f753a00d83fc37b356e6.tar.bz2 sddm-6d2445aa552cd923df18f753a00d83fc37b356e6.zip