update submit-grid-jobs to not add new jobs for runs which are already in the database

Also add a -r or --reprocess command line option to reprocess runs which are already in the database.
author: tlatorre <tlatorre@uchicago.edu> 2020-01-20 15:19:26 -0600
committer: tlatorre <tlatorre@uchicago.edu> 2020-01-20 15:19:26 -0600
commit: 98f2d7bae97cdcc98e76d52737e59d05199a6b8d (patch)
tree: a3810a254f1c33172ad23eb8f21e81a38298e696
parent: ddb687f13bc90650d7161ec7c95d64962eec1270 (diff)
download: sddm-98f2d7bae97cdcc98e76d52737e59d05199a6b8d.tar.gz
sddm-98f2d7bae97cdcc98e76d52737e59d05199a6b8d.tar.bz2
sddm-98f2d7bae97cdcc98e76d52737e59d05199a6b8d.zip
1 files changed, 11 insertions, 1 deletions
diff --git a/utils/submit-grid-jobs b/utils/submit-grid-jobs
index 2e9feb7..4e3bd5b 100755
--- a/utils/submit-grid-jobs
+++ b/utils/submit-grid-jobs
@@ -586,6 +586,7 @@ if __name__ == '__main__':
     parser.add_argument('--max-retries', type=int, default=2, help="maximum number of times to try and resubmit a grid job")
     parser.add_argument('--auto', action='store_true', default=False, help="automatically loop over database entries and submit grid jobs")
     parser.add_argument('--max-jobs', type=int, default=100, help="maximum number of jobs in the grid queue at any time")
+    parser.add_argument('-r','--reprocess', action='store_true', default=False, help="force reprocessing of runs which are already in the database")
     args = parser.parse_args()
 
     log.set_verbosity(args.loglevel)
@@ -619,6 +620,10 @@ if __name__ == '__main__':
 
     conn.commit()
 
+    results = c.execute('SELECT DISTINCT run FROM state')
+
+    unique_runs = [row[0] for row in results.fetchall()]
+
     if 'SDDM_DATA' not in os.environ:
         log.warn("Please set the SDDM_DATA environment variable to point to the fitter source code location", file=sys.stderr)
         sys.exit(1)
@@ -674,7 +679,12 @@ if __name__ == '__main__':
             else:
                 check_call([zdab_cat,filename,"-o",output.name],stderr=f)
 
-        with h5py.File(output.name) as f:
+        with h5py.File(output.name,'r') as f:
+            if len(f['ev']) and not args.reprocess and int(f['ev'][0]['run']) in unique_runs:
+                head, tail = split(filename)
+                log.notice("Skipping %s because run %i is already in the database" % (tail,int(f['ev'][0]['run'])))
+                continue
+
             for ev in f['ev']:
                 if ev['nhit'] >= args.min_nhit:
                     for i in range(1,args.max_particles+1):
author	tlatorre <tlatorre@uchicago.edu>	2020-01-20 15:19:26 -0600
committer	tlatorre <tlatorre@uchicago.edu>	2020-01-20 15:19:26 -0600
commit	98f2d7bae97cdcc98e76d52737e59d05199a6b8d (patch)
tree	a3810a254f1c33172ad23eb8f21e81a38298e696
parent	ddb687f13bc90650d7161ec7c95d64962eec1270 (diff)
download	sddm-98f2d7bae97cdcc98e76d52737e59d05199a6b8d.tar.gz sddm-98f2d7bae97cdcc98e76d52737e59d05199a6b8d.tar.bz2 sddm-98f2d7bae97cdcc98e76d52737e59d05199a6b8d.zip