aboutsummaryrefslogtreecommitdiff
path: root/utils/submit-grid-jobs
diff options
context:
space:
mode:
authortlatorre <tlatorre@uchicago.edu>2020-05-18 11:18:27 -0500
committertlatorre <tlatorre@uchicago.edu>2020-05-19 11:31:57 -0500
commitc91d11cd1c712639c9639ba6878cdd18d008a2f6 (patch)
tree5cbe34f4d4eaa16b7ad03c0315bfc624e5ebf373 /utils/submit-grid-jobs
parent2a448a9e9fbf5c15be72d8be065ed8f43b62c3e7 (diff)
downloadsddm-c91d11cd1c712639c9639ba6878cdd18d008a2f6.tar.gz
sddm-c91d11cd1c712639c9639ba6878cdd18d008a2f6.tar.bz2
sddm-c91d11cd1c712639c9639ba6878cdd18d008a2f6.zip
always resubmit jobs in the RETRY state
Diffstat (limited to 'utils/submit-grid-jobs')
-rwxr-xr-xutils/submit-grid-jobs6
1 files changed, 5 insertions, 1 deletions
diff --git a/utils/submit-grid-jobs b/utils/submit-grid-jobs
index 1796e99..29f7d9c 100755
--- a/utils/submit-grid-jobs
+++ b/utils/submit-grid-jobs
@@ -400,7 +400,11 @@ def main(conn, dqxx_dir, max_retries, max_jobs):
else:
# Don't know what to do here for Removed or Submission_err
log.warn("Job %i is in the state %i. Don't know what to do." % (id, job_status))
- elif state == 'RETRY' and nretry < max_retries:
+ elif state == 'RETRY':
+ if njobs >= max_jobs:
+ log.verbose("Skipping job %i because there are already %i jobs in the queue" % (id,njobs))
+ continue
+
log.notice("Resubmitting job %i from RETRY state" % id)
if submit_job(submit_file):
log.warn("Failed to resubmit job %i. Setting it to FAILED state." % id)