aboutsummaryrefslogtreecommitdiff
path: root/utils/mv-grid-files
blob: 4e1c2313bac8487dd2f5f802cb81835f5dfa2c53 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python
"""
Script to move grid output files which have been successfully fit. For example:

    $ cd fit_results
    $ mv-grid-files --new-dir fit_results_to_move

will take all the files associated with a successful grid job (submit file,
hdf5 output, condor output, condor error, and condor log) and move them with
the same directory structure to fit_results_to_move.

The idea here is that we first move these files locally on the grid login node
and then we can set up a cron job:

    0 0 * * * rsync -avzP --remove-source-files username@osgconnect.net:fit_results_to_move/ ~/fit_results/

to copy all the data back and delete it from the grid login node. This ensures
we leave the files there for jobs which didn't successfully fit since we
probably want to resubmit them at some point.
"""
from sddm.logger import Logger

log = Logger()

def mv(src,dst):
    log.notice("mv %s %s" % (src,dst))
    if os.path.exists(src):
        os.renames(src,dst)
    else:
        log.debug("skipping %s because it doesn't exist" % src)

if __name__ == '__main__':
    import argparse
    import sqlite3
    import os
    from os.path import join, split, exists
    import numpy as np
    import glob
    from sddm import splitext

    parser = argparse.ArgumentParser("delete grid output files which have been successfully fit")
    parser.add_argument("--db", type=str, help="database file", default=None)
    parser.add_argument('--loglevel',
                        help="logging level (debug, verbose, notice, warning)",
                        default='notice')
    parser.add_argument('--logfile', default=None,
                        help="filename for log file")
    parser.add_argument('--new-dir', default=None,
                        help="directory to move successful fits to")
    args = parser.parse_args()

    log.set_verbosity(args.loglevel)

    if args.logfile:
        log.set_logfile(args.logfile)

    home = os.path.expanduser("~")

    if args.db is None:
        args.db = join(home,'state.db')

    if args.new_dir is None:
        args.new_dir = join(home,"fit_results_to_move")

    conn = sqlite3.connect(args.db)

    c = conn.cursor()

    results = c.execute('SELECT filename, uuid, gtid, particle_id, state FROM state WHERE state = "SUCCESS" ORDER BY timestamp ASC')

    for filename, uuid, gtid, particle_id, state in results.fetchall():
        head, tail = split(filename)
        root, ext = splitext(tail)

        # all output files are prefixed with FILENAME_GTID_UUID
        prefix = "%s_%08i_%i_%s" % (root,gtid,particle_id,uuid)

        new_dir = "%s_%s" % (root,uuid)

        if state == 'SUCCESS':
            # If it successfully fit, then we move all the associated files to
            # a new directory. From there, they can be copied back
            for filename in glob.glob("%s/%s.*" % (new_dir,prefix)):
                mv(filename,join(args.new_dir,filename))

    conn.close()