benchmark.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136

#!/usr/bin/env python
import numpy as np
from pycuda import gpuarray as ga
import time
from uncertainties import ufloat
import sys

from chroma.gpu import GPU, to_float3
from chroma.camera import get_rays
from chroma.event import Photons
from chroma.sample import uniform_sphere

def progress(seq):
    "Print progress while iterating over `seq`."
    n = len(seq)
    print '[' + ' '*21 + ']\r[',
    sys.stdout.flush()
    for i, item in enumerate(seq):
        if i % (n//10) == 0:
            print '.',
            sys.stdout.flush()
        yield item
    print ']'
    sys.stdout.flush()

def ray_trace(gpu, number=1000):
    """
    Return the number of mean and standard deviation of the number of ray
    intersections per second as a ufloat for the geometry loaded onto `gpu`.

    .. note::
        The rays are thrown from a camera sitting *outside* of the geometry.

    Args:
        - gpu, chroma.gpu.GPU
            The GPU object with a geometry already loaded.
        - number, int
            The number of kernel calls to average.
    """
    lb, ub = gpu.geometry.mesh.get_bounds()
    scale = np.linalg.norm(ub-lb)
    point = [0,scale,(lb[2]+ub[2])/2]

    size = (800,600)
    width, height = size

    origins, directions = get_rays(point, size, 0.035, focal_length=0.018)

    origins_gpu = ga.to_gpu(to_float3(origins))
    directions_gpu = ga.to_gpu(to_float3(directions))
    pixels_gpu = ga.zeros(width*height, dtype=np.int32)

    run_times = []
    for i in progress(range(number)):
        t0 = time.time()
        gpu.kernels.ray_trace(np.int32(pixels_gpu.size), origins_gpu, directions_gpu, pixels_gpu, block=(gpu.nthreads_per_block,1,1), grid=(pixels_gpu.size//gpu.nthreads_per_block+1,1))
        gpu.context.synchronize()
        elapsed = time.time() - t0

        if i > 0:
            # first kernel call incurs some driver overhead
            run_times.append(elapsed)

    return pixels_gpu.size/ufloat((np.mean(run_times),np.std(run_times)))

def load_photons(gpu, number=10, nphotons=500000):
    """
    Return the mean and standard deviation of the number of photons loaded
    onto `gpu` per second.

    Args:
        - gpu, chroma.gpu.GPU
            The GPU object with a geometry already loaded.
        - number, int
            The number of loads to average
        - nphotons, int
            The number of photons to load per trial
    """
    gpu.setup_propagate()
    photons = Photons(np.zeros((nphotons,3)), uniform_sphere(nphotons), np.random.uniform(400,800,size=nphotons))

    run_times = []
    for i in progress(range(number)):
        t0 = time.time()
        gpu.load_photons(photons)
        gpu.context.synchronize()
        elapsed = time.time() - t0

        if i > 0:
            # first kernel call incurs some driver overhead
            run_times.append(elapsed)

    return nphotons/ufloat((np.mean(run_times),np.std(run_times)))

def propagate(gpu, number=10, nphotons=500000):
    """
    Return the mean and standard deviation of the number of photons propagated
    per second as a ufloat for the geometry loaded onto `gpu`.

    Args:
        - gpu, chroma.gpu.GPU
            The GPU object with a geometry already loaded.
        - number, int
            The number of kernel calls to average.
        - nphotons, int
            The number of photons to propagate per kernel call.
    """
    gpu.setup_propagate()

    run_times = []
    for i in progress(range(number)):
        photons = Photons(np.zeros((nphotons,3)), uniform_sphere(nphotons), np.random.uniform(400,800,size=nphotons))
        gpu.load_photons(photons)
        t0 = time.time()
        gpu.propagate()
        gpu.context.synchronize()
        elapsed = time.time() - t0

        if i > 0:
            # first kernel call incurs some driver overhead
            run_times.append(elapsed)

    return nphotons/ufloat((np.mean(run_times),np.std(run_times)))

if __name__ == '__main__':
    from chroma.detectors import build_lbne_200kton, build_minilbne

    lbne = build_lbne_200kton()
    lbne.build(bits=11)

    gpu = GPU()
    gpu.load_geometry(lbne, print_usage=False)

    print '%s track steps/s' % ray_trace(gpu)
    print '%s loaded photons/s' % load_photons(gpu)
    print '%s propagated photons/s' % propagate(gpu)