benchmark.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192

#!/usr/bin/env python
import numpy as np
from pycuda import gpuarray as ga
import time
from uncertainties import ufloat
import sys

from chroma.gpu import GPU, to_float3
from chroma.camera import get_rays
from chroma.event import Photons
from chroma.sample import uniform_sphere
from chroma.generator.photon import G4ParallelGenerator
from chroma.optics import water_wcsim
from chroma.generator.vertex import constant_particle_gun
from chroma.tools import profile_if_possible
# Generator processes need to fork BEFORE the GPU context is setup
g4generator = G4ParallelGenerator(4, water_wcsim)

def progress(seq):
    "Print progress while iterating over `seq`."
    n = len(seq)
    print '[' + ' '*21 + ']\r[',
    sys.stdout.flush()
    for i, item in enumerate(seq):
        if i % (n//10) == 0:
            print '.',
            sys.stdout.flush()
        yield item
    print ']'
    sys.stdout.flush()

def ray_trace(gpu, number=1000):
    """
    Return the number of mean and standard deviation of the number of ray
    intersections per second as a ufloat for the geometry loaded onto `gpu`.

    .. note::
        The rays are thrown from a camera sitting *outside* of the geometry.

    Args:
        - gpu, chroma.gpu.GPU
            The GPU object with a geometry already loaded.
        - number, int
            The number of kernel calls to average.
    """
    lb, ub = gpu.geometry.mesh.get_bounds()
    scale = np.linalg.norm(ub-lb)
    point = [0,scale,(lb[2]+ub[2])/2]

    size = (800,600)
    width, height = size

    origins, directions = get_rays(point, size, 0.035, focal_length=0.018)

    origins_gpu = ga.to_gpu(to_float3(origins))
    directions_gpu = ga.to_gpu(to_float3(directions))
    pixels_gpu = ga.zeros(width*height, dtype=np.int32)

    run_times = []
    for i in progress(range(number)):
        t0 = time.time()
        gpu.kernels.ray_trace(np.int32(pixels_gpu.size), origins_gpu, directions_gpu, pixels_gpu, block=(gpu.nthreads_per_block,1,1), grid=(pixels_gpu.size//gpu.nthreads_per_block+1,1))
        gpu.context.synchronize()
        elapsed = time.time() - t0

        if i > 0:
            # first kernel call incurs some driver overhead
            run_times.append(elapsed)

    return pixels_gpu.size/ufloat((np.mean(run_times),np.std(run_times)))

def load_photons(gpu, number=10, nphotons=500000):
    """
    Return the mean and standard deviation of the number of photons loaded
    onto `gpu` per second.

    Args:
        - gpu, chroma.gpu.GPU
            The GPU object with a geometry already loaded.
        - number, int
            The number of loads to average
        - nphotons, int
            The number of photons to load per trial
    """
    gpu.setup_propagate()
    photons = Photons(np.zeros((nphotons,3)), uniform_sphere(nphotons), np.random.uniform(400,800,size=nphotons))

    run_times = []
    for i in progress(range(number)):
        t0 = time.time()
        gpu.load_photons(photons)
        gpu.context.synchronize()
        elapsed = time.time() - t0

        if i > 0:
            # first kernel call incurs some driver overhead
            run_times.append(elapsed)

    return nphotons/ufloat((np.mean(run_times),np.std(run_times)))

def propagate(gpu, number=10, nphotons=500000):
    """
    Return the mean and standard deviation of the number of photons propagated
    per second as a ufloat for the geometry loaded onto `gpu`.

    Args:
        - gpu, chroma.gpu.GPU
            The GPU object with a geometry already loaded.
        - number, int
            The number of kernel calls to average.
        - nphotons, int
            The number of photons to propagate per kernel call.
    """
    gpu.setup_propagate()

    run_times = []
    for i in progress(range(number)):
        photons = Photons(np.zeros((nphotons,3)), uniform_sphere(nphotons), np.random.uniform(400,800,size=nphotons))
        gpu.load_photons(photons)
        t0 = time.time()
        gpu.propagate()
        gpu.context.synchronize()
        elapsed = time.time() - t0

        if i > 0:
            # first kernel call incurs some driver overhead
            run_times.append(elapsed)

    return nphotons/ufloat((np.mean(run_times),np.std(run_times)))

@profile_if_possible
def pdf(gpu, max_pmt_id, npdfs=10, nevents=100, nreps=1):
    """
    Return the mean and standard deviation of the number of 100 MeV
    events per second that can be histogrammed a ufloat for the
    geometry loaded onto `gpu`.

    Args:
        - gpu, chroma.gpu.GPU
            The GPU object with a geometry already loaded.
        - max_pmt_id, int
            The channel number of the highest PMT
        - npdfs, int
            The number of pdf generations to average.
        - nevents, int
            The number of 100 MeV events to generate for each PDF.
        - nreps, int
            The number of times to propagate each event and add to PDF
    """
    gpu.setup_propagate()

    run_times = []

    gpu.setup_propagate()
    gpu.setup_daq(max_pmt_id)
    gpu.setup_pdf(max_pmt_id, 100, (-0.5, 999.5), 10, (-0.5, 9.5))
    vertex_gen = constant_particle_gun('e-', (0,0,0), (1,0,0), 100)

    for i in progress(range(npdfs)):
        t0 = time.time()
        gpu.clear_pdf()

        for ev in g4generator.generate_events(nevents, vertex_gen):
            for j in xrange(nreps):
                gpu.load_photons(ev.photon_start)
                gpu.propagate()
                gpu.run_daq()
                gpu.add_hits_to_pdf()

        hitcount, pdf = gpu.get_pdfs()

        elapsed = time.time() - t0

        if i > 0:
            # first kernel call incurs some driver overhead
            run_times.append(elapsed)

    return nevents*nreps/ufloat((np.mean(run_times),np.std(run_times)))

if __name__ == '__main__':
    from chroma.detectors import build_lbne_200kton, build_minilbne

    lbne = build_lbne_200kton()
    lbne.build(bits=11)

    gpu = GPU()
    gpu.load_geometry(lbne, print_usage=False)
    
    #print '%s track steps/s' % ray_trace(gpu)
    #print '%s loaded photons/s' % load_photons(gpu)
    #print '%s propagated photons/s' % propagate(gpu)
    print '%s histogrammed 100 MeV events/s' % pdf(gpu, max(lbne.pmtids))