diff options
Diffstat (limited to 'benchmark.py')
-rwxr-xr-x | benchmark.py | 47 |
1 files changed, 28 insertions, 19 deletions
diff --git a/benchmark.py b/benchmark.py index f839f61..31c145c 100755 --- a/benchmark.py +++ b/benchmark.py @@ -17,21 +17,21 @@ from chroma import optics # Generator processes need to fork BEFORE the GPU context is setup g4generator = generator.photon.G4ParallelGenerator(4, optics.water_wcsim) -def intersect(gpu_instance, number=100, nphotons=500000, nthreads_per_block=64, max_blocks=1024): +def intersect(gpu_geometry, number=100, nphotons=500000, nthreads_per_block=64, + max_blocks=1024): "Returns the average number of ray intersections per second." distances_gpu = ga.empty(nphotons, dtype=np.float32) + module = gpu.get_cu_module('mesh.h', options=('--use_fast_math',)) + gpu_funcs = gpu.GPUFuncs(module) + run_times = [] for i in tools.progress(range(number)): - pos = np.zeros((nphotons,3), dtype=np.float32) - dir = sample.uniform_sphere(nphotons) - - gpu_rays = gpu.GPURays(pos, dir) - - gpu_funcs = gpu.GPUFuncs(gpu_instance.module) + pos = ga.zeros(nphotons, dtype=ga.vec.float3) + dir = ga.to_gpu(gpu.to_float3(sample.uniform_sphere(nphotons))) t0 = time.time() - gpu_funcs.distance_to_mesh(np.int32(gpu_rays.pos.size), gpu_rays.pos, gpu_rays.dir, distances_gpu, block=(nthreads_per_block,1,1), grid=(gpu_rays.pos.size//nthreads_per_block+1,1)) + gpu_funcs.distance_to_mesh(np.int32(pos.size), pos, dir, gpu_geometry.gpudata, distances_gpu, block=(nthreads_per_block,1,1), grid=(pos.size//nthreads_per_block+1,1)) cuda.Context.get_current().synchronize() elapsed = time.time() - t0 @@ -39,7 +39,7 @@ def intersect(gpu_instance, number=100, nphotons=500000, nthreads_per_block=64, # first kernel call incurs some driver overhead run_times.append(elapsed) - return gpu_rays.pos.size/ufloat((np.mean(run_times),np.std(run_times))) + return nphotons/ufloat((np.mean(run_times),np.std(run_times))) def load_photons(number=100, nphotons=500000): """Returns the average number of photons moved to the GPU device memory @@ -64,7 +64,8 @@ def load_photons(number=100, nphotons=500000): return nphotons/ufloat((np.mean(run_times),np.std(run_times))) -def propagate(gpu_instance, number=10, nphotons=500000, nthreads_per_block=64, max_blocks=1024): +def propagate(gpu_geometry, number=10, nphotons=500000, nthreads_per_block=64, + max_blocks=1024): "Returns the average number of photons propagated on the GPU per second." rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks) @@ -79,7 +80,8 @@ def propagate(gpu_instance, number=10, nphotons=500000, nthreads_per_block=64, m gpu_photons = gpu.GPUPhotons(photons) t0 = time.time() - gpu.propagate(gpu_instance, gpu_photons, rng_states, nthreads_per_block, max_blocks) + gpu_photons.propagate(gpu_geometry, rng_states, nthreads_per_block, + max_blocks) cuda.Context.get_current().synchronize() elapsed = time.time() - t0 @@ -90,7 +92,8 @@ def propagate(gpu_instance, number=10, nphotons=500000, nthreads_per_block=64, m return nphotons/ufloat((np.mean(run_times),np.std(run_times))) @tools.profile_if_possible -def pdf(gpu_instance, gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=1, nthreads_per_block=64, max_blocks=1024): +def pdf(gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=1, + nthreads_per_block=64, max_blocks=1024): """ Returns the average number of 100 MeV events per second that can be histogrammed per second. @@ -118,14 +121,17 @@ def pdf(gpu_instance, gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=1, t0 = time.time() gpu_pdf.clear_pdf() - vertex_gen = generator.vertex.constant_particle_gun('e-', (0,0,0), (1,0,0), 100) + vertex_gen = generator.vertex.constant_particle_gun('e-', (0,0,0), + (1,0,0), 100) vertex_iter = itertools.islice(vertex_gen, nevents) for ev in g4generator.generate_events(vertex_iter): for j in xrange(nreps): gpu_photons = gpu.GPUPhotons(ev.photons_beg) - gpu.propagate(gpu_instance, gpu_photons, rng_states, nthreads_per_block, max_blocks) - gpu_channels = gpu_daq.acquire(gpu_photons, rng_states, nthreads_per_block, max_blocks) + gpu_photons.propagate(gpu_geometry, rng_states, + nthreads_per_block, max_blocks) + gpu_channels = gpu_daq.acquire(gpu_photons, rng_states, + nthreads_per_block, max_blocks) gpu_pdf.add_hits_to_pdf(gpu_channels, nthreads_per_block) hitcount, pdf = gpu_pdf.get_pdfs() @@ -146,16 +152,19 @@ if __name__ == '__main__': lbne.build(bits=11) gpu_instance = gpu.GPU() - gpu_geometry = gpu.GPUGeometry(gpu_instance, lbne) + gpu_geometry = gpu.GPUGeometry(lbne) gpu_instance.print_mem_info() - print '%s ray intersections/sec.' % tools.ufloat_to_str(intersect(gpu_instance)) + print '%s ray intersections/sec.' % \ + tools.ufloat_to_str(intersect(gpu_geometry)) gc.collect() gpu_instance.print_mem_info() print '%s photons loaded/sec.' % tools.ufloat_to_str(load_photons()) gc.collect() gpu_instance.print_mem_info() - print '%s photons propagated/sec.' % tools.ufloat_to_str(propagate(gpu_instance)) + print '%s photons propagated/sec.' % \ + tools.ufloat_to_str(propagate(gpu_geometry)) gc.collect() gpu_instance.print_mem_info() - print '%s 100 MeV events histogrammed/s' % tools.ufloat_to_str(pdf(gpu_instance, gpu_geometry, max(lbne.pmtids))) + print '%s 100 MeV events histogrammed/s' % \ + tools.ufloat_to_str(pdf(gpu_geometry, max(lbne.pmtids))) |