diff options
author | Stan Seibert <stan@mtrr.org> | 2011-09-03 09:21:36 -0400 |
---|---|---|
committer | Stan Seibert <stan@mtrr.org> | 2011-09-03 09:21:36 -0400 |
commit | 38f05bf761490def1886016524f328528b08f549 (patch) | |
tree | e0ee6555ee8bdf02a9e0b832a33707bcee06a3fa /benchmark.py | |
parent | 48550062440c5b7f1479ecbe17fd4b024a90fca2 (diff) | |
parent | 707ca1b366f11032682cc864ca2848905e6b485c (diff) | |
download | chroma-38f05bf761490def1886016524f328528b08f549.tar.gz chroma-38f05bf761490def1886016524f328528b08f549.tar.bz2 chroma-38f05bf761490def1886016524f328528b08f549.zip |
merge
Diffstat (limited to 'benchmark.py')
-rwxr-xr-x | benchmark.py | 205 |
1 files changed, 87 insertions, 118 deletions
diff --git a/benchmark.py b/benchmark.py index 4c46a1f..f839f61 100755 --- a/benchmark.py +++ b/benchmark.py @@ -1,95 +1,61 @@ #!/usr/bin/env python import numpy as np +import pycuda.driver as cuda from pycuda import gpuarray as ga import time from uncertainties import ufloat import sys - -from chroma.gpu import GPU, to_float3 -from chroma.camera import get_rays -from chroma.event import Photons -from chroma.sample import uniform_sphere -from chroma.generator.photon import G4ParallelGenerator -from chroma.optics import water_wcsim -from chroma.generator.vertex import constant_particle_gun -from chroma.tools import profile_if_possible +import itertools + +from chroma import gpu +from chroma import camera +from chroma import event +from chroma import sample +from chroma import generator +from chroma import tools +from chroma import optics # Generator processes need to fork BEFORE the GPU context is setup -g4generator = G4ParallelGenerator(4, water_wcsim) - -def progress(seq): - "Print progress while iterating over `seq`." - n = len(seq) - print '[' + ' '*21 + ']\r[', - sys.stdout.flush() - for i, item in enumerate(seq): - if i % (n//10) == 0: - print '.', - sys.stdout.flush() - yield item - print ']' - sys.stdout.flush() - -def ray_trace(gpu, number=1000): - """ - Return the number of mean and standard deviation of the number of ray - intersections per second as a ufloat for the geometry loaded onto `gpu`. - - .. note:: - The rays are thrown from a camera sitting *outside* of the geometry. +g4generator = generator.photon.G4ParallelGenerator(4, optics.water_wcsim) - Args: - - gpu, chroma.gpu.GPU - The GPU object with a geometry already loaded. - - number, int - The number of kernel calls to average. - """ - lb, ub = gpu.geometry.mesh.get_bounds() - scale = np.linalg.norm(ub-lb) - point = [0,scale,(lb[2]+ub[2])/2] +def intersect(gpu_instance, number=100, nphotons=500000, nthreads_per_block=64, max_blocks=1024): + "Returns the average number of ray intersections per second." + distances_gpu = ga.empty(nphotons, dtype=np.float32) - size = (800,600) - width, height = size + run_times = [] + for i in tools.progress(range(number)): + pos = np.zeros((nphotons,3), dtype=np.float32) + dir = sample.uniform_sphere(nphotons) - origins, directions = get_rays(point, size, 0.035, focal_length=0.018) + gpu_rays = gpu.GPURays(pos, dir) - origins_gpu = ga.to_gpu(to_float3(origins)) - directions_gpu = ga.to_gpu(to_float3(directions)) - pixels_gpu = ga.zeros(width*height, dtype=np.int32) + gpu_funcs = gpu.GPUFuncs(gpu_instance.module) - run_times = [] - for i in progress(range(number)): t0 = time.time() - gpu.kernels.ray_trace(np.int32(pixels_gpu.size), origins_gpu, directions_gpu, pixels_gpu, block=(gpu.nthreads_per_block,1,1), grid=(pixels_gpu.size//gpu.nthreads_per_block+1,1)) - gpu.context.synchronize() + gpu_funcs.distance_to_mesh(np.int32(gpu_rays.pos.size), gpu_rays.pos, gpu_rays.dir, distances_gpu, block=(nthreads_per_block,1,1), grid=(gpu_rays.pos.size//nthreads_per_block+1,1)) + cuda.Context.get_current().synchronize() elapsed = time.time() - t0 if i > 0: # first kernel call incurs some driver overhead run_times.append(elapsed) - return pixels_gpu.size/ufloat((np.mean(run_times),np.std(run_times))) + return gpu_rays.pos.size/ufloat((np.mean(run_times),np.std(run_times))) -def load_photons(gpu, number=10, nphotons=500000): - """ - Return the mean and standard deviation of the number of photons loaded - onto `gpu` per second. - - Args: - - gpu, chroma.gpu.GPU - The GPU object with a geometry already loaded. - - number, int - The number of loads to average - - nphotons, int - The number of photons to load per trial - """ - gpu.setup_propagate() - photons = Photons(np.zeros((nphotons,3)), uniform_sphere(nphotons), np.random.uniform(400,800,size=nphotons)) +def load_photons(number=100, nphotons=500000): + """Returns the average number of photons moved to the GPU device memory + per second.""" + pos = np.zeros((nphotons,3)) + dir = sample.uniform_sphere(nphotons) + pol = np.cross(sample.uniform_sphere(nphotons), dir) + pol /= np.apply_along_axis(np.linalg.norm,1,pol)[:,np.newaxis] + wavelengths = np.random.uniform(400,800,size=nphotons) + photons = event.Photons(pos, dir, pol, wavelengths) run_times = [] - for i in progress(range(number)): + for i in tools.progress(range(number)): t0 = time.time() - gpu.load_photons(photons) - gpu.context.synchronize() + gpu_photons = gpu.GPUPhotons(photons) + cuda.Context.get_current().synchronize() elapsed = time.time() - t0 if i > 0: @@ -98,28 +64,23 @@ def load_photons(gpu, number=10, nphotons=500000): return nphotons/ufloat((np.mean(run_times),np.std(run_times))) -def propagate(gpu, number=10, nphotons=500000): - """ - Return the mean and standard deviation of the number of photons propagated - per second as a ufloat for the geometry loaded onto `gpu`. - - Args: - - gpu, chroma.gpu.GPU - The GPU object with a geometry already loaded. - - number, int - The number of kernel calls to average. - - nphotons, int - The number of photons to propagate per kernel call. - """ - gpu.setup_propagate() +def propagate(gpu_instance, number=10, nphotons=500000, nthreads_per_block=64, max_blocks=1024): + "Returns the average number of photons propagated on the GPU per second." + rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks) run_times = [] - for i in progress(range(number)): - photons = Photons(np.zeros((nphotons,3)), uniform_sphere(nphotons), np.random.uniform(400,800,size=nphotons)) - gpu.load_photons(photons) + for i in tools.progress(range(number)): + pos = np.zeros((nphotons,3)) + dir = sample.uniform_sphere(nphotons) + pol = np.cross(sample.uniform_sphere(nphotons), dir) + pol /= np.apply_along_axis(np.linalg.norm,1,pol)[:,np.newaxis] + wavelengths = np.random.uniform(400,800,size=nphotons) + photons = event.Photons(pos, dir, pol, wavelengths) + gpu_photons = gpu.GPUPhotons(photons) + t0 = time.time() - gpu.propagate() - gpu.context.synchronize() + gpu.propagate(gpu_instance, gpu_photons, rng_states, nthreads_per_block, max_blocks) + cuda.Context.get_current().synchronize() elapsed = time.time() - t0 if i > 0: @@ -128,16 +89,15 @@ def propagate(gpu, number=10, nphotons=500000): return nphotons/ufloat((np.mean(run_times),np.std(run_times))) -@profile_if_possible -def pdf(gpu, max_pmt_id, npdfs=10, nevents=100, nreps=1): +@tools.profile_if_possible +def pdf(gpu_instance, gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=1, nthreads_per_block=64, max_blocks=1024): """ - Return the mean and standard deviation of the number of 100 MeV - events per second that can be histogrammed a ufloat for the - geometry loaded onto `gpu`. + Returns the average number of 100 MeV events per second that can be + histogrammed per second. Args: - - gpu, chroma.gpu.GPU - The GPU object with a geometry already loaded. + - gpu_instance, chroma.gpu.GPU + The GPU instance passed to the GPUGeometry constructor. - max_pmt_id, int The channel number of the highest PMT - npdfs, int @@ -147,27 +107,28 @@ def pdf(gpu, max_pmt_id, npdfs=10, nevents=100, nreps=1): - nreps, int The number of times to propagate each event and add to PDF """ - gpu.setup_propagate() - - run_times = [] + rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks) - gpu.setup_propagate() - gpu.setup_daq(max_pmt_id) - gpu.setup_pdf(max_pmt_id, 100, (-0.5, 999.5), 10, (-0.5, 9.5)) - vertex_gen = constant_particle_gun('e-', (0,0,0), (1,0,0), 100) + gpu_daq = gpu.GPUDaq(gpu_geometry, max_pmt_id) + gpu_pdf = gpu.GPUPDF() + gpu_pdf.setup_pdf(max_pmt_id, 100, (-0.5, 999.5), 10, (-0.5, 9.5)) - for i in progress(range(npdfs)): + run_times = [] + for i in tools.progress(range(npdfs)): t0 = time.time() - gpu.clear_pdf() + gpu_pdf.clear_pdf() + + vertex_gen = generator.vertex.constant_particle_gun('e-', (0,0,0), (1,0,0), 100) + vertex_iter = itertools.islice(vertex_gen, nevents) - for ev in g4generator.generate_events(nevents, vertex_gen): + for ev in g4generator.generate_events(vertex_iter): for j in xrange(nreps): - gpu.load_photons(ev.photon_start) - gpu.propagate() - gpu.run_daq() - gpu.add_hits_to_pdf() + gpu_photons = gpu.GPUPhotons(ev.photons_beg) + gpu.propagate(gpu_instance, gpu_photons, rng_states, nthreads_per_block, max_blocks) + gpu_channels = gpu_daq.acquire(gpu_photons, rng_states, nthreads_per_block, max_blocks) + gpu_pdf.add_hits_to_pdf(gpu_channels, nthreads_per_block) - hitcount, pdf = gpu.get_pdfs() + hitcount, pdf = gpu_pdf.get_pdfs() elapsed = time.time() - t0 @@ -178,15 +139,23 @@ def pdf(gpu, max_pmt_id, npdfs=10, nevents=100, nreps=1): return nevents*nreps/ufloat((np.mean(run_times),np.std(run_times))) if __name__ == '__main__': - from chroma.detectors import build_lbne_200kton, build_minilbne + from chroma import detectors + import gc - lbne = build_lbne_200kton() + lbne = detectors.build_lbne_200kton() lbne.build(bits=11) - gpu = GPU() - gpu.load_geometry(lbne, print_usage=False) + gpu_instance = gpu.GPU() + gpu_geometry = gpu.GPUGeometry(gpu_instance, lbne) - print '%s track steps/s' % ray_trace(gpu) - print '%s loaded photons/s' % load_photons(gpu) - print '%s propagated photons/s' % propagate(gpu) - print '%s histogrammed 100 MeV events/s' % pdf(gpu, max(lbne.pmtids)) + gpu_instance.print_mem_info() + print '%s ray intersections/sec.' % tools.ufloat_to_str(intersect(gpu_instance)) + gc.collect() + gpu_instance.print_mem_info() + print '%s photons loaded/sec.' % tools.ufloat_to_str(load_photons()) + gc.collect() + gpu_instance.print_mem_info() + print '%s photons propagated/sec.' % tools.ufloat_to_str(propagate(gpu_instance)) + gc.collect() + gpu_instance.print_mem_info() + print '%s 100 MeV events histogrammed/s' % tools.ufloat_to_str(pdf(gpu_instance, gpu_geometry, max(lbne.pmtids))) |