From 4c554d621d9b66c595397a0667d212bc4ea57be1 Mon Sep 17 00:00:00 2001 From: Stan Seibert Date: Sat, 10 Sep 2011 15:05:49 -0400 Subject: Add the ability to propagate the same photons multiple times on the the GPU, and run the DAQ multiple times on the same photons in a likelihood calculation. Propagating the same photons in a warp speeds up propagation by a factor of 3 (and we could do this even better if we wanted), and this improves the statistics in a likelihood evaluation quite a bit. Running the DAQ multiple times is also an inexpensive way to improve the quality of the PDF estimates. --- benchmark.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'benchmark.py') diff --git a/benchmark.py b/benchmark.py index 329986c..9f90c7b 100755 --- a/benchmark.py +++ b/benchmark.py @@ -91,7 +91,7 @@ def propagate(gpu_geometry, number=10, nphotons=500000, nthreads_per_block=64, return nphotons/ufloat((np.mean(run_times),np.std(run_times))) @tools.profile_if_possible -def pdf(gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=1, +def pdf(gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=16, ndaq=1, nthreads_per_block=64, max_blocks=1024): """ Returns the average number of 100 MeV events per second that can be @@ -108,6 +108,9 @@ def pdf(gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=1, The number of 100 MeV events to generate for each PDF. - nreps, int The number of times to propagate each event and add to PDF + - ndaq, int + The number of times to run the DAQ simulation on the propagated + event and add it to the PDF. """ rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks) @@ -125,13 +128,15 @@ def pdf(gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=1, vertex_iter = itertools.islice(vertex_gen, nevents) for ev in g4generator.generate_events(vertex_iter): - for j in xrange(nreps): - gpu_photons = gpu.GPUPhotons(ev.photons_beg) - gpu_photons.propagate(gpu_geometry, rng_states, - nthreads_per_block, max_blocks) - gpu_channels = gpu_daq.acquire(gpu_photons, rng_states, - nthreads_per_block, max_blocks) - gpu_pdf.add_hits_to_pdf(gpu_channels, nthreads_per_block) + gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps) + + gpu_photons.propagate(gpu_geometry, rng_states, + nthreads_per_block, max_blocks) + for gpu_photon_slice in gpu_photons.iterate_copies(): + for idaq in xrange(ndaq): + gpu_channels = gpu_daq.acquire(gpu_photon_slice, rng_states, + nthreads_per_block, max_blocks) + gpu_pdf.add_hits_to_pdf(gpu_channels, nthreads_per_block) hitcount, pdf = gpu_pdf.get_pdfs() @@ -141,7 +146,7 @@ def pdf(gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=1, # first kernel call incurs some driver overhead run_times.append(elapsed) - return nevents*nreps/ufloat((np.mean(run_times),np.std(run_times))) + return nevents*nreps*ndaq/ufloat((np.mean(run_times),np.std(run_times))) if __name__ == '__main__': from chroma import detectors -- cgit