From 4c554d621d9b66c595397a0667d212bc4ea57be1 Mon Sep 17 00:00:00 2001
From: Stan Seibert <stan@mtrr.org>
Date: Sat, 10 Sep 2011 15:05:49 -0400
Subject: Add the ability to propagate the same photons multiple times on the
 the GPU, and run the DAQ multiple times on the same photons in a likelihood
 calculation.

Propagating the same photons in a warp speeds up propagation by a
factor of 3 (and we could do this even better if we wanted), and this
improves the statistics in a likelihood evaluation quite a bit.
Running the DAQ multiple times is also an inexpensive way to improve
the quality of the PDF estimates.
---
 benchmark.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'benchmark.py')

diff --git a/benchmark.py b/benchmark.py
index 329986c..9f90c7b 100755
--- a/benchmark.py
+++ b/benchmark.py
@@ -91,7 +91,7 @@ def propagate(gpu_geometry, number=10, nphotons=500000, nthreads_per_block=64,
     return nphotons/ufloat((np.mean(run_times),np.std(run_times)))
 
 @tools.profile_if_possible
-def pdf(gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=1,
+def pdf(gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=16, ndaq=1,
         nthreads_per_block=64, max_blocks=1024):
     """
     Returns the average number of 100 MeV events per second that can be
@@ -108,6 +108,9 @@ def pdf(gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=1,
             The number of 100 MeV events to generate for each PDF.
         - nreps, int
             The number of times to propagate each event and add to PDF
+        - ndaq, int
+            The number of times to run the DAQ simulation on the propagated
+            event and add it to the PDF.
     """
     rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks)
 
@@ -125,13 +128,15 @@ def pdf(gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=1,
         vertex_iter = itertools.islice(vertex_gen, nevents)
 
         for ev in g4generator.generate_events(vertex_iter):
-            for j in xrange(nreps):
-                gpu_photons = gpu.GPUPhotons(ev.photons_beg)
-                gpu_photons.propagate(gpu_geometry, rng_states,
-                                      nthreads_per_block, max_blocks)
-                gpu_channels = gpu_daq.acquire(gpu_photons, rng_states,
-                                               nthreads_per_block, max_blocks)
-                gpu_pdf.add_hits_to_pdf(gpu_channels, nthreads_per_block)
+            gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps)
+
+            gpu_photons.propagate(gpu_geometry, rng_states,
+                                  nthreads_per_block, max_blocks)
+            for gpu_photon_slice in gpu_photons.iterate_copies():
+                for idaq in xrange(ndaq):
+                    gpu_channels = gpu_daq.acquire(gpu_photon_slice, rng_states,
+                                                   nthreads_per_block, max_blocks)
+                    gpu_pdf.add_hits_to_pdf(gpu_channels, nthreads_per_block)
 
         hitcount, pdf = gpu_pdf.get_pdfs()
 
@@ -141,7 +146,7 @@ def pdf(gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=1,
             # first kernel call incurs some driver overhead
             run_times.append(elapsed)
 
-    return nevents*nreps/ufloat((np.mean(run_times),np.std(run_times)))
+    return nevents*nreps*ndaq/ufloat((np.mean(run_times),np.std(run_times)))
 
 if __name__ == '__main__':
     from chroma import detectors
-- 
cgit