1 files changed, 79 insertions, 0 deletions
diff --git a/benchmark.py b/benchmark.py
index 5a344bf..8c75d35 100755
--- a/benchmark.py
+++ b/benchmark.py
@@ -148,6 +148,82 @@ def pdf(gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=16, ndaq=1,
 
     return nevents*nreps*ndaq/ufloat((np.mean(run_times),np.std(run_times)))
 
+def pdf_eval(gpu_geometry, max_pmt_id, npdfs=10, nevents=25, nreps=16, ndaq=128,
+             nthreads_per_block=64, max_blocks=1024):
+    """
+    Returns the average number of 100 MeV events that can be
+    histogrammed per second.
+
+    Args:
+        - gpu_instance, chroma.gpu.GPU
+            The GPU instance passed to the GPUGeometry constructor.
+        - max_pmt_id, int
+            The channel number of the highest PMT
+        - npdfs, int
+            The number of pdf generations to average.
+        - nevents, int
+            The number of 100 MeV events to generate for each PDF.
+        - nreps, int
+            The number of times to propagate each event and add to PDF
+        - ndaq, int
+            The number of times to run the DAQ simulation on the propagated
+            event and add it to the PDF.
+    """
+    rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks)
+
+    # Make data event
+    data_ev = g4generator.generate_events(itertools.islice(generator.vertex.constant_particle_gun('e-', (0,0,0),
+                                                                                                  (1,0,0), 100),
+                                                           1)).next()
+    gpu_photons = gpu.GPUPhotons(data_ev.photons_beg)
+
+    gpu_photons.propagate(gpu_geometry, rng_states,
+                          nthreads_per_block, max_blocks)
+    gpu_daq = gpu.GPUDaq(gpu_geometry, max_pmt_id)
+    data_ev_channels = gpu_daq.acquire(gpu_photons, rng_states, nthreads_per_block, max_blocks).get()
+    
+    # Setup PDF evaluation
+    gpu_pdf = gpu.GPUPDF()
+    gpu_pdf.setup_pdf_eval(data_ev_channels.hit,
+                           data_ev_channels.t,
+                           data_ev_channels.q,
+                           0.05,
+                           (-0.5, 999.5),
+                           1.0,
+                           (-0.5, 20),
+                           min_bin_content=20,
+                           time_only=True)
+
+    run_times = []
+    for i in tools.progress(range(npdfs)):
+        t0 = time.time()
+        gpu_pdf.clear_pdf_eval()
+
+        vertex_gen = generator.vertex.constant_particle_gun('e-', (0,0,0),
+                                                            (1,0,0), 100)
+        vertex_iter = itertools.islice(vertex_gen, nevents)
+
+        for ev in g4generator.generate_events(vertex_iter):
+            gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps)
+
+            gpu_photons.propagate(gpu_geometry, rng_states,
+                                  nthreads_per_block, max_blocks)
+            for gpu_photon_slice in gpu_photons.iterate_copies():
+                for idaq in xrange(ndaq):
+                    gpu_channels = gpu_daq.acquire(gpu_photon_slice, rng_states,
+                                                   nthreads_per_block, max_blocks)
+                    gpu_pdf.accumulate_pdf_eval(gpu_channels, nthreads_per_block)
+
+        cuda.Context.get_current().synchronize()        
+        elapsed = time.time() - t0
+
+        if i > 0:
+            # first kernel call incurs some driver overhead
+            run_times.append(elapsed)
+
+    return nevents*nreps*ndaq/ufloat((np.mean(run_times),np.std(run_times)))
+
+
 if __name__ == '__main__':
     from chroma import detectors
     import gc
@@ -171,4 +247,7 @@ if __name__ == '__main__':
     print '%s 100 MeV events histogrammed/s' % \
         tools.ufloat_to_str(pdf(gpu_geometry, max(lbne.pmtids)))
 
+    print '%s 100 MeV events/s accumulated in PDF evaluation data structure (100 GEANT4 x 16 Chroma x 128 DAQ)' % \
+        tools.ufloat_to_str(pdf_eval(gpu_geometry, max(lbne.pmtids)))
+
     context.pop()