A GPU-side "DAQ" implementation that identifies the first photon on each channel, which is presumed to trigger that channel. Major speed up in conversion of detection times to time PDFs.

author: Stan Seibert <stan@mtrr.org> 2011-06-21 10:32:46 -0400
committer: Stan Seibert <stan@mtrr.org> 2011-06-21 10:32:46 -0400
commit: 93ad18d540a351f628726bfff9bf16f3dcf9ceb3 (patch)
tree: fcf91ce4b2229dff4fdeece4de465e24161f223b /gputhread.py
parent: 02beb07ecc3eb0a914e325928fffb3ae3d6e3878 (diff)
download: chroma-93ad18d540a351f628726bfff9bf16f3dcf9ceb3.tar.gz
chroma-93ad18d540a351f628726bfff9bf16f3dcf9ceb3.tar.bz2
chroma-93ad18d540a351f628726bfff9bf16f3dcf9ceb3.zip
1 files changed, 31 insertions, 4 deletions
diff --git a/gputhread.py b/gputhread.py
index 6271607..8258f63 100644
--- a/gputhread.py
+++ b/gputhread.py
@@ -77,12 +77,26 @@ class GPUThread(threading.Thread):
         propagate = module.get_function('propagate')
         init_rng = module.get_function('init_rng')
         texrefs = self.geometry.load(module)
+        
 
-        init_rng(np.int32(100000), np.int32(0), np.int32(0), block=(self.nblocks,1,1), grid=(100000//self.nblocks+1,1))
+        daq_module = SourceModule(src.daq, options=['-I' + src.dir],
+                                  no_extern_c=True, cache_dir=False)
+        init_daq_rng = daq_module.get_function('init_daq_rng')
+        reset_earliest_time_int = daq_module.get_function('reset_earliest_time_int')
+        run_daq = daq_module.get_function('run_daq')
+        convert_sortable_int_to_float = daq_module.get_function('convert_sortable_int_to_float')
+
+        earliest_time_gpu = gpuarray.GPUArray(shape=(max(self.geometry.pmtids)+1,), dtype=np.float32)
+        earliest_time_int_gpu = gpuarray.GPUArray(shape=earliest_time_gpu.shape, dtype=np.uint32)
+
+        solid_map_gpu = gpuarray.to_gpu(self.geometry.solid_id.astype(np.int32))
+        
+        init_rng(np.int32(100000), np.int32(self.device_id), np.int32(0), block=(self.nblocks,1,1), grid=(100000//self.nblocks+1,1))
+        init_daq_rng(np.int32(100000), np.int32(10000+self.device_id), np.int32(0), block=(self.nblocks,1,1), grid=(100000//self.nblocks+1,1))
 
         while not self.stopped():
             try:
-                job = self.jobs.get(block=False, timeout=0.5)
+                job = self.jobs.get(block=False, timeout=0.01)
             except Queue.Empty:
                 continue
 
@@ -93,16 +107,28 @@ class GPUThread(threading.Thread):
             times_gpu = cuda.to_device(job.times)
             states_gpu = cuda.to_device(job.states)
             last_hit_triangles_gpu = cuda.to_device(job.last_hit_triangles)
-
+            
             nphotons = len(job.positions)
 
             t0 = time.time()
             propagate(np.int32(nphotons), positions_gpu, directions_gpu, wavelengths_gpu, polarizations_gpu, times_gpu, states_gpu, last_hit_triangles_gpu, np.int32(self.geometry.node_map.size-1), np.int32(self.geometry.first_node), np.int32(job.max_steps), block=(self.nblocks,1,1), grid=(nphotons//self.nblocks+1,1))
+
+            reset_earliest_time_int(np.float32(1e9), np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu,
+                                    block=(self.nblocks,1,1), 
+                                    grid=(len(earliest_time_int_gpu)//self.nblocks+1,1))
+            run_daq(np.int32(2), np.float32(1.2e-9), np.int32(nphotons), times_gpu, states_gpu,
+                    last_hit_triangles_gpu, solid_map_gpu,
+                    np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu,
+                    block=(self.nblocks,1,1), grid=(nphotons//self.nblocks+1,1))
+            convert_sortable_int_to_float(np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu,
+                                          earliest_time_gpu,
+                                          block=(self.nblocks,1,1), 
+                                          grid=(len(earliest_time_int_gpu)//self.nblocks+1,1))
+
             cuda.Context.synchronize()
             elapsed = time.time() - t0
             
             #print 'device %i; elapsed %f sec' % (self.device_id, elapsed)
-
             cuda.memcpy_dtoh(job.positions, positions_gpu)
             cuda.memcpy_dtoh(job.directions, directions_gpu)
             cuda.memcpy_dtoh(job.wavelengths, wavelengths_gpu)
@@ -110,6 +136,7 @@ class GPUThread(threading.Thread):
             cuda.memcpy_dtoh(job.times, times_gpu)
             cuda.memcpy_dtoh(job.states, states_gpu)
             cuda.memcpy_dtoh(job.last_hit_triangles, last_hit_triangles_gpu)
+            job.earliest_times = earliest_time_gpu.get()
 
             self.output.put(job)
             self.jobs.task_done()
author	Stan Seibert <stan@mtrr.org>	2011-06-21 10:32:46 -0400
committer	Stan Seibert <stan@mtrr.org>	2011-06-21 10:32:46 -0400
commit	93ad18d540a351f628726bfff9bf16f3dcf9ceb3 (patch)
tree	fcf91ce4b2229dff4fdeece4de465e24161f223b /gputhread.py
parent	02beb07ecc3eb0a914e325928fffb3ae3d6e3878 (diff)
download	chroma-93ad18d540a351f628726bfff9bf16f3dcf9ceb3.tar.gz chroma-93ad18d540a351f628726bfff9bf16f3dcf9ceb3.tar.bz2 chroma-93ad18d540a351f628726bfff9bf16f3dcf9ceb3.zip