2 files changed, 8 insertions, 3 deletions
diff --git a/gpu.py b/gpu.py
index 7b08473..e4eecbc 100644
--- a/gpu.py
+++ b/gpu.py
@@ -72,13 +72,13 @@ class GPU(object):
             device = cuda.Device(device_id)
             self.context = device.make_context()
         print 'device %s' % self.context.get_device().name()
-        self.module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True, cache_dir=False)
+        self.module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True)
         self.geo_funcs = CUDAFuncs(self.module, ['set_wavelength_range', 'set_material', 'set_surface', 'set_global_mesh_variables', 'color_solids'])
 
         self.prop_funcs = CUDAFuncs(self.module, ['init_rng', 'propagate'])
         self.nthread_per_block = 64
         self.max_blocks = 1024
-        self.daq_module = SourceModule(src.daq, options=['-I' + src.dir], no_extern_c=True, cache_dir=False)
+        self.daq_module = SourceModule(src.daq, options=['-I' + src.dir], no_extern_c=True)
         self.daq_funcs = CUDAFuncs(self.daq_module, 
                               ['reset_earliest_time_int', 'run_daq',
                                'convert_sortable_int_to_float'])
@@ -279,6 +279,7 @@ class GPU(object):
         self.earliest_time_int_gpu = gpuarray.GPUArray(shape=self.earliest_time_gpu.shape, 
                                                        dtype=np.uint32)
         self.channel_history_gpu = gpuarray.zeros_like(self.earliest_time_int_gpu)
+        self.channel_q_gpu = gpuarray.zeros_like(self.earliest_time_int_gpu)
         self.daq_pmt_rms = pmt_rms
 
     def run_daq(self):
@@ -287,6 +288,7 @@ class GPU(object):
                                                self.earliest_time_int_gpu, 
                                                block=(self.nthread_per_block,1,1), 
                                                grid=(len(self.earliest_time_int_gpu)//self.nthread_per_block+1,1))
+        self.channel_q_gpu.fill(0)
         self.channel_history_gpu.fill(0)
 
         #self.context.synchronize()
@@ -300,6 +302,7 @@ class GPU(object):
                                    self.solid_id_map_gpu, 
                                    np.int32(len(self.earliest_time_int_gpu)), 
                                    self.earliest_time_int_gpu,
+                                   self.channel_q_gpu,
                                    self.channel_history_gpu,
                                    block=(self.nthread_per_block,1,1), grid=(blocks,1))
         #self.context.synchronize()
@@ -313,7 +316,7 @@ class GPU(object):
 
     def get_hits(self):
         return { 't': self.earliest_time_gpu.get(), 
-                 'q': np.ones(shape=len(self.earliest_time_int_gpu), dtype=np.float32),
+                 'q': self.channel_q_gpu.get().astype(np.float32),
                  'history': self.channel_history_gpu.get()}
 
     def __del__(self):
diff --git a/src/daq.cu b/src/daq.cu
index 9824d7d..2b95560 100644
--- a/src/daq.cu
+++ b/src/daq.cu
@@ -35,6 +35,7 @@ __global__ void run_daq(curandState *s, unsigned int detection_state,
 			unsigned int *photon_histories,
 			int *last_hit_triangles, int *solid_map,
 			int nsolids, unsigned int *earliest_time_int,
+			unsigned int *channel_q,
 			unsigned int *channel_histories)
 {
 
@@ -57,6 +58,7 @@ __global__ void run_daq(curandState *s, unsigned int detection_state,
 			  unsigned int time_int = float_to_sortable_int(time);
 			  
 			  atomicMin(earliest_time_int + solid_id, time_int);
+			  atomicAdd(channel_q + solid_id, 1);
 			  atomicOr(channel_histories + solid_id, history);
 			}