8 files changed, 132 insertions, 51 deletions
diff --git a/chroma/cuda/daq.cu b/chroma/cuda/daq.cu
index 3ed402d..719d928 100644
--- a/chroma/cuda/daq.cu
+++ b/chroma/cuda/daq.cu
@@ -36,6 +36,7 @@ __global__ void
 run_daq(curandState *s, unsigned int detection_state,
 	int first_photon, int nphotons, float *photon_times,
 	unsigned int *photon_histories, int *last_hit_triangles,
+	float *weights,
 	int *solid_map,
 	Detector *detector,
 	unsigned int *earliest_time_int,
@@ -55,22 +56,27 @@ run_daq(curandState *s, unsigned int detection_state,
 	    int channel_index = detector->solid_id_to_channel_index[solid_id];
 
 	    if (channel_index >= 0 && (history & detection_state)) {
-		float time = photon_times[photon_id] + 
-		    sample_cdf(&rng, detector->time_cdf_len,
-			       detector->time_cdf_x, detector->time_cdf_y);
-		unsigned int time_int = float_to_sortable_int(time);
+
+		float weight = weights[photon_id];
+		if (curand_uniform(&rng) < weight) {
+		    float time = photon_times[photon_id] + 
+			sample_cdf(&rng, detector->time_cdf_len,
+				   detector->time_cdf_x, detector->time_cdf_y);
+		    unsigned int time_int = float_to_sortable_int(time);
 			  
-		float charge = sample_cdf(&rng, detector->charge_cdf_len,
+		    float charge = sample_cdf(&rng, detector->charge_cdf_len,
 					  detector->charge_cdf_x,
 					  detector->charge_cdf_y);
-		unsigned int charge_int = roundf(charge / detector->charge_unit);
+		    unsigned int charge_int = roundf(charge / detector->charge_unit);
 
-		atomicMin(earliest_time_int + channel_index, time_int);
-		atomicAdd(channel_q_int + channel_index, charge_int);
-		atomicOr(channel_histories + channel_index, history);
-	    }
+		    atomicMin(earliest_time_int + channel_index, time_int);
+		    atomicAdd(channel_q_int + channel_index, charge_int);
+		    atomicOr(channel_histories + channel_index, history);
+		} // if weighted photon contributes
 
-	}
+	    } // if photon detected by a channel
+
+	} // if photon terminated on surface
 
 	s[id] = rng;
 	
@@ -82,6 +88,7 @@ __global__ void
 run_daq_many(curandState *s, unsigned int detection_state,
 	     int first_photon, int nphotons, float *photon_times,
 	     unsigned int *photon_histories, int *last_hit_triangles,
+	     float *weights,
 	     int *solid_map,
 	     Detector *detector,
 	     unsigned int *earliest_time_int,
@@ -94,7 +101,7 @@ run_daq_many(curandState *s, unsigned int detection_state,
     __shared__ int channel_index;
     __shared__ unsigned int history;
     __shared__ float photon_time;
-    
+    __shared__ float weight;
 
     if (threadIdx.x == 0) {
 	photon_id = first_photon + blockIdx.x;
@@ -105,6 +112,7 @@ run_daq_many(curandState *s, unsigned int detection_state,
 	    history = photon_histories[photon_id];
 	    channel_index = detector->solid_id_to_channel_index[solid_id];
 	    photon_time = photon_times[photon_id];
+	    weight = weights[photon_id];
 	}
     }
 
@@ -119,19 +127,21 @@ run_daq_many(curandState *s, unsigned int detection_state,
     for (int i = threadIdx.x; i < ndaq; i += blockDim.x) {
 	int channel_offset = channel_index + i * channel_stride;
 
-	float time = photon_time + curand_normal(&rng) * 1.2f;// + 
-	//sample_cdf(&rng, detector->time_cdf_len,
-	//	       detector->time_cdf_x, detector->time_cdf_y);
-	unsigned int time_int = float_to_sortable_int(time);
-	
-	float charge = 1.0f; //sample_cdf(&rng, detector->charge_cdf_len,
-	//detector->charge_cdf_x,
-	//detector->charge_cdf_y);
-	unsigned int charge_int = roundf(charge / detector->charge_unit);
+	if (curand_uniform(&rng) < weight) {
+	    float time = photon_time + curand_normal(&rng) * 1.2f;// + 
+	    //sample_cdf(&rng, detector->time_cdf_len,
+	    //	       detector->time_cdf_x, detector->time_cdf_y);
+	    unsigned int time_int = float_to_sortable_int(time);
 	
-	atomicMin(earliest_time_int + channel_offset, time_int);
-	atomicAdd(channel_q_int + channel_offset, charge_int);
-	atomicOr(channel_histories + channel_offset, history);
+	    float charge = 1.0f; //sample_cdf(&rng, detector->charge_cdf_len,
+	    //detector->charge_cdf_x,
+	    //detector->charge_cdf_y);
+	    unsigned int charge_int = roundf(charge / detector->charge_unit);
+	    
+	    atomicMin(earliest_time_int + channel_offset, time_int);
+	    atomicAdd(channel_q_int + channel_offset, charge_int);
+	    atomicOr(channel_histories + channel_offset, history);
+	}
     }
 
     s[id] = rng;
diff --git a/chroma/cuda/photon.h b/chroma/cuda/photon.h
index 8b7e588..79a87f0 100644
--- a/chroma/cuda/photon.h
+++ b/chroma/cuda/photon.h
@@ -9,6 +9,8 @@
 #include "mesh.h"
 #include "geometry.h"
 
+#define WEIGHT_LOWER_THRESHOLD 0.01f
+
 struct Photon
 {
     float3 position;
@@ -16,7 +18,9 @@ struct Photon
     float3 polarization;
     float wavelength;
     float time;
-
+  
+    float weight;
+  
     unsigned int history;
 
     int last_hit_triangle;
@@ -175,11 +179,17 @@ rayleigh_scatter(Photon &p, curandState &rng)
     p.polarization /= norm(p.polarization);
 } // scatter
 
-__device__ int propagate_to_boundary(Photon &p, State &s, curandState &rng)
+__device__ int propagate_to_boundary(Photon &p, State &s, curandState &rng,
+                                     bool use_weights=false)
 {
     float absorption_distance = -s.absorption_length*logf(curand_uniform(&rng));
     float scattering_distance = -s.scattering_length*logf(curand_uniform(&rng));
 
+    if (use_weights && p.weight > WEIGHT_LOWER_THRESHOLD) // Prevent absorption
+	absorption_distance = 1e30;
+    else
+	use_weights = false;
+
     if (absorption_distance <= scattering_distance) {
 	if (absorption_distance <= s.distance_to_boundary) {
 	    p.time += absorption_distance/(SPEED_OF_LIGHT/s.refractive_index1);
@@ -193,6 +203,11 @@ __device__ int propagate_to_boundary(Photon &p, State &s, curandState &rng)
     }
     else {
 	if (scattering_distance <= s.distance_to_boundary) {
+
+	    // Scale weight by absorption probability along this distance
+	    if (use_weights)
+		p.weight *= expf(-scattering_distance/s.absorption_length);
+
 	    p.time += scattering_distance/(SPEED_OF_LIGHT/s.refractive_index1);
 	    p.position += scattering_distance*p.direction;
 
@@ -206,6 +221,10 @@ __device__ int propagate_to_boundary(Photon &p, State &s, curandState &rng)
 	} // photon is scattered in material1
     } // if scattering_distance < absorption_distance
 
+    // Scale weight by absorption probability along this distance
+    if (use_weights)
+	p.weight *= expf(-s.distance_to_boundary/s.absorption_length);
+    
     p.position += s.distance_to_boundary*p.direction;
     p.time += s.distance_to_boundary/(SPEED_OF_LIGHT/s.refractive_index1);
 
@@ -269,7 +288,8 @@ propagate_at_boundary(Photon &p, State &s, curandState &rng)
 } // propagate_at_boundary
 
 __device__ int
-propagate_at_surface(Photon &p, State &s, curandState &rng, Geometry *geometry)
+propagate_at_surface(Photon &p, State &s, curandState &rng, Geometry *geometry,
+                     bool use_weights=false)
 {
     Surface *surface = geometry->surfaces[s.surface_index];
 
@@ -283,6 +303,25 @@ propagate_at_surface(Photon &p, State &s, curandState &rng, Geometry *geometry)
 
     float uniform_sample = curand_uniform(&rng);
 
+    if (use_weights && p.weight > WEIGHT_LOWER_THRESHOLD 
+	&& absorb < (1.0f - WEIGHT_LOWER_THRESHOLD)) {
+	// Prevent absorption and reweight accordingly
+	float survive = 1.0f - absorb;
+	absorb = 0.0f;
+	p.weight *= survive;
+
+	// Renormalize remaining probabilities
+	detect /= survive;
+	reflect_diffuse /= survive;
+	reflect_specular /= survive;
+    }
+
+    if (use_weights && detect > 0.0f) {
+	p.history |= SURFACE_DETECT;
+	p.weight *= detect;
+	return BREAK;
+    }
+
     if (uniform_sample < absorb) {
 	p.history |= SURFACE_ABSORB;
 	return BREAK;
diff --git a/chroma/cuda/propagate.cu b/chroma/cuda/propagate.cu
index 87ba9f5..29f8aea 100644
--- a/chroma/cuda/propagate.cu
+++ b/chroma/cuda/propagate.cu
@@ -14,7 +14,7 @@ photon_duplicate(int first_photon, int nthreads,
 		 float3 *positions, float3 *directions,
 		 float *wavelengths, float3 *polarizations,
 		 float *times, unsigned int *histories,
-		 int *last_hit_triangles, 
+		 int *last_hit_triangles, float *weights,
 		 int copies, int stride)
 {
     int id = blockIdx.x*blockDim.x + threadIdx.x;
@@ -32,6 +32,7 @@ photon_duplicate(int first_photon, int nthreads,
     p.time = times[photon_id];
     p.last_hit_triangle = last_hit_triangles[photon_id];
     p.history = histories[photon_id];
+    p.weight = weights[photon_id];
 
     for (int i=1; i <= copies; i++) {
       int target_photon_id = photon_id + stride * i;
@@ -43,6 +44,7 @@ photon_duplicate(int first_photon, int nthreads,
       times[target_photon_id] = p.time;
       last_hit_triangles[target_photon_id] = p.last_hit_triangle;
       histories[target_photon_id] = p.history;
+      weights[target_photon_id] = p.weight;
     }
 }
 
@@ -79,11 +81,11 @@ copy_photons(int first_photon, int nthreads, unsigned int target_flag,
 	     float3 *positions, float3 *directions,
 	     float *wavelengths, float3 *polarizations,
 	     float *times, unsigned int *histories,
-	     int *last_hit_triangles, 
+	     int *last_hit_triangles, float *weights,
 	     float3 *new_positions, float3 *new_directions,
 	     float *new_wavelengths, float3 *new_polarizations,
 	     float *new_times, unsigned int *new_histories,
-	     int *new_last_hit_triangles)
+	     int *new_last_hit_triangles, float *new_weights)
 {
     int id = blockIdx.x*blockDim.x + threadIdx.x;
     
@@ -102,6 +104,7 @@ copy_photons(int first_photon, int nthreads, unsigned int target_flag,
 	new_times[offset] = times[photon_id];
 	new_histories[offset] = histories[photon_id];
 	new_last_hit_triangles[offset] = last_hit_triangles[photon_id];
+	new_weights[offset] = weights[photon_id];
     }
 }
 
@@ -112,7 +115,8 @@ propagate(int first_photon, int nthreads, unsigned int *input_queue,
 	  float3 *positions, float3 *directions,
 	  float *wavelengths, float3 *polarizations,
 	  float *times, unsigned int *histories,
-	  int *last_hit_triangles, int max_steps,
+	  int *last_hit_triangles, float *weights,
+	  int max_steps, int use_weights,
 	  Geometry *g)
 {
     __shared__ Geometry sg;
@@ -143,6 +147,7 @@ propagate(int first_photon, int nthreads, unsigned int *input_queue,
     p.time = times[photon_id];
     p.last_hit_triangle = last_hit_triangles[photon_id];
     p.history = histories[photon_id];
+    p.weight = weights[photon_id];
 
     if (p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB))
 	return;
@@ -166,7 +171,7 @@ propagate(int first_photon, int nthreads, unsigned int *input_queue,
 	if (p.last_hit_triangle == -1)
 	    break;
 
-	command = propagate_to_boundary(p, s, rng);
+	command = propagate_to_boundary(p, s, rng, use_weights);
 
 	if (command == BREAK)
 	    break;
@@ -175,7 +180,7 @@ propagate(int first_photon, int nthreads, unsigned int *input_queue,
 	    continue;
 
 	if (s.surface_index != -1) {
-	    command = propagate_at_surface(p, s, rng, g);
+	  command = propagate_at_surface(p, s, rng, g, use_weights);
 
 	    if (command == BREAK)
 		break;
@@ -196,7 +201,8 @@ propagate(int first_photon, int nthreads, unsigned int *input_queue,
     times[photon_id] = p.time;
     histories[photon_id] = p.history;
     last_hit_triangles[photon_id] = p.last_hit_triangle;
-	
+    weights[photon_id] = p.weight;
+
     // Not done, put photon in output queue
     if ((p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB)) == 0) {
 	int out_idx = atomicAdd(output_queue, 1);
diff --git a/chroma/event.py b/chroma/event.py
index 334df9a..e65c543 100644
--- a/chroma/event.py
+++ b/chroma/event.py
@@ -42,7 +42,7 @@ class Vertex(object):
         self.t0 = t0
 
 class Photons(object):
-    def __init__(self, pos, dir, pol, wavelengths, t=None, last_hit_triangles=None, flags=None):
+    def __init__(self, pos, dir, pol, wavelengths, t=None, last_hit_triangles=None, flags=None, weights=None):
         '''Create a new list of n photons.
 
             pos: numpy.ndarray(dtype=numpy.float32, shape=(n,3))
@@ -67,6 +67,10 @@ class Photons(object):
             flags: numpy.ndarray(dtype=numpy.uint32, shape=n)
                Bit-field indicating the physics interaction history of the photon.  See 
                history bit constants in chroma.event for definition.
+
+            weights: numpy.ndarray(dtype=numpy.float32, shape=n)
+               Survival probability for each photon.  Used by 
+               photon propagation code when computing likelihood functions.
         '''
         self.pos = pos
         self.dir = dir
@@ -89,6 +93,11 @@ class Photons(object):
         else:
             self.flags = flags
 
+        if weights is None:
+            self.weights = np.ones(len(pos), dtype=np.float32)
+        else:
+            self.weights = weights
+
     def __add__(self, other):
         '''Concatenate two Photons objects into one list of photons.
 
@@ -104,7 +113,9 @@ class Photons(object):
         t = np.concatenate((self.t, other.t))
         last_hit_triangles = np.concatenate((self.last_hit_triangles, other.last_hit_triangles))
         flags = np.concatenate((self.flags, other.flags))
-        return Photons(pos, dir, pol, wavelengths, t, last_hit_triangles, flags)
+        weights = np.concatenate((self.weights, other.weights))
+        return Photons(pos, dir, pol, wavelengths, t, last_hit_triangles, flags,
+                       weights)
 
     def __len__(self):
         '''Returns the number of photons in self.'''
diff --git a/chroma/gpu/daq.py b/chroma/gpu/daq.py
index ec5add4..10b4b00 100644
--- a/chroma/gpu/daq.py
+++ b/chroma/gpu/daq.py
@@ -67,7 +67,7 @@ class GPUDaq(object):
                     chunk_iterator(nphotons, nthreads_per_block, max_blocks):
                 self.gpu_funcs.run_daq(rng_states, np.uint32(0x1 << 2), 
                                        np.int32(start_photon+first_photon), np.int32(photons_this_round), gpuphotons.t, 
-                                       gpuphotons.flags, gpuphotons.last_hit_triangles, 
+                                       gpuphotons.flags, gpuphotons.last_hit_triangles, gpuphotons.weights,
                                        self.solid_id_map_gpu,
                                        self.detector_gpu,
                                        self.earliest_time_int_gpu, 
@@ -78,7 +78,7 @@ class GPUDaq(object):
                     chunk_iterator(nphotons, 1, max_blocks):
                 self.gpu_funcs.run_daq_many(rng_states, np.uint32(0x1 << 2), 
                                             np.int32(start_photon), np.int32(photons_this_round), gpuphotons.t, 
-                                            gpuphotons.flags, gpuphotons.last_hit_triangles, 
+                                            gpuphotons.flags, gpuphotons.last_hit_triangles, gpuphotons.weights,
                                             self.solid_id_map_gpu,
                                             self.detector_gpu,
                                             self.earliest_time_int_gpu, 
diff --git a/chroma/gpu/pdf.py b/chroma/gpu/pdf.py
index 5d27f63..fe718a7 100644
--- a/chroma/gpu/pdf.py
+++ b/chroma/gpu/pdf.py
@@ -313,6 +313,9 @@ class GPUPDF(object):
                                            self.work_queues,
                                            block=(nthreads_per_block,1,1), 
                                            grid=(self.event_hit_gpu.size//nthreads_per_block+1,1))
+        cuda.Context.get_current().synchronize()
+        return # FIXME
+
         self.gpu_funcs.accumulate_nearest_neighbor_block(np.int32(self.event_nhit),
                                                          np.int32(gpuchannels.ndaq),
                                                          self.map_hit_offset_to_channel_id_gpu,
diff --git a/chroma/gpu/photon.py b/chroma/gpu/photon.py
index f55b8e8..80fe77c 100644
--- a/chroma/gpu/photon.py
+++ b/chroma/gpu/photon.py
@@ -34,6 +34,7 @@ class GPUPhotons(object):
         self.t = ga.empty(shape=nphotons*ncopies, dtype=np.float32)
         self.last_hit_triangles = ga.empty(shape=nphotons*ncopies, dtype=np.int32)
         self.flags = ga.empty(shape=nphotons*ncopies, dtype=np.uint32)
+        self.weights = ga.empty(shape=nphotons*ncopies, dtype=np.float32)
 
         # Assign the provided photons to the beginning (possibly
         # the entire array if ncopies is 1
@@ -44,6 +45,7 @@ class GPUPhotons(object):
         self.t[:nphotons].set(photons.t.astype(np.float32))
         self.last_hit_triangles[:nphotons].set(photons.last_hit_triangles.astype(np.int32))
         self.flags[:nphotons].set(photons.flags.astype(np.uint32))
+        self.weights[:nphotons].set(photons.weights.astype(np.float32))
 
         module = get_cu_module('propagate.cu', options=cuda_options)
         self.gpu_funcs = GPUFuncs(module)
@@ -56,7 +58,7 @@ class GPUPhotons(object):
                     chunk_iterator(nphotons, nthreads_per_block, max_blocks):
                 self.gpu_funcs.photon_duplicate(np.int32(first_photon), np.int32(photons_this_round),
                                                 self.pos, self.dir, self.wavelengths, self.pol, self.t, 
-                                                self.flags, self.last_hit_triangles, 
+                                                self.flags, self.last_hit_triangles, self.weights,
                                                 np.int32(ncopies-1), 
                                                 np.int32(nphotons),
                                                 block=(nthreads_per_block,1,1), grid=(blocks, 1))
@@ -74,7 +76,8 @@ class GPUPhotons(object):
         t = self.t.get()
         last_hit_triangles = self.last_hit_triangles.get()
         flags = self.flags.get()
-        return event.Photons(pos, dir, pol, wavelengths, t, last_hit_triangles, flags)
+        weights = self.weights.get()
+        return event.Photons(pos, dir, pol, wavelengths, t, last_hit_triangles, flags, weights)
 
     def iterate_copies(self):
         '''Returns an iterator that yields GPUPhotonsSlice objects
@@ -87,11 +90,12 @@ class GPUPhotons(object):
                                   wavelengths=self.wavelengths[window],
                                   t=self.t[window],
                                   last_hit_triangles=self.last_hit_triangles[window],
-                                  flags=self.flags[window])
+                                  flags=self.flags[window],
+                                  weights=self.weights[window])
 
     @profile_if_possible
     def propagate(self, gpu_geometry, rng_states, nthreads_per_block=64,
-                  max_blocks=1024, max_steps=10):
+                  max_blocks=1024, max_steps=10, use_weights=False):
         """Propagate photons on GPU to termination or max_steps, whichever
         comes first.
 
@@ -123,7 +127,7 @@ class GPUPhotons(object):
 
             for first_photon, photons_this_round, blocks in \
                     chunk_iterator(nphotons, nthreads_per_block, max_blocks):
-                self.gpu_funcs.propagate(np.int32(first_photon), np.int32(photons_this_round), input_queue_gpu[1:], output_queue_gpu, rng_states, self.pos, self.dir, self.wavelengths, self.pol, self.t, self.flags, self.last_hit_triangles, np.int32(nsteps), gpu_geometry.gpudata, block=(nthreads_per_block,1,1), grid=(blocks, 1))
+                self.gpu_funcs.propagate(np.int32(first_photon), np.int32(photons_this_round), input_queue_gpu[1:], output_queue_gpu, rng_states, self.pos, self.dir, self.wavelengths, self.pol, self.t, self.flags, self.last_hit_triangles, self.weights, np.int32(nsteps), np.int32(use_weights), gpu_geometry.gpudata, block=(nthreads_per_block,1,1), grid=(blocks, 1))
 
             step += nsteps
 
@@ -171,6 +175,7 @@ class GPUPhotons(object):
         t = ga.empty(shape=reduced_nphotons, dtype=np.float32)
         last_hit_triangles = ga.empty(shape=reduced_nphotons, dtype=np.int32)
         flags = ga.empty(shape=reduced_nphotons, dtype=np.uint32)
+        weights = ga.empty(shape=reduced_nphotons, dtype=np.float32)
 
         # And finaly copy photons, if there are any
         if reduced_nphotons > 0:
@@ -181,12 +186,12 @@ class GPUPhotons(object):
                                             np.int32(photons_this_round), 
                                             np.uint32(target_flag),
                                             index_counter_gpu, 
-                                            self.pos, self.dir, self.wavelengths, self.pol, self.t, self.flags, self.last_hit_triangles,
-                                            pos, dir, wavelengths, pol, t, flags, last_hit_triangles,
+                                            self.pos, self.dir, self.wavelengths, self.pol, self.t, self.flags, self.last_hit_triangles, self.weights,
+                                            pos, dir, wavelengths, pol, t, flags, last_hit_triangles, weights,
                                             block=(nthreads_per_block,1,1), 
                                             grid=(blocks, 1))
             assert index_counter_gpu.get()[0] == reduced_nphotons
-        return GPUPhotonsSlice(pos, dir, pol, wavelengths, t, last_hit_triangles, flags)
+        return GPUPhotonsSlice(pos, dir, pol, wavelengths, t, last_hit_triangles, flags, weights)
 
     def __del__(self):
         del self.pos
@@ -210,7 +215,7 @@ class GPUPhotonsSlice(GPUPhotons):
 
     Returned by the GPUPhotons.iterate_copies() iterator.'''
     def __init__(self, pos, dir, pol, wavelengths, t, last_hit_triangles,
-                 flags):
+                 flags, weights):
         '''Create new object using slices of GPUArrays from an instance
         of GPUPhotons.  NOTE THESE ARE NOT CPU ARRAYS!'''
         self.pos = pos
@@ -220,6 +225,7 @@ class GPUPhotonsSlice(GPUPhotons):
         self.t = t
         self.last_hit_triangles = last_hit_triangles
         self.flags = flags
+        self.weights = weights
 
         module = get_cu_module('propagate.cu', options=cuda_options)
         self.gpu_funcs = GPUFuncs(module)
diff --git a/chroma/sim.py b/chroma/sim.py
index a82f1f8..47f1820 100644
--- a/chroma/sim.py
+++ b/chroma/sim.py
@@ -127,7 +127,8 @@ class Simulation(object):
     def eval_pdf(self, event_channels, iterable, min_twidth, trange, min_qwidth, qrange, min_bin_content=100, nreps=1, ndaq=1, time_only=True):
         """Returns tuple: 1D array of channel hit counts, 1D array of PDF
         probability densities."""
-        gpu_daq = gpu.GPUDaq(self.gpu_geometry, ndaq=ndaq)
+        ndaq_reps = ndaq // 32
+        gpu_daq = gpu.GPUDaq(self.gpu_geometry, ndaq=32)
 
         self.gpu_pdf.setup_pdf_eval(event_channels.hit,
                                     event_channels.t,
@@ -148,7 +149,8 @@ class Simulation(object):
             gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps)
             gpu_photons.propagate(self.gpu_geometry, self.rng_states,
                                   nthreads_per_block=self.nthreads_per_block,
-                                  max_blocks=self.max_blocks)
+                                  max_blocks=self.max_blocks,
+                                  use_weights=True)
             nphotons = gpu_photons.true_nphotons
             for i in xrange(gpu_photons.ncopies):
                 start_photon = i * nphotons
@@ -157,8 +159,12 @@ class Simulation(object):
                                                            nphotons=nphotons)
                 if len(gpu_photon_slice) == 0:
                     continue
-                gpu_channels = gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks)
-                self.gpu_pdf.accumulate_pdf_eval(gpu_channels, 32)
+
+                #weights = gpu_photon_slice.weights.get()
+                #print 'weights', weights.min(), weights.max()
+                for j in xrange(ndaq_reps):
+                    gpu_channels = gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks)
+                    self.gpu_pdf.accumulate_pdf_eval(gpu_channels, nthreads_per_block=32)
         
         return self.gpu_pdf.get_pdf_eval()