diff options
-rw-r--r-- | chroma/cuda/photon.h | 31 | ||||
-rw-r--r-- | chroma/cuda/propagate.cu | 5 | ||||
-rw-r--r-- | chroma/gpu/photon.py | 6 |
3 files changed, 37 insertions, 5 deletions
diff --git a/chroma/cuda/photon.h b/chroma/cuda/photon.h index 79a87f0..371c122 100644 --- a/chroma/cuda/photon.h +++ b/chroma/cuda/photon.h @@ -180,7 +180,7 @@ rayleigh_scatter(Photon &p, curandState &rng) } // scatter __device__ int propagate_to_boundary(Photon &p, State &s, curandState &rng, - bool use_weights=false) + bool use_weights=false, int scatter_first=0) { float absorption_distance = -s.absorption_length*logf(curand_uniform(&rng)); float scattering_distance = -s.scattering_length*logf(curand_uniform(&rng)); @@ -190,6 +190,35 @@ __device__ int propagate_to_boundary(Photon &p, State &s, curandState &rng, else use_weights = false; + if (scatter_first == 1) { + // Force scatter + float scatter_prob = 1.0f - expf(-s.distance_to_boundary/s.scattering_length); + + if (scatter_prob > WEIGHT_LOWER_THRESHOLD) { + int i=0; + const int max_i = 1000; + while (i < max_i && scattering_distance > s.distance_to_boundary) { + scattering_distance = -s.scattering_length*logf(curand_uniform(&rng)); + i++; + } + p.weight *= scatter_prob; + } + + } else if (scatter_first == -1) { + // Prevent scatter + float no_scatter_prob = expf(-s.distance_to_boundary/s.scattering_length); + + if (no_scatter_prob > WEIGHT_LOWER_THRESHOLD) { + int i=0; + const int max_i = 1000; + while (i < max_i && scattering_distance <= s.distance_to_boundary) { + scattering_distance = -s.scattering_length*logf(curand_uniform(&rng)); + i++; + } + p.weight *= no_scatter_prob; + } + } + if (absorption_distance <= scattering_distance) { if (absorption_distance <= s.distance_to_boundary) { p.time += absorption_distance/(SPEED_OF_LIGHT/s.refractive_index1); diff --git a/chroma/cuda/propagate.cu b/chroma/cuda/propagate.cu index 29f8aea..9520cf8 100644 --- a/chroma/cuda/propagate.cu +++ b/chroma/cuda/propagate.cu @@ -116,7 +116,7 @@ propagate(int first_photon, int nthreads, unsigned int *input_queue, float *wavelengths, float3 *polarizations, float *times, unsigned int *histories, int *last_hit_triangles, float *weights, - int max_steps, int use_weights, + int max_steps, int use_weights, int scatter_first, Geometry *g) { __shared__ Geometry sg; @@ -171,7 +171,8 @@ propagate(int first_photon, int nthreads, unsigned int *input_queue, if (p.last_hit_triangle == -1) break; - command = propagate_to_boundary(p, s, rng, use_weights); + command = propagate_to_boundary(p, s, rng, use_weights, scatter_first); + scatter_first = 0; // Only use the scatter_first value once if (command == BREAK) break; diff --git a/chroma/gpu/photon.py b/chroma/gpu/photon.py index 83dfcd4..fbb064b 100644 --- a/chroma/gpu/photon.py +++ b/chroma/gpu/photon.py @@ -95,7 +95,8 @@ class GPUPhotons(object): @profile_if_possible def propagate(self, gpu_geometry, rng_states, nthreads_per_block=64, - max_blocks=1024, max_steps=10, use_weights=False): + max_blocks=1024, max_steps=10, use_weights=False, + scatter_first=0): """Propagate photons on GPU to termination or max_steps, whichever comes first. @@ -127,9 +128,10 @@ class GPUPhotons(object): for first_photon, photons_this_round, blocks in \ chunk_iterator(nphotons, nthreads_per_block, max_blocks): - self.gpu_funcs.propagate(np.int32(first_photon), np.int32(photons_this_round), input_queue_gpu[1:], output_queue_gpu, rng_states, self.pos, self.dir, self.wavelengths, self.pol, self.t, self.flags, self.last_hit_triangles, self.weights, np.int32(nsteps), np.int32(use_weights), gpu_geometry.gpudata, block=(nthreads_per_block,1,1), grid=(blocks, 1)) + self.gpu_funcs.propagate(np.int32(first_photon), np.int32(photons_this_round), input_queue_gpu[1:], output_queue_gpu, rng_states, self.pos, self.dir, self.wavelengths, self.pol, self.t, self.flags, self.last_hit_triangles, self.weights, np.int32(nsteps), np.int32(use_weights), np.int32(scatter_first), gpu_geometry.gpudata, block=(nthreads_per_block,1,1), grid=(blocks, 1)) step += nsteps + scatter_first = 0 # Only allow non-zero in first pass if step < max_steps: temp = input_queue_gpu |