summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--chroma/cuda/propagate.cu62
-rw-r--r--chroma/gpu/photon.py56
2 files changed, 117 insertions, 1 deletions
diff --git a/chroma/cuda/propagate.cu b/chroma/cuda/propagate.cu
index fdcf532..87ba9f5 100644
--- a/chroma/cuda/propagate.cu
+++ b/chroma/cuda/propagate.cu
@@ -45,7 +45,67 @@ photon_duplicate(int first_photon, int nthreads,
histories[target_photon_id] = p.history;
}
}
-
+
+__global__ void
+count_photons(int first_photon, int nthreads, unsigned int target_flag,
+ unsigned int *index_counter,
+ unsigned int *histories)
+{
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
+ __shared__ unsigned int counter;
+
+ if (threadIdx.x == 0)
+ counter = 0;
+ __syncthreads();
+
+ if (id < nthreads) {
+ int photon_id = first_photon + id;
+
+ if (histories[photon_id] & target_flag) {
+ atomicAdd(&counter, 1);
+ }
+
+ }
+
+ __syncthreads();
+
+ if (threadIdx.x == 0)
+ atomicAdd(index_counter, counter);
+}
+
+__global__ void
+copy_photons(int first_photon, int nthreads, unsigned int target_flag,
+ unsigned int *index_counter,
+ float3 *positions, float3 *directions,
+ float *wavelengths, float3 *polarizations,
+ float *times, unsigned int *histories,
+ int *last_hit_triangles,
+ float3 *new_positions, float3 *new_directions,
+ float *new_wavelengths, float3 *new_polarizations,
+ float *new_times, unsigned int *new_histories,
+ int *new_last_hit_triangles)
+{
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
+
+ if (id >= nthreads)
+ return;
+
+ int photon_id = first_photon + id;
+
+ if (histories[photon_id] & target_flag) {
+ int offset = atomicAdd(index_counter, 1);
+
+ new_positions[offset] = positions[photon_id];
+ new_directions[offset] = directions[photon_id];
+ new_polarizations[offset] = polarizations[photon_id];
+ new_wavelengths[offset] = wavelengths[photon_id];
+ new_times[offset] = times[photon_id];
+ new_histories[offset] = histories[photon_id];
+ new_last_hit_triangles[offset] = last_hit_triangles[photon_id];
+ }
+}
+
+
__global__ void
propagate(int first_photon, int nthreads, unsigned int *input_queue,
unsigned int *output_queue, curandState *rng_states,
diff --git a/chroma/gpu/photon.py b/chroma/gpu/photon.py
index 2a9a1ff..f55b8e8 100644
--- a/chroma/gpu/photon.py
+++ b/chroma/gpu/photon.py
@@ -2,6 +2,7 @@ import numpy as np
import sys
import gc
from pycuda import gpuarray as ga
+import pycuda.driver as cuda
from chroma.tools import profile_if_possible
from chroma import event
@@ -135,6 +136,57 @@ class GPUPhotons(object):
if ga.max(self.flags).get() & (1 << 31):
print >>sys.stderr, "WARNING: ABORTED PHOTONS"
+ cuda.Context.get_current().synchronize()
+
+
+ @profile_if_possible
+ def select(self, target_flag, nthreads_per_block=64, max_blocks=1024,
+ start_photon=None, nphotons=None):
+ '''Return a new GPUPhoton object containing only photons that
+ have a particular bit set in their history word.'''
+ cuda.Context.get_current().synchronize()
+ index_counter_gpu = ga.zeros(shape=1, dtype=np.uint32)
+ cuda.Context.get_current().synchronize()
+ if start_photon is None:
+ start_photon = 0
+ if nphotons is None:
+ nphotons = self.pos.size - start_photon
+
+ # First count how much space we need
+ for first_photon, photons_this_round, blocks in \
+ chunk_iterator(nphotons, nthreads_per_block, max_blocks):
+ self.gpu_funcs.count_photons(np.int32(start_photon+first_photon),
+ np.int32(photons_this_round),
+ np.uint32(target_flag),
+ index_counter_gpu, self.flags,
+ block=(nthreads_per_block,1,1),
+ grid=(blocks, 1))
+ cuda.Context.get_current().synchronize()
+ reduced_nphotons = int(index_counter_gpu.get()[0])
+ # Then allocate new storage space
+ pos = ga.empty(shape=reduced_nphotons, dtype=ga.vec.float3)
+ dir = ga.empty(shape=reduced_nphotons, dtype=ga.vec.float3)
+ pol = ga.empty(shape=reduced_nphotons, dtype=ga.vec.float3)
+ wavelengths = ga.empty(shape=reduced_nphotons, dtype=np.float32)
+ t = ga.empty(shape=reduced_nphotons, dtype=np.float32)
+ last_hit_triangles = ga.empty(shape=reduced_nphotons, dtype=np.int32)
+ flags = ga.empty(shape=reduced_nphotons, dtype=np.uint32)
+
+ # And finaly copy photons, if there are any
+ if reduced_nphotons > 0:
+ index_counter_gpu.fill(0)
+ for first_photon, photons_this_round, blocks in \
+ chunk_iterator(nphotons, nthreads_per_block, max_blocks):
+ self.gpu_funcs.copy_photons(np.int32(start_photon+first_photon),
+ np.int32(photons_this_round),
+ np.uint32(target_flag),
+ index_counter_gpu,
+ self.pos, self.dir, self.wavelengths, self.pol, self.t, self.flags, self.last_hit_triangles,
+ pos, dir, wavelengths, pol, t, flags, last_hit_triangles,
+ block=(nthreads_per_block,1,1),
+ grid=(blocks, 1))
+ assert index_counter_gpu.get()[0] == reduced_nphotons
+ return GPUPhotonsSlice(pos, dir, pol, wavelengths, t, last_hit_triangles, flags)
def __del__(self):
del self.pos
@@ -147,6 +199,10 @@ class GPUPhotons(object):
# Free up GPU memory quickly if now available
gc.collect()
+
+ def __len__(self):
+ return self.pos.size
+
class GPUPhotonsSlice(GPUPhotons):
'''A `slice`-like view of a subrange of another GPU photons array.
Works exactly like an instance of GPUPhotons, but the GPU storage