diff options
author | Anthony LaTorre <tlatorre9@gmail.com> | 2011-06-23 19:26:24 -0400 |
---|---|---|
committer | Anthony LaTorre <tlatorre9@gmail.com> | 2011-06-23 19:26:24 -0400 |
commit | ac9f568017e9a7cf8da04acdacc342b4db6576fa (patch) | |
tree | a0978984be2d045781fa3603376b7d52e065b5a6 /src | |
parent | a7b94c352cc70fbc79d78e3ca6cec334aec2e258 (diff) | |
download | chroma-ac9f568017e9a7cf8da04acdacc342b4db6576fa.tar.gz chroma-ac9f568017e9a7cf8da04acdacc342b4db6576fa.tar.bz2 chroma-ac9f568017e9a7cf8da04acdacc342b4db6576fa.zip |
move photon initialization to the gpu. it's unclear if this is a speed improvement.
Diffstat (limited to 'src')
-rw-r--r-- | src/daq.cu | 119 | ||||
-rw-r--r-- | src/kernel.cu | 27 | ||||
-rw-r--r-- | src/random.h | 48 |
3 files changed, 121 insertions, 73 deletions
@@ -16,65 +16,70 @@ __device__ float sortable_int_to_float(unsigned int i) //return __int_as_float(i ^ mask); } +extern "C" { -__device__ curandState daq_rng_states[100000]; +__global__ void reset_earliest_time_int(float maxtime, int ntime_ints, unsigned int *time_ints) +{ + int id = threadIdx.x + blockDim.x * blockIdx.x; + if (id < ntime_ints) { + unsigned int maxtime_int = float_to_sortable_int(maxtime); + time_ints[id] = maxtime_int; + } +} + +__global__ void run_daq(curandState *s, int detection_state, float time_rms, + int nphotons, float *photon_times, int *photon_states, + int *last_hit_triangles, int *solid_map, + int nsolids, unsigned int *earliest_time_int) +{ + + int id = threadIdx.x + blockDim.x * blockIdx.x; + + if (id < nphotons) + { + curandState rng = s[id]; + + + + int triangle_id = last_hit_triangles[id]; + + if (triangle_id > -1) + { + int solid_id = solid_map[triangle_id]; + int state = photon_states[id]; + float time = photon_times[id] + curand_normal(&rng) * time_rms; + unsigned int time_int = float_to_sortable_int(time); + + + + if (solid_id < nsolids && state == detection_state) { + atomicMin(earliest_time_int + solid_id, time_int); + } -extern "C" { - __global__ void init_daq_rng(int nthreads, - unsigned long long seed, unsigned long long offset) - { - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - curand_init(seed, id, offset, daq_rng_states+id); - } - - __global__ void reset_earliest_time_int(float maxtime, - int ntime_ints, unsigned int *time_ints) - { - int id = threadIdx.x + blockDim.x * blockIdx.x; - if (id < ntime_ints) { - unsigned int maxtime_int = float_to_sortable_int(maxtime); - time_ints[id] = maxtime_int; - } - } - - __global__ void run_daq(int detection_state, float time_rms, - int nphotons, float *photon_times, int *photon_states, - int *last_hit_triangles, int *solid_map, - int nsolids, unsigned int *earliest_time_int) - { - int id = threadIdx.x + blockDim.x * blockIdx.x; - - curandState_t rng = daq_rng_states[id]; - - if (id < nphotons) { - int triangle_id = last_hit_triangles[id]; - - if (triangle_id > -1) { - int solid_id = solid_map[triangle_id]; - int state = photon_states[id]; - float time = photon_times[id] + curand_normal(&rng) * time_rms; - unsigned int time_int = float_to_sortable_int(time); - if (solid_id < nsolids && state == detection_state) - atomicMin(earliest_time_int + solid_id, time_int); - } - } - - daq_rng_states[id] = rng; - } - - __global__ void convert_sortable_int_to_float(int n, - unsigned int *sortable_ints, - float *float_output) - { - int id = threadIdx.x + blockDim.x * blockIdx.x; - - if (id < n) - float_output[id] = sortable_int_to_float(sortable_ints[id]); - } + } + + + + s[id] = rng; + + + + } + + + + +} + +__global__ void convert_sortable_int_to_float(int n, + unsigned int *sortable_ints, + float *float_output) +{ + int id = threadIdx.x + blockDim.x * blockIdx.x; + + if (id < n) + float_output[id] = sortable_int_to_float(sortable_ints[id]); +} } // extern "C" diff --git a/src/kernel.cu b/src/kernel.cu index 584c9e4..55fde70 100644 --- a/src/kernel.cu +++ b/src/kernel.cu @@ -141,26 +141,33 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, con return triangle_index; } -__device__ curandState rng_states[100000]; - extern "C" { - __global__ void set_pointer(uint4 *triangle_ptr, float3 *vertex_ptr) +__global__ void fill_float(int nthreads, float *a, float value) { - triangles = triangle_ptr; - vertices = vertex_ptr; + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + a[id] = value; } -/* Initialize random number states */ -__global__ void init_rng(int nthreads, unsigned long long seed, unsigned long long offset) +__global__ void fill_float3(int nthreads, float3 *a, float3 value) { int id = blockIdx.x*blockDim.x + threadIdx.x; if (id >= nthreads) return; - curand_init(seed, id, offset, rng_states+id); + a[id] = value; +} + +__global__ void set_pointer(uint4 *triangle_ptr, float3 *vertex_ptr) +{ + triangles = triangle_ptr; + vertices = vertex_ptr; } /* Translate `points` by the vector `v` */ @@ -223,7 +230,7 @@ __global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, i } // ray_trace -__global__ void propagate(int nthreads, float3 *positions, float3 *directions, float *wavelengths, float3 *polarizations, float *times, int *states, int *last_hit_triangles, int start_node, int first_node, int max_steps) +__global__ void propagate(int nthreads, curandState *rng_states, float3 *positions, float3 *directions, float *wavelengths, float3 *polarizations, float *times, int *states, int *last_hit_triangles, int start_node, int first_node, int max_steps) { int id = blockIdx.x*blockDim.x + threadIdx.x; @@ -445,8 +452,8 @@ __global__ void propagate(int nthreads, float3 *positions, float3 *directions, f } // while(nsteps < max_steps) - states[id] = state; rng_states[id] = rng; + states[id] = state; positions[id] = position; directions[id] = direction; polarizations[id] = polarization; diff --git a/src/random.h b/src/random.h index 74329da..b084c19 100644 --- a/src/random.h +++ b/src/random.h @@ -5,18 +5,54 @@ #include "physical_constants.h" -__device__ float uniform(curandState *rng, float a=0.0, float b=1.0) +__device__ float uniform(curandState *s, const float &low, const float &high) { - return a + curand_uniform(rng)*(b-a); + return low + curand_uniform(s)*(high-low); } -__device__ float3 uniform_sphere(curandState *rng) +__device__ float3 uniform_sphere(curandState *s) { - float theta = uniform(rng, 0, 2*PI); - float u = uniform(rng, -1, 1); - float c = sqrtf(1-u*u); + float theta = uniform(s, 0.0f, 2*PI); + float u = uniform(s, -1.0f, 1.0f); + float c = sqrtf(1.0f-u*u); return make_float3(c*cosf(theta), c*sinf(theta), u); } +extern "C" +{ + +__global__ void init_rng(int nthreads, curandState *s, unsigned long long seed, unsigned long long offset) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + curand_init(seed, id, offset, &s[id]); +} + +__global__ void fill_uniform(int nthreads, curandState *s, float *a, float low, float high) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + a[id] = uniform(&s[id], low, high); + +} + +__global__ void fill_uniform_sphere(int nthreads, curandState *s, float3 *a) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + a[id] = uniform_sphere(&s[id]); +} + +} // extern "c" + #endif |