summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAnthony LaTorre <tlatorre9@gmail.com>2011-06-23 19:26:24 -0400
committerAnthony LaTorre <tlatorre9@gmail.com>2011-06-23 19:26:24 -0400
commitac9f568017e9a7cf8da04acdacc342b4db6576fa (patch)
treea0978984be2d045781fa3603376b7d52e065b5a6 /src
parenta7b94c352cc70fbc79d78e3ca6cec334aec2e258 (diff)
downloadchroma-ac9f568017e9a7cf8da04acdacc342b4db6576fa.tar.gz
chroma-ac9f568017e9a7cf8da04acdacc342b4db6576fa.tar.bz2
chroma-ac9f568017e9a7cf8da04acdacc342b4db6576fa.zip
move photon initialization to the gpu. it's unclear if this is a speed improvement.
Diffstat (limited to 'src')
-rw-r--r--src/daq.cu119
-rw-r--r--src/kernel.cu27
-rw-r--r--src/random.h48
3 files changed, 121 insertions, 73 deletions
diff --git a/src/daq.cu b/src/daq.cu
index 99490bd..a7f5120 100644
--- a/src/daq.cu
+++ b/src/daq.cu
@@ -16,65 +16,70 @@ __device__ float sortable_int_to_float(unsigned int i)
//return __int_as_float(i ^ mask);
}
+extern "C" {
-__device__ curandState daq_rng_states[100000];
+__global__ void reset_earliest_time_int(float maxtime, int ntime_ints, unsigned int *time_ints)
+{
+ int id = threadIdx.x + blockDim.x * blockIdx.x;
+ if (id < ntime_ints) {
+ unsigned int maxtime_int = float_to_sortable_int(maxtime);
+ time_ints[id] = maxtime_int;
+ }
+}
+
+__global__ void run_daq(curandState *s, int detection_state, float time_rms,
+ int nphotons, float *photon_times, int *photon_states,
+ int *last_hit_triangles, int *solid_map,
+ int nsolids, unsigned int *earliest_time_int)
+{
+
+ int id = threadIdx.x + blockDim.x * blockIdx.x;
+
+ if (id < nphotons)
+ {
+ curandState rng = s[id];
+
+
+
+ int triangle_id = last_hit_triangles[id];
+
+ if (triangle_id > -1)
+ {
+ int solid_id = solid_map[triangle_id];
+ int state = photon_states[id];
+ float time = photon_times[id] + curand_normal(&rng) * time_rms;
+ unsigned int time_int = float_to_sortable_int(time);
+
+
+
+ if (solid_id < nsolids && state == detection_state) {
+ atomicMin(earliest_time_int + solid_id, time_int);
+ }
-extern "C" {
- __global__ void init_daq_rng(int nthreads,
- unsigned long long seed, unsigned long long offset)
- {
- int id = blockIdx.x*blockDim.x + threadIdx.x;
-
- if (id >= nthreads)
- return;
-
- curand_init(seed, id, offset, daq_rng_states+id);
- }
-
- __global__ void reset_earliest_time_int(float maxtime,
- int ntime_ints, unsigned int *time_ints)
- {
- int id = threadIdx.x + blockDim.x * blockIdx.x;
- if (id < ntime_ints) {
- unsigned int maxtime_int = float_to_sortable_int(maxtime);
- time_ints[id] = maxtime_int;
- }
- }
-
- __global__ void run_daq(int detection_state, float time_rms,
- int nphotons, float *photon_times, int *photon_states,
- int *last_hit_triangles, int *solid_map,
- int nsolids, unsigned int *earliest_time_int)
- {
- int id = threadIdx.x + blockDim.x * blockIdx.x;
-
- curandState_t rng = daq_rng_states[id];
-
- if (id < nphotons) {
- int triangle_id = last_hit_triangles[id];
-
- if (triangle_id > -1) {
- int solid_id = solid_map[triangle_id];
- int state = photon_states[id];
- float time = photon_times[id] + curand_normal(&rng) * time_rms;
- unsigned int time_int = float_to_sortable_int(time);
- if (solid_id < nsolids && state == detection_state)
- atomicMin(earliest_time_int + solid_id, time_int);
- }
- }
-
- daq_rng_states[id] = rng;
- }
-
- __global__ void convert_sortable_int_to_float(int n,
- unsigned int *sortable_ints,
- float *float_output)
- {
- int id = threadIdx.x + blockDim.x * blockIdx.x;
-
- if (id < n)
- float_output[id] = sortable_int_to_float(sortable_ints[id]);
- }
+ }
+
+
+
+ s[id] = rng;
+
+
+
+ }
+
+
+
+
+}
+
+__global__ void convert_sortable_int_to_float(int n,
+ unsigned int *sortable_ints,
+ float *float_output)
+{
+ int id = threadIdx.x + blockDim.x * blockIdx.x;
+
+ if (id < n)
+ float_output[id] = sortable_int_to_float(sortable_ints[id]);
+}
} // extern "C"
diff --git a/src/kernel.cu b/src/kernel.cu
index 584c9e4..55fde70 100644
--- a/src/kernel.cu
+++ b/src/kernel.cu
@@ -141,26 +141,33 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, con
return triangle_index;
}
-__device__ curandState rng_states[100000];
-
extern "C"
{
- __global__ void set_pointer(uint4 *triangle_ptr, float3 *vertex_ptr)
+__global__ void fill_float(int nthreads, float *a, float value)
{
- triangles = triangle_ptr;
- vertices = vertex_ptr;
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
+
+ if (id >= nthreads)
+ return;
+
+ a[id] = value;
}
-/* Initialize random number states */
-__global__ void init_rng(int nthreads, unsigned long long seed, unsigned long long offset)
+__global__ void fill_float3(int nthreads, float3 *a, float3 value)
{
int id = blockIdx.x*blockDim.x + threadIdx.x;
if (id >= nthreads)
return;
- curand_init(seed, id, offset, rng_states+id);
+ a[id] = value;
+}
+
+__global__ void set_pointer(uint4 *triangle_ptr, float3 *vertex_ptr)
+{
+ triangles = triangle_ptr;
+ vertices = vertex_ptr;
}
/* Translate `points` by the vector `v` */
@@ -223,7 +230,7 @@ __global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, i
} // ray_trace
-__global__ void propagate(int nthreads, float3 *positions, float3 *directions, float *wavelengths, float3 *polarizations, float *times, int *states, int *last_hit_triangles, int start_node, int first_node, int max_steps)
+__global__ void propagate(int nthreads, curandState *rng_states, float3 *positions, float3 *directions, float *wavelengths, float3 *polarizations, float *times, int *states, int *last_hit_triangles, int start_node, int first_node, int max_steps)
{
int id = blockIdx.x*blockDim.x + threadIdx.x;
@@ -445,8 +452,8 @@ __global__ void propagate(int nthreads, float3 *positions, float3 *directions, f
} // while(nsteps < max_steps)
- states[id] = state;
rng_states[id] = rng;
+ states[id] = state;
positions[id] = position;
directions[id] = direction;
polarizations[id] = polarization;
diff --git a/src/random.h b/src/random.h
index 74329da..b084c19 100644
--- a/src/random.h
+++ b/src/random.h
@@ -5,18 +5,54 @@
#include "physical_constants.h"
-__device__ float uniform(curandState *rng, float a=0.0, float b=1.0)
+__device__ float uniform(curandState *s, const float &low, const float &high)
{
- return a + curand_uniform(rng)*(b-a);
+ return low + curand_uniform(s)*(high-low);
}
-__device__ float3 uniform_sphere(curandState *rng)
+__device__ float3 uniform_sphere(curandState *s)
{
- float theta = uniform(rng, 0, 2*PI);
- float u = uniform(rng, -1, 1);
- float c = sqrtf(1-u*u);
+ float theta = uniform(s, 0.0f, 2*PI);
+ float u = uniform(s, -1.0f, 1.0f);
+ float c = sqrtf(1.0f-u*u);
return make_float3(c*cosf(theta), c*sinf(theta), u);
}
+extern "C"
+{
+
+__global__ void init_rng(int nthreads, curandState *s, unsigned long long seed, unsigned long long offset)
+{
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
+
+ if (id >= nthreads)
+ return;
+
+ curand_init(seed, id, offset, &s[id]);
+}
+
+__global__ void fill_uniform(int nthreads, curandState *s, float *a, float low, float high)
+{
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
+
+ if (id >= nthreads)
+ return;
+
+ a[id] = uniform(&s[id], low, high);
+
+}
+
+__global__ void fill_uniform_sphere(int nthreads, curandState *s, float3 *a)
+{
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
+
+ if (id >= nthreads)
+ return;
+
+ a[id] = uniform_sphere(&s[id]);
+}
+
+} // extern "c"
+
#endif