diff options
-rw-r--r-- | chroma/cuda/interpolate.h | 50 | ||||
-rw-r--r-- | chroma/cuda/random.h | 42 |
2 files changed, 73 insertions, 19 deletions
diff --git a/chroma/cuda/interpolate.h b/chroma/cuda/interpolate.h new file mode 100644 index 0000000..a866272 --- /dev/null +++ b/chroma/cuda/interpolate.h @@ -0,0 +1,50 @@ +#ifndef __INTERPOLATE_H__ +#define __INTERPOLATE_H__ + +__device__ float +interp(float x, int n, float *xp, float *fp) +{ + int lower = 0; + int upper = n-1; + + if (x <= xp[lower]) + return fp[lower]; + + if (x >= xp[upper]) + return fp[upper]; + + while (lower < upper-1) + { + int half = (lower+upper)/2; + + if (x < xp[half]) + upper = half; + else + lower = half; + } + + float df = fp[upper] - fp[lower]; + float dx = xp[upper] - xp[lower]; + + return fp[lower] + df*(x-xp[lower])/dx; +} + +__device__ float +interp_uniform(float x, int n, float x0, float dx, float *fp) +{ + if (x <= xp[0]) + return xp[0]; + + if (x >= xp[n-1]) + return xp[n-1]; + + int lower = (x - x0)/dx; + int upper = lower + 1; + + float df = fp[upper] - fp[lower]; + float dx = xp[upper] - xp[lower]; + + return fp[lower] + df*(x-xp[lower])/dx; +} + +#endif diff --git a/chroma/cuda/random.h b/chroma/cuda/random.h index ee528ea..498f6ab 100644 --- a/chroma/cuda/random.h +++ b/chroma/cuda/random.h @@ -4,6 +4,7 @@ #include <curand_kernel.h> #include "physical_constants.h" +#include "interpolate.h" __device__ float uniform(curandState *s, const float &low, const float &high) @@ -26,22 +27,7 @@ uniform_sphere(curandState *s) __device__ float sample_cdf(curandState *rng, int ncdf, float *cdf_x, float *cdf_y) { - float u = curand_uniform(rng); - - // Find u in cdf_y with binary search - // list must contain at least 2 elements: 0.0 and 1.0 - int lower=0; - int upper=ncdf-1; - while(lower < upper-1) { - int half = (lower+upper) / 2; - if (u < cdf_y[half]) - upper = half; - else - lower = half; - } - - float frac = (u - cdf_y[lower]) / (cdf_y[upper] - cdf_y[lower]); - return cdf_x[lower] * frac + cdf_x[upper] * (1.0f - frac); + return interp(curand_uniform(rng),ncdf,cdf_y,cdf_x); } // Sample from a uniformly-sampled CDF @@ -52,16 +38,20 @@ sample_cdf(curandState *rng, int ncdf, float x0, float delta, float *cdf_y) int lower = 0; int upper = ncdf - 1; - while(lower < upper-1) { + + while(lower < upper-1) + { int half = (lower + upper) / 2; + if (u < cdf_y[half]) upper = half; else lower = half; } - float frac = (u - cdf_y[lower]) / (cdf_y[upper] - cdf_y[lower]); - return (x0 + delta * lower) * frac + (x0 + delta * upper) * (1.0f - frac); + float delta_cdf_y = cdf_y[upper] - cdf_y[lower]; + + return x0 + delta*lower + delta*(u-cdf_y[lower])/delta_cdf_y; } extern "C" @@ -102,6 +92,20 @@ fill_uniform_sphere(int nthreads, curandState *s, float3 *a) a[id] = uniform_sphere(&s[id]); } +__global__ void +fill_sample_cdf(int offset, int nthreads, curandState *rng_states, + int ncdf, float *cdf_x, float *cdf_y, float *x) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + curandState *s = rng_states+id; + + x[id+offset] = sample_cdf(s,ncdf,cdf_x,cdf_y); +} + } // extern "c" #endif |