summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--chroma/cuda/interpolate.h50
-rw-r--r--chroma/cuda/random.h42
2 files changed, 73 insertions, 19 deletions
diff --git a/chroma/cuda/interpolate.h b/chroma/cuda/interpolate.h
new file mode 100644
index 0000000..a866272
--- /dev/null
+++ b/chroma/cuda/interpolate.h
@@ -0,0 +1,50 @@
+#ifndef __INTERPOLATE_H__
+#define __INTERPOLATE_H__
+
+__device__ float
+interp(float x, int n, float *xp, float *fp)
+{
+ int lower = 0;
+ int upper = n-1;
+
+ if (x <= xp[lower])
+ return fp[lower];
+
+ if (x >= xp[upper])
+ return fp[upper];
+
+ while (lower < upper-1)
+ {
+ int half = (lower+upper)/2;
+
+ if (x < xp[half])
+ upper = half;
+ else
+ lower = half;
+ }
+
+ float df = fp[upper] - fp[lower];
+ float dx = xp[upper] - xp[lower];
+
+ return fp[lower] + df*(x-xp[lower])/dx;
+}
+
+__device__ float
+interp_uniform(float x, int n, float x0, float dx, float *fp)
+{
+ if (x <= xp[0])
+ return xp[0];
+
+ if (x >= xp[n-1])
+ return xp[n-1];
+
+ int lower = (x - x0)/dx;
+ int upper = lower + 1;
+
+ float df = fp[upper] - fp[lower];
+ float dx = xp[upper] - xp[lower];
+
+ return fp[lower] + df*(x-xp[lower])/dx;
+}
+
+#endif
diff --git a/chroma/cuda/random.h b/chroma/cuda/random.h
index ee528ea..498f6ab 100644
--- a/chroma/cuda/random.h
+++ b/chroma/cuda/random.h
@@ -4,6 +4,7 @@
#include <curand_kernel.h>
#include "physical_constants.h"
+#include "interpolate.h"
__device__ float
uniform(curandState *s, const float &low, const float &high)
@@ -26,22 +27,7 @@ uniform_sphere(curandState *s)
__device__ float
sample_cdf(curandState *rng, int ncdf, float *cdf_x, float *cdf_y)
{
- float u = curand_uniform(rng);
-
- // Find u in cdf_y with binary search
- // list must contain at least 2 elements: 0.0 and 1.0
- int lower=0;
- int upper=ncdf-1;
- while(lower < upper-1) {
- int half = (lower+upper) / 2;
- if (u < cdf_y[half])
- upper = half;
- else
- lower = half;
- }
-
- float frac = (u - cdf_y[lower]) / (cdf_y[upper] - cdf_y[lower]);
- return cdf_x[lower] * frac + cdf_x[upper] * (1.0f - frac);
+ return interp(curand_uniform(rng),ncdf,cdf_y,cdf_x);
}
// Sample from a uniformly-sampled CDF
@@ -52,16 +38,20 @@ sample_cdf(curandState *rng, int ncdf, float x0, float delta, float *cdf_y)
int lower = 0;
int upper = ncdf - 1;
- while(lower < upper-1) {
+
+ while(lower < upper-1)
+ {
int half = (lower + upper) / 2;
+
if (u < cdf_y[half])
upper = half;
else
lower = half;
}
- float frac = (u - cdf_y[lower]) / (cdf_y[upper] - cdf_y[lower]);
- return (x0 + delta * lower) * frac + (x0 + delta * upper) * (1.0f - frac);
+ float delta_cdf_y = cdf_y[upper] - cdf_y[lower];
+
+ return x0 + delta*lower + delta*(u-cdf_y[lower])/delta_cdf_y;
}
extern "C"
@@ -102,6 +92,20 @@ fill_uniform_sphere(int nthreads, curandState *s, float3 *a)
a[id] = uniform_sphere(&s[id]);
}
+__global__ void
+fill_sample_cdf(int offset, int nthreads, curandState *rng_states,
+ int ncdf, float *cdf_x, float *cdf_y, float *x)
+{
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
+
+ if (id >= nthreads)
+ return;
+
+ curandState *s = rng_states+id;
+
+ x[id+offset] = sample_cdf(s,ncdf,cdf_x,cdf_y);
+}
+
} // extern "c"
#endif