diff options
Diffstat (limited to 'chroma/cuda')
| -rw-r--r-- | chroma/cuda/__init__.py | 2 | ||||
| -rw-r--r-- | chroma/cuda/daq.cu | 198 | ||||
| -rw-r--r-- | chroma/cuda/geometry.h | 74 | ||||
| -rw-r--r-- | chroma/cuda/hybrid_render.cu | 202 | ||||
| -rw-r--r-- | chroma/cuda/intersect.h | 141 | ||||
| -rw-r--r-- | chroma/cuda/linalg.h | 170 | ||||
| -rw-r--r-- | chroma/cuda/matrix.h | 249 | ||||
| -rw-r--r-- | chroma/cuda/mesh.h | 175 | ||||
| -rw-r--r-- | chroma/cuda/photon.h | 325 | ||||
| -rw-r--r-- | chroma/cuda/physical_constants.h | 7 | ||||
| -rw-r--r-- | chroma/cuda/propagate.cu | 147 | ||||
| -rw-r--r-- | chroma/cuda/random.h | 87 | ||||
| -rw-r--r-- | chroma/cuda/render.cu | 171 | ||||
| -rw-r--r-- | chroma/cuda/rotate.h | 27 | ||||
| -rw-r--r-- | chroma/cuda/sorting.h | 107 | ||||
| -rw-r--r-- | chroma/cuda/tools.cu | 21 | ||||
| -rw-r--r-- | chroma/cuda/transform.cu | 51 |
17 files changed, 2154 insertions, 0 deletions
diff --git a/chroma/cuda/__init__.py b/chroma/cuda/__init__.py new file mode 100644 index 0000000..dd87cbc --- /dev/null +++ b/chroma/cuda/__init__.py @@ -0,0 +1,2 @@ +import os.path +srcdir = os.path.dirname(os.path.abspath(__file__)) diff --git a/chroma/cuda/daq.cu b/chroma/cuda/daq.cu new file mode 100644 index 0000000..5a68846 --- /dev/null +++ b/chroma/cuda/daq.cu @@ -0,0 +1,198 @@ +// -*-c++-*- +#include <curand_kernel.h> + +__device__ unsigned int +float_to_sortable_int(float f) +{ + return __float_as_int(f); + //int i = __float_as_int(f); + //unsigned int mask = -(int)(i >> 31) | 0x80000000; + //return i ^ mask; +} + +__device__ float +sortable_int_to_float(unsigned int i) +{ + return __int_as_float(i); + //unsigned int mask = ((i >> 31) - 1) | 0x80000000; + //return __int_as_float(i ^ mask); +} + +extern "C" +{ + +__global__ void +reset_earliest_time_int(float maxtime, int ntime_ints, unsigned int *time_ints) +{ + int id = threadIdx.x + blockDim.x * blockIdx.x; + if (id < ntime_ints) { + unsigned int maxtime_int = float_to_sortable_int(maxtime); + time_ints[id] = maxtime_int; + } +} + +__global__ void +run_daq(curandState *s, unsigned int detection_state, float time_rms, + int first_photon, int nphotons, float *photon_times, + unsigned int *photon_histories, int *last_hit_triangles, + int *solid_map, int nsolids, unsigned int *earliest_time_int, + unsigned int *channel_q, unsigned int *channel_histories) +{ + + int id = threadIdx.x + blockDim.x * blockIdx.x; + + if (id < nphotons) { + curandState rng = s[id]; + int photon_id = id + first_photon; + int triangle_id = last_hit_triangles[photon_id]; + + if (triangle_id > -1) { + int solid_id = solid_map[triangle_id]; + unsigned int history = photon_histories[photon_id]; + + if (solid_id < nsolids && (history & detection_state)) { + float time = photon_times[photon_id] + curand_normal(&rng) * time_rms; + unsigned int time_int = float_to_sortable_int(time); + + atomicMin(earliest_time_int + solid_id, time_int); + atomicAdd(channel_q + solid_id, 1); + atomicOr(channel_histories + solid_id, history); + } + + } + + s[id] = rng; + + } + +} + +__global__ void +convert_sortable_int_to_float(int n, unsigned int *sortable_ints, + float *float_output) +{ + int id = threadIdx.x + blockDim.x * blockIdx.x; + + if (id < n) + float_output[id] = sortable_int_to_float(sortable_ints[id]); +} + +__global__ void +bin_hits(int nchannels, unsigned int *channel_q, float *channel_time, + unsigned int *hitcount, int tbins, float tmin, float tmax, int qbins, + float qmin, float qmax, unsigned int *pdf) +{ + int id = threadIdx.x + blockDim.x * blockIdx.x; + + if (id >= nchannels) + return; + + unsigned int q = channel_q[id]; + float t = channel_time[id]; + + if (t < 1e8 && t >= tmin && t < tmax && q >= qmin && q < qmax) { + hitcount[id] += 1; + + int tbin = (t - tmin) / (tmax - tmin) * tbins; + int qbin = (q - qmin) / (qmax - qmin) * qbins; + + // row major order (channel, t, q) + int bin = id * (tbins * qbins) + tbin * qbins + qbin; + pdf[bin] += 1; + } +} + +__global__ void +accumulate_pdf_eval(int time_only, int nchannels, unsigned int *event_hit, + float *event_time, float *event_charge, float *mc_time, + unsigned int *mc_charge, // quirk of DAQ! + unsigned int *hitcount, unsigned int *bincount, + float min_twidth, float tmin, float tmax, + float min_qwidth, float qmin, float qmax, + float *nearest_mc, int min_bin_content) +{ + int id = threadIdx.x + blockDim.x * blockIdx.x; + + if (id >= nchannels) + return; + + // Was this channel hit in the Monte Carlo? + float channel_mc_time = mc_time[id]; + if (channel_mc_time >= 1e8) + return; // Nothing else to do + + // Is this channel inside the range of the PDF? + float distance; + int channel_bincount = bincount[id]; + if (time_only) { + if (channel_mc_time < tmin || channel_mc_time > tmax) + return; // Nothing else to do + + // This MC information is contained in the PDF + hitcount[id] += 1; + + // Was this channel hit in the event-of-interest? + int channel_event_hit = event_hit[id]; + if (!channel_event_hit) + return; // No need to update PDF value for unhit channel + + // Are we inside the minimum size bin? + float channel_event_time = event_time[id]; + distance = fabsf(channel_mc_time - channel_event_time); + if (distance < min_twidth/2.0) + bincount[id] = channel_bincount + 1; + + } + else { // time and charge PDF + float channel_mc_charge = mc_charge[id]; // int->float conversion because DAQ just returns an integer + + if (channel_mc_time < tmin || channel_mc_time > tmax || + channel_mc_charge < qmin || channel_mc_charge > qmax) + return; // Nothing else to do + + // This MC information is contained in the PDF + hitcount[id] += 1; + + // Was this channel hit in the event-of-interest? + int channel_event_hit = event_hit[id]; + if (!channel_event_hit) + return; // No need to update PDF value for unhit channel + + // Are we inside the minimum size bin? + float channel_event_time = event_time[id]; + float channel_event_charge = event_charge[id]; + float normalized_time_distance = fabsf(channel_event_time - channel_mc_time)/min_twidth/2.0; + float normalized_charge_distance = fabsf(channel_event_charge - channel_mc_charge)/min_qwidth/2.0; + distance = sqrt(normalized_time_distance*normalized_time_distance + normalized_charge_distance*normalized_charge_distance); + + if (distance < 1.0f) + bincount[id] = channel_bincount + 1; + } + + // Do we need to keep updating the nearest_mc list? + if (channel_bincount + 1 >= min_bin_content) + return; // No need to perform insertion into nearest_mc because bincount is a better estimate of the PDF value + + // insertion sort the distance into the array of nearest MC points + int offset = min_bin_content * id; + int entry = min_bin_content - 1; + + // If last entry less than new entry, nothing to update + if (distance > nearest_mc[offset + entry]) + return; + + // Find where to insert the new entry while sliding the rest + // to the right + entry--; + while (entry >= 0) { + if (nearest_mc[offset+entry] >= distance) + nearest_mc[offset+entry+1] = nearest_mc[offset+entry]; + else + break; + entry--; + } + + nearest_mc[offset+entry+1] = distance; +} + +} // extern "C" diff --git a/chroma/cuda/geometry.h b/chroma/cuda/geometry.h new file mode 100644 index 0000000..2b5eacb --- /dev/null +++ b/chroma/cuda/geometry.h @@ -0,0 +1,74 @@ +#ifndef __GEOMETRY_H__ +#define __GEOMETRY_H__ + +struct Material +{ + float *refractive_index; + float *absorption_length; + float *scattering_length; + unsigned int n; + float step; + float wavelength0; +}; + +struct Surface +{ + float *detect; + float *absorb; + float *reflect_diffuse; + float *reflect_specular; + unsigned int n; + float step; + float wavelength0; +}; + +struct Triangle +{ + float3 v0, v1, v2; +}; + +struct Geometry +{ + float3 *vertices; + uint3 *triangles; + unsigned int *material_codes; + unsigned int *colors; + float3 *lower_bounds; + float3 *upper_bounds; + unsigned int *node_map; + unsigned int *node_map_end; + Material **materials; + Surface **surfaces; + unsigned int start_node; + unsigned int first_node; +}; + +__device__ Triangle +get_triangle(Geometry *geometry, const unsigned int &i) +{ + uint3 triangle_data = geometry->triangles[i]; + + Triangle triangle; + triangle.v0 = geometry->vertices[triangle_data.x]; + triangle.v1 = geometry->vertices[triangle_data.y]; + triangle.v2 = geometry->vertices[triangle_data.z]; + + return triangle; +} + +template <class T> +__device__ float +interp_property(T *m, const float &x, const float *fp) +{ + if (x < m->wavelength0) + return fp[0]; + + if (x > (m->wavelength0 + (m->n-1)*m->step)) + return fp[m->n-1]; + + int jl = (x-m->wavelength0)/m->step; + + return fp[jl] + (x-(m->wavelength0 + jl*m->step))*(fp[jl+1]-fp[jl])/m->step; +} + +#endif diff --git a/chroma/cuda/hybrid_render.cu b/chroma/cuda/hybrid_render.cu new file mode 100644 index 0000000..29edefa --- /dev/null +++ b/chroma/cuda/hybrid_render.cu @@ -0,0 +1,202 @@ +//-*-c-*- +#include <math_constants.h> +#include <curand_kernel.h> + +#include "linalg.h" +#include "matrix.h" +#include "rotate.h" +#include "mesh.h" +#include "geometry.h" +#include "photon.h" + +__device__ void +fAtomicAdd(float *addr, float data) +{ + while (data) + data = atomicExch(addr, data+atomicExch(addr, 0.0f)); +} + +__device__ void +to_diffuse(Photon &p, State &s, Geometry *g, curandState &rng, int max_steps) +{ + int steps = 0; + while (steps < max_steps) { + steps++; + + int command; + + fill_state(s, p, g); + + if (p.last_hit_triangle == -1) + break; + + command = propagate_to_boundary(p, s, rng); + + if (command == BREAK) + break; + + if (command == CONTINUE) + continue; + + if (s.surface_index != -1) { + command = propagate_at_surface(p, s, rng, g); + + if (p.history & REFLECT_DIFFUSE) + break; + + if (command == BREAK) + break; + + if (command == CONTINUE) + continue; + } + + propagate_at_boundary(p, s, rng); + + } // while (steps < max_steps) + +} // to_diffuse + +extern "C" +{ + +__global__ void +update_xyz_lookup(int nthreads, int total_threads, int offset, float3 position, + curandState *rng_states, float wavelength, float3 xyz, + float3 *xyz_lookup1, float3 *xyz_lookup2, int max_steps, + Geometry *g) +{ + int kernel_id = blockIdx.x*blockDim.x + threadIdx.x; + int id = kernel_id + offset; + + if (kernel_id >= nthreads || id >= total_threads) + return; + + curandState rng = rng_states[kernel_id]; + + Triangle t = get_triangle(g, id); + + float a = curand_uniform(&rng); + float b = uniform(&rng, 0.0f, (1.0f - a)); + float c = 1.0f - a - b; + + float3 direction = a*t.v0 + b*t.v1 + c*t.v2 - position; + direction /= norm(direction); + + float distance; + int triangle_index = intersect_mesh(position, direction, g, distance); + + if (triangle_index != id) { + rng_states[kernel_id] = rng; + return; + } + + float3 v01 = t.v1 - t.v0; + float3 v12 = t.v2 - t.v1; + + float3 surface_normal = normalize(cross(v01,v12)); + + float cos_theta = dot(surface_normal,-direction); + + if (cos_theta < 0.0f) + cos_theta = dot(-surface_normal,-direction); + + Photon p; + p.position = position; + p.direction = direction; + p.wavelength = wavelength; + p.polarization = uniform_sphere(&rng); + p.last_hit_triangle = -1; + p.time = 0; + p.history = 0; + + State s; + to_diffuse(p, s, g, rng, max_steps); + + if (p.history & REFLECT_DIFFUSE) { + if (s.inside_to_outside) { + fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].x, cos_theta*xyz.x); + fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].y, cos_theta*xyz.y); + fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].z, cos_theta*xyz.z); + } + else { + fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].x, cos_theta*xyz.x); + fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].y, cos_theta*xyz.y); + fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].z, cos_theta*xyz.z); + } + } + + rng_states[kernel_id] = rng; + +} // update_xyz_lookup + +__global__ void +update_xyz_image(int nthreads, curandState *rng_states, float3 *positions, + float3 *directions, float wavelength, float3 xyz, + float3 *xyz_lookup1, float3 *xyz_lookup2, float3 *image, + int nlookup_calls, int max_steps, Geometry *g) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + curandState rng = rng_states[id]; + + Photon p; + p.position = positions[id]; + p.direction = directions[id]; + p.direction /= norm(p.direction); + p.wavelength = wavelength; + p.polarization = uniform_sphere(&rng); + p.last_hit_triangle = -1; + p.time = 0; + p.history = 0; + + State s; + to_diffuse(p, s, g, rng, max_steps); + + if (p.history & REFLECT_DIFFUSE) { + if (s.inside_to_outside) + image[id] += xyz*xyz_lookup1[p.last_hit_triangle]/nlookup_calls; + else + image[id] += xyz*xyz_lookup2[p.last_hit_triangle]/nlookup_calls; + } + + rng_states[id] = rng; + +} // update_xyz_image + +__global__ void +process_image(int nthreads, float3 *image, unsigned int *pixels, int nimages) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + float3 rgb = image[id]/nimages; + + if (rgb.x < 0.0f) + rgb.x = 0.0f; + if (rgb.y < 0.0f) + rgb.y = 0.0f; + if (rgb.z < 0.0f) + rgb.z = 0.0f; + + if (rgb.x > 1.0f) + rgb.x = 1.0f; + if (rgb.y > 1.0f) + rgb.y = 1.0f; + if (rgb.z > 1.0f) + rgb.z = 1.0f; + + unsigned int r = floorf(rgb.x*255.0f); + unsigned int g = floorf(rgb.y*255.0f); + unsigned int b = floorf(rgb.z*255.0f); + + pixels[id] = 255 << 24 | r << 16 | g << 8 | b; + +} // process_image + +} // extern "c" diff --git a/chroma/cuda/intersect.h b/chroma/cuda/intersect.h new file mode 100644 index 0000000..978bde8 --- /dev/null +++ b/chroma/cuda/intersect.h @@ -0,0 +1,141 @@ +//-*-c-*- + +#ifndef __INTERSECT_H__ +#define __INTERSECT_H__ + +#include "linalg.h" +#include "matrix.h" +#include "geometry.h" + +#define EPSILON 0.0f + +/* Tests the intersection between a ray and a triangle. + If the ray intersects the triangle, set `distance` to the distance from + `origin` to the intersection and return true, else return false. + `direction` must be normalized to one. */ +__device__ bool +intersect_triangle(const float3 &origin, const float3 &direction, + const Triangle &triangle, float &distance) +{ + float3 m1 = triangle.v1-triangle.v0; + float3 m2 = triangle.v2-triangle.v0; + float3 m3 = -direction; + + Matrix m = make_matrix(m1, m2, m3); + + float determinant = det(m); + + if (determinant == 0.0f) + return false; + + float3 b = origin-triangle.v0; + + float u1 = ((m.a11*m.a22 - m.a12*m.a21)*b.x + + (m.a02*m.a21 - m.a01*m.a22)*b.y + + (m.a01*m.a12 - m.a02*m.a11)*b.z)/determinant; + + if (u1 < -EPSILON || u1 > 1.0f) + return false; + + float u2 = ((m.a12*m.a20 - m.a10*m.a22)*b.x + + (m.a00*m.a22 - m.a02*m.a20)*b.y + + (m.a02*m.a10 - m.a00*m.a12)*b.z)/determinant; + + if (u2 < -EPSILON || u2 > 1.0f) + return false; + + float u3 = ((m.a10*m.a21 - m.a11*m.a20)*b.x + + (m.a01*m.a20 - m.a00*m.a21)*b.y + + (m.a00*m.a11 - m.a01*m.a10)*b.z)/determinant; + + if (u3 <= 0.0f || (1.0f-u1-u2) < -EPSILON) + return false; + + distance = u3; + + return true; +} + +/* Tests the intersection between a ray and an axis-aligned box defined by + an upper and lower bound. If the ray intersects the box, set + `distance_to_box` to the distance from `origin` to the intersection and + return true, else return false. `direction` must be normalized to one. + + Source: "An Efficient and Robust Ray-Box Intersection Algorithm." + by Williams, et. al. */ +__device__ bool +intersect_box(const float3 &origin, const float3 &direction, + const float3 &lower_bound, const float3 &upper_bound, + float& distance_to_box) +{ + float kmin, kmax, kymin, kymax, kzmin, kzmax; + + float divx = 1.0f/direction.x; + if (divx >= 0.0f) + { + kmin = (lower_bound.x - origin.x)*divx; + kmax = (upper_bound.x - origin.x)*divx; + } + else + { + kmin = (upper_bound.x - origin.x)*divx; + kmax = (lower_bound.x - origin.x)*divx; + } + + if (kmax < 0.0f) + return false; + + float divy = 1.0f/direction.y; + if (divy >= 0.0f) + { + kymin = (lower_bound.y - origin.y)*divy; + kymax = (upper_bound.y - origin.y)*divy; + } + else + { + kymin = (upper_bound.y - origin.y)*divy; + kymax = (lower_bound.y - origin.y)*divy; + } + + if (kymax < 0.0f) + return false; + + if (kymin > kmin) + kmin = kymin; + + if (kymax < kmax) + kmax = kymax; + + if (kmin > kmax) + return false; + + float divz = 1.0f/direction.z; + if (divz >= 0.0f) + { + kzmin = (lower_bound.z - origin.z)*divz; + kzmax = (upper_bound.z - origin.z)*divz; + } + else + { + kzmin = (upper_bound.z - origin.z)*divz; + kzmax = (lower_bound.z - origin.z)*divz; + } + + if (kzmax < 0.0f) + return false; + + if (kzmin > kmin) + kmin = kzmin; + + if (kzmax < kmax) + kmax = kzmax; + + if (kmin > kmax) + return false; + + distance_to_box = kmin; + + return true; +} + +#endif diff --git a/chroma/cuda/linalg.h b/chroma/cuda/linalg.h new file mode 100644 index 0000000..35b2423 --- /dev/null +++ b/chroma/cuda/linalg.h @@ -0,0 +1,170 @@ +#ifndef __LINALG_H__ +#define __LINALG_H__ + +__device__ float3 +operator- (const float3 &a) +{ + return make_float3(-a.x, -a.y, -a.z); +} + +__device__ float3 +operator* (const float3 &a, const float3 &b) +{ + return make_float3(a.x*b.x, a.y*b.y, a.z*b.z); +} + +__device__ float3 +operator/ (const float3 &a, const float3 &b) +{ + return make_float3(a.x/b.x, a.y/b.y, a.z/b.z); +} + +__device__ void +operator*= (float3 &a, const float3 &b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; +} + +__device__ void +operator/= (float3 &a, const float3 &b) +{ + a.x /= b.x; + a.y /= b.y; + a.z /= b.z; +} + +__device__ float3 +operator+ (const float3 &a, const float3 &b) +{ + return make_float3(a.x+b.x, a.y+b.y, a.z+b.z); +} + +__device__ void +operator+= (float3 &a, const float3 &b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; +} + +__device__ float3 +operator- (const float3 &a, const float3 &b) +{ + return make_float3(a.x-b.x, a.y-b.y, a.z-b.z); +} + +__device__ void +operator-= (float3 &a, const float3 &b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; +} + +__device__ float3 +operator+ (const float3 &a, const float &c) +{ + return make_float3(a.x+c, a.y+c, a.z+c); +} + +__device__ void +operator+= (float3 &a, const float &c) +{ + a.x += c; + a.y += c; + a.z += c; +} + +__device__ float3 +operator+ (const float &c, const float3 &a) +{ + return make_float3(c+a.x, c+a.y, c+a.z); +} + +__device__ float3 +operator- (const float3 &a, const float &c) +{ + return make_float3(a.x-c, a.y-c, a.z-c); +} + +__device__ void +operator-= (float3 &a, const float &c) +{ + a.x -= c; + a.y -= c; + a.z -= c; +} + +__device__ float3 +operator- (const float &c, const float3& a) +{ + return make_float3(c-a.x, c-a.y, c-a.z); +} + +__device__ float3 +operator* (const float3 &a, const float &c) +{ + return make_float3(a.x*c, a.y*c, a.z*c); +} + +__device__ void +operator*= (float3 &a, const float &c) +{ + a.x *= c; + a.y *= c; + a.z *= c; +} + +__device__ float3 +operator* (const float &c, const float3& a) +{ + return make_float3(c*a.x, c*a.y, c*a.z); +} + +__device__ float3 +operator/ (const float3 &a, const float &c) +{ + return make_float3(a.x/c, a.y/c, a.z/c); +} + +__device__ void +operator/= (float3 &a, const float &c) +{ + a.x /= c; + a.y /= c; + a.z /= c; +} + +__device__ float3 +operator/ (const float &c, const float3 &a) +{ + return make_float3(c/a.x, c/a.y, c/a.z); +} + +__device__ float +dot(const float3 &a, const float3 &b) +{ + return a.x*b.x + a.y*b.y + a.z*b.z; +} + +__device__ float3 +cross(const float3 &a, const float3 &b) +{ + return make_float3(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); +} + +__device__ float +norm(const float3 &a) +{ + return sqrtf(dot(a,a)); +} + +__device__ float3 +normalize(const float3 &a) +{ + return a/norm(a); +} + +#endif diff --git a/chroma/cuda/matrix.h b/chroma/cuda/matrix.h new file mode 100644 index 0000000..0a66e58 --- /dev/null +++ b/chroma/cuda/matrix.h @@ -0,0 +1,249 @@ +#ifndef __MATRIX_H__ +#define __MATRIX_H__ + +struct Matrix +{ + float a00, a01, a02, a10, a11, a12, a20, a21, a22; +}; + +__device__ Matrix +make_matrix(float a00, float a01, float a02, + float a10, float a11, float a12, + float a20, float a21, float a22) +{ + Matrix m = {a00, a01, a02, a10, a11, a12, a20, a21, a22}; + return m; +} + +__device__ Matrix +make_matrix(const float3 &u1, const float3 &u2, const float3 &u3) +{ + Matrix m = {u1.x, u2.x, u3.x, u1.y, u2.y, u3.y, u1.z, u2.z, u3.z}; + return m; +} + +__device__ Matrix +operator- (const Matrix &m) +{ + return make_matrix(-m.a00, -m.a01, -m.a02, + -m.a10, -m.a11, -m.a12, + -m.a20, -m.a21, -m.a22); +} + +__device__ float3 +operator* (const Matrix &m, const float3 &a) +{ + return make_float3(m.a00*a.x + m.a01*a.y + m.a02*a.z, + m.a10*a.x + m.a11*a.y + m.a12*a.z, + m.a20*a.x + m.a21*a.y + m.a22*a.z); +} + +__device__ Matrix +operator+ (const Matrix &m, const Matrix &n) +{ + return make_matrix(m.a00+n.a00, m.a01+n.a01, m.a02+n.a02, + m.a10+n.a10, m.a11+n.a11, m.a12+n.a12, + m.a20+n.a20, m.a21+n.a21, m.a22+n.a22); +} + +__device__ void +operator+= (Matrix &m, const Matrix &n) +{ + m.a00 += n.a00; + m.a01 += n.a01; + m.a02 += n.a02; + m.a10 += n.a10; + m.a11 += n.a11; + m.a12 += n.a12; + m.a20 += n.a20; + m.a21 += n.a21; + m.a22 += n.a22; +} + +__device__ Matrix +operator- (const Matrix &m, const Matrix &n) +{ + return make_matrix(m.a00-n.a00, m.a01-n.a01, m.a02-n.a02, + m.a10-n.a10, m.a11-n.a11, m.a12-n.a12, + m.a20-n.a20, m.a21-n.a21, m.a22-n.a22); +} + +__device__ void +operator-= (Matrix &m, const Matrix& n) +{ + m.a00 -= n.a00; + m.a01 -= n.a01; + m.a02 -= n.a02; + m.a10 -= n.a10; + m.a11 -= n.a11; + m.a12 -= n.a12; + m.a20 -= n.a20; + m.a21 -= n.a21; + m.a22 -= n.a22; +} + +__device__ Matrix +operator* (const Matrix &m, const Matrix &n) +{ + return make_matrix(m.a00*n.a00 + m.a01*n.a10 + m.a02*n.a20, + m.a00*n.a01 + m.a01*n.a11 + m.a02*n.a21, + m.a00*n.a02 + m.a01*n.a12 + m.a02*n.a22, + m.a10*n.a00 + m.a11*n.a10 + m.a12*n.a20, + m.a10*n.a01 + m.a11*n.a11 + m.a12*n.a21, + m.a10*n.a02 + m.a11*n.a12 + m.a12*n.a22, + m.a20*n.a00 + m.a21*n.a10 + m.a22*n.a20, + m.a20*n.a01 + m.a21*n.a11 + m.a22*n.a21, + m.a20*n.a02 + m.a21*n.a12 + m.a22*n.a22); +} + +__device__ Matrix +operator+ (const Matrix &m, const float &c) +{ + return make_matrix(m.a00+c, m.a01+c, m.a02+c, + m.a10+c, m.a11+c, m.a12+c, + m.a20+c, m.a21+c, m.a22+c); +} + +__device__ void +operator+= (Matrix &m, const float &c) +{ + m.a00 += c; + m.a01 += c; + m.a02 += c; + m.a10 += c; + m.a11 += c; + m.a12 += c; + m.a20 += c; + m.a21 += c; + m.a22 += c; +} + +__device__ Matrix +operator+ (const float &c, const Matrix &m) +{ + return make_matrix(c+m.a00, c+m.a01, c+m.a02, + c+m.a10, c+m.a11, c+m.a12, + c+m.a20, c+m.a21, c+m.a22); +} + +__device__ Matrix +operator- (const Matrix &m, const float &c) +{ + return make_matrix(m.a00-c, m.a01-c, m.a02-c, + m.a10-c, m.a11-c, m.a12-c, + m.a20-c, m.a21-c, m.a22-c); +} + +__device__ void +operator-= (Matrix &m, const float &c) +{ + m.a00 -= c; + m.a01 -= c; + m.a02 -= c; + m.a10 -= c; + m.a11 -= c; + m.a12 -= c; + m.a20 -= c; + m.a21 -= c; + m.a22 -= c; +} + +__device__ Matrix +operator- (const float &c, const Matrix &m) +{ + return make_matrix(c-m.a00, c-m.a01, c-m.a02, + c-m.a10, c-m.a11, c-m.a12, + c-m.a20, c-m.a21, c-m.a22); +} + +__device__ Matrix +operator* (const Matrix &m, const float &c) +{ + return make_matrix(m.a00*c, m.a01*c, m.a02*c, + m.a10*c, m.a11*c, m.a12*c, + m.a20*c, m.a21*c, m.a22*c); +} + +__device__ void +operator*= (Matrix &m, const float &c) +{ + m.a00 *= c; + m.a01 *= c; + m.a02 *= c; + m.a10 *= c; + m.a11 *= c; + m.a12 *= c; + m.a20 *= c; + m.a21 *= c; + m.a22 *= c; +} + +__device__ Matrix +operator* (const float &c, const Matrix &m) +{ + return make_matrix(c*m.a00, c*m.a01, c*m.a02, + c*m.a10, c*m.a11, c*m.a12, + c*m.a20, c*m.a21, c*m.a22); +} + +__device__ Matrix +operator/ (const Matrix &m, const float &c) +{ + return make_matrix(m.a00/c, m.a01/c, m.a02/c, + m.a10/c, m.a11/c, m.a12/c, + m.a20/c, m.a21/c, m.a22/c); +} + +__device__ void +operator/= (Matrix &m, const float &c) +{ + m.a00 /= c; + m.a01 /= c; + m.a02 /= c; + m.a10 /= c; + m.a11 /= c; + m.a12 /= c; + m.a20 /= c; + m.a21 /= c; + m.a22 /= c; +} + +__device__ Matrix +operator/ (const float &c, const Matrix &m) +{ + return make_matrix(c/m.a00, c/m.a01, c/m.a02, + c/m.a10, c/m.a11, c/m.a12, + c/m.a20, c/m.a21, c/m.a22); +} + +__device__ float +det(const Matrix &m) +{ + return m.a00*(m.a11*m.a22 - m.a12*m.a21) - + m.a10*(m.a01*m.a22 - m.a02*m.a21) + + m.a20*(m.a01*m.a12 - m.a02*m.a11); +} + +__device__ Matrix +inv(const Matrix &m) +{ + return make_matrix(m.a11*m.a22 - m.a12*m.a21, + m.a02*m.a21 - m.a01*m.a22, + m.a01*m.a12 - m.a02*m.a11, + m.a12*m.a20 - m.a10*m.a22, + m.a00*m.a22 - m.a02*m.a20, + m.a02*m.a10 - m.a00*m.a12, + m.a10*m.a21 - m.a11*m.a20, + m.a01*m.a20 - m.a00*m.a21, + m.a00*m.a11 - m.a01*m.a10)/det(m); +} + +__device__ Matrix +outer(const float3 &a, const float3 &b) +{ + return make_matrix(a.x*b.x, a.x*b.y, a.x*b.z, + a.y*b.x, a.y*b.y, a.y*b.z, + a.z*b.x, a.z*b.y, a.z*b.z); +} + +#endif diff --git a/chroma/cuda/mesh.h b/chroma/cuda/mesh.h new file mode 100644 index 0000000..d60d801 --- /dev/null +++ b/chroma/cuda/mesh.h @@ -0,0 +1,175 @@ +#ifndef __MESH_H__ +#define __MESH_H__ + +#include "intersect.h" +#include "geometry.h" + +#include "stdio.h" + +#define STACK_SIZE 100 + +/* Tests the intersection between a ray and a node in the bounding volume + hierarchy. If the ray intersects the bounding volume and `min_distance` + is less than zero or the distance from `origin` to the intersection is + less than `min_distance`, return true, else return false. */ +__device__ bool +intersect_node(const float3 &origin, const float3 &direction, + Geometry *g, int i, float min_distance=-1.0f) +{ + /* assigning these to local variables is faster for some reason */ + float3 lower_bound = g->lower_bounds[i]; + float3 upper_bound = g->upper_bounds[i]; + + float distance_to_box; + + if (intersect_box(origin, direction, lower_bound, upper_bound, + distance_to_box)) { + if (min_distance < 0.0f) + return true; + + if (distance_to_box > min_distance) + return false; + + return true; + } + else { + return false; + } + +} + +/* Finds the intersection between a ray and `geometry`. If the ray does + intersect the mesh and the index of the intersected triangle is not equal + to `last_hit_triangle`, set `min_distance` to the distance from `origin` to + the intersection and return the index of the triangle which the ray + intersected, else return -1. */ +__device__ int +intersect_mesh(const float3 &origin, const float3& direction, Geometry *g, + float &min_distance, int last_hit_triangle = -1) +{ + int triangle_index = -1; + + float distance; + min_distance = -1.0f; + + if (!intersect_node(origin, direction, g, g->start_node, min_distance)) + return -1; + + unsigned int stack[STACK_SIZE]; + + unsigned int *head = &stack[0]; + unsigned int *node = &stack[1]; + unsigned int *tail = &stack[STACK_SIZE-1]; + *node = g->start_node; + + unsigned int i; + + do + { + unsigned int first_child = g->node_map[*node]; + unsigned int stop = g->node_map_end[*node]; + + while (*node >= g->first_node && stop == first_child+1) { + *node = first_child; + first_child = g->node_map[*node]; + stop = g->node_map_end[*node]; + } + + if (*node >= g->first_node) { + for (i=first_child; i < stop; i++) { + if (intersect_node(origin, direction, g, i, min_distance)) { + *node = i; + node++; + } + } + + node--; + } + else { + // node is a leaf + for (i=first_child; i < stop; i++) { + if (last_hit_triangle == i) + continue; + + Triangle t = get_triangle(g, i); + + if (intersect_triangle(origin, direction, t, distance)) { + if (triangle_index == -1) { + triangle_index = i; + min_distance = distance; + continue; + } + + if (distance < min_distance) { + triangle_index = i; + min_distance = distance; + } + } + } // triangle loop + + node--; + + } // node is a leaf + + if (node > tail) { + printf("warning: intersect_mesh() aborted; node > tail\n"); + break; + } + + } // while loop + while (node != head); + + return triangle_index; +} + +extern "C" +{ + +__global__ void +distance_to_mesh(int nthreads, float3 *_origin, float3 *_direction, + Geometry *g, float *_distance) +{ + __shared__ Geometry sg; + + if (threadIdx.x == 0) + sg = *g; + + __syncthreads(); + + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + g = &sg; + + float3 origin = _origin[id]; + float3 direction = _direction[id]; + direction /= norm(direction); + + float distance; + + int triangle_index = intersect_mesh(origin, direction, g, distance); + + if (triangle_index != -1) + _distance[id] = distance; +} + +__global__ void +color_solids(int first_triangle, int nthreads, int *solid_id_map, + bool *solid_hit, unsigned int *solid_colors, Geometry *g) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + int triangle_id = first_triangle + id; + int solid_id = solid_id_map[triangle_id]; + if (solid_hit[solid_id]) + g->colors[triangle_id] = solid_colors[solid_id]; +} + +} // extern "C" + +#endif diff --git a/chroma/cuda/photon.h b/chroma/cuda/photon.h new file mode 100644 index 0000000..8b7e588 --- /dev/null +++ b/chroma/cuda/photon.h @@ -0,0 +1,325 @@ +#ifndef __PHOTON_H__ +#define __PHOTON_H__ + +#include "stdio.h" +#include "linalg.h" +#include "rotate.h" +#include "random.h" +#include "physical_constants.h" +#include "mesh.h" +#include "geometry.h" + +struct Photon +{ + float3 position; + float3 direction; + float3 polarization; + float wavelength; + float time; + + unsigned int history; + + int last_hit_triangle; +}; + +struct State +{ + bool inside_to_outside; + + float3 surface_normal; + + float refractive_index1, refractive_index2; + float absorption_length; + float scattering_length; + + int surface_index; + + float distance_to_boundary; +}; + +enum +{ + NO_HIT = 0x1 << 0, + BULK_ABSORB = 0x1 << 1, + SURFACE_DETECT = 0x1 << 2, + SURFACE_ABSORB = 0x1 << 3, + RAYLEIGH_SCATTER = 0x1 << 4, + REFLECT_DIFFUSE = 0x1 << 5, + REFLECT_SPECULAR = 0x1 << 6, + NAN_ABORT = 0x1 << 31 +}; // processes + +enum {BREAK, CONTINUE, PASS}; // return value from propagate_to_boundary + +__device__ int +convert(int c) +{ + if (c & 0x80) + return (0xFFFFFF00 | c); + else + return c; +} + +__device__ float +get_theta(const float3 &a, const float3 &b) +{ + return acosf(fmaxf(-1.0f,fminf(1.0f,dot(a,b)))); +} + +__device__ void +fill_state(State &s, Photon &p, Geometry *g) +{ + p.last_hit_triangle = intersect_mesh(p.position, p.direction, g, + s.distance_to_boundary, + p.last_hit_triangle); + + if (p.last_hit_triangle == -1) { + p.history |= NO_HIT; + return; + } + + Triangle t = get_triangle(g, p.last_hit_triangle); + + unsigned int material_code = g->material_codes[p.last_hit_triangle]; + + int inner_material_index = convert(0xFF & (material_code >> 24)); + int outer_material_index = convert(0xFF & (material_code >> 16)); + s.surface_index = convert(0xFF & (material_code >> 8)); + + float3 v01 = t.v1 - t.v0; + float3 v12 = t.v2 - t.v1; + + s.surface_normal = normalize(cross(v01, v12)); + + Material *material1, *material2; + if (dot(s.surface_normal,-p.direction) > 0.0f) { + // outside to inside + material1 = g->materials[outer_material_index]; + material2 = g->materials[inner_material_index]; + + s.inside_to_outside = false; + } + else { + // inside to outside + material1 = g->materials[inner_material_index]; + material2 = g->materials[outer_material_index]; + s.surface_normal = -s.surface_normal; + + s.inside_to_outside = true; + } + + s.refractive_index1 = interp_property(material1, p.wavelength, + material1->refractive_index); + s.refractive_index2 = interp_property(material2, p.wavelength, + material2->refractive_index); + s.absorption_length = interp_property(material1, p.wavelength, + material1->absorption_length); + s.scattering_length = interp_property(material1, p.wavelength, + material1->scattering_length); + +} // fill_state + +__device__ float3 +pick_new_direction(float3 axis, float theta, float phi) +{ + // Taken from SNOMAN rayscatter.for + float cos_theta = cosf(theta); + float sin_theta = sinf(theta); + float cos_phi = cosf(phi); + float sin_phi = sinf(phi); + + float sin_axis_theta = sqrt(1.0f - axis.z*axis.z); + float cos_axis_phi, sin_axis_phi; + + if (isnan(sin_axis_theta) || sin_axis_theta < 0.00001f) { + cos_axis_phi = 1.0f; + sin_axis_phi = 0.0f; + } + else { + cos_axis_phi = axis.x / sin_axis_theta; + sin_axis_phi = axis.y / sin_axis_theta; + } + + float dirx = cos_theta*axis.x + + sin_theta*(axis.z*cos_phi*cos_axis_phi - sin_phi*sin_axis_phi); + float diry = cos_theta*axis.y + + sin_theta*(cos_phi*axis.z*sin_axis_phi - sin_phi*cos_axis_phi); + float dirz = cos_theta*axis.z - sin_theta*cos_phi*sin_axis_theta; + + return make_float3(dirx, diry, dirz); +} + +__device__ void +rayleigh_scatter(Photon &p, curandState &rng) +{ + float cos_theta = 2.0f*cosf((acosf(1.0f - 2.0f*curand_uniform(&rng))-2*PI)/3.0f); + if (cos_theta > 1.0f) + cos_theta = 1.0f; + else if (cos_theta < -1.0f) + cos_theta = -1.0f; + + float theta = acosf(cos_theta); + float phi = uniform(&rng, 0.0f, 2.0f * PI); + + p.direction = pick_new_direction(p.polarization, theta, phi); + + if (1.0f - fabsf(cos_theta) < 1e-6f) { + p.polarization = pick_new_direction(p.polarization, PI/2.0f, phi); + } + else { + // linear combination of old polarization and new direction + p.polarization = p.polarization - cos_theta * p.direction; + } + + p.direction /= norm(p.direction); + p.polarization /= norm(p.polarization); +} // scatter + +__device__ int propagate_to_boundary(Photon &p, State &s, curandState &rng) +{ + float absorption_distance = -s.absorption_length*logf(curand_uniform(&rng)); + float scattering_distance = -s.scattering_length*logf(curand_uniform(&rng)); + + if (absorption_distance <= scattering_distance) { + if (absorption_distance <= s.distance_to_boundary) { + p.time += absorption_distance/(SPEED_OF_LIGHT/s.refractive_index1); + p.position += absorption_distance*p.direction; + p.history |= BULK_ABSORB; + + p.last_hit_triangle = -1; + + return BREAK; + } // photon is absorbed in material1 + } + else { + if (scattering_distance <= s.distance_to_boundary) { + p.time += scattering_distance/(SPEED_OF_LIGHT/s.refractive_index1); + p.position += scattering_distance*p.direction; + + rayleigh_scatter(p, rng); + + p.history |= RAYLEIGH_SCATTER; + + p.last_hit_triangle = -1; + + return CONTINUE; + } // photon is scattered in material1 + } // if scattering_distance < absorption_distance + + p.position += s.distance_to_boundary*p.direction; + p.time += s.distance_to_boundary/(SPEED_OF_LIGHT/s.refractive_index1); + + return PASS; + +} // propagate_to_boundary + +__device__ void +propagate_at_boundary(Photon &p, State &s, curandState &rng) +{ + float incident_angle = get_theta(s.surface_normal,-p.direction); + float refracted_angle = asinf(sinf(incident_angle)*s.refractive_index1/s.refractive_index2); + + float3 incident_plane_normal = cross(p.direction, s.surface_normal); + float incident_plane_normal_length = norm(incident_plane_normal); + + // Photons at normal incidence do not have a unique plane of incidence, + // so we have to pick the plane normal to be the polarization vector + // to get the correct logic below + if (incident_plane_normal_length < 1e-6f) + incident_plane_normal = p.polarization; + else + incident_plane_normal /= incident_plane_normal_length; + + float normal_coefficient = dot(p.polarization, incident_plane_normal); + float normal_probability = normal_coefficient*normal_coefficient; + + float reflection_coefficient; + if (curand_uniform(&rng) < normal_probability) { + // photon polarization normal to plane of incidence + reflection_coefficient = -sinf(incident_angle-refracted_angle)/sinf(incident_angle+refracted_angle); + + if ((curand_uniform(&rng) < reflection_coefficient*reflection_coefficient) || isnan(refracted_angle)) { + p.direction = rotate(s.surface_normal, incident_angle, incident_plane_normal); + + p.history |= REFLECT_SPECULAR; + } + else { + p.direction = rotate(s.surface_normal, PI-refracted_angle, incident_plane_normal); + } + + p.polarization = incident_plane_normal; + } + else { + // photon polarization parallel to plane of incidence + reflection_coefficient = tanf(incident_angle-refracted_angle)/tanf(incident_angle+refracted_angle); + + if ((curand_uniform(&rng) < reflection_coefficient*reflection_coefficient) || isnan(refracted_angle)) { + p.direction = rotate(s.surface_normal, incident_angle, incident_plane_normal); + + p.history |= REFLECT_SPECULAR; + } + else { + p.direction = rotate(s.surface_normal, PI-refracted_angle, incident_plane_normal); + } + + p.polarization = cross(incident_plane_normal, p.direction); + p.polarization /= norm(p.polarization); + } + +} // propagate_at_boundary + +__device__ int +propagate_at_surface(Photon &p, State &s, curandState &rng, Geometry *geometry) +{ + Surface *surface = geometry->surfaces[s.surface_index]; + + /* since the surface properties are interpolated linearly, we are + guaranteed that they still sum to 1.0. */ + + float detect = interp_property(surface, p.wavelength, surface->detect); + float absorb = interp_property(surface, p.wavelength, surface->absorb); + float reflect_diffuse = interp_property(surface, p.wavelength, surface->reflect_diffuse); + float reflect_specular = interp_property(surface, p.wavelength, surface->reflect_specular); + + float uniform_sample = curand_uniform(&rng); + + if (uniform_sample < absorb) { + p.history |= SURFACE_ABSORB; + return BREAK; + } + else if (uniform_sample < absorb + detect) { + p.history |= SURFACE_DETECT; + return BREAK; + } + else if (uniform_sample < absorb + detect + reflect_diffuse) { + // diffusely reflect + p.direction = uniform_sphere(&rng); + + if (dot(p.direction, s.surface_normal) < 0.0f) + p.direction = -p.direction; + + // randomize polarization? + p.polarization = cross(uniform_sphere(&rng), p.direction); + p.polarization /= norm(p.polarization); + + p.history |= REFLECT_DIFFUSE; + + return CONTINUE; + } + else { + // specularly reflect + float incident_angle = get_theta(s.surface_normal,-p.direction); + float3 incident_plane_normal = cross(p.direction, s.surface_normal); + incident_plane_normal /= norm(incident_plane_normal); + + p.direction = rotate(s.surface_normal, incident_angle, + incident_plane_normal); + + p.history |= REFLECT_SPECULAR; + + return CONTINUE; + } + +} // propagate_at_surface + +#endif diff --git a/chroma/cuda/physical_constants.h b/chroma/cuda/physical_constants.h new file mode 100644 index 0000000..2ff87cd --- /dev/null +++ b/chroma/cuda/physical_constants.h @@ -0,0 +1,7 @@ +#ifndef __PHYSICAL_CONSTANTS_H__ +#define __PHYSICAL_CONSTANTS_H__ + +#define SPEED_OF_LIGHT 2.99792458e8 +#define PI 3.141592653589793 + +#endif diff --git a/chroma/cuda/propagate.cu b/chroma/cuda/propagate.cu new file mode 100644 index 0000000..fdcf532 --- /dev/null +++ b/chroma/cuda/propagate.cu @@ -0,0 +1,147 @@ +//-*-c-*- + +#include "linalg.h" +#include "geometry.h" +#include "photon.h" + +#include "stdio.h" + +extern "C" +{ + +__global__ void +photon_duplicate(int first_photon, int nthreads, + float3 *positions, float3 *directions, + float *wavelengths, float3 *polarizations, + float *times, unsigned int *histories, + int *last_hit_triangles, + int copies, int stride) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + int photon_id = first_photon + id; + + Photon p; + p.position = positions[photon_id]; + p.direction = directions[photon_id]; + p.polarization = polarizations[photon_id]; + p.wavelength = wavelengths[photon_id]; + p.time = times[photon_id]; + p.last_hit_triangle = last_hit_triangles[photon_id]; + p.history = histories[photon_id]; + + for (int i=1; i <= copies; i++) { + int target_photon_id = photon_id + stride * i; + + positions[target_photon_id] = p.position; + directions[target_photon_id] = p.direction; + polarizations[target_photon_id] = p.polarization; + wavelengths[target_photon_id] = p.wavelength; + times[target_photon_id] = p.time; + last_hit_triangles[target_photon_id] = p.last_hit_triangle; + histories[target_photon_id] = p.history; + } +} + +__global__ void +propagate(int first_photon, int nthreads, unsigned int *input_queue, + unsigned int *output_queue, curandState *rng_states, + float3 *positions, float3 *directions, + float *wavelengths, float3 *polarizations, + float *times, unsigned int *histories, + int *last_hit_triangles, int max_steps, + Geometry *g) +{ + __shared__ Geometry sg; + + if (threadIdx.x == 0) + sg = *g; + + __syncthreads(); + + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + g = &sg; + + curandState rng = rng_states[id]; + + int photon_id = input_queue[first_photon + id]; + + Photon p; + p.position = positions[photon_id]; + p.direction = directions[photon_id]; + p.direction /= norm(p.direction); + p.polarization = polarizations[photon_id]; + p.polarization /= norm(p.polarization); + p.wavelength = wavelengths[photon_id]; + p.time = times[photon_id]; + p.last_hit_triangle = last_hit_triangles[photon_id]; + p.history = histories[photon_id]; + + if (p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB)) + return; + + State s; + + int steps = 0; + while (steps < max_steps) { + steps++; + + int command; + + // check for NaN and fail + if (isnan(p.direction.x*p.direction.y*p.direction.z*p.position.x*p.position.y*p.position.z)) { + p.history |= NO_HIT | NAN_ABORT; + break; + } + + fill_state(s, p, g); + + if (p.last_hit_triangle == -1) + break; + + command = propagate_to_boundary(p, s, rng); + + if (command == BREAK) + break; + + if (command == CONTINUE) + continue; + + if (s.surface_index != -1) { + command = propagate_at_surface(p, s, rng, g); + + if (command == BREAK) + break; + + if (command == CONTINUE) + continue; + } + + propagate_at_boundary(p, s, rng); + + } // while (steps < max_steps) + + rng_states[id] = rng; + positions[photon_id] = p.position; + directions[photon_id] = p.direction; + polarizations[photon_id] = p.polarization; + wavelengths[photon_id] = p.wavelength; + times[photon_id] = p.time; + histories[photon_id] = p.history; + last_hit_triangles[photon_id] = p.last_hit_triangle; + + // Not done, put photon in output queue + if ((p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB)) == 0) { + int out_idx = atomicAdd(output_queue, 1); + output_queue[out_idx] = photon_id; + } +} // propagate + +} // extern "C" diff --git a/chroma/cuda/random.h b/chroma/cuda/random.h new file mode 100644 index 0000000..e9d2e75 --- /dev/null +++ b/chroma/cuda/random.h @@ -0,0 +1,87 @@ +#ifndef __RANDOM_H__ +#define __RANDOM_H__ + +#include <curand_kernel.h> + +#include "physical_constants.h" + +__device__ float +uniform(curandState *s, const float &low, const float &high) +{ + return low + curand_uniform(s)*(high-low); +} + +__device__ float3 +uniform_sphere(curandState *s) +{ + float theta = uniform(s, 0.0f, 2*PI); + float u = uniform(s, -1.0f, 1.0f); + float c = sqrtf(1.0f-u*u); + + return make_float3(c*cosf(theta), c*sinf(theta), u); +} + +// Draw a random sample given a cumulative distribution function +// Assumptions: ncdf >= 2, cdf_y[0] is 0.0, and cdf_y[ncdf-1] is 1.0 +__device__ float +sample_cdf(curandState *rng, int ncdf, float *cdf_x, float *cdf_y) +{ + float u = curand_uniform(rng); + + // Find u in cdf_y with binary search + // list must contain at least 2 elements: 0.0 and 1.0 + int lower=0; + int upper=ncdf-1; + while(lower < upper-1) { + int half = (lower+upper) / 2; + if (u < cdf_y[half]) + upper = half; + else + lower = half; + } + + float frac = (u - cdf_y[lower]) / (cdf_y[upper] - cdf_y[lower]); + return cdf_x[lower] * frac + cdf_x[upper] * (1.0f - frac); +} + +extern "C" +{ + +__global__ void +init_rng(int nthreads, curandState *s, unsigned long long seed, + unsigned long long offset) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + curand_init(seed, id, offset, &s[id]); +} + +__global__ void +fill_uniform(int nthreads, curandState *s, float *a, float low, float high) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + a[id] = uniform(&s[id], low, high); + +} + +__global__ void +fill_uniform_sphere(int nthreads, curandState *s, float3 *a) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + a[id] = uniform_sphere(&s[id]); +} + +} // extern "c" + +#endif diff --git a/chroma/cuda/render.cu b/chroma/cuda/render.cu new file mode 100644 index 0000000..d4ed443 --- /dev/null +++ b/chroma/cuda/render.cu @@ -0,0 +1,171 @@ +//-*-c-*- + +#include "linalg.h" +#include "intersect.h" +#include "mesh.h" +#include "sorting.h" +#include "geometry.h" + +#include "stdio.h" + +__device__ float4 +get_color(const float3 &direction, const Triangle &t, unsigned int rgba) +{ + float3 v01 = t.v1 - t.v0; + float3 v12 = t.v2 - t.v1; + + float3 surface_normal = normalize(cross(v01,v12)); + + float cos_theta = dot(surface_normal,-direction); + + if (cos_theta < 0.0f) + cos_theta = -cos_theta; + + unsigned int a0 = 0xff & (rgba >> 24); + unsigned int r0 = 0xff & (rgba >> 16); + unsigned int g0 = 0xff & (rgba >> 8); + unsigned int b0 = 0xff & rgba; + + float alpha = (255 - a0)/255.0f; + + return make_float4(r0*cos_theta, g0*cos_theta, b0*cos_theta, alpha); +} + +extern "C" +{ + +__global__ void +render(int nthreads, float3 *_origin, float3 *_direction, Geometry *g, + unsigned int alpha_depth, unsigned int *pixels, float *_dx, + unsigned int *dxlen, float4 *_color) +{ + __shared__ Geometry sg; + + if (threadIdx.x == 0) + sg = *g; + + __syncthreads(); + + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + g = &sg; + + float3 origin = _origin[id]; + float3 direction = _direction[id]; + unsigned int n = dxlen[id]; + + float distance; + + if (n < 1 && !intersect_node(origin, direction, g, g->start_node)) { + pixels[id] = 0; + return; + } + + unsigned int stack[STACK_SIZE]; + + unsigned int *head = &stack[0]; + unsigned int *node = &stack[1]; + unsigned int *tail = &stack[STACK_SIZE-1]; + *node = g->start_node; + + float *dx = _dx + id*alpha_depth; + float4 *color_a = _color + id*alpha_depth; + + unsigned int i; + + do { + unsigned int first_child = g->node_map[*node]; + unsigned int stop = g->node_map_end[*node]; + + while (*node >= g->first_node && stop == first_child+1) { + *node = first_child; + first_child = g->node_map[*node]; + stop = g->node_map_end[*node]; + } + + if (*node >= g->first_node) { + for (i=first_child; i < stop; i++) { + if (intersect_node(origin, direction, g, i)) { + *node = i; + node++; + } + } + + node--; + } + else { + // node is a leaf + for (i=first_child; i < stop; i++) { + Triangle t = get_triangle(g, i); + + if (intersect_triangle(origin, direction, t, distance)) { + if (n < 1) { + dx[0] = distance; + + unsigned int rgba = g->colors[i]; + float4 color = get_color(direction, t, rgba); + + color_a[0] = color; + } + else { + unsigned long j = searchsorted(n, dx, distance); + + if (j <= alpha_depth-1) { + insert(alpha_depth, dx, j, distance); + + unsigned int rgba = g->colors[i]; + float4 color = get_color(direction, t, rgba); + + insert(alpha_depth, color_a, j, color); + } + } + + if (n < alpha_depth) + n++; + } + + } // triangle loop + + node--; + + } // node is a leaf + + } // while loop + while (node != head); + + if (n < 1) { + pixels[id] = 0; + return; + } + + dxlen[id] = n; + + float scale = 1.0f; + float fr = 0.0f; + float fg = 0.0f; + float fb = 0.0f; + for (i=0; i < n; i++) { + float alpha = color_a[i].w; + + fr += scale*color_a[i].x*alpha; + fg += scale*color_a[i].y*alpha; + fb += scale*color_a[i].z*alpha; + + scale *= (1.0f-alpha); + } + unsigned int a; + if (n < alpha_depth) + a = floorf(255*(1.0f-scale)); + else + a = 255; + unsigned int red = floorf(fr/(1.0f-scale)); + unsigned int green = floorf(fg/(1.0f-scale)); + unsigned int blue = floorf(fb/(1.0f-scale)); + + pixels[id] = a << 24 | red << 16 | green << 8 | blue; +} + +} // extern "C" diff --git a/chroma/cuda/rotate.h b/chroma/cuda/rotate.h new file mode 100644 index 0000000..15f8037 --- /dev/null +++ b/chroma/cuda/rotate.h @@ -0,0 +1,27 @@ +#ifndef __ROTATE_H__ +#define __ROTATE_H__ + +#include "linalg.h" +#include "matrix.h" + +__device__ const Matrix IDENTITY_MATRIX = {1,0,0,0,1,0,0,0,1}; + +__device__ Matrix +make_rotation_matrix(float phi, const float3 &n) +{ + float cos_phi = cosf(phi); + float sin_phi = sinf(phi); + + return IDENTITY_MATRIX*cos_phi + (1-cos_phi)*outer(n,n) + + sin_phi*make_matrix(0,n.z,-n.y,-n.z,0,n.x,n.y,-n.x,0); +} + +/* rotate points counterclockwise, when looking towards +infinity, + through an angle `phi` about the axis `n`. */ +__device__ float3 +rotate(const float3 &a, float phi, const float3 &n) +{ + return make_rotation_matrix(phi,n)*a; +} + +#endif diff --git a/chroma/cuda/sorting.h b/chroma/cuda/sorting.h new file mode 100644 index 0000000..2bf1a5a --- /dev/null +++ b/chroma/cuda/sorting.h @@ -0,0 +1,107 @@ +template <class T> +__device__ void +swap(T &a, T &b) +{ + T tmp = a; + a = b; + b = tmp; +} + +template <class T> +__device__ void +reverse(int n, T *a) +{ + for (int i=0; i < n/2; i++) + swap(a[i],a[n-1-i]); +} + +template <class T> +__device__ void +piksrt(int n, T *arr) +{ + int i,j; + T a; + + for (j=1; j < n; j++) { + a = arr[j]; + i = j-1; + while (i >= 0 && arr[i] > a) { + arr[i+1] = arr[i]; + i--; + } + arr[i+1] = a; + } +} + +template <class T, class U> +__device__ void +piksrt2(int n, T *arr, U *brr) +{ + int i,j; + T a; + U b; + + for (j=1; j < n; j++) { + a = arr[j]; + b = brr[j]; + i = j-1; + while (i >= 0 && arr[i] > a) { + arr[i+1] = arr[i]; + brr[i+1] = brr[i]; + i--; + } + arr[i+1] = a; + brr[i+1] = b; + } +} + +/* Returns the index in `arr` where `x` should be inserted in order to + maintain order. If `n` equals one, return the index such that, when + `x` is inserted, `arr` will be in ascending order. +*/ +template <class T> +__device__ unsigned long +searchsorted(unsigned long n, T *arr, const T &x) +{ + unsigned long ju,jm,jl; + int ascnd; + + jl = 0; + ju = n; + + ascnd = (arr[n-1] >= arr[0]); + + while (ju-jl > 1) { + jm = (ju+jl) >> 1; + + if ((x > arr[jm]) == ascnd) + jl = jm; + else + ju = jm; + } + + if ((x <= arr[0]) == ascnd) + return 0; + else + return ju; +} + +template <class T> +__device__ void +insert(unsigned long n, T *arr, unsigned long i, const T &x) +{ + unsigned long j; + for (j=n-1; j > i; j--) + arr[j] = arr[j-1]; + arr[i] = x; +} + +template <class T> +__device__ void +add_sorted(unsigned long n, T *arr, const T &x) +{ + unsigned long i = searchsorted(n, arr, x); + + if (i < n) + insert(n, arr, i, x); +} diff --git a/chroma/cuda/tools.cu b/chroma/cuda/tools.cu new file mode 100644 index 0000000..80d06ec --- /dev/null +++ b/chroma/cuda/tools.cu @@ -0,0 +1,21 @@ +//--*-c-*- + +extern "C" +{ + +__global__ void +interleave(int nthreads, unsigned long long *x, unsigned long long *y, + unsigned long long *z, int bits, unsigned long long *dest) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + for (int i=0; i < bits; i++) + dest[id] |= (x[id] & 1 << i) << (2*i) | + (y[id] & 1 << i) << (2*i+1) | + (z[id] & 1 << i) << (2*i+2); +} + +} // extern "C" diff --git a/chroma/cuda/transform.cu b/chroma/cuda/transform.cu new file mode 100644 index 0000000..1f4405e --- /dev/null +++ b/chroma/cuda/transform.cu @@ -0,0 +1,51 @@ +//-*-c-*- + +#include "linalg.h" +#include "rotate.h" + +extern "C" +{ + +/* Translate the points `a` by the vector `v` */ +__global__ void +translate(int nthreads, float3 *a, float3 v) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + a[id] += v; +} + +/* Rotate the points `a` through an angle `phi` counter-clockwise about the + axis `axis` (when looking towards +infinity). */ +__global__ void +rotate(int nthreads, float3 *a, float phi, float3 axis) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + a[id] = rotate(a[id], phi, axis); +} + +/* Rotate the points `a` through an angle `phi` counter-clockwise + (when looking towards +infinity along `axis`) about the axis defined + by the point `point` and the vector `axis` . */ +__global__ void +rotate_around_point(int nthreads, float3 *a, float phi, float3 axis, + float3 point) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + a[id] -= point; + a[id] = rotate(a[id], phi, axis); + a[id] += point; +} + +} // extern "c" |
