diff options
Diffstat (limited to 'src/kernel.cu')
-rw-r--r-- | src/kernel.cu | 272 |
1 files changed, 272 insertions, 0 deletions
diff --git a/src/kernel.cu b/src/kernel.cu new file mode 100644 index 0000000..02c52cf --- /dev/null +++ b/src/kernel.cu @@ -0,0 +1,272 @@ +//-*-c-*- +#include <math_constants.h> + +#include "linalg.h" +#include "matrix.h" +#include "rotate.h" + +texture<float4, 1, cudaReadModeElementType> mesh; + + + +texture<float4, 1, cudaReadModeElementType> upper_bound_arr; +texture<float4, 1, cudaReadModeElementType> lower_bound_arr; + +texture<uint, 1, cudaReadModeElementType> child_map_arr; +texture<uint, 1, cudaReadModeElementType> child_len_arr; + +__device__ float3 make_float3(const float4 &a) +{ + return make_float3(a.x, a.y, a.z); +} + +__device__ bool intersect_triangle(const float3 &origin, const float3 &direction, const float3 &v0, const float3 &v1, const float3 &v2, float3 &intersection) +{ + Matrix m = make_matrix(v1-v0, v2-v0, -direction); + + float determinant = det(m); + + if (determinant == 0.0f) + return false; + + float3 b = origin-v0; + + float u1 = ((m.a11*m.a22 - m.a12*m.a21)*b.x + (m.a02*m.a21 - m.a01*m.a22)*b.y + (m.a01*m.a12 - m.a02*m.a11)*b.z)/determinant; + + if (u1 < 0.0f) + return false; + + float u2 = ((m.a12*m.a20 - m.a10*m.a22)*b.x + (m.a00*m.a22 - m.a02*m.a20)*b.y + (m.a02*m.a10 - m.a00*m.a12)*b.z)/determinant; + + if (u2 < 0.0f) + return false; + + float u3 = ((m.a10*m.a21 - m.a11*m.a20)*b.x + (m.a01*m.a20 - m.a00*m.a21)*b.y + (m.a00*m.a11 - m.a01*m.a10)*b.z)/determinant; + + if (u3 < 0.0f || (1-u1-u2) < 0.0f) + return false; + + intersection = origin + direction*u3; + + return true; +} + +__device__ int get_color(const float3 &direction, const float3 &v0, const float3& v1, const float3 &v2) +{ + float scale = 255*dot(normalize(cross(v1-v0,v2-v0)),-direction); + if (scale < 0.0f) + scale = 255*dot(-normalize(cross(v1-v0,v2-v0)),-direction); + + + int rgb = floorf(scale); + + + return rgb << 16 | rgb << 8 | rgb; +} + + +__device__ bool intersect_box(const float3 &origin, const float3 &direction, const float3 &lower_bound, const float3 &upper_bound) +{ + float kmin, kmax, kymin, kymax, kzmin, kzmax; + + if (direction.x >= 0.0f) + { + kmin = (lower_bound.x - origin.x)/direction.x; + kmax = (upper_bound.x - origin.x)/direction.x; + } + else + { + kmin = (upper_bound.x - origin.x)/direction.x; + kmax = (lower_bound.x - origin.x)/direction.x; + } + + if (kmax < kmin) + return false; + + if (direction.y >= 0.0f) + { + kymin = (lower_bound.y - origin.y)/direction.y; + kymax = (upper_bound.y - origin.y)/direction.y; + } + else + { + kymin = (upper_bound.y - origin.y)/direction.y; + kymax = (lower_bound.y - origin.y)/direction.y; + } + + if (kymax < kymin) + return false; + + if (kymin > kmin) + kmin = kymin; + + if (kymax < kmax) + kmax = kymax; + + if (kmin > kmax) + return false; + + if (direction.z >= 0.0f) + { + kzmin = (lower_bound.z - origin.z)/direction.z; + kzmax = (upper_bound.z - origin.z)/direction.z; + } + else + { + kzmin = (upper_bound.z - origin.z)/direction.z; + kzmax = (lower_bound.z - origin.z)/direction.z; + } + + if (kzmax < kzmin) + return false; + + if (kzmin > kmin) + kmin = kzmin; + + if (kzmax < kmax) + kmax = kzmax; + + if (kmin > kmax) + return false; + + if (kmax < 0.0f) + return false; + + return true; +} + +__device__ bool intersect_node(const float3 &origin, const float3 &direction, const int &i) +{ + float3 lower_bound = make_float3(tex1Dfetch(lower_bound_arr, i)); + float3 upper_bound = make_float3(tex1Dfetch(upper_bound_arr, i)); + + return intersect_box(origin, direction, lower_bound, upper_bound); +} + +extern "C" +{ + +__global__ void translate(int max_idx, float3 *pt, float3 v) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + + if (idx > max_idx) + return; + + pt[idx] += v; +} + +__global__ void rotate(int max_idx, float3 *pt, float phi, float3 axis) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + + if (idx > max_idx) + return; + + pt[idx] = rotate(pt[idx], phi, axis); +} + +__global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *direction_arr, int *pixel_arr, int first_leaf) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + + if (idx > max_idx) + return; + + float3 origin = origin_arr[idx]; + float3 direction = direction_arr[idx]; + + int *pixel = pixel_arr+idx; + + bool hit = false; + + float distance, min_distance; + float3 intersection, min_intersection; + + if (!intersect_node(origin, direction, 0)) + { + *pixel = 0; + return; + } + + int stack[100]; + + int *head = &stack[0]; + int *node = &stack[1]; + *node = 0; + + int i; + + bool show_leafs = false; + + do + { + int child_map = tex1Dfetch(child_map_arr, *node); + int child_len = tex1Dfetch(child_len_arr, *node); + + if (*node < first_leaf) + { + for (i=0; i < child_len; i++) + if (intersect_node(origin, direction, child_map+i)) + *node++ = child_map+i; + + if (node == head) + break; + + node--; + + } + else if (show_leafs) + { + hit = true; + *pixel = 255; + node--; + } + else // node is a leaf + { + for (i=0; i < child_len; i++) + { + int mesh_idx = child_map + 3*i; + + float3 v0 = make_float3(tex1Dfetch(mesh, mesh_idx)); + float3 v1 = make_float3(tex1Dfetch(mesh, mesh_idx+1)); + float3 v2 = make_float3(tex1Dfetch(mesh, mesh_idx+2)); + + if (intersect_triangle(origin, direction, v0, v1, v2, intersection)) + { + if (!hit) + { + *pixel = get_color(direction, v0, v1, v2); + + min_distance = norm(intersection-origin); + min_intersection = intersection; + hit = true; + continue; + } + + distance = norm(intersection-origin); + + if (distance < min_distance) + { + *pixel = get_color(direction, v0, v1, v2); + + min_distance = distance; + min_intersection = intersection; + } + } + + } // triangle loop + + node--; + + } // node is a leaf + + } // while loop + while (node != head); + + if (!hit) + *pixel = 0; + +} // intersect mesh + +} // extern "c" |