summaryrefslogtreecommitdiff
path: root/src/kernel.cu
diff options
context:
space:
mode:
Diffstat (limited to 'src/kernel.cu')
-rw-r--r--src/kernel.cu272
1 files changed, 272 insertions, 0 deletions
diff --git a/src/kernel.cu b/src/kernel.cu
new file mode 100644
index 0000000..02c52cf
--- /dev/null
+++ b/src/kernel.cu
@@ -0,0 +1,272 @@
+//-*-c-*-
+#include <math_constants.h>
+
+#include "linalg.h"
+#include "matrix.h"
+#include "rotate.h"
+
+texture<float4, 1, cudaReadModeElementType> mesh;
+
+
+
+texture<float4, 1, cudaReadModeElementType> upper_bound_arr;
+texture<float4, 1, cudaReadModeElementType> lower_bound_arr;
+
+texture<uint, 1, cudaReadModeElementType> child_map_arr;
+texture<uint, 1, cudaReadModeElementType> child_len_arr;
+
+__device__ float3 make_float3(const float4 &a)
+{
+ return make_float3(a.x, a.y, a.z);
+}
+
+__device__ bool intersect_triangle(const float3 &origin, const float3 &direction, const float3 &v0, const float3 &v1, const float3 &v2, float3 &intersection)
+{
+ Matrix m = make_matrix(v1-v0, v2-v0, -direction);
+
+ float determinant = det(m);
+
+ if (determinant == 0.0f)
+ return false;
+
+ float3 b = origin-v0;
+
+ float u1 = ((m.a11*m.a22 - m.a12*m.a21)*b.x + (m.a02*m.a21 - m.a01*m.a22)*b.y + (m.a01*m.a12 - m.a02*m.a11)*b.z)/determinant;
+
+ if (u1 < 0.0f)
+ return false;
+
+ float u2 = ((m.a12*m.a20 - m.a10*m.a22)*b.x + (m.a00*m.a22 - m.a02*m.a20)*b.y + (m.a02*m.a10 - m.a00*m.a12)*b.z)/determinant;
+
+ if (u2 < 0.0f)
+ return false;
+
+ float u3 = ((m.a10*m.a21 - m.a11*m.a20)*b.x + (m.a01*m.a20 - m.a00*m.a21)*b.y + (m.a00*m.a11 - m.a01*m.a10)*b.z)/determinant;
+
+ if (u3 < 0.0f || (1-u1-u2) < 0.0f)
+ return false;
+
+ intersection = origin + direction*u3;
+
+ return true;
+}
+
+__device__ int get_color(const float3 &direction, const float3 &v0, const float3& v1, const float3 &v2)
+{
+ float scale = 255*dot(normalize(cross(v1-v0,v2-v0)),-direction);
+ if (scale < 0.0f)
+ scale = 255*dot(-normalize(cross(v1-v0,v2-v0)),-direction);
+
+
+ int rgb = floorf(scale);
+
+
+ return rgb << 16 | rgb << 8 | rgb;
+}
+
+
+__device__ bool intersect_box(const float3 &origin, const float3 &direction, const float3 &lower_bound, const float3 &upper_bound)
+{
+ float kmin, kmax, kymin, kymax, kzmin, kzmax;
+
+ if (direction.x >= 0.0f)
+ {
+ kmin = (lower_bound.x - origin.x)/direction.x;
+ kmax = (upper_bound.x - origin.x)/direction.x;
+ }
+ else
+ {
+ kmin = (upper_bound.x - origin.x)/direction.x;
+ kmax = (lower_bound.x - origin.x)/direction.x;
+ }
+
+ if (kmax < kmin)
+ return false;
+
+ if (direction.y >= 0.0f)
+ {
+ kymin = (lower_bound.y - origin.y)/direction.y;
+ kymax = (upper_bound.y - origin.y)/direction.y;
+ }
+ else
+ {
+ kymin = (upper_bound.y - origin.y)/direction.y;
+ kymax = (lower_bound.y - origin.y)/direction.y;
+ }
+
+ if (kymax < kymin)
+ return false;
+
+ if (kymin > kmin)
+ kmin = kymin;
+
+ if (kymax < kmax)
+ kmax = kymax;
+
+ if (kmin > kmax)
+ return false;
+
+ if (direction.z >= 0.0f)
+ {
+ kzmin = (lower_bound.z - origin.z)/direction.z;
+ kzmax = (upper_bound.z - origin.z)/direction.z;
+ }
+ else
+ {
+ kzmin = (upper_bound.z - origin.z)/direction.z;
+ kzmax = (lower_bound.z - origin.z)/direction.z;
+ }
+
+ if (kzmax < kzmin)
+ return false;
+
+ if (kzmin > kmin)
+ kmin = kzmin;
+
+ if (kzmax < kmax)
+ kmax = kzmax;
+
+ if (kmin > kmax)
+ return false;
+
+ if (kmax < 0.0f)
+ return false;
+
+ return true;
+}
+
+__device__ bool intersect_node(const float3 &origin, const float3 &direction, const int &i)
+{
+ float3 lower_bound = make_float3(tex1Dfetch(lower_bound_arr, i));
+ float3 upper_bound = make_float3(tex1Dfetch(upper_bound_arr, i));
+
+ return intersect_box(origin, direction, lower_bound, upper_bound);
+}
+
+extern "C"
+{
+
+__global__ void translate(int max_idx, float3 *pt, float3 v)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+
+ if (idx > max_idx)
+ return;
+
+ pt[idx] += v;
+}
+
+__global__ void rotate(int max_idx, float3 *pt, float phi, float3 axis)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+
+ if (idx > max_idx)
+ return;
+
+ pt[idx] = rotate(pt[idx], phi, axis);
+}
+
+__global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *direction_arr, int *pixel_arr, int first_leaf)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+
+ if (idx > max_idx)
+ return;
+
+ float3 origin = origin_arr[idx];
+ float3 direction = direction_arr[idx];
+
+ int *pixel = pixel_arr+idx;
+
+ bool hit = false;
+
+ float distance, min_distance;
+ float3 intersection, min_intersection;
+
+ if (!intersect_node(origin, direction, 0))
+ {
+ *pixel = 0;
+ return;
+ }
+
+ int stack[100];
+
+ int *head = &stack[0];
+ int *node = &stack[1];
+ *node = 0;
+
+ int i;
+
+ bool show_leafs = false;
+
+ do
+ {
+ int child_map = tex1Dfetch(child_map_arr, *node);
+ int child_len = tex1Dfetch(child_len_arr, *node);
+
+ if (*node < first_leaf)
+ {
+ for (i=0; i < child_len; i++)
+ if (intersect_node(origin, direction, child_map+i))
+ *node++ = child_map+i;
+
+ if (node == head)
+ break;
+
+ node--;
+
+ }
+ else if (show_leafs)
+ {
+ hit = true;
+ *pixel = 255;
+ node--;
+ }
+ else // node is a leaf
+ {
+ for (i=0; i < child_len; i++)
+ {
+ int mesh_idx = child_map + 3*i;
+
+ float3 v0 = make_float3(tex1Dfetch(mesh, mesh_idx));
+ float3 v1 = make_float3(tex1Dfetch(mesh, mesh_idx+1));
+ float3 v2 = make_float3(tex1Dfetch(mesh, mesh_idx+2));
+
+ if (intersect_triangle(origin, direction, v0, v1, v2, intersection))
+ {
+ if (!hit)
+ {
+ *pixel = get_color(direction, v0, v1, v2);
+
+ min_distance = norm(intersection-origin);
+ min_intersection = intersection;
+ hit = true;
+ continue;
+ }
+
+ distance = norm(intersection-origin);
+
+ if (distance < min_distance)
+ {
+ *pixel = get_color(direction, v0, v1, v2);
+
+ min_distance = distance;
+ min_intersection = intersection;
+ }
+ }
+
+ } // triangle loop
+
+ node--;
+
+ } // node is a leaf
+
+ } // while loop
+ while (node != head);
+
+ if (!hit)
+ *pixel = 0;
+
+} // intersect mesh
+
+} // extern "c"