diff options
-rw-r--r-- | src/intersect.cu | 23 | ||||
-rw-r--r-- | test.py | 2 |
2 files changed, 11 insertions, 14 deletions
diff --git a/src/intersect.cu b/src/intersect.cu index f3c5657..0bb73a0 100644 --- a/src/intersect.cu +++ b/src/intersect.cu @@ -1,11 +1,7 @@ //-*-c-*- -__device__ bool intersect_triangle(const float3 &x, const float3 &p, float3 *triangle, float3 &intersection) +__device__ bool intersect_triangle(const float3 &x, const float3 &p, const float3 &v0, const float3 &v1, const float3 &v2, float3 &intersection) { - float3 v0 = triangle[0]; - float3 v1 = triangle[1]; - float3 v2 = triangle[2]; - Matrix m = make_matrix(v1-v0, v2-v0, -p); float determinant = det(m); @@ -35,12 +31,9 @@ __device__ bool intersect_triangle(const float3 &x, const float3 &p, float3 *tri return true; } -__device__ int get_color(const float3 &p, float3 *vertex) +__device__ int get_color(const float3 &p, const float3 &v0, const float3& v1, const float3 &v2) { - float3 v1 = vertex[1] - vertex[0]; - float3 v2 = vertex[2] - vertex[0]; - - float3 normal = cross(v1,v2); + float3 normal = cross(v1-v0,v2-v0); float scale; scale = dot(normal,-p)/(norm(normal)*norm(p)); @@ -94,11 +87,15 @@ __global__ void intersect_triangle_mesh(int max_idx, float3 *xarr, float3 *parr, int i; for (i=0; i < n; i++) { - if (intersect_triangle(x, p, mesh+3*i, intersection)) + float3 v0 = *(mesh+3*i); + float3 v1 = *(mesh+3*i+1); + float3 v2 = *(mesh+3*i+2); + + if (intersect_triangle(x, p, v0, v1, v2, intersection)) { if (!hit) { - *pixel = get_color(p, mesh+3*i); + *pixel = get_color(p, v0, v1, v2); min_distance = norm(intersection-x); min_intersection = intersection; @@ -110,7 +107,7 @@ __global__ void intersect_triangle_mesh(int max_idx, float3 *xarr, float3 *parr, if (distance < min_distance) { - *pixel = get_color(p, mesh+3*i); + *pixel = get_color(p, v0, v1, v2); min_distance = distance; min_intersection = intersection; @@ -71,7 +71,7 @@ for i in range(100): rotate(np.int32(p.size), p_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(256,1,1), grid=(width*height//256+1,1)) t0 = time.time() - intersect(np.int32(x.size), x_gpu, p_gpu, np.int32(mesh.size//3), mesh_gpu, pixel_gpu, block=(128,1,1), grid=(width*height//128+1,1)) + intersect(np.int32(x.size), x_gpu, p_gpu, np.int32(mesh.size//3), mesh_gpu, pixel_gpu, block=(64,1,1), grid=(width*height//64+1,1)) cuda.Context.synchronize() elapsed = time.time() - t0 |