summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony LaTorre <telatorre@gmail.com>2011-05-09 13:58:12 -0400
committerAnthony LaTorre <telatorre@gmail.com>2011-05-09 13:58:12 -0400
commit7e61bfbe7df445ff43abfc802df9471cf66b55ca (patch)
treebe4dc6797a87b64db60c4d9d53c9e63e62e7145c
parentbcaef46bb56feb2f92c4feae1bff9e041a4f84cf (diff)
downloadchroma-7e61bfbe7df445ff43abfc802df9471cf66b55ca.tar.gz
chroma-7e61bfbe7df445ff43abfc802df9471cf66b55ca.tar.bz2
chroma-7e61bfbe7df445ff43abfc802df9471cf66b55ca.zip
improve triangle intersection algorithm by allowing it to terminate early
-rw-r--r--src/intersect.cu38
-rw-r--r--test.py15
2 files changed, 21 insertions, 32 deletions
diff --git a/src/intersect.cu b/src/intersect.cu
index 1c157dc..f3c5657 100644
--- a/src/intersect.cu
+++ b/src/intersect.cu
@@ -1,36 +1,36 @@
//-*-c-*-
-__device__ Matrix inv(const Matrix&m, float& determinant)
-{
- determinant = det(m);
-
- return make_matrix(m.a11*m.a22 - m.a12*m.a21,
- m.a02*m.a21 - m.a01*m.a22,
- m.a01*m.a12 - m.a02*m.a11,
- m.a12*m.a20 - m.a10*m.a22,
- m.a00*m.a22 - m.a02*m.a20,
- m.a02*m.a10 - m.a00*m.a12,
- m.a10*m.a21 - m.a11*m.a20,
- m.a01*m.a20 - m.a00*m.a21,
- m.a00*m.a11 - m.a01*m.a10)/determinant;
-}
-
__device__ bool intersect_triangle(const float3 &x, const float3 &p, float3 *triangle, float3 &intersection)
{
float3 v0 = triangle[0];
float3 v1 = triangle[1];
float3 v2 = triangle[2];
- float determinant;
- float3 u = inv(make_matrix(v1-v0,v2-v0,-p), determinant)*(x-v0);
+ Matrix m = make_matrix(v1-v0, v2-v0, -p);
+
+ float determinant = det(m);
if (determinant == 0.0)
return false;
- if (u.x < 0.0 || u.y < 0.0 || u.z < 0.0 || (1-u.x-u.y) < 0.0)
+ float3 b = x-v0;
+
+ float u1 = ((m.a11*m.a22 - m.a12*m.a21)*b.x + (m.a02*m.a21 - m.a01*m.a22)*b.y + (m.a01*m.a12 - m.a02*m.a11)*b.z)/determinant;
+
+ if (u1 < 0.0)
+ return false;
+
+ float u2 = ((m.a12*m.a20 - m.a10*m.a22)*b.x + (m.a00*m.a22 - m.a02*m.a20)*b.y + (m.a02*m.a10 - m.a00*m.a12)*b.z)/determinant;
+
+ if (u2 < 0.0)
+ return false;
+
+ float u3 = ((m.a10*m.a21 - m.a11*m.a20)*b.x + (m.a01*m.a20 - m.a00*m.a21)*b.y + (m.a00*m.a11 - m.a01*m.a10)*b.z)/determinant;
+
+ if (u3 < 0.0 || (1-u1-u2) < 0.0)
return false;
- intersection = x + p*u.z;
+ intersection = x + p*u3;
return true;
}
diff --git a/test.py b/test.py
index 5c7c234..ec7c954 100644
--- a/test.py
+++ b/test.py
@@ -17,17 +17,6 @@ def array2float3(arr):
return x
-def array2float4(arr):
- if len(arr.shape) != 2 or arr.shape[-1] != 3:
- raise Exception('shape mismatch')
-
- x = np.empty(arr.shape[0], dtype=gpuarray.vec.float4)
- x['x'] = arr[:,0]
- x['y'] = arr[:,1]
- x['z'] = arr[:,2]
-
- return x
-
print 'device %s' % autoinit.device.name()
source = open('src/linalg.h').read() + open('src/matrix.h').read() + \
@@ -77,9 +66,9 @@ translate(np.int32(mesh.size), mesh_gpu, gpuarray.vec.make_float3(0,30,0), block
for i in range(100):
- rotate(np.int32(x.size), x_gpu, np.float32(np.pi/50), gpuarray.vec.make_float3(0,0,1), block=(256,1,1), grid=(width*height//256+1,1))
+ rotate(np.int32(x.size), x_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(256,1,1), grid=(width*height//256+1,1))
- rotate(np.int32(p.size), p_gpu, np.float32(np.pi/50), gpuarray.vec.make_float3(0,0,1), block=(256,1,1), grid=(width*height//256+1,1))
+ rotate(np.int32(p.size), p_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(256,1,1), grid=(width*height//256+1,1))
t0 = time.time()
intersect(np.int32(x.size), x_gpu, p_gpu, np.int32(mesh.size//3), mesh_gpu, pixel_gpu, block=(256,1,1), grid=(width*height//256+1,1))