11 files changed, 162 insertions, 12 deletions
diff --git a/models/40mmcube.stl b/models/40mmcube.stl
new file mode 100644
index 0000000..779ff9c
--- /dev/null
+++ b/models/40mmcube.stl
diff --git a/models/ccube4.STL b/models/ccube4.STL
new file mode 100644
index 0000000..126dc86
--- /dev/null
+++ b/models/ccube4.STL
diff --git a/models/companioncube.stl b/models/companioncube.stl
new file mode 100644
index 0000000..09ea5a1
--- /dev/null
+++ b/models/companioncube.stl
diff --git a/models/lionsolid.stl b/models/lionsolid.stl
new file mode 100644
index 0000000..e133e8f
--- /dev/null
+++ b/models/lionsolid.stl
diff --git a/models/tie_interceptor6.stl b/models/tie_interceptor6.stl
new file mode 100644
index 0000000..970e8da
--- /dev/null
+++ b/models/tie_interceptor6.stl
diff --git a/intersect.cu b/src/intersect.cu
index d7fdaa5..844f6fd 100644
--- a/intersect.cu
+++ b/src/intersect.cu
@@ -18,8 +18,7 @@ __device__ __host__ Matrix inv(const Matrix&m, float& determinant)
 __device__ bool intersect_triangle(const float3 &x, const float3 &p, float3 *vertex, float3 &intersection)
 {
 	float determinant;
-
-	float3 u = inv(make_matrix(vertex[0]-vertex[2],vertex[1]-vertex[2],-p), determinant)*(x-vertex[2]);
+	float3 u = inv(make_matrix(vertex[1]-vertex[0],vertex[2]-vertex[0],-p), determinant)*(x-vertex[0]);
 
 	if (determinant == 0.0)
 		return false;
@@ -32,13 +31,53 @@ __device__ bool intersect_triangle(const float3 &x, const float3 &p, float3 *ver
 	return true;
 }
 
+__device__ int get_color(const float3 &p, float3 *vertex)
+{
+	float3 v1 = vertex[1] - vertex[0];
+	float3 v2 = vertex[2] - vertex[0];
+
+	float3 normal = cross(v1,v2);
+
+	float scale;
+	scale = dot(normal,-p)/(norm(normal)*norm(p));
+	if (scale < 0.0)
+		scale = dot(-normal,-p)/(norm(normal)*norm(p));
+
+	int rgb = floorf(255*scale);
+
+	return rgb*65536 + rgb*256 + rgb;
+}
+
 extern "C"
 {
 
-__global__ void intersect_triangle_mesh(float3 *x, float3 *p, int n, float3 *mesh, int *state, float3 *w)
+__global__ void translate(int max_idx, float3 *x, float3 v)
 {
 	int idx = blockIdx.x*blockDim.x + threadIdx.x;
 
+	if (idx > max_idx)
+		return;
+
+	x[idx] += v;
+}
+
+__global__ void rotate(int max_idx, float3 *x, float phi, float3 axis)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+
+	if (idx > max_idx)
+		return;
+
+	x[idx] = rotate(x[idx], phi, axis);
+}
+
+__global__ void intersect_triangle_mesh(int max_idx, float3 *x, float3 *p, int n, float3 *mesh, int *pixel)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+
+	if (idx > max_idx)
+		return;
+
 	bool hit = false;
 
 	float distance, min_distance;
@@ -51,22 +90,29 @@ __global__ void intersect_triangle_mesh(float3 *x, float3 *p, int n, float3 *mes
 		{
 			if (!hit)
 			{
-				state[idx] = 1;
+				pixel[idx] = get_color(p[idx], mesh+3*i);
+
 				min_distance = norm(intersection-x[idx]);
 				min_intersection = intersection;
+				hit = true;
 				continue;
 			}
 
-			distance = norm(w[idx]-x[idx]);
+			distance = norm(intersection-x[idx]);
 
 			if (distance < min_distance)
 			{
-				state[idx] = 1;
+				pixel[idx] = get_color(p[idx], mesh+3*i);
+
 				min_distance = distance;
 				min_intersection = intersection;
 			}
 		}
 	}
+	
+	if (!hit)
+		pixel[idx] = 0;
+	
 }
 
 } // extern "c"
diff --git a/linalg.h b/src/linalg.h
index 593aa9d..593aa9d 100644
--- a/linalg.h
+++ b/src/linalg.h
diff --git a/matrix.h b/src/matrix.h
index 1de8e8c..5c72344 100644
--- a/matrix.h
+++ b/src/matrix.h
@@ -14,9 +14,10 @@ __device__ __host__ Matrix make_matrix(float a00, float a01, float a02,
 	return m;
 }
 
-__device__ __host__ Matrix make_matrix(float3 &u1, float3 &u2, float3 &u3)
+__device__ __host__ Matrix make_matrix(const float3 &u1, const float3 &u2, const float3 &u3)
 {
 	Matrix m = {u1.x, u2.x, u3.x, u1.y, u2.y, u3.y, u1.z, u2.z, u3.z};
+	return m;
 }
 
 __device__ __host__ Matrix operator- (const Matrix &m)
diff --git a/rotate.h b/src/rotate.h
index 52d6d6a..52d6d6a 100644
--- a/rotate.h
+++ b/src/rotate.h
diff --git a/stl.py b/stl.py
index b300554..076c739 100644
--- a/stl.py
+++ b/stl.py
@@ -1,15 +1,30 @@
 import numpy as np
+import struct
 
 def pull_vertices(filename):
     f = open(filename)
 
-    vertices = []
+    vertex = []
     for line in f:
         if not line.strip().startswith('vertex'):
             continue
-        vertices.append([float(s) for s in line.strip().split()[1:]])
+        vertex.append([float(s) for s in line.strip().split()[1:]])
 
-    return np.array(vertices)
+    f.close()
+    return np.array(vertex)
 
-if __name__ == '__main__':
-    print pull_vertices('models/MiniFig.STL')
+def pull_vertices_binary(filename):
+    f = open(filename)
+
+    f.read(80)
+    triangles = struct.unpack('<I', f.read(4))[0]
+
+    vertex = []
+    for i in range(triangles):
+        f.read(12)
+        for j in range(3):
+            vertex.append(struct.unpack('<fff', f.read(12)))
+        f.read(2)
+
+    f.close()
+    return np.array(vertex)
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..014c2db
--- /dev/null
+++ b/test.py
@@ -0,0 +1,88 @@
+import time
+from stl import *
+import numpy as np
+from pycuda import autoinit
+from pycuda.compiler import SourceModule
+import pycuda.driver as cuda
+from pycuda import gpuarray
+
+def array2float3(arr):
+    if len(arr.shape) != 2 or arr.shape[-1] != 3:
+        raise Exception('shape mismatch')
+
+    x = np.empty(arr.shape[0], dtype=gpuarray.vec.float3)
+    x['x'] = arr[:,0]
+    x['y'] = arr[:,1]
+    x['z'] = arr[:,2]
+
+    return x
+
+print 'device %s' % autoinit.device.name()
+
+source = open('src/linalg.h').read() + open('src/matrix.h').read() + \
+    open('src/rotate.h').read() + open('src/intersect.cu').read()
+
+mod = SourceModule(source, no_extern_c=True, arch='sm_13')
+intersect = mod.get_function('intersect_triangle_mesh')
+rotate = mod.get_function('rotate')
+translate = mod.get_function('translate')
+
+mesh = array2float3(pull_vertices_binary('models/tie_interceptor6.stl'))
+
+import pygame
+size = width, height = 800, 600
+screen = pygame.display.set_mode(size)
+
+film_size = (0.035, 0.024)
+focal_length = 0.05
+
+grid = []
+for x in np.linspace(-film_size[0]/2, film_size[0]/2, width):
+    for z in np.linspace(-film_size[1]/2, film_size[1]/2, height):
+        grid.append((x,0,z))
+grid = np.array(grid)
+grid += (0,focal_length,0)
+grid += (0,300,0)
+
+x = array2float3(grid)
+p = array2float3(((0,300,0)-grid))
+
+x_gpu = cuda.mem_alloc(x.nbytes)
+cuda.memcpy_htod(x_gpu,x)
+
+p_gpu = cuda.mem_alloc(p.nbytes)
+cuda.memcpy_htod(p_gpu,p)
+
+mesh_gpu = cuda.mem_alloc(mesh.nbytes)
+cuda.memcpy_htod(mesh_gpu,mesh)
+
+pixel = np.empty(size, dtype=np.int32).flatten()
+pixel_gpu = cuda.mem_alloc(pixel.nbytes)
+cuda.memcpy_htod(pixel_gpu,pixel)
+
+rotate(np.int32(mesh.size), mesh_gpu, np.float32(-np.pi/2), gpuarray.vec.make_float3(1,0,0), block=(256,1,1), grid=(mesh.size//256+1,1))
+
+translate(np.int32(mesh.size), mesh_gpu, gpuarray.vec.make_float3(0,30,0), block=(256,1,1), grid=(mesh.size//256+1,1))
+
+t0 = time.time()
+for i in range(100):
+    rotate(np.int32(x.size), x_gpu, np.float32(np.pi/50), gpuarray.vec.make_float3(0,0,1), block=(256,1,1), grid=(width*height//256+1,1))
+
+    rotate(np.int32(p.size), p_gpu, np.float32(np.pi/50), gpuarray.vec.make_float3(0,0,1), block=(256,1,1), grid=(width*height//256+1,1))
+
+    intersect(np.int32(x.size), x_gpu, p_gpu, np.int32(mesh.size//3), mesh_gpu, pixel_gpu, block=(256,1,1), grid=(width*height//256+1,1))
+
+    cuda.Context.synchronize()
+
+    cuda.memcpy_dtoh(pixel, pixel_gpu)
+    pygame.surfarray.blit_array(screen, pixel.reshape(size))
+    pygame.display.flip()
+
+
+elapsed = time.time() - t0
+
+print '%i triangles, %i photons, %f sec; (%f photons/s)' % \
+    (mesh.size//3, pixel.size, elapsed, pixel.size/elapsed)
+
+
+raw_input('press enter to exit')