1 files changed, 88 insertions, 0 deletions
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..014c2db
--- /dev/null
+++ b/test.py
@@ -0,0 +1,88 @@
+import time
+from stl import *
+import numpy as np
+from pycuda import autoinit
+from pycuda.compiler import SourceModule
+import pycuda.driver as cuda
+from pycuda import gpuarray
+
+def array2float3(arr):
+    if len(arr.shape) != 2 or arr.shape[-1] != 3:
+        raise Exception('shape mismatch')
+
+    x = np.empty(arr.shape[0], dtype=gpuarray.vec.float3)
+    x['x'] = arr[:,0]
+    x['y'] = arr[:,1]
+    x['z'] = arr[:,2]
+
+    return x
+
+print 'device %s' % autoinit.device.name()
+
+source = open('src/linalg.h').read() + open('src/matrix.h').read() + \
+    open('src/rotate.h').read() + open('src/intersect.cu').read()
+
+mod = SourceModule(source, no_extern_c=True, arch='sm_13')
+intersect = mod.get_function('intersect_triangle_mesh')
+rotate = mod.get_function('rotate')
+translate = mod.get_function('translate')
+
+mesh = array2float3(pull_vertices_binary('models/tie_interceptor6.stl'))
+
+import pygame
+size = width, height = 800, 600
+screen = pygame.display.set_mode(size)
+
+film_size = (0.035, 0.024)
+focal_length = 0.05
+
+grid = []
+for x in np.linspace(-film_size[0]/2, film_size[0]/2, width):
+    for z in np.linspace(-film_size[1]/2, film_size[1]/2, height):
+        grid.append((x,0,z))
+grid = np.array(grid)
+grid += (0,focal_length,0)
+grid += (0,300,0)
+
+x = array2float3(grid)
+p = array2float3(((0,300,0)-grid))
+
+x_gpu = cuda.mem_alloc(x.nbytes)
+cuda.memcpy_htod(x_gpu,x)
+
+p_gpu = cuda.mem_alloc(p.nbytes)
+cuda.memcpy_htod(p_gpu,p)
+
+mesh_gpu = cuda.mem_alloc(mesh.nbytes)
+cuda.memcpy_htod(mesh_gpu,mesh)
+
+pixel = np.empty(size, dtype=np.int32).flatten()
+pixel_gpu = cuda.mem_alloc(pixel.nbytes)
+cuda.memcpy_htod(pixel_gpu,pixel)
+
+rotate(np.int32(mesh.size), mesh_gpu, np.float32(-np.pi/2), gpuarray.vec.make_float3(1,0,0), block=(256,1,1), grid=(mesh.size//256+1,1))
+
+translate(np.int32(mesh.size), mesh_gpu, gpuarray.vec.make_float3(0,30,0), block=(256,1,1), grid=(mesh.size//256+1,1))
+
+t0 = time.time()
+for i in range(100):
+    rotate(np.int32(x.size), x_gpu, np.float32(np.pi/50), gpuarray.vec.make_float3(0,0,1), block=(256,1,1), grid=(width*height//256+1,1))
+
+    rotate(np.int32(p.size), p_gpu, np.float32(np.pi/50), gpuarray.vec.make_float3(0,0,1), block=(256,1,1), grid=(width*height//256+1,1))
+
+    intersect(np.int32(x.size), x_gpu, p_gpu, np.int32(mesh.size//3), mesh_gpu, pixel_gpu, block=(256,1,1), grid=(width*height//256+1,1))
+
+    cuda.Context.synchronize()
+
+    cuda.memcpy_dtoh(pixel, pixel_gpu)
+    pygame.surfarray.blit_array(screen, pixel.reshape(size))
+    pygame.display.flip()
+
+
+elapsed = time.time() - t0
+
+print '%i triangles, %i photons, %f sec; (%f photons/s)' % \
+    (mesh.size//3, pixel.size, elapsed, pixel.size/elapsed)
+
+
+raw_input('press enter to exit')