1 files changed, 57 insertions, 110 deletions
diff --git a/test.py b/test.py
index 15b7102..2aef412 100644
--- a/test.py
+++ b/test.py
@@ -1,152 +1,99 @@
+import os
 import time
-from stl import *
 import numpy as np
+
 from pycuda import autoinit
 from pycuda.compiler import SourceModule
 import pycuda.driver as cuda
 from pycuda import gpuarray
-from string import Template
-
-def array2vector(arr, dtype=gpuarray.vec.float3):
-    if len(arr.shape) != 2 or arr.shape[-1] != 3:
-        raise Exception('shape mismatch')
 
-    x = np.empty(arr.shape[0], dtype=dtype)
-    x['x'] = arr[:,0]
-    x['y'] = arr[:,1]
-    x['z'] = arr[:,2]
+from stl import *
+from geometry import *
+from materials import *
+from camera import *
+from vector import *
 
-    return x
+import pygame
 
 print 'device %s' % autoinit.device.name()
 
-source = open('src/intersect.cu').read()
-mod = SourceModule(source, options=['-I /home/tlatorre/projects/chroma/src'], no_extern_c=True, arch='sm_13')
+source_directory = os.path.split(os.path.realpath(__file__))[0] + '/src'
 
-rotate = mod.get_function('rotate')
-translate = mod.get_function('translate')
+source = open(source_directory + '/kernel.cu').read()
+mod = SourceModule(source, options=['-I ' + source_directory], no_extern_c=True, arch='sm_13')
 intersect_mesh = mod.get_function('intersect_mesh')
+rotate = mod.get_function('rotate')
 
-import pygame
 size = width, height = 800, 600
-screen = pygame.display.set_mode(size, (pygame.NOFRAME | pygame.DOUBLEBUF))
-
-film_size = (0.035, 0.024)
-focal_length = 0.05
-
-grid = []
-for x in np.linspace(-film_size[0]/2, film_size[0]/2, width):
-    for z in np.linspace(-film_size[1]/2, film_size[1]/2, height):
-        grid.append((x,0,z))
-grid = np.array(grid)
-grid += (0,focal_length,0)
-grid += (0,300,0)
-
-x = array2vector(grid)
-x_gpu = cuda.to_device(x)
-
-p = (0,300,0)-grid
-
-for i in range(p.shape[0]):
-    p[i] /= np.linalg.norm(p[i])
-
-p = array2vector(p)
-p_gpu = cuda.to_device(p)
-
-
-from zcurve import *
-
-mesh = read_stl('models/tie_interceptor6.stl')
-mesh = mesh.reshape(mesh.shape[0]//3,3,3)
-mesh = morton_order(mesh)
-mesh = mesh.reshape(mesh.shape[0]*3, 3)
-
-mesh3 = array2vector(mesh)
-
-from build import Graph
-
+screen = pygame.display.set_mode(size)
+camera = Camera(size)
+camera.position((0,300,0))
 
-rotate(np.int32(mesh3.size), cuda.InOut(mesh3), np.float32(-np.pi/2), gpuarray.vec.make_float3(1,0,0), block=(256,1,1), grid=(mesh3.size//256+1,1))
+origin, direction = camera.get_pixels()
 
-translate(np.int32(mesh3.size), cuda.InOut(mesh3), gpuarray.vec.make_float3(0,30,0), block=(256,1,1), grid=(mesh3.size//256+1,1))
+for i in range(direction.shape[0]):
+    direction[i] /= np.linalg.norm(direction[i])
 
-graph = Graph(mesh3)
+origin, direction = make_vector(origin), make_vector(direction)
 
-lower = array2vector(graph.lower, dtype=gpuarray.vec.float4)
-upper = array2vector(graph.upper, dtype=gpuarray.vec.float4)
-start = graph.start.astype(np.uint32)
-count = graph.count.astype(np.uint32)
-stack = np.zeros(lower.size, dtype=np.int32)
+origin_gpu = cuda.to_device(origin)
+direction_gpu = cuda.to_device(direction)
 
-lower_gpu = cuda.to_device(lower)
-upper_gpu = cuda.to_device(upper)
+geometry = Geometry()
+geometry.add_mesh(read_stl('models/tie_interceptor6.stl'), vacuum, vacuum)
+geometry.build(bits=16)
 
-lower_tex = mod.get_texref('lower_bound_arr')
-upper_tex = mod.get_texref('upper_bound_arr')
+mesh = geometry.mesh
+mesh = mesh.reshape(mesh.shape[0]*3,3)
+mesh = make_vector(mesh, dtype=gpuarray.vec.float4)
+lower_bound = make_vector(geometry.lower_bound, dtype=gpuarray.vec.float4)
+upper_bound = make_vector(geometry.upper_bound, dtype=gpuarray.vec.float4)
+child_map = geometry.child_map.astype(np.uint32)
+child_len = geometry.child_len.astype(np.uint32)
+first_leaf = np.int32(geometry.first_leaf)
 
-lower_tex.set_address(lower_gpu, lower.nbytes)
-upper_tex.set_address(upper_gpu, upper.nbytes)
-
-lower_tex.set_format(cuda.array_format.FLOAT, 4)
-upper_tex.set_format(cuda.array_format.FLOAT, 4)
-
-start_gpu = cuda.to_device(start)
-count_gpu = cuda.to_device(count)
-stack_gpu = cuda.mem_alloc(stack.nbytes)
-cuda.memcpy_htod(stack_gpu, stack)
+mesh_gpu = cuda.to_device(mesh)
+lower_bound_gpu = cuda.to_device(lower_bound)
+upper_bound_gpu = cuda.to_device(upper_bound)
+child_map_gpu = cuda.to_device(child_map)
+child_len_gpu = cuda.to_device(child_len)
 
+mesh_tex = mod.get_texref('mesh')
+lower_bound_tex = mod.get_texref('lower_bound_arr')
+upper_bound_tex = mod.get_texref('upper_bound_arr')
 child_map_tex = mod.get_texref('child_map_arr')
 child_len_tex = mod.get_texref('child_len_arr')
 
-child_map_tex.set_address(start_gpu, start.nbytes)
-child_len_tex.set_address(count_gpu, count.nbytes)
+mesh_tex.set_address(mesh_gpu, mesh.nbytes)
+lower_bound_tex.set_address(lower_bound_gpu, lower_bound.nbytes)
+upper_bound_tex.set_address(upper_bound_gpu, upper_bound.nbytes)
+child_map_tex.set_address(child_map_gpu, child_map.nbytes)
+child_len_tex.set_address(child_len_gpu, child_len.nbytes)
 
+mesh_tex.set_format(cuda.array_format.FLOAT, 4)
+lower_bound_tex.set_format(cuda.array_format.FLOAT, 4)
+upper_bound_tex.set_format(cuda.array_format.FLOAT, 4)
 child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
 child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
 
-mesh = np.empty(mesh3.size, dtype=gpuarray.vec.float4)
-mesh['x'] = mesh3['x']
-mesh['y'] = mesh3['y']
-mesh['z'] = mesh3['z']
-
-mesh_gpu = cuda.to_device(mesh)
-mesh_tex = mod.get_texref('mesh')
-mesh_tex.set_address(mesh_gpu, mesh.nbytes)
-mesh_tex.set_format(cuda.array_format.FLOAT, 4)
-
-pixel = np.empty(size, dtype=np.int32).flatten()
-
-pixel_gpu = cuda.mem_alloc(pixel.nbytes)
-cuda.memcpy_htod(pixel_gpu, pixel)
-
-speed = []
-elapsed = []
-
-t0total = time.time()
+pixels = np.empty(width*height, dtype=np.int32)
+pixels_gpu = cuda.to_device(pixels)
 
 block_size = 64
 for i in range(100):
-    rotate(np.int32(x.size), x_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
-
-    rotate(np.int32(p.size), p_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
+    rotate(np.int32(origin.size), origin_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
+    rotate(np.int32(direction.size), direction_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
 
     t0 = time.time()
-    intersect_mesh(np.int32(x.size), x_gpu, p_gpu, pixel_gpu, np.int32(graph.first_leaf), block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, upper_tex, lower_tex, child_map_tex, child_len_tex])
+    intersect_mesh(np.int32(origin.size), origin_gpu, direction_gpu, pixels_gpu, first_leaf, block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, lower_bound_tex, upper_bound_tex, child_map_tex, child_len_tex])
     cuda.Context.synchronize()
-
-    elapsed.append(time.time() - t0)
+    elapsed = time.time() - t0
 
     print '%i triangles, %i photons, %f sec; (%f photons/sec)' % \
-        (mesh.size//3, pixel.size, elapsed[-1], pixel.size/elapsed[-1])
+        (mesh.size//3, pixels.size, elapsed, pixels.size/elapsed)
 
-    speed.append(pixel.size/elapsed[-1])
-
-    cuda.memcpy_dtoh(pixel, pixel_gpu)
-    pygame.surfarray.blit_array(screen, pixel.reshape(size))
+    cuda.memcpy_dtoh(pixels, pixels_gpu)
+    pygame.surfarray.blit_array(screen, pixels.reshape(size))
     pygame.display.flip()
 
-print 'average time = %f sec' % np.mean(elapsed)
-print 'average speed = %f photons/sec' % np.mean(speed)
-print 'total time = %f sec' % (time.time() - t0total)
-
 raw_input('press enter to exit')