11 files changed, 280 insertions, 404 deletions
diff --git a/build.py b/build.py
deleted file mode 100644
index 8c05c5d..0000000
--- a/build.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import numpy as np
-
-class Leaf(object):
-    def __init__(self, meshasdf, mesh_idx):
-        self.xmin, self.ymin, self.zmin = meshasdf[0]
-        self.xmax, self.ymax, self.zmax = meshasdf[0]
-
-        for x, y, z in meshasdf[1:]:
-            if x < self.xmin:
-                self.xmin = x
-            if x > self.xmax:
-                self.xmax = x
-            if y < self.ymin:
-                self.ymin = y
-            if y > self.ymax:
-                self.ymax = y
-            if z < self.zmin:
-                self.zmin = z
-            if z > self.zmax:
-                self.zmax = z
-
-        self.mesh_idx = mesh_idx
-        self.size = meshasdf.size//3
-
-class Node(object):
-    def __init__(self, children):
-        self.xmin = children[0].xmin
-        self.xmax = children[0].xmax
-        self.ymin = children[0].ymin
-        self.ymax = children[0].ymax
-        self.zmin = children[0].zmin
-        self.zmax = children[0].zmax
-
-        for child in children[1:]:
-            if child.xmin < self.xmin:
-                self.xmin = child.xmin
-            if child.xmax > self.xmax:
-                self.xmax = child.xmax
-            if child.ymin < self.ymin:
-                self.ymin = child.ymin
-            if child.ymax > self.ymax:
-                self.ymax = child.ymax
-            if child.zmin < self.zmin:
-                self.zmin = child.zmin
-            if child.zmax > self.zmax:
-                self.zmax = child.zmax
-
-        self.children = children
-        self.size = len(children)
-
-class Graph(object):
-    def __init__(self, mesh, triangles_per_leaf=2, children_per_node=4):
-        leafs = []
-        for i in range(0, mesh.size, triangles_per_leaf*3):
-            leafs.append(Leaf(mesh[i:i+triangles_per_leaf*3], i))
-
-        layers = []
-        layers.append(leafs)
-
-        while len(layers[-1]) > 1:
-            nodes = []
-            for i in range(0, len(layers[-1]), children_per_node):
-                nodes.append(Node(layers[-1][i:i+children_per_node]))
-
-            layers.append(nodes)
-
-        # leaves last
-        layers.reverse()
-
-        nodes = []
-        for layer in layers:
-            for node in layer:
-                nodes.append(node)
-
-        lower = []
-        upper = []
-        start = []
-        count = [] # number of child nodes or triangles
-        first_leaf = -1
-
-        for i, node in enumerate(nodes):
-            lower.append([node.xmin, node.ymin, node.zmin])
-            upper.append([node.xmax, node.ymax, node.zmax])
-            count.append(node.size)
-
-            if isinstance(node, Node):
-                for j, child in enumerate(nodes):
-                    if child is node.children[0]:
-                        start.append(j)
-                        break
-
-            if isinstance(node, Leaf):
-                start.append(node.mesh_idx)
-
-                if first_leaf == -1:
-                    first_leaf = i
-
-        self.lower = np.array(lower)
-        self.upper = np.array(upper)
-        self.start = np.array(start)
-        self.count = np.array(count)
-        self.first_leaf = first_leaf
diff --git a/bvh.py b/bvh.py
new file mode 100644
index 0000000..d1e4c1a
--- /dev/null
+++ b/bvh.py
@@ -0,0 +1,47 @@
+import numpy as np
+from itertools import chain
+
+def compress(data, selectors):
+    return (d for d, s in zip(data, selectors) if s)
+
+class Leaf(object):
+    def __init__(self, mesh, mesh_idx, zvalue=None):
+        self.mesh_idx = mesh_idx
+        self.size = mesh.shape[0]
+        self.zvalue = zvalue
+        self.lower_bound = np.array([np.min(mesh[:,:,0]), np.min(mesh[:,:,1]), np.min(mesh[:,:,2])])
+        self.upper_bound = np.array([np.max(mesh[:,:,0]), np.max(mesh[:,:,1]), np.max(mesh[:,:,2])])
+        self.center = (self.lower_bound + self.upper_bound)/2.0
+
+class Node(object):
+    def __init__(self, children, zvalue=None):
+        self.size = len(children)
+        self.zvalue = zvalue
+
+        lower_pts = np.array([child.lower_bound for child in children])
+        upper_pts = np.array([child.upper_bound for child in children])
+
+        self.lower_bound = np.array([np.min(lower_pts[:,0]), np.min(lower_pts[:,1]), np.min(lower_pts[:,2])])
+        self.upper_bound = np.array([np.max(upper_pts[:,0]), np.max(upper_pts[:,1]), np.max(upper_pts[:,2])])
+
+        self.children = children
+
+        self.center = (self.lower_bound + self.upper_bound)/2.0
+
+def interleave(*args):
+    return int("".join(chain(*zip(*[bin(x)[2:].zfill(x.nbytes*8) for x in args]))), 2)
+
+def morton_order(mesh, bits=8):
+    lower_bound = np.array([np.min(mesh[:,:,0]), np.min(mesh[:,:,1]), np.min(mesh[:,:,2])])
+    upper_bound = np.array([np.max(mesh[:,:,0]), np.max(mesh[:,:,1]), np.max(mesh[:,:,2])])
+    
+    def quantize(x):
+        return np.uint64((x-lower_bound)*bits/(upper_bound-lower_bound))
+
+    zvalues = np.empty(mesh.shape[0], dtype=np.uint64)
+    for i, triangle in enumerate(mesh):
+        zvalues[i] = interleave(*quantize(np.mean(triangle, axis=0)))
+
+    order = np.array(zip(*sorted(zip(zvalues, range(len(zvalues)))))[-1])
+
+    return order, zvalues
diff --git a/camera.py b/camera.py
new file mode 100644
index 0000000..dc3e32e
--- /dev/null
+++ b/camera.py
@@ -0,0 +1,22 @@
+import numpy as np
+
+class Camera(object):
+    def __init__(self, size = (800, 600), film_size = (0.035, 0.024), focal_length=0.05):
+        width, height = size
+
+        grid = []
+        for i, x in enumerate(np.linspace(-film_size[0]/2, film_size[0]/2, width)):
+            for j, z in enumerate(np.linspace(-film_size[1]/2, film_size[1]/2, height)):
+                grid.append((x,0,z))
+
+        self.grid = np.array(grid)
+        self.grid += (0,focal_length,0)
+
+        self.focal_point = np.zeros(3)
+
+    def position(self, position):
+        self.grid += position
+        self.focal_point += position
+
+    def get_pixels(self):
+        return self.grid, self.focal_point-self.grid
diff --git a/geometry.py b/geometry.py
new file mode 100644
index 0000000..d365141
--- /dev/null
+++ b/geometry.py
@@ -0,0 +1,110 @@
+import numpy as np
+from bvh import *
+
+class Geometry(object):
+    """
+    Geometry object.
+    """
+
+    def __init__(self):
+        self.materials = []
+        self.mesh = np.empty(0)
+        self.inside = np.empty(0)
+        self.outside = np.empty(0)
+
+    def add_mesh(self, mesh, inside, outside):
+        """
+        Add a triangle mesh.
+
+        Args:
+            - mesh: array of shape (n,3,3)
+                Array of vertices for a closed mesh with n triangles.
+
+            - inside: Material
+                Material inside the mesh.
+
+            - outside: Material
+                Material outside the mesh.
+         """
+
+        if len(mesh.shape) != 3 or mesh.shape[1] != 3 or mesh.shape[2] != 3:
+            raise Exception('shape mismatch')
+
+        if inside not in self.materials:
+            self.materials.append(inside)
+        if outside not in self.materials:
+            self.materials.append(outside)
+
+        self.inside.resize(self.inside.size+mesh.shape[0])
+        self.inside[-mesh.shape[0]:] = np.tile(self.materials.index(inside), mesh.shape[0])
+
+        self.outside.resize(self.outside.size+mesh.shape[0])
+        self.outside[-mesh.shape[0]:] = np.tile(self.materials.index(outside), mesh.shape[0])
+
+        self.mesh.resize((self.mesh.shape[0]+mesh.shape[0],3,3))
+        self.mesh[-mesh.shape[0]:] = mesh
+
+    def build(self, bits=16):
+        order, zvalues = morton_order(self.mesh, bits)
+
+
+        zvalues = zvalues[order]
+        self.mesh = self.mesh[order]
+        self.inside = self.inside[order]
+        self.outside = self.outside[order]
+
+        leafs = []
+
+        for z in sorted(set(zvalues)):
+            mask = (zvalues == z)
+            leafs.append(Leaf(self.mesh[mask], 3*np.where(mask)[0][0], z))
+
+        layers = []
+        layers.append(leafs)
+
+        while True:
+            zvalues = np.array([node.zvalue for node in layers[-1]])
+
+            bit_shifted_zvalues = zvalues >> 1
+
+            nodes = []
+            for z in sorted(set(bit_shifted_zvalues)):
+                mask = (bit_shifted_zvalues == z)
+                nodes.append(Node(list(compress(layers[-1], mask)), z))
+
+            layers.append(nodes)
+
+            if len(nodes) == 1:
+                break
+
+        layers.reverse()
+
+        nodes = []
+        for layer in layers:
+            for node in layer:
+                nodes.append(node)
+
+
+        self.lower_bound = np.empty((len(nodes),3))
+        self.upper_bound = np.empty((len(nodes),3))
+        self.child_map = np.empty(len(nodes))
+        self.child_len = np.empty(len(nodes))
+        self.first_leaf = -1
+
+        for i, node in enumerate(nodes):
+            self.lower_bound[i] = node.lower_bound
+            self.upper_bound[i] = node.upper_bound
+            
+            self.child_len[i] = node.size
+
+            if isinstance(node, Node):
+                for j, child in enumerate(nodes):
+                    if child is node.children[0]:
+                        self.child_map[i] = j
+                        break
+
+            if isinstance(node, Leaf):
+                self.child_map[i] = node.mesh_idx
+
+                if self.first_leaf == -1:
+                    self.first_leaf = i
diff --git a/materials.py b/materials.py
new file mode 100644
index 0000000..9639a52
--- /dev/null
+++ b/materials.py
@@ -0,0 +1,12 @@
+class Material(object):
+    def __init__(self, name='none'):
+        self.name = name
+
+        self.refractive_index = None
+        self.absorption_length = None
+        self.scattering_length = None
+
+air = Material('air')
+h2o = Material('h2o')
+glass = Material('glass')
+vacuum = Material('vacuum')
diff --git a/src/intersect.cu b/src/kernel.cu
index 73b4483..02c52cf 100644
--- a/src/intersect.cu
+++ b/src/kernel.cu
@@ -7,6 +7,8 @@
 
 texture<float4, 1, cudaReadModeElementType> mesh;
 
+
+
 texture<float4, 1, cudaReadModeElementType> upper_bound_arr;
 texture<float4, 1, cudaReadModeElementType> lower_bound_arr;
 
@@ -51,7 +53,13 @@ __device__ bool intersect_triangle(const float3 &origin, const float3 &direction
 
 __device__ int get_color(const float3 &direction, const float3 &v0, const float3& v1, const float3 &v2)
 {
-	int rgb = floorf(255*dot(normalize(cross(v1-v0,v2-v0)),-direction));
+	float scale = 255*dot(normalize(cross(v1-v0,v2-v0)),-direction);
+	if (scale < 0.0f)
+		scale = 255*dot(-normalize(cross(v1-v0,v2-v0)),-direction);
+
+
+	int rgb = floorf(scale);
+
 
 	return rgb << 16 | rgb << 8 | rgb;
 }
@@ -189,6 +197,8 @@ __global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *directio
 
 	int i;
 
+	bool show_leafs = false;
+
 	do
 	{
 		int child_map = tex1Dfetch(child_map_arr, *node);
@@ -206,6 +216,12 @@ __global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *directio
 			node--;
 
 		}
+		else if (show_leafs)
+		{
+			hit = true;
+			*pixel = 255;
+			node--;
+		}
 		else // node is a leaf
 		{
 			for (i=0; i < child_len; i++)
diff --git a/stl.py b/stl.py
index 7989d43..276a645 100644
--- a/stl.py
+++ b/stl.py
@@ -23,7 +23,7 @@ def read_ascii_stl(filename):
         vertex.append([float(s) for s in line.strip().split()[1:]])
 
     f.close()
-    return np.array(vertex)
+    return np.array(vertex).reshape(len(vertex)//3,3,3)
 
 def read_binary_stl(filename):
     f = open(filename)
@@ -39,4 +39,4 @@ def read_binary_stl(filename):
         f.read(2)
 
     f.close()
-    return np.array(vertex)
+    return np.array(vertex).reshape(len(vertex)//3,3,3)
diff --git a/test.py b/test.py
index 15b7102..2aef412 100644
--- a/test.py
+++ b/test.py
@@ -1,152 +1,99 @@
+import os
 import time
-from stl import *
 import numpy as np
+
 from pycuda import autoinit
 from pycuda.compiler import SourceModule
 import pycuda.driver as cuda
 from pycuda import gpuarray
-from string import Template
-
-def array2vector(arr, dtype=gpuarray.vec.float3):
-    if len(arr.shape) != 2 or arr.shape[-1] != 3:
-        raise Exception('shape mismatch')
 
-    x = np.empty(arr.shape[0], dtype=dtype)
-    x['x'] = arr[:,0]
-    x['y'] = arr[:,1]
-    x['z'] = arr[:,2]
+from stl import *
+from geometry import *
+from materials import *
+from camera import *
+from vector import *
 
-    return x
+import pygame
 
 print 'device %s' % autoinit.device.name()
 
-source = open('src/intersect.cu').read()
-mod = SourceModule(source, options=['-I /home/tlatorre/projects/chroma/src'], no_extern_c=True, arch='sm_13')
+source_directory = os.path.split(os.path.realpath(__file__))[0] + '/src'
 
-rotate = mod.get_function('rotate')
-translate = mod.get_function('translate')
+source = open(source_directory + '/kernel.cu').read()
+mod = SourceModule(source, options=['-I ' + source_directory], no_extern_c=True, arch='sm_13')
 intersect_mesh = mod.get_function('intersect_mesh')
+rotate = mod.get_function('rotate')
 
-import pygame
 size = width, height = 800, 600
-screen = pygame.display.set_mode(size, (pygame.NOFRAME | pygame.DOUBLEBUF))
-
-film_size = (0.035, 0.024)
-focal_length = 0.05
-
-grid = []
-for x in np.linspace(-film_size[0]/2, film_size[0]/2, width):
-    for z in np.linspace(-film_size[1]/2, film_size[1]/2, height):
-        grid.append((x,0,z))
-grid = np.array(grid)
-grid += (0,focal_length,0)
-grid += (0,300,0)
-
-x = array2vector(grid)
-x_gpu = cuda.to_device(x)
-
-p = (0,300,0)-grid
-
-for i in range(p.shape[0]):
-    p[i] /= np.linalg.norm(p[i])
-
-p = array2vector(p)
-p_gpu = cuda.to_device(p)
-
-
-from zcurve import *
-
-mesh = read_stl('models/tie_interceptor6.stl')
-mesh = mesh.reshape(mesh.shape[0]//3,3,3)
-mesh = morton_order(mesh)
-mesh = mesh.reshape(mesh.shape[0]*3, 3)
-
-mesh3 = array2vector(mesh)
-
-from build import Graph
-
+screen = pygame.display.set_mode(size)
+camera = Camera(size)
+camera.position((0,300,0))
 
-rotate(np.int32(mesh3.size), cuda.InOut(mesh3), np.float32(-np.pi/2), gpuarray.vec.make_float3(1,0,0), block=(256,1,1), grid=(mesh3.size//256+1,1))
+origin, direction = camera.get_pixels()
 
-translate(np.int32(mesh3.size), cuda.InOut(mesh3), gpuarray.vec.make_float3(0,30,0), block=(256,1,1), grid=(mesh3.size//256+1,1))
+for i in range(direction.shape[0]):
+    direction[i] /= np.linalg.norm(direction[i])
 
-graph = Graph(mesh3)
+origin, direction = make_vector(origin), make_vector(direction)
 
-lower = array2vector(graph.lower, dtype=gpuarray.vec.float4)
-upper = array2vector(graph.upper, dtype=gpuarray.vec.float4)
-start = graph.start.astype(np.uint32)
-count = graph.count.astype(np.uint32)
-stack = np.zeros(lower.size, dtype=np.int32)
+origin_gpu = cuda.to_device(origin)
+direction_gpu = cuda.to_device(direction)
 
-lower_gpu = cuda.to_device(lower)
-upper_gpu = cuda.to_device(upper)
+geometry = Geometry()
+geometry.add_mesh(read_stl('models/tie_interceptor6.stl'), vacuum, vacuum)
+geometry.build(bits=16)
 
-lower_tex = mod.get_texref('lower_bound_arr')
-upper_tex = mod.get_texref('upper_bound_arr')
+mesh = geometry.mesh
+mesh = mesh.reshape(mesh.shape[0]*3,3)
+mesh = make_vector(mesh, dtype=gpuarray.vec.float4)
+lower_bound = make_vector(geometry.lower_bound, dtype=gpuarray.vec.float4)
+upper_bound = make_vector(geometry.upper_bound, dtype=gpuarray.vec.float4)
+child_map = geometry.child_map.astype(np.uint32)
+child_len = geometry.child_len.astype(np.uint32)
+first_leaf = np.int32(geometry.first_leaf)
 
-lower_tex.set_address(lower_gpu, lower.nbytes)
-upper_tex.set_address(upper_gpu, upper.nbytes)
-
-lower_tex.set_format(cuda.array_format.FLOAT, 4)
-upper_tex.set_format(cuda.array_format.FLOAT, 4)
-
-start_gpu = cuda.to_device(start)
-count_gpu = cuda.to_device(count)
-stack_gpu = cuda.mem_alloc(stack.nbytes)
-cuda.memcpy_htod(stack_gpu, stack)
+mesh_gpu = cuda.to_device(mesh)
+lower_bound_gpu = cuda.to_device(lower_bound)
+upper_bound_gpu = cuda.to_device(upper_bound)
+child_map_gpu = cuda.to_device(child_map)
+child_len_gpu = cuda.to_device(child_len)
 
+mesh_tex = mod.get_texref('mesh')
+lower_bound_tex = mod.get_texref('lower_bound_arr')
+upper_bound_tex = mod.get_texref('upper_bound_arr')
 child_map_tex = mod.get_texref('child_map_arr')
 child_len_tex = mod.get_texref('child_len_arr')
 
-child_map_tex.set_address(start_gpu, start.nbytes)
-child_len_tex.set_address(count_gpu, count.nbytes)
+mesh_tex.set_address(mesh_gpu, mesh.nbytes)
+lower_bound_tex.set_address(lower_bound_gpu, lower_bound.nbytes)
+upper_bound_tex.set_address(upper_bound_gpu, upper_bound.nbytes)
+child_map_tex.set_address(child_map_gpu, child_map.nbytes)
+child_len_tex.set_address(child_len_gpu, child_len.nbytes)
 
+mesh_tex.set_format(cuda.array_format.FLOAT, 4)
+lower_bound_tex.set_format(cuda.array_format.FLOAT, 4)
+upper_bound_tex.set_format(cuda.array_format.FLOAT, 4)
 child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
 child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
 
-mesh = np.empty(mesh3.size, dtype=gpuarray.vec.float4)
-mesh['x'] = mesh3['x']
-mesh['y'] = mesh3['y']
-mesh['z'] = mesh3['z']
-
-mesh_gpu = cuda.to_device(mesh)
-mesh_tex = mod.get_texref('mesh')
-mesh_tex.set_address(mesh_gpu, mesh.nbytes)
-mesh_tex.set_format(cuda.array_format.FLOAT, 4)
-
-pixel = np.empty(size, dtype=np.int32).flatten()
-
-pixel_gpu = cuda.mem_alloc(pixel.nbytes)
-cuda.memcpy_htod(pixel_gpu, pixel)
-
-speed = []
-elapsed = []
-
-t0total = time.time()
+pixels = np.empty(width*height, dtype=np.int32)
+pixels_gpu = cuda.to_device(pixels)
 
 block_size = 64
 for i in range(100):
-    rotate(np.int32(x.size), x_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
-
-    rotate(np.int32(p.size), p_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
+    rotate(np.int32(origin.size), origin_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
+    rotate(np.int32(direction.size), direction_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
 
     t0 = time.time()
-    intersect_mesh(np.int32(x.size), x_gpu, p_gpu, pixel_gpu, np.int32(graph.first_leaf), block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, upper_tex, lower_tex, child_map_tex, child_len_tex])
+    intersect_mesh(np.int32(origin.size), origin_gpu, direction_gpu, pixels_gpu, first_leaf, block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, lower_bound_tex, upper_bound_tex, child_map_tex, child_len_tex])
     cuda.Context.synchronize()
-
-    elapsed.append(time.time() - t0)
+    elapsed = time.time() - t0
 
     print '%i triangles, %i photons, %f sec; (%f photons/sec)' % \
-        (mesh.size//3, pixel.size, elapsed[-1], pixel.size/elapsed[-1])
+        (mesh.size//3, pixels.size, elapsed, pixels.size/elapsed)
 
-    speed.append(pixel.size/elapsed[-1])
-
-    cuda.memcpy_dtoh(pixel, pixel_gpu)
-    pygame.surfarray.blit_array(screen, pixel.reshape(size))
+    cuda.memcpy_dtoh(pixels, pixels_gpu)
+    pygame.surfarray.blit_array(screen, pixels.reshape(size))
     pygame.display.flip()
 
-print 'average time = %f sec' % np.mean(elapsed)
-print 'average speed = %f photons/sec' % np.mean(speed)
-print 'total time = %f sec' % (time.time() - t0total)
-
 raw_input('press enter to exit')
diff --git a/test_lbne.py b/test_lbne.py
deleted file mode 100644
index 9fe27ef..0000000
--- a/test_lbne.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import time
-from stl import *
-import numpy as np
-from pycuda import autoinit
-from pycuda.compiler import SourceModule
-import pycuda.driver as cuda
-from pycuda import gpuarray
-from string import Template
-
-def array2vector(arr, dtype=gpuarray.vec.float3):
-    if len(arr.shape) != 2 or arr.shape[-1] != 3:
-        raise Exception('shape mismatch')
-
-    x = np.empty(arr.shape[0], dtype=dtype)
-    x['x'] = arr[:,0]
-    x['y'] = arr[:,1]
-    x['z'] = arr[:,2]
-
-    return x
-
-print 'device %s' % autoinit.device.name()
-
-source = open('src/intersect.cu').read()
-mod = SourceModule(source, options=['-I /home/tlatorre/projects/chroma/src'], no_extern_c=True, arch='sm_13')
-
-rotate = mod.get_function('rotate')
-translate = mod.get_function('translate')
-intersect_mesh = mod.get_function('intersect_mesh')
-
-import pygame
-size = width, height = 800, 600
-screen = pygame.display.set_mode(size, (pygame.NOFRAME | pygame.DOUBLEBUF))
-
-film_size = (0.035, 0.024)
-focal_length = 0.05
-
-grid = []
-for x in np.linspace(-film_size[0]/2, film_size[0]/2, width):
-    for z in np.linspace(-film_size[1]/2, film_size[1]/2, height):
-        grid.append((x,0,z))
-grid = np.array(grid)
-grid += (0,focal_length,0)
-grid += (0,200,30)
-
-x = array2vector(grid)
-x_gpu = cuda.to_device(x)
-
-p = (0,200,30)-grid
-
-for i in range(p.shape[0]):
-    p[i] /= np.linalg.norm(p[i])
-
-p = array2vector(p)
-p_gpu = cuda.to_device(p)
-
-
-from zcurve import *
-
-#mesh = read_stl('models/tie_interceptor6.stl')
-#mesh = read_stl('models/sphere.stl')
-#mesh = read_stl('models/IV.stl')
-
-#from lbne import build_lbne
-#mesh = build_lbne()
-
-mesh = read_stl('models/lbne_sphere_only.stl')
-
-mesh = mesh.reshape(mesh.shape[0]//3,3,3)
-mesh = morton_order(mesh)
-mesh = mesh.reshape(mesh.shape[0]*3, 3)
-
-mesh3 = array2vector(mesh)
-
-from build import Graph
-
-
-#rotate(np.int32(mesh3.size), cuda.InOut(mesh3), np.float32(-np.pi/2), gpuarray.vec.make_float3(1,0,0), block=(256,1,1), grid=(mesh3.size//256+1,1))
-
-#translate(np.int32(mesh3.size), cuda.InOut(mesh3), gpuarray.vec.make_float3(0,30,0), block=(256,1,1), grid=(mesh3.size//256+1,1))
-
-graph = Graph(mesh3)
-
-lower = array2vector(graph.lower, dtype=gpuarray.vec.float4)
-upper = array2vector(graph.upper, dtype=gpuarray.vec.float4)
-start = graph.start.astype(np.uint32)
-count = graph.count.astype(np.uint32)
-stack = np.zeros(lower.size, dtype=np.int32)
-
-lower_gpu = cuda.to_device(lower)
-upper_gpu = cuda.to_device(upper)
-
-lower_tex = mod.get_texref('lower_bound_arr')
-upper_tex = mod.get_texref('upper_bound_arr')
-
-lower_tex.set_address(lower_gpu, lower.nbytes)
-upper_tex.set_address(upper_gpu, upper.nbytes)
-
-lower_tex.set_format(cuda.array_format.FLOAT, 4)
-upper_tex.set_format(cuda.array_format.FLOAT, 4)
-
-start_gpu = cuda.to_device(start)
-count_gpu = cuda.to_device(count)
-stack_gpu = cuda.mem_alloc(stack.nbytes)
-cuda.memcpy_htod(stack_gpu, stack)
-
-child_map_tex = mod.get_texref('child_map_arr')
-child_len_tex = mod.get_texref('child_len_arr')
-
-child_map_tex.set_address(start_gpu, start.nbytes)
-child_len_tex.set_address(count_gpu, count.nbytes)
-
-child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
-child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
-
-mesh = np.empty(mesh3.size, dtype=gpuarray.vec.float4)
-mesh['x'] = mesh3['x']
-mesh['y'] = mesh3['y']
-mesh['z'] = mesh3['z']
-
-mesh_gpu = cuda.to_device(mesh)
-mesh_tex = mod.get_texref('mesh')
-mesh_tex.set_address(mesh_gpu, mesh.nbytes)
-mesh_tex.set_format(cuda.array_format.FLOAT, 4)
-
-pixel = np.empty(size, dtype=np.int32).flatten()
-
-pixel_gpu = cuda.mem_alloc(pixel.nbytes)
-cuda.memcpy_htod(pixel_gpu, pixel)
-
-speed = []
-elapsed = []
-
-t0total = time.time()
-
-block_size = 64
-for i in range(1000):
-    rotate(np.int32(x.size), x_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
-
-    rotate(np.int32(p.size), p_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
-
-    translate(np.int32(x.size), x_gpu,
-              gpuarray.vec.make_float3(-np.sin(i*np.pi/100),-np.cos(i*np.pi/100),0),
-              block=(block_size,1,1), grid=(width*height//block_size+1,1))
-
-    t0 = time.time()
-    intersect_mesh(np.int32(x.size), x_gpu, p_gpu, pixel_gpu, np.int32(graph.first_leaf), block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, upper_tex, lower_tex, child_map_tex, child_len_tex])
-    cuda.Context.synchronize()
-
-    elapsed.append(time.time() - t0)
-
-    print '%i triangles, %i photons, %f sec; (%f photons/sec)' % \
-        (mesh.size//3, pixel.size, elapsed[-1], pixel.size/elapsed[-1])
-
-    speed.append(pixel.size/elapsed[-1])
-
-    cuda.memcpy_dtoh(pixel, pixel_gpu)
-    pygame.surfarray.blit_array(screen, pixel.reshape(size))
-    pygame.display.flip()
-
-print 'average time = %f sec' % np.mean(elapsed)
-print 'average speed = %f photons/sec' % np.mean(speed)
-print 'total time = %f sec' % (time.time() - t0total)
-
-raw_input('press enter to exit')
diff --git a/vector.py b/vector.py
new file mode 100644
index 0000000..7b36e07
--- /dev/null
+++ b/vector.py
@@ -0,0 +1,13 @@
+import numpy as np
+from pycuda import gpuarray
+
+def make_vector(arr, dtype=gpuarray.vec.float3):
+    if len(arr.shape) != 2 or arr.shape[-1] != 3:
+        raise Exception('shape mismatch')
+
+    x = np.empty(arr.shape[0], dtype)
+    x['x'] = arr[:,0]
+    x['y'] = arr[:,1]
+    x['z'] = arr[:,2]
+
+    return x
diff --git a/zcurve.py b/zcurve.py
deleted file mode 100644
index 5ef37f3..0000000
--- a/zcurve.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import numpy as np
-from itertools import chain
-
-def interleave(*args):
-    return int("".join(chain(*zip(*[bin(x)[2:].zfill(x.nbytes*8) for x in args]))), 2)
-
-def morton_order(mesh, dtype=np.uint8):
-    vertices = mesh.reshape(mesh.shape[0]*3, 3)
-    
-    upper_bound = np.max(vertices, axis=0)
-    lower_bound = np.min(vertices, axis=0)
-
-    quantize = lambda x: dtype((x-lower_bound)*np.iinfo(dtype).max/(upper_bound-lower_bound))
-
-    zvalue = []
-    for triangle in mesh:
-        center = np.mean(np.vstack((triangle[0], triangle[1], triangle[2])), axis=0)
-        zvalue.append(interleave(*quantize(center)))
-
-    ordered_mesh = np.empty(mesh.shape)
-
-    for i, idx in enumerate(zip(*sorted(zip(zvalue, range(len(zvalue)))))[-1]):
-        ordered_mesh[i] = mesh[idx]
-
-    return ordered_mesh