summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--build.py102
-rw-r--r--bvh.py47
-rw-r--r--camera.py22
-rw-r--r--geometry.py110
-rw-r--r--materials.py12
-rw-r--r--src/kernel.cu (renamed from src/intersect.cu)18
-rw-r--r--stl.py4
-rw-r--r--test.py167
-rw-r--r--test_lbne.py164
-rw-r--r--vector.py13
-rw-r--r--zcurve.py25
11 files changed, 280 insertions, 404 deletions
diff --git a/build.py b/build.py
deleted file mode 100644
index 8c05c5d..0000000
--- a/build.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import numpy as np
-
-class Leaf(object):
- def __init__(self, meshasdf, mesh_idx):
- self.xmin, self.ymin, self.zmin = meshasdf[0]
- self.xmax, self.ymax, self.zmax = meshasdf[0]
-
- for x, y, z in meshasdf[1:]:
- if x < self.xmin:
- self.xmin = x
- if x > self.xmax:
- self.xmax = x
- if y < self.ymin:
- self.ymin = y
- if y > self.ymax:
- self.ymax = y
- if z < self.zmin:
- self.zmin = z
- if z > self.zmax:
- self.zmax = z
-
- self.mesh_idx = mesh_idx
- self.size = meshasdf.size//3
-
-class Node(object):
- def __init__(self, children):
- self.xmin = children[0].xmin
- self.xmax = children[0].xmax
- self.ymin = children[0].ymin
- self.ymax = children[0].ymax
- self.zmin = children[0].zmin
- self.zmax = children[0].zmax
-
- for child in children[1:]:
- if child.xmin < self.xmin:
- self.xmin = child.xmin
- if child.xmax > self.xmax:
- self.xmax = child.xmax
- if child.ymin < self.ymin:
- self.ymin = child.ymin
- if child.ymax > self.ymax:
- self.ymax = child.ymax
- if child.zmin < self.zmin:
- self.zmin = child.zmin
- if child.zmax > self.zmax:
- self.zmax = child.zmax
-
- self.children = children
- self.size = len(children)
-
-class Graph(object):
- def __init__(self, mesh, triangles_per_leaf=2, children_per_node=4):
- leafs = []
- for i in range(0, mesh.size, triangles_per_leaf*3):
- leafs.append(Leaf(mesh[i:i+triangles_per_leaf*3], i))
-
- layers = []
- layers.append(leafs)
-
- while len(layers[-1]) > 1:
- nodes = []
- for i in range(0, len(layers[-1]), children_per_node):
- nodes.append(Node(layers[-1][i:i+children_per_node]))
-
- layers.append(nodes)
-
- # leaves last
- layers.reverse()
-
- nodes = []
- for layer in layers:
- for node in layer:
- nodes.append(node)
-
- lower = []
- upper = []
- start = []
- count = [] # number of child nodes or triangles
- first_leaf = -1
-
- for i, node in enumerate(nodes):
- lower.append([node.xmin, node.ymin, node.zmin])
- upper.append([node.xmax, node.ymax, node.zmax])
- count.append(node.size)
-
- if isinstance(node, Node):
- for j, child in enumerate(nodes):
- if child is node.children[0]:
- start.append(j)
- break
-
- if isinstance(node, Leaf):
- start.append(node.mesh_idx)
-
- if first_leaf == -1:
- first_leaf = i
-
- self.lower = np.array(lower)
- self.upper = np.array(upper)
- self.start = np.array(start)
- self.count = np.array(count)
- self.first_leaf = first_leaf
diff --git a/bvh.py b/bvh.py
new file mode 100644
index 0000000..d1e4c1a
--- /dev/null
+++ b/bvh.py
@@ -0,0 +1,47 @@
+import numpy as np
+from itertools import chain
+
+def compress(data, selectors):
+ return (d for d, s in zip(data, selectors) if s)
+
+class Leaf(object):
+ def __init__(self, mesh, mesh_idx, zvalue=None):
+ self.mesh_idx = mesh_idx
+ self.size = mesh.shape[0]
+ self.zvalue = zvalue
+ self.lower_bound = np.array([np.min(mesh[:,:,0]), np.min(mesh[:,:,1]), np.min(mesh[:,:,2])])
+ self.upper_bound = np.array([np.max(mesh[:,:,0]), np.max(mesh[:,:,1]), np.max(mesh[:,:,2])])
+ self.center = (self.lower_bound + self.upper_bound)/2.0
+
+class Node(object):
+ def __init__(self, children, zvalue=None):
+ self.size = len(children)
+ self.zvalue = zvalue
+
+ lower_pts = np.array([child.lower_bound for child in children])
+ upper_pts = np.array([child.upper_bound for child in children])
+
+ self.lower_bound = np.array([np.min(lower_pts[:,0]), np.min(lower_pts[:,1]), np.min(lower_pts[:,2])])
+ self.upper_bound = np.array([np.max(upper_pts[:,0]), np.max(upper_pts[:,1]), np.max(upper_pts[:,2])])
+
+ self.children = children
+
+ self.center = (self.lower_bound + self.upper_bound)/2.0
+
+def interleave(*args):
+ return int("".join(chain(*zip(*[bin(x)[2:].zfill(x.nbytes*8) for x in args]))), 2)
+
+def morton_order(mesh, bits=8):
+ lower_bound = np.array([np.min(mesh[:,:,0]), np.min(mesh[:,:,1]), np.min(mesh[:,:,2])])
+ upper_bound = np.array([np.max(mesh[:,:,0]), np.max(mesh[:,:,1]), np.max(mesh[:,:,2])])
+
+ def quantize(x):
+ return np.uint64((x-lower_bound)*bits/(upper_bound-lower_bound))
+
+ zvalues = np.empty(mesh.shape[0], dtype=np.uint64)
+ for i, triangle in enumerate(mesh):
+ zvalues[i] = interleave(*quantize(np.mean(triangle, axis=0)))
+
+ order = np.array(zip(*sorted(zip(zvalues, range(len(zvalues)))))[-1])
+
+ return order, zvalues
diff --git a/camera.py b/camera.py
new file mode 100644
index 0000000..dc3e32e
--- /dev/null
+++ b/camera.py
@@ -0,0 +1,22 @@
+import numpy as np
+
+class Camera(object):
+ def __init__(self, size = (800, 600), film_size = (0.035, 0.024), focal_length=0.05):
+ width, height = size
+
+ grid = []
+ for i, x in enumerate(np.linspace(-film_size[0]/2, film_size[0]/2, width)):
+ for j, z in enumerate(np.linspace(-film_size[1]/2, film_size[1]/2, height)):
+ grid.append((x,0,z))
+
+ self.grid = np.array(grid)
+ self.grid += (0,focal_length,0)
+
+ self.focal_point = np.zeros(3)
+
+ def position(self, position):
+ self.grid += position
+ self.focal_point += position
+
+ def get_pixels(self):
+ return self.grid, self.focal_point-self.grid
diff --git a/geometry.py b/geometry.py
new file mode 100644
index 0000000..d365141
--- /dev/null
+++ b/geometry.py
@@ -0,0 +1,110 @@
+import numpy as np
+from bvh import *
+
+class Geometry(object):
+ """
+ Geometry object.
+ """
+
+ def __init__(self):
+ self.materials = []
+ self.mesh = np.empty(0)
+ self.inside = np.empty(0)
+ self.outside = np.empty(0)
+
+ def add_mesh(self, mesh, inside, outside):
+ """
+ Add a triangle mesh.
+
+ Args:
+ - mesh: array of shape (n,3,3)
+ Array of vertices for a closed mesh with n triangles.
+
+ - inside: Material
+ Material inside the mesh.
+
+ - outside: Material
+ Material outside the mesh.
+ """
+
+ if len(mesh.shape) != 3 or mesh.shape[1] != 3 or mesh.shape[2] != 3:
+ raise Exception('shape mismatch')
+
+ if inside not in self.materials:
+ self.materials.append(inside)
+ if outside not in self.materials:
+ self.materials.append(outside)
+
+ self.inside.resize(self.inside.size+mesh.shape[0])
+ self.inside[-mesh.shape[0]:] = np.tile(self.materials.index(inside), mesh.shape[0])
+
+ self.outside.resize(self.outside.size+mesh.shape[0])
+ self.outside[-mesh.shape[0]:] = np.tile(self.materials.index(outside), mesh.shape[0])
+
+ self.mesh.resize((self.mesh.shape[0]+mesh.shape[0],3,3))
+ self.mesh[-mesh.shape[0]:] = mesh
+
+ def build(self, bits=16):
+ order, zvalues = morton_order(self.mesh, bits)
+
+
+ zvalues = zvalues[order]
+ self.mesh = self.mesh[order]
+ self.inside = self.inside[order]
+ self.outside = self.outside[order]
+
+ leafs = []
+
+ for z in sorted(set(zvalues)):
+ mask = (zvalues == z)
+ leafs.append(Leaf(self.mesh[mask], 3*np.where(mask)[0][0], z))
+
+ layers = []
+ layers.append(leafs)
+
+ while True:
+ zvalues = np.array([node.zvalue for node in layers[-1]])
+
+ bit_shifted_zvalues = zvalues >> 1
+
+ nodes = []
+ for z in sorted(set(bit_shifted_zvalues)):
+ mask = (bit_shifted_zvalues == z)
+ nodes.append(Node(list(compress(layers[-1], mask)), z))
+
+ layers.append(nodes)
+
+ if len(nodes) == 1:
+ break
+
+ layers.reverse()
+
+ nodes = []
+ for layer in layers:
+ for node in layer:
+ nodes.append(node)
+
+
+ self.lower_bound = np.empty((len(nodes),3))
+ self.upper_bound = np.empty((len(nodes),3))
+ self.child_map = np.empty(len(nodes))
+ self.child_len = np.empty(len(nodes))
+ self.first_leaf = -1
+
+ for i, node in enumerate(nodes):
+ self.lower_bound[i] = node.lower_bound
+ self.upper_bound[i] = node.upper_bound
+
+ self.child_len[i] = node.size
+
+ if isinstance(node, Node):
+ for j, child in enumerate(nodes):
+ if child is node.children[0]:
+ self.child_map[i] = j
+ break
+
+ if isinstance(node, Leaf):
+ self.child_map[i] = node.mesh_idx
+
+ if self.first_leaf == -1:
+ self.first_leaf = i
diff --git a/materials.py b/materials.py
new file mode 100644
index 0000000..9639a52
--- /dev/null
+++ b/materials.py
@@ -0,0 +1,12 @@
+class Material(object):
+ def __init__(self, name='none'):
+ self.name = name
+
+ self.refractive_index = None
+ self.absorption_length = None
+ self.scattering_length = None
+
+air = Material('air')
+h2o = Material('h2o')
+glass = Material('glass')
+vacuum = Material('vacuum')
diff --git a/src/intersect.cu b/src/kernel.cu
index 73b4483..02c52cf 100644
--- a/src/intersect.cu
+++ b/src/kernel.cu
@@ -7,6 +7,8 @@
texture<float4, 1, cudaReadModeElementType> mesh;
+
+
texture<float4, 1, cudaReadModeElementType> upper_bound_arr;
texture<float4, 1, cudaReadModeElementType> lower_bound_arr;
@@ -51,7 +53,13 @@ __device__ bool intersect_triangle(const float3 &origin, const float3 &direction
__device__ int get_color(const float3 &direction, const float3 &v0, const float3& v1, const float3 &v2)
{
- int rgb = floorf(255*dot(normalize(cross(v1-v0,v2-v0)),-direction));
+ float scale = 255*dot(normalize(cross(v1-v0,v2-v0)),-direction);
+ if (scale < 0.0f)
+ scale = 255*dot(-normalize(cross(v1-v0,v2-v0)),-direction);
+
+
+ int rgb = floorf(scale);
+
return rgb << 16 | rgb << 8 | rgb;
}
@@ -189,6 +197,8 @@ __global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *directio
int i;
+ bool show_leafs = false;
+
do
{
int child_map = tex1Dfetch(child_map_arr, *node);
@@ -206,6 +216,12 @@ __global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *directio
node--;
}
+ else if (show_leafs)
+ {
+ hit = true;
+ *pixel = 255;
+ node--;
+ }
else // node is a leaf
{
for (i=0; i < child_len; i++)
diff --git a/stl.py b/stl.py
index 7989d43..276a645 100644
--- a/stl.py
+++ b/stl.py
@@ -23,7 +23,7 @@ def read_ascii_stl(filename):
vertex.append([float(s) for s in line.strip().split()[1:]])
f.close()
- return np.array(vertex)
+ return np.array(vertex).reshape(len(vertex)//3,3,3)
def read_binary_stl(filename):
f = open(filename)
@@ -39,4 +39,4 @@ def read_binary_stl(filename):
f.read(2)
f.close()
- return np.array(vertex)
+ return np.array(vertex).reshape(len(vertex)//3,3,3)
diff --git a/test.py b/test.py
index 15b7102..2aef412 100644
--- a/test.py
+++ b/test.py
@@ -1,152 +1,99 @@
+import os
import time
-from stl import *
import numpy as np
+
from pycuda import autoinit
from pycuda.compiler import SourceModule
import pycuda.driver as cuda
from pycuda import gpuarray
-from string import Template
-
-def array2vector(arr, dtype=gpuarray.vec.float3):
- if len(arr.shape) != 2 or arr.shape[-1] != 3:
- raise Exception('shape mismatch')
- x = np.empty(arr.shape[0], dtype=dtype)
- x['x'] = arr[:,0]
- x['y'] = arr[:,1]
- x['z'] = arr[:,2]
+from stl import *
+from geometry import *
+from materials import *
+from camera import *
+from vector import *
- return x
+import pygame
print 'device %s' % autoinit.device.name()
-source = open('src/intersect.cu').read()
-mod = SourceModule(source, options=['-I /home/tlatorre/projects/chroma/src'], no_extern_c=True, arch='sm_13')
+source_directory = os.path.split(os.path.realpath(__file__))[0] + '/src'
-rotate = mod.get_function('rotate')
-translate = mod.get_function('translate')
+source = open(source_directory + '/kernel.cu').read()
+mod = SourceModule(source, options=['-I ' + source_directory], no_extern_c=True, arch='sm_13')
intersect_mesh = mod.get_function('intersect_mesh')
+rotate = mod.get_function('rotate')
-import pygame
size = width, height = 800, 600
-screen = pygame.display.set_mode(size, (pygame.NOFRAME | pygame.DOUBLEBUF))
-
-film_size = (0.035, 0.024)
-focal_length = 0.05
-
-grid = []
-for x in np.linspace(-film_size[0]/2, film_size[0]/2, width):
- for z in np.linspace(-film_size[1]/2, film_size[1]/2, height):
- grid.append((x,0,z))
-grid = np.array(grid)
-grid += (0,focal_length,0)
-grid += (0,300,0)
-
-x = array2vector(grid)
-x_gpu = cuda.to_device(x)
-
-p = (0,300,0)-grid
-
-for i in range(p.shape[0]):
- p[i] /= np.linalg.norm(p[i])
-
-p = array2vector(p)
-p_gpu = cuda.to_device(p)
-
-
-from zcurve import *
-
-mesh = read_stl('models/tie_interceptor6.stl')
-mesh = mesh.reshape(mesh.shape[0]//3,3,3)
-mesh = morton_order(mesh)
-mesh = mesh.reshape(mesh.shape[0]*3, 3)
-
-mesh3 = array2vector(mesh)
-
-from build import Graph
-
+screen = pygame.display.set_mode(size)
+camera = Camera(size)
+camera.position((0,300,0))
-rotate(np.int32(mesh3.size), cuda.InOut(mesh3), np.float32(-np.pi/2), gpuarray.vec.make_float3(1,0,0), block=(256,1,1), grid=(mesh3.size//256+1,1))
+origin, direction = camera.get_pixels()
-translate(np.int32(mesh3.size), cuda.InOut(mesh3), gpuarray.vec.make_float3(0,30,0), block=(256,1,1), grid=(mesh3.size//256+1,1))
+for i in range(direction.shape[0]):
+ direction[i] /= np.linalg.norm(direction[i])
-graph = Graph(mesh3)
+origin, direction = make_vector(origin), make_vector(direction)
-lower = array2vector(graph.lower, dtype=gpuarray.vec.float4)
-upper = array2vector(graph.upper, dtype=gpuarray.vec.float4)
-start = graph.start.astype(np.uint32)
-count = graph.count.astype(np.uint32)
-stack = np.zeros(lower.size, dtype=np.int32)
+origin_gpu = cuda.to_device(origin)
+direction_gpu = cuda.to_device(direction)
-lower_gpu = cuda.to_device(lower)
-upper_gpu = cuda.to_device(upper)
+geometry = Geometry()
+geometry.add_mesh(read_stl('models/tie_interceptor6.stl'), vacuum, vacuum)
+geometry.build(bits=16)
-lower_tex = mod.get_texref('lower_bound_arr')
-upper_tex = mod.get_texref('upper_bound_arr')
+mesh = geometry.mesh
+mesh = mesh.reshape(mesh.shape[0]*3,3)
+mesh = make_vector(mesh, dtype=gpuarray.vec.float4)
+lower_bound = make_vector(geometry.lower_bound, dtype=gpuarray.vec.float4)
+upper_bound = make_vector(geometry.upper_bound, dtype=gpuarray.vec.float4)
+child_map = geometry.child_map.astype(np.uint32)
+child_len = geometry.child_len.astype(np.uint32)
+first_leaf = np.int32(geometry.first_leaf)
-lower_tex.set_address(lower_gpu, lower.nbytes)
-upper_tex.set_address(upper_gpu, upper.nbytes)
-
-lower_tex.set_format(cuda.array_format.FLOAT, 4)
-upper_tex.set_format(cuda.array_format.FLOAT, 4)
-
-start_gpu = cuda.to_device(start)
-count_gpu = cuda.to_device(count)
-stack_gpu = cuda.mem_alloc(stack.nbytes)
-cuda.memcpy_htod(stack_gpu, stack)
+mesh_gpu = cuda.to_device(mesh)
+lower_bound_gpu = cuda.to_device(lower_bound)
+upper_bound_gpu = cuda.to_device(upper_bound)
+child_map_gpu = cuda.to_device(child_map)
+child_len_gpu = cuda.to_device(child_len)
+mesh_tex = mod.get_texref('mesh')
+lower_bound_tex = mod.get_texref('lower_bound_arr')
+upper_bound_tex = mod.get_texref('upper_bound_arr')
child_map_tex = mod.get_texref('child_map_arr')
child_len_tex = mod.get_texref('child_len_arr')
-child_map_tex.set_address(start_gpu, start.nbytes)
-child_len_tex.set_address(count_gpu, count.nbytes)
+mesh_tex.set_address(mesh_gpu, mesh.nbytes)
+lower_bound_tex.set_address(lower_bound_gpu, lower_bound.nbytes)
+upper_bound_tex.set_address(upper_bound_gpu, upper_bound.nbytes)
+child_map_tex.set_address(child_map_gpu, child_map.nbytes)
+child_len_tex.set_address(child_len_gpu, child_len.nbytes)
+mesh_tex.set_format(cuda.array_format.FLOAT, 4)
+lower_bound_tex.set_format(cuda.array_format.FLOAT, 4)
+upper_bound_tex.set_format(cuda.array_format.FLOAT, 4)
child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
-mesh = np.empty(mesh3.size, dtype=gpuarray.vec.float4)
-mesh['x'] = mesh3['x']
-mesh['y'] = mesh3['y']
-mesh['z'] = mesh3['z']
-
-mesh_gpu = cuda.to_device(mesh)
-mesh_tex = mod.get_texref('mesh')
-mesh_tex.set_address(mesh_gpu, mesh.nbytes)
-mesh_tex.set_format(cuda.array_format.FLOAT, 4)
-
-pixel = np.empty(size, dtype=np.int32).flatten()
-
-pixel_gpu = cuda.mem_alloc(pixel.nbytes)
-cuda.memcpy_htod(pixel_gpu, pixel)
-
-speed = []
-elapsed = []
-
-t0total = time.time()
+pixels = np.empty(width*height, dtype=np.int32)
+pixels_gpu = cuda.to_device(pixels)
block_size = 64
for i in range(100):
- rotate(np.int32(x.size), x_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
-
- rotate(np.int32(p.size), p_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
+ rotate(np.int32(origin.size), origin_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
+ rotate(np.int32(direction.size), direction_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
t0 = time.time()
- intersect_mesh(np.int32(x.size), x_gpu, p_gpu, pixel_gpu, np.int32(graph.first_leaf), block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, upper_tex, lower_tex, child_map_tex, child_len_tex])
+ intersect_mesh(np.int32(origin.size), origin_gpu, direction_gpu, pixels_gpu, first_leaf, block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, lower_bound_tex, upper_bound_tex, child_map_tex, child_len_tex])
cuda.Context.synchronize()
-
- elapsed.append(time.time() - t0)
+ elapsed = time.time() - t0
print '%i triangles, %i photons, %f sec; (%f photons/sec)' % \
- (mesh.size//3, pixel.size, elapsed[-1], pixel.size/elapsed[-1])
+ (mesh.size//3, pixels.size, elapsed, pixels.size/elapsed)
- speed.append(pixel.size/elapsed[-1])
-
- cuda.memcpy_dtoh(pixel, pixel_gpu)
- pygame.surfarray.blit_array(screen, pixel.reshape(size))
+ cuda.memcpy_dtoh(pixels, pixels_gpu)
+ pygame.surfarray.blit_array(screen, pixels.reshape(size))
pygame.display.flip()
-print 'average time = %f sec' % np.mean(elapsed)
-print 'average speed = %f photons/sec' % np.mean(speed)
-print 'total time = %f sec' % (time.time() - t0total)
-
raw_input('press enter to exit')
diff --git a/test_lbne.py b/test_lbne.py
deleted file mode 100644
index 9fe27ef..0000000
--- a/test_lbne.py
+++ /dev/null
@@ -1,164 +0,0 @@
-import time
-from stl import *
-import numpy as np
-from pycuda import autoinit
-from pycuda.compiler import SourceModule
-import pycuda.driver as cuda
-from pycuda import gpuarray
-from string import Template
-
-def array2vector(arr, dtype=gpuarray.vec.float3):
- if len(arr.shape) != 2 or arr.shape[-1] != 3:
- raise Exception('shape mismatch')
-
- x = np.empty(arr.shape[0], dtype=dtype)
- x['x'] = arr[:,0]
- x['y'] = arr[:,1]
- x['z'] = arr[:,2]
-
- return x
-
-print 'device %s' % autoinit.device.name()
-
-source = open('src/intersect.cu').read()
-mod = SourceModule(source, options=['-I /home/tlatorre/projects/chroma/src'], no_extern_c=True, arch='sm_13')
-
-rotate = mod.get_function('rotate')
-translate = mod.get_function('translate')
-intersect_mesh = mod.get_function('intersect_mesh')
-
-import pygame
-size = width, height = 800, 600
-screen = pygame.display.set_mode(size, (pygame.NOFRAME | pygame.DOUBLEBUF))
-
-film_size = (0.035, 0.024)
-focal_length = 0.05
-
-grid = []
-for x in np.linspace(-film_size[0]/2, film_size[0]/2, width):
- for z in np.linspace(-film_size[1]/2, film_size[1]/2, height):
- grid.append((x,0,z))
-grid = np.array(grid)
-grid += (0,focal_length,0)
-grid += (0,200,30)
-
-x = array2vector(grid)
-x_gpu = cuda.to_device(x)
-
-p = (0,200,30)-grid
-
-for i in range(p.shape[0]):
- p[i] /= np.linalg.norm(p[i])
-
-p = array2vector(p)
-p_gpu = cuda.to_device(p)
-
-
-from zcurve import *
-
-#mesh = read_stl('models/tie_interceptor6.stl')
-#mesh = read_stl('models/sphere.stl')
-#mesh = read_stl('models/IV.stl')
-
-#from lbne import build_lbne
-#mesh = build_lbne()
-
-mesh = read_stl('models/lbne_sphere_only.stl')
-
-mesh = mesh.reshape(mesh.shape[0]//3,3,3)
-mesh = morton_order(mesh)
-mesh = mesh.reshape(mesh.shape[0]*3, 3)
-
-mesh3 = array2vector(mesh)
-
-from build import Graph
-
-
-#rotate(np.int32(mesh3.size), cuda.InOut(mesh3), np.float32(-np.pi/2), gpuarray.vec.make_float3(1,0,0), block=(256,1,1), grid=(mesh3.size//256+1,1))
-
-#translate(np.int32(mesh3.size), cuda.InOut(mesh3), gpuarray.vec.make_float3(0,30,0), block=(256,1,1), grid=(mesh3.size//256+1,1))
-
-graph = Graph(mesh3)
-
-lower = array2vector(graph.lower, dtype=gpuarray.vec.float4)
-upper = array2vector(graph.upper, dtype=gpuarray.vec.float4)
-start = graph.start.astype(np.uint32)
-count = graph.count.astype(np.uint32)
-stack = np.zeros(lower.size, dtype=np.int32)
-
-lower_gpu = cuda.to_device(lower)
-upper_gpu = cuda.to_device(upper)
-
-lower_tex = mod.get_texref('lower_bound_arr')
-upper_tex = mod.get_texref('upper_bound_arr')
-
-lower_tex.set_address(lower_gpu, lower.nbytes)
-upper_tex.set_address(upper_gpu, upper.nbytes)
-
-lower_tex.set_format(cuda.array_format.FLOAT, 4)
-upper_tex.set_format(cuda.array_format.FLOAT, 4)
-
-start_gpu = cuda.to_device(start)
-count_gpu = cuda.to_device(count)
-stack_gpu = cuda.mem_alloc(stack.nbytes)
-cuda.memcpy_htod(stack_gpu, stack)
-
-child_map_tex = mod.get_texref('child_map_arr')
-child_len_tex = mod.get_texref('child_len_arr')
-
-child_map_tex.set_address(start_gpu, start.nbytes)
-child_len_tex.set_address(count_gpu, count.nbytes)
-
-child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
-child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
-
-mesh = np.empty(mesh3.size, dtype=gpuarray.vec.float4)
-mesh['x'] = mesh3['x']
-mesh['y'] = mesh3['y']
-mesh['z'] = mesh3['z']
-
-mesh_gpu = cuda.to_device(mesh)
-mesh_tex = mod.get_texref('mesh')
-mesh_tex.set_address(mesh_gpu, mesh.nbytes)
-mesh_tex.set_format(cuda.array_format.FLOAT, 4)
-
-pixel = np.empty(size, dtype=np.int32).flatten()
-
-pixel_gpu = cuda.mem_alloc(pixel.nbytes)
-cuda.memcpy_htod(pixel_gpu, pixel)
-
-speed = []
-elapsed = []
-
-t0total = time.time()
-
-block_size = 64
-for i in range(1000):
- rotate(np.int32(x.size), x_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
-
- rotate(np.int32(p.size), p_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
-
- translate(np.int32(x.size), x_gpu,
- gpuarray.vec.make_float3(-np.sin(i*np.pi/100),-np.cos(i*np.pi/100),0),
- block=(block_size,1,1), grid=(width*height//block_size+1,1))
-
- t0 = time.time()
- intersect_mesh(np.int32(x.size), x_gpu, p_gpu, pixel_gpu, np.int32(graph.first_leaf), block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, upper_tex, lower_tex, child_map_tex, child_len_tex])
- cuda.Context.synchronize()
-
- elapsed.append(time.time() - t0)
-
- print '%i triangles, %i photons, %f sec; (%f photons/sec)' % \
- (mesh.size//3, pixel.size, elapsed[-1], pixel.size/elapsed[-1])
-
- speed.append(pixel.size/elapsed[-1])
-
- cuda.memcpy_dtoh(pixel, pixel_gpu)
- pygame.surfarray.blit_array(screen, pixel.reshape(size))
- pygame.display.flip()
-
-print 'average time = %f sec' % np.mean(elapsed)
-print 'average speed = %f photons/sec' % np.mean(speed)
-print 'total time = %f sec' % (time.time() - t0total)
-
-raw_input('press enter to exit')
diff --git a/vector.py b/vector.py
new file mode 100644
index 0000000..7b36e07
--- /dev/null
+++ b/vector.py
@@ -0,0 +1,13 @@
+import numpy as np
+from pycuda import gpuarray
+
+def make_vector(arr, dtype=gpuarray.vec.float3):
+ if len(arr.shape) != 2 or arr.shape[-1] != 3:
+ raise Exception('shape mismatch')
+
+ x = np.empty(arr.shape[0], dtype)
+ x['x'] = arr[:,0]
+ x['y'] = arr[:,1]
+ x['z'] = arr[:,2]
+
+ return x
diff --git a/zcurve.py b/zcurve.py
deleted file mode 100644
index 5ef37f3..0000000
--- a/zcurve.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import numpy as np
-from itertools import chain
-
-def interleave(*args):
- return int("".join(chain(*zip(*[bin(x)[2:].zfill(x.nbytes*8) for x in args]))), 2)
-
-def morton_order(mesh, dtype=np.uint8):
- vertices = mesh.reshape(mesh.shape[0]*3, 3)
-
- upper_bound = np.max(vertices, axis=0)
- lower_bound = np.min(vertices, axis=0)
-
- quantize = lambda x: dtype((x-lower_bound)*np.iinfo(dtype).max/(upper_bound-lower_bound))
-
- zvalue = []
- for triangle in mesh:
- center = np.mean(np.vstack((triangle[0], triangle[1], triangle[2])), axis=0)
- zvalue.append(interleave(*quantize(center)))
-
- ordered_mesh = np.empty(mesh.shape)
-
- for i, idx in enumerate(zip(*sorted(zip(zvalue, range(len(zvalue)))))[-1]):
- ordered_mesh[i] = mesh[idx]
-
- return ordered_mesh