summaryrefslogtreecommitdiff
path: root/test.py
diff options
context:
space:
mode:
authorAnthony LaTorre <telatorre@gmail.com>2011-05-13 01:03:42 -0400
committerAnthony LaTorre <telatorre@gmail.com>2011-05-13 01:03:42 -0400
commit519acb39bdb1df9869bb17bcc710108ac8c02983 (patch)
treebc4fd6f165df09feb7fdc0b166d38df144ebc9a2 /test.py
parent6996620497d0e6382df8e1cb0d07f6746ac3b0f3 (diff)
downloadchroma-519acb39bdb1df9869bb17bcc710108ac8c02983.tar.gz
chroma-519acb39bdb1df9869bb17bcc710108ac8c02983.tar.bz2
chroma-519acb39bdb1df9869bb17bcc710108ac8c02983.zip
added a bounding volume hierarchy
Diffstat (limited to 'test.py')
-rw-r--r--test.py88
1 files changed, 75 insertions, 13 deletions
diff --git a/test.py b/test.py
index 4390c50..15b7102 100644
--- a/test.py
+++ b/test.py
@@ -5,6 +5,7 @@ from pycuda import autoinit
from pycuda.compiler import SourceModule
import pycuda.driver as cuda
from pycuda import gpuarray
+from string import Template
def array2vector(arr, dtype=gpuarray.vec.float3):
if len(arr.shape) != 2 or arr.shape[-1] != 3:
@@ -19,18 +20,16 @@ def array2vector(arr, dtype=gpuarray.vec.float3):
print 'device %s' % autoinit.device.name()
-source = open('src/linalg.h').read() + open('src/matrix.h').read() + \
- open('src/rotate.h').read() + open('src/intersect.cu').read()
+source = open('src/intersect.cu').read()
+mod = SourceModule(source, options=['-I /home/tlatorre/projects/chroma/src'], no_extern_c=True, arch='sm_13')
-mod = SourceModule(source, no_extern_c=True, arch='sm_13')
-
-intersect = mod.get_function('intersect_triangle_mesh')
rotate = mod.get_function('rotate')
translate = mod.get_function('translate')
+intersect_mesh = mod.get_function('intersect_mesh')
import pygame
size = width, height = 800, 600
-screen = pygame.display.set_mode(size)
+screen = pygame.display.set_mode(size, (pygame.NOFRAME | pygame.DOUBLEBUF))
film_size = (0.035, 0.024)
focal_length = 0.05
@@ -46,15 +45,65 @@ grid += (0,300,0)
x = array2vector(grid)
x_gpu = cuda.to_device(x)
-p = array2vector(((0,300,0)-grid))
+p = (0,300,0)-grid
+
+for i in range(p.shape[0]):
+ p[i] /= np.linalg.norm(p[i])
+
+p = array2vector(p)
p_gpu = cuda.to_device(p)
-mesh3 = array2vector(read_stl('models/tie_interceptor6.stl'))
+
+from zcurve import *
+
+mesh = read_stl('models/tie_interceptor6.stl')
+mesh = mesh.reshape(mesh.shape[0]//3,3,3)
+mesh = morton_order(mesh)
+mesh = mesh.reshape(mesh.shape[0]*3, 3)
+
+mesh3 = array2vector(mesh)
+
+from build import Graph
+
rotate(np.int32(mesh3.size), cuda.InOut(mesh3), np.float32(-np.pi/2), gpuarray.vec.make_float3(1,0,0), block=(256,1,1), grid=(mesh3.size//256+1,1))
translate(np.int32(mesh3.size), cuda.InOut(mesh3), gpuarray.vec.make_float3(0,30,0), block=(256,1,1), grid=(mesh3.size//256+1,1))
+graph = Graph(mesh3)
+
+lower = array2vector(graph.lower, dtype=gpuarray.vec.float4)
+upper = array2vector(graph.upper, dtype=gpuarray.vec.float4)
+start = graph.start.astype(np.uint32)
+count = graph.count.astype(np.uint32)
+stack = np.zeros(lower.size, dtype=np.int32)
+
+lower_gpu = cuda.to_device(lower)
+upper_gpu = cuda.to_device(upper)
+
+lower_tex = mod.get_texref('lower_bound_arr')
+upper_tex = mod.get_texref('upper_bound_arr')
+
+lower_tex.set_address(lower_gpu, lower.nbytes)
+upper_tex.set_address(upper_gpu, upper.nbytes)
+
+lower_tex.set_format(cuda.array_format.FLOAT, 4)
+upper_tex.set_format(cuda.array_format.FLOAT, 4)
+
+start_gpu = cuda.to_device(start)
+count_gpu = cuda.to_device(count)
+stack_gpu = cuda.mem_alloc(stack.nbytes)
+cuda.memcpy_htod(stack_gpu, stack)
+
+child_map_tex = mod.get_texref('child_map_arr')
+child_len_tex = mod.get_texref('child_len_arr')
+
+child_map_tex.set_address(start_gpu, start.nbytes)
+child_len_tex.set_address(count_gpu, count.nbytes)
+
+child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
+child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
+
mesh = np.empty(mesh3.size, dtype=gpuarray.vec.float4)
mesh['x'] = mesh3['x']
mesh['y'] = mesh3['y']
@@ -66,7 +115,14 @@ mesh_tex.set_address(mesh_gpu, mesh.nbytes)
mesh_tex.set_format(cuda.array_format.FLOAT, 4)
pixel = np.empty(size, dtype=np.int32).flatten()
-pixel_gpu = cuda.to_device(pixel)
+
+pixel_gpu = cuda.mem_alloc(pixel.nbytes)
+cuda.memcpy_htod(pixel_gpu, pixel)
+
+speed = []
+elapsed = []
+
+t0total = time.time()
block_size = 64
for i in range(100):
@@ -75,16 +131,22 @@ for i in range(100):
rotate(np.int32(p.size), p_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
t0 = time.time()
- intersect(np.int32(x.size), x_gpu, p_gpu, np.int32(mesh.size//3), pixel_gpu, block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex])
+ intersect_mesh(np.int32(x.size), x_gpu, p_gpu, pixel_gpu, np.int32(graph.first_leaf), block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, upper_tex, lower_tex, child_map_tex, child_len_tex])
cuda.Context.synchronize()
- elapsed = time.time() - t0
+ elapsed.append(time.time() - t0)
- print '%i triangles, %i photons, %f sec; (%f photons/s)' % \
- (mesh.size//3, pixel.size, elapsed, pixel.size/elapsed)
+ print '%i triangles, %i photons, %f sec; (%f photons/sec)' % \
+ (mesh.size//3, pixel.size, elapsed[-1], pixel.size/elapsed[-1])
+
+ speed.append(pixel.size/elapsed[-1])
cuda.memcpy_dtoh(pixel, pixel_gpu)
pygame.surfarray.blit_array(screen, pixel.reshape(size))
pygame.display.flip()
+print 'average time = %f sec' % np.mean(elapsed)
+print 'average speed = %f photons/sec' % np.mean(speed)
+print 'total time = %f sec' % (time.time() - t0total)
+
raw_input('press enter to exit')