diff options
Diffstat (limited to 'test.py')
-rw-r--r-- | test.py | 88 |
1 files changed, 75 insertions, 13 deletions
@@ -5,6 +5,7 @@ from pycuda import autoinit from pycuda.compiler import SourceModule import pycuda.driver as cuda from pycuda import gpuarray +from string import Template def array2vector(arr, dtype=gpuarray.vec.float3): if len(arr.shape) != 2 or arr.shape[-1] != 3: @@ -19,18 +20,16 @@ def array2vector(arr, dtype=gpuarray.vec.float3): print 'device %s' % autoinit.device.name() -source = open('src/linalg.h').read() + open('src/matrix.h').read() + \ - open('src/rotate.h').read() + open('src/intersect.cu').read() +source = open('src/intersect.cu').read() +mod = SourceModule(source, options=['-I /home/tlatorre/projects/chroma/src'], no_extern_c=True, arch='sm_13') -mod = SourceModule(source, no_extern_c=True, arch='sm_13') - -intersect = mod.get_function('intersect_triangle_mesh') rotate = mod.get_function('rotate') translate = mod.get_function('translate') +intersect_mesh = mod.get_function('intersect_mesh') import pygame size = width, height = 800, 600 -screen = pygame.display.set_mode(size) +screen = pygame.display.set_mode(size, (pygame.NOFRAME | pygame.DOUBLEBUF)) film_size = (0.035, 0.024) focal_length = 0.05 @@ -46,15 +45,65 @@ grid += (0,300,0) x = array2vector(grid) x_gpu = cuda.to_device(x) -p = array2vector(((0,300,0)-grid)) +p = (0,300,0)-grid + +for i in range(p.shape[0]): + p[i] /= np.linalg.norm(p[i]) + +p = array2vector(p) p_gpu = cuda.to_device(p) -mesh3 = array2vector(read_stl('models/tie_interceptor6.stl')) + +from zcurve import * + +mesh = read_stl('models/tie_interceptor6.stl') +mesh = mesh.reshape(mesh.shape[0]//3,3,3) +mesh = morton_order(mesh) +mesh = mesh.reshape(mesh.shape[0]*3, 3) + +mesh3 = array2vector(mesh) + +from build import Graph + rotate(np.int32(mesh3.size), cuda.InOut(mesh3), np.float32(-np.pi/2), gpuarray.vec.make_float3(1,0,0), block=(256,1,1), grid=(mesh3.size//256+1,1)) translate(np.int32(mesh3.size), cuda.InOut(mesh3), gpuarray.vec.make_float3(0,30,0), block=(256,1,1), grid=(mesh3.size//256+1,1)) +graph = Graph(mesh3) + +lower = array2vector(graph.lower, dtype=gpuarray.vec.float4) +upper = array2vector(graph.upper, dtype=gpuarray.vec.float4) +start = graph.start.astype(np.uint32) +count = graph.count.astype(np.uint32) +stack = np.zeros(lower.size, dtype=np.int32) + +lower_gpu = cuda.to_device(lower) +upper_gpu = cuda.to_device(upper) + +lower_tex = mod.get_texref('lower_bound_arr') +upper_tex = mod.get_texref('upper_bound_arr') + +lower_tex.set_address(lower_gpu, lower.nbytes) +upper_tex.set_address(upper_gpu, upper.nbytes) + +lower_tex.set_format(cuda.array_format.FLOAT, 4) +upper_tex.set_format(cuda.array_format.FLOAT, 4) + +start_gpu = cuda.to_device(start) +count_gpu = cuda.to_device(count) +stack_gpu = cuda.mem_alloc(stack.nbytes) +cuda.memcpy_htod(stack_gpu, stack) + +child_map_tex = mod.get_texref('child_map_arr') +child_len_tex = mod.get_texref('child_len_arr') + +child_map_tex.set_address(start_gpu, start.nbytes) +child_len_tex.set_address(count_gpu, count.nbytes) + +child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) +child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) + mesh = np.empty(mesh3.size, dtype=gpuarray.vec.float4) mesh['x'] = mesh3['x'] mesh['y'] = mesh3['y'] @@ -66,7 +115,14 @@ mesh_tex.set_address(mesh_gpu, mesh.nbytes) mesh_tex.set_format(cuda.array_format.FLOAT, 4) pixel = np.empty(size, dtype=np.int32).flatten() -pixel_gpu = cuda.to_device(pixel) + +pixel_gpu = cuda.mem_alloc(pixel.nbytes) +cuda.memcpy_htod(pixel_gpu, pixel) + +speed = [] +elapsed = [] + +t0total = time.time() block_size = 64 for i in range(100): @@ -75,16 +131,22 @@ for i in range(100): rotate(np.int32(p.size), p_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1)) t0 = time.time() - intersect(np.int32(x.size), x_gpu, p_gpu, np.int32(mesh.size//3), pixel_gpu, block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex]) + intersect_mesh(np.int32(x.size), x_gpu, p_gpu, pixel_gpu, np.int32(graph.first_leaf), block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, upper_tex, lower_tex, child_map_tex, child_len_tex]) cuda.Context.synchronize() - elapsed = time.time() - t0 + elapsed.append(time.time() - t0) - print '%i triangles, %i photons, %f sec; (%f photons/s)' % \ - (mesh.size//3, pixel.size, elapsed, pixel.size/elapsed) + print '%i triangles, %i photons, %f sec; (%f photons/sec)' % \ + (mesh.size//3, pixel.size, elapsed[-1], pixel.size/elapsed[-1]) + + speed.append(pixel.size/elapsed[-1]) cuda.memcpy_dtoh(pixel, pixel_gpu) pygame.surfarray.blit_array(screen, pixel.reshape(size)) pygame.display.flip() +print 'average time = %f sec' % np.mean(elapsed) +print 'average speed = %f photons/sec' % np.mean(speed) +print 'total time = %f sec' % (time.time() - t0total) + raw_input('press enter to exit') |