diff options
Diffstat (limited to 'test.py')
-rw-r--r-- | test.py | 167 |
1 files changed, 57 insertions, 110 deletions
@@ -1,152 +1,99 @@ +import os import time -from stl import * import numpy as np + from pycuda import autoinit from pycuda.compiler import SourceModule import pycuda.driver as cuda from pycuda import gpuarray -from string import Template - -def array2vector(arr, dtype=gpuarray.vec.float3): - if len(arr.shape) != 2 or arr.shape[-1] != 3: - raise Exception('shape mismatch') - x = np.empty(arr.shape[0], dtype=dtype) - x['x'] = arr[:,0] - x['y'] = arr[:,1] - x['z'] = arr[:,2] +from stl import * +from geometry import * +from materials import * +from camera import * +from vector import * - return x +import pygame print 'device %s' % autoinit.device.name() -source = open('src/intersect.cu').read() -mod = SourceModule(source, options=['-I /home/tlatorre/projects/chroma/src'], no_extern_c=True, arch='sm_13') +source_directory = os.path.split(os.path.realpath(__file__))[0] + '/src' -rotate = mod.get_function('rotate') -translate = mod.get_function('translate') +source = open(source_directory + '/kernel.cu').read() +mod = SourceModule(source, options=['-I ' + source_directory], no_extern_c=True, arch='sm_13') intersect_mesh = mod.get_function('intersect_mesh') +rotate = mod.get_function('rotate') -import pygame size = width, height = 800, 600 -screen = pygame.display.set_mode(size, (pygame.NOFRAME | pygame.DOUBLEBUF)) - -film_size = (0.035, 0.024) -focal_length = 0.05 - -grid = [] -for x in np.linspace(-film_size[0]/2, film_size[0]/2, width): - for z in np.linspace(-film_size[1]/2, film_size[1]/2, height): - grid.append((x,0,z)) -grid = np.array(grid) -grid += (0,focal_length,0) -grid += (0,300,0) - -x = array2vector(grid) -x_gpu = cuda.to_device(x) - -p = (0,300,0)-grid - -for i in range(p.shape[0]): - p[i] /= np.linalg.norm(p[i]) - -p = array2vector(p) -p_gpu = cuda.to_device(p) - - -from zcurve import * - -mesh = read_stl('models/tie_interceptor6.stl') -mesh = mesh.reshape(mesh.shape[0]//3,3,3) -mesh = morton_order(mesh) -mesh = mesh.reshape(mesh.shape[0]*3, 3) - -mesh3 = array2vector(mesh) - -from build import Graph - +screen = pygame.display.set_mode(size) +camera = Camera(size) +camera.position((0,300,0)) -rotate(np.int32(mesh3.size), cuda.InOut(mesh3), np.float32(-np.pi/2), gpuarray.vec.make_float3(1,0,0), block=(256,1,1), grid=(mesh3.size//256+1,1)) +origin, direction = camera.get_pixels() -translate(np.int32(mesh3.size), cuda.InOut(mesh3), gpuarray.vec.make_float3(0,30,0), block=(256,1,1), grid=(mesh3.size//256+1,1)) +for i in range(direction.shape[0]): + direction[i] /= np.linalg.norm(direction[i]) -graph = Graph(mesh3) +origin, direction = make_vector(origin), make_vector(direction) -lower = array2vector(graph.lower, dtype=gpuarray.vec.float4) -upper = array2vector(graph.upper, dtype=gpuarray.vec.float4) -start = graph.start.astype(np.uint32) -count = graph.count.astype(np.uint32) -stack = np.zeros(lower.size, dtype=np.int32) +origin_gpu = cuda.to_device(origin) +direction_gpu = cuda.to_device(direction) -lower_gpu = cuda.to_device(lower) -upper_gpu = cuda.to_device(upper) +geometry = Geometry() +geometry.add_mesh(read_stl('models/tie_interceptor6.stl'), vacuum, vacuum) +geometry.build(bits=16) -lower_tex = mod.get_texref('lower_bound_arr') -upper_tex = mod.get_texref('upper_bound_arr') +mesh = geometry.mesh +mesh = mesh.reshape(mesh.shape[0]*3,3) +mesh = make_vector(mesh, dtype=gpuarray.vec.float4) +lower_bound = make_vector(geometry.lower_bound, dtype=gpuarray.vec.float4) +upper_bound = make_vector(geometry.upper_bound, dtype=gpuarray.vec.float4) +child_map = geometry.child_map.astype(np.uint32) +child_len = geometry.child_len.astype(np.uint32) +first_leaf = np.int32(geometry.first_leaf) -lower_tex.set_address(lower_gpu, lower.nbytes) -upper_tex.set_address(upper_gpu, upper.nbytes) - -lower_tex.set_format(cuda.array_format.FLOAT, 4) -upper_tex.set_format(cuda.array_format.FLOAT, 4) - -start_gpu = cuda.to_device(start) -count_gpu = cuda.to_device(count) -stack_gpu = cuda.mem_alloc(stack.nbytes) -cuda.memcpy_htod(stack_gpu, stack) +mesh_gpu = cuda.to_device(mesh) +lower_bound_gpu = cuda.to_device(lower_bound) +upper_bound_gpu = cuda.to_device(upper_bound) +child_map_gpu = cuda.to_device(child_map) +child_len_gpu = cuda.to_device(child_len) +mesh_tex = mod.get_texref('mesh') +lower_bound_tex = mod.get_texref('lower_bound_arr') +upper_bound_tex = mod.get_texref('upper_bound_arr') child_map_tex = mod.get_texref('child_map_arr') child_len_tex = mod.get_texref('child_len_arr') -child_map_tex.set_address(start_gpu, start.nbytes) -child_len_tex.set_address(count_gpu, count.nbytes) +mesh_tex.set_address(mesh_gpu, mesh.nbytes) +lower_bound_tex.set_address(lower_bound_gpu, lower_bound.nbytes) +upper_bound_tex.set_address(upper_bound_gpu, upper_bound.nbytes) +child_map_tex.set_address(child_map_gpu, child_map.nbytes) +child_len_tex.set_address(child_len_gpu, child_len.nbytes) +mesh_tex.set_format(cuda.array_format.FLOAT, 4) +lower_bound_tex.set_format(cuda.array_format.FLOAT, 4) +upper_bound_tex.set_format(cuda.array_format.FLOAT, 4) child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) -mesh = np.empty(mesh3.size, dtype=gpuarray.vec.float4) -mesh['x'] = mesh3['x'] -mesh['y'] = mesh3['y'] -mesh['z'] = mesh3['z'] - -mesh_gpu = cuda.to_device(mesh) -mesh_tex = mod.get_texref('mesh') -mesh_tex.set_address(mesh_gpu, mesh.nbytes) -mesh_tex.set_format(cuda.array_format.FLOAT, 4) - -pixel = np.empty(size, dtype=np.int32).flatten() - -pixel_gpu = cuda.mem_alloc(pixel.nbytes) -cuda.memcpy_htod(pixel_gpu, pixel) - -speed = [] -elapsed = [] - -t0total = time.time() +pixels = np.empty(width*height, dtype=np.int32) +pixels_gpu = cuda.to_device(pixels) block_size = 64 for i in range(100): - rotate(np.int32(x.size), x_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1)) - - rotate(np.int32(p.size), p_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1)) + rotate(np.int32(origin.size), origin_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1)) + rotate(np.int32(direction.size), direction_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1)) t0 = time.time() - intersect_mesh(np.int32(x.size), x_gpu, p_gpu, pixel_gpu, np.int32(graph.first_leaf), block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, upper_tex, lower_tex, child_map_tex, child_len_tex]) + intersect_mesh(np.int32(origin.size), origin_gpu, direction_gpu, pixels_gpu, first_leaf, block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, lower_bound_tex, upper_bound_tex, child_map_tex, child_len_tex]) cuda.Context.synchronize() - - elapsed.append(time.time() - t0) + elapsed = time.time() - t0 print '%i triangles, %i photons, %f sec; (%f photons/sec)' % \ - (mesh.size//3, pixel.size, elapsed[-1], pixel.size/elapsed[-1]) + (mesh.size//3, pixels.size, elapsed, pixels.size/elapsed) - speed.append(pixel.size/elapsed[-1]) - - cuda.memcpy_dtoh(pixel, pixel_gpu) - pygame.surfarray.blit_array(screen, pixel.reshape(size)) + cuda.memcpy_dtoh(pixels, pixels_gpu) + pygame.surfarray.blit_array(screen, pixels.reshape(size)) pygame.display.flip() -print 'average time = %f sec' % np.mean(elapsed) -print 'average speed = %f photons/sec' % np.mean(speed) -print 'total time = %f sec' % (time.time() - t0total) - raw_input('press enter to exit') |