diff options
Diffstat (limited to 'test.py')
-rw-r--r-- | test.py | 88 |
1 files changed, 88 insertions, 0 deletions
@@ -0,0 +1,88 @@ +import time +from stl import * +import numpy as np +from pycuda import autoinit +from pycuda.compiler import SourceModule +import pycuda.driver as cuda +from pycuda import gpuarray + +def array2float3(arr): + if len(arr.shape) != 2 or arr.shape[-1] != 3: + raise Exception('shape mismatch') + + x = np.empty(arr.shape[0], dtype=gpuarray.vec.float3) + x['x'] = arr[:,0] + x['y'] = arr[:,1] + x['z'] = arr[:,2] + + return x + +print 'device %s' % autoinit.device.name() + +source = open('src/linalg.h').read() + open('src/matrix.h').read() + \ + open('src/rotate.h').read() + open('src/intersect.cu').read() + +mod = SourceModule(source, no_extern_c=True, arch='sm_13') +intersect = mod.get_function('intersect_triangle_mesh') +rotate = mod.get_function('rotate') +translate = mod.get_function('translate') + +mesh = array2float3(pull_vertices_binary('models/tie_interceptor6.stl')) + +import pygame +size = width, height = 800, 600 +screen = pygame.display.set_mode(size) + +film_size = (0.035, 0.024) +focal_length = 0.05 + +grid = [] +for x in np.linspace(-film_size[0]/2, film_size[0]/2, width): + for z in np.linspace(-film_size[1]/2, film_size[1]/2, height): + grid.append((x,0,z)) +grid = np.array(grid) +grid += (0,focal_length,0) +grid += (0,300,0) + +x = array2float3(grid) +p = array2float3(((0,300,0)-grid)) + +x_gpu = cuda.mem_alloc(x.nbytes) +cuda.memcpy_htod(x_gpu,x) + +p_gpu = cuda.mem_alloc(p.nbytes) +cuda.memcpy_htod(p_gpu,p) + +mesh_gpu = cuda.mem_alloc(mesh.nbytes) +cuda.memcpy_htod(mesh_gpu,mesh) + +pixel = np.empty(size, dtype=np.int32).flatten() +pixel_gpu = cuda.mem_alloc(pixel.nbytes) +cuda.memcpy_htod(pixel_gpu,pixel) + +rotate(np.int32(mesh.size), mesh_gpu, np.float32(-np.pi/2), gpuarray.vec.make_float3(1,0,0), block=(256,1,1), grid=(mesh.size//256+1,1)) + +translate(np.int32(mesh.size), mesh_gpu, gpuarray.vec.make_float3(0,30,0), block=(256,1,1), grid=(mesh.size//256+1,1)) + +t0 = time.time() +for i in range(100): + rotate(np.int32(x.size), x_gpu, np.float32(np.pi/50), gpuarray.vec.make_float3(0,0,1), block=(256,1,1), grid=(width*height//256+1,1)) + + rotate(np.int32(p.size), p_gpu, np.float32(np.pi/50), gpuarray.vec.make_float3(0,0,1), block=(256,1,1), grid=(width*height//256+1,1)) + + intersect(np.int32(x.size), x_gpu, p_gpu, np.int32(mesh.size//3), mesh_gpu, pixel_gpu, block=(256,1,1), grid=(width*height//256+1,1)) + + cuda.Context.synchronize() + + cuda.memcpy_dtoh(pixel, pixel_gpu) + pygame.surfarray.blit_array(screen, pixel.reshape(size)) + pygame.display.flip() + + +elapsed = time.time() - t0 + +print '%i triangles, %i photons, %f sec; (%f photons/s)' % \ + (mesh.size//3, pixel.size, elapsed, pixel.size/elapsed) + + +raw_input('press enter to exit') |