1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
import time
from stl import *
import numpy as np
from pycuda import autoinit
from pycuda.compiler import SourceModule
import pycuda.driver as cuda
from pycuda import gpuarray
def array2vector(arr, dtype=gpuarray.vec.float3):
if len(arr.shape) != 2 or arr.shape[-1] != 3:
raise Exception('shape mismatch')
x = np.empty(arr.shape[0], dtype=dtype)
x['x'] = arr[:,0]
x['y'] = arr[:,1]
x['z'] = arr[:,2]
return x
print 'device %s' % autoinit.device.name()
source = open('src/linalg.h').read() + open('src/matrix.h').read() + \
open('src/rotate.h').read() + open('src/intersect.cu').read()
mod = SourceModule(source, no_extern_c=True, arch='sm_13')
intersect = mod.get_function('intersect_triangle_mesh')
rotate = mod.get_function('rotate')
translate = mod.get_function('translate')
import pygame
size = width, height = 800, 600
screen = pygame.display.set_mode(size)
film_size = (0.035, 0.024)
focal_length = 0.05
grid = []
for x in np.linspace(-film_size[0]/2, film_size[0]/2, width):
for z in np.linspace(-film_size[1]/2, film_size[1]/2, height):
grid.append((x,0,z))
grid = np.array(grid)
grid += (0,focal_length,0)
grid += (0,300,0)
x = array2vector(grid)
x_gpu = cuda.to_device(x)
p = array2vector(((0,300,0)-grid))
p_gpu = cuda.to_device(p)
mesh3 = array2vector(read_stl('models/tie_interceptor6.stl'))
rotate(np.int32(mesh3.size), cuda.InOut(mesh3), np.float32(-np.pi/2), gpuarray.vec.make_float3(1,0,0), block=(256,1,1), grid=(mesh3.size//256+1,1))
translate(np.int32(mesh3.size), cuda.InOut(mesh3), gpuarray.vec.make_float3(0,30,0), block=(256,1,1), grid=(mesh3.size//256+1,1))
mesh = np.empty(mesh3.size, dtype=gpuarray.vec.float4)
mesh['x'] = mesh3['x']
mesh['y'] = mesh3['y']
mesh['z'] = mesh3['z']
mesh_gpu = cuda.to_device(mesh)
mesh_tex = mod.get_texref('mesh')
mesh_tex.set_address(mesh_gpu, mesh.nbytes)
mesh_tex.set_format(cuda.array_format.FLOAT, 4)
pixel = np.empty(size, dtype=np.int32).flatten()
pixel_gpu = cuda.to_device(pixel)
block_size = 64
for i in range(100):
rotate(np.int32(x.size), x_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
rotate(np.int32(p.size), p_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
t0 = time.time()
intersect(np.int32(x.size), x_gpu, p_gpu, np.int32(mesh.size//3), pixel_gpu, block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex])
cuda.Context.synchronize()
elapsed = time.time() - t0
print '%i triangles, %i photons, %f sec; (%f photons/s)' % \
(mesh.size//3, pixel.size, elapsed, pixel.size/elapsed)
cuda.memcpy_dtoh(pixel, pixel_gpu)
pygame.surfarray.blit_array(screen, pixel.reshape(size))
pygame.display.flip()
raw_input('press enter to exit')
|