1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
import os
import time
import numpy as np
from pycuda import autoinit
from pycuda.compiler import SourceModule
import pycuda.driver as cuda
from pycuda import gpuarray
from stl import *
from geometry import *
from materials import *
from camera import *
from vector import *
import pygame
print 'device %s' % autoinit.device.name()
source_directory = os.path.split(os.path.realpath(__file__))[0] + '/src'
source = open(source_directory + '/kernel.cu').read()
mod = SourceModule(source, options=['-I ' + source_directory], no_extern_c=True, cache_dir=False)
intersect_mesh = mod.get_function('intersect_mesh')
rotate = mod.get_function('rotate')
size = width, height = 800, 600
screen = pygame.display.set_mode(size)
camera = Camera(size)
camera.position((0,300,50))
origin, direction = camera.get_rays()
for i in range(direction.shape[0]):
direction[i] /= np.linalg.norm(direction[i])
origin, direction = make_vector(origin), make_vector(direction)
origin_gpu = cuda.to_device(origin)
direction_gpu = cuda.to_device(direction)
solid = Solid(read_stl('models/tie_interceptor6.stl'), vacuum, vacuum)
geometry = Geometry()
geometry.add_solid(solid)
geometry.build()
mesh = geometry.mesh
mesh = mesh.reshape(mesh.shape[0]*3,3)
mesh = make_vector(mesh, dtype=gpuarray.vec.float4)
lower_bound = make_vector(geometry.lower_bound, dtype=gpuarray.vec.float4)
upper_bound = make_vector(geometry.upper_bound, dtype=gpuarray.vec.float4)
child_map = geometry.child_map.astype(np.uint32)
child_len = geometry.child_len.astype(np.uint32)
first_leaf = np.int32(geometry.first_leaf)
mesh_gpu = cuda.to_device(mesh)
lower_bound_gpu = cuda.to_device(lower_bound)
upper_bound_gpu = cuda.to_device(upper_bound)
child_map_gpu = cuda.to_device(child_map)
child_len_gpu = cuda.to_device(child_len)
mesh_tex = mod.get_texref('mesh')
lower_bound_tex = mod.get_texref('lower_bound_arr')
upper_bound_tex = mod.get_texref('upper_bound_arr')
child_map_tex = mod.get_texref('child_map_arr')
child_len_tex = mod.get_texref('child_len_arr')
mesh_tex.set_address(mesh_gpu, mesh.nbytes)
lower_bound_tex.set_address(lower_bound_gpu, lower_bound.nbytes)
upper_bound_tex.set_address(upper_bound_gpu, upper_bound.nbytes)
child_map_tex.set_address(child_map_gpu, child_map.nbytes)
child_len_tex.set_address(child_len_gpu, child_len.nbytes)
mesh_tex.set_format(cuda.array_format.FLOAT, 4)
lower_bound_tex.set_format(cuda.array_format.FLOAT, 4)
upper_bound_tex.set_format(cuda.array_format.FLOAT, 4)
child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
pixels = np.empty(width*height, dtype=np.int32)
pixels_gpu = cuda.to_device(pixels)
states = np.empty(width*height, dtype=np.int32)
states_gpu = cuda.to_device(states)
block_size = 64
for i in range(100):
rotate(np.int32(origin.size), origin_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
rotate(np.int32(direction.size), direction_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
t0 = time.time()
intersect_mesh(np.int32(origin.size), origin_gpu, direction_gpu, pixels_gpu, first_leaf, states_gpu, block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, lower_bound_tex, upper_bound_tex, child_map_tex, child_len_tex])
cuda.Context.synchronize()
elapsed = time.time() - t0
print '%i triangles, %i photons, %f sec; (%f photons/sec)' % \
(mesh.size//3, pixels.size, elapsed, pixels.size/elapsed)
cuda.memcpy_dtoh(pixels, pixels_gpu)
pygame.surfarray.blit_array(screen, pixels.reshape(size))
pygame.display.flip()
raw_input('press enter to exit')
|