summaryrefslogtreecommitdiff
path: root/gpu.py
blob: 3d30387508303f5e2da3217e3fe3b9516c5a0b92 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import time
import numpy as np

from pycuda import autoinit
from pycuda.compiler import SourceModule
import pycuda.driver as cuda
from pycuda import gpuarray

def make_vector(arr, dtype=gpuarray.vec.float3):
    if len(arr.shape) != 2 or arr.shape[-1] != 3:
        raise Exception('shape mismatch')

    v = np.empty(arr.shape[0], dtype)
    v['x'] = arr[:,0]
    v['y'] = arr[:,1]
    v['z'] = arr[:,2]

    return v

print 'device %s' % autoinit.device.name()

source_directory = os.path.split(os.path.realpath(__file__))[0] + '/src'

source = open(source_directory + '/intersect.cu').read()
module = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False)

cuda_intersect = module.get_function('intersect_mesh')
cuda_rotate = module.get_function('rotate')
cuda_translate = module.get_function('translate')

class GPU(object):
    def __init__(self):
        pass

    def load_geometry(self, geometry):
        self.mesh = geometry.mesh
        self.lower_bound = geometry.lower_bound
        self.upper_bound = geometry.upper_bound
        self.child_map = geometry.child_map.astype(np.uint32)
        self.child_len = geometry.child_len.astype(np.uint32)
        self.first_leaf = np.int32(geometry.first_leaf)

        self.mesh_vec = make_vector(self.mesh.reshape(self.mesh.shape[0]*3,3), dtype=gpuarray.vec.float4)
        self.lower_bound_vec = make_vector(geometry.lower_bound, dtype=gpuarray.vec.float4)
        self.upper_bound_vec = make_vector(geometry.upper_bound, dtype=gpuarray.vec.float4)

        self.mesh_gpu = cuda.to_device(self.mesh_vec)
        self.lower_bound_gpu = cuda.to_device(self.lower_bound_vec)
        self.upper_bound_gpu = cuda.to_device(self.upper_bound_vec)
        self.child_map_gpu = cuda.to_device(self.child_map)
        self.child_len_gpu = cuda.to_device(self.child_len)

        self.mesh_tex = module.get_texref('mesh')
        self.lower_bound_tex = module.get_texref('lower_bound_arr')
        self.upper_bound_tex = module.get_texref('upper_bound_arr')
        self.child_map_tex = module.get_texref('child_map_arr')
        self.child_len_tex = module.get_texref('child_len_arr')

        self.mesh_tex.set_address(self.mesh_gpu, self.mesh_vec.nbytes)
        self.lower_bound_tex.set_address(self.lower_bound_gpu, self.lower_bound_vec.nbytes)
        self.upper_bound_tex.set_address(self.upper_bound_gpu, self.upper_bound_vec.nbytes)
        self.child_map_tex.set_address(self.child_map_gpu, self.child_map.nbytes)
        self.child_len_tex.set_address(self.child_len_gpu, self.child_len.nbytes)

        self.mesh_tex.set_format(cuda.array_format.FLOAT, 4)
        self.lower_bound_tex.set_format(cuda.array_format.FLOAT, 4)
        self.upper_bound_tex.set_format(cuda.array_format.FLOAT, 4)
        self.child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
        self.child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)

        self.geometry = geometry

    def call(self, *args, **kwargs):
        kwargs['texrefs'] = [self.mesh_tex, self.lower_bound_tex, self.upper_bound_tex, self.child_map_tex, self.child_len_tex]
        cuda_intersect(*args, **kwargs)