diff options
Diffstat (limited to 'gpu.py')
-rw-r--r-- | gpu.py | 91 |
1 files changed, 44 insertions, 47 deletions
@@ -1,13 +1,17 @@ import os -import time import numpy as np - from pycuda import autoinit from pycuda.compiler import SourceModule import pycuda.driver as cuda from pycuda import gpuarray +import layout + +float3 = gpuarray.vec.float3 +float4 = gpuarray.vec.float4 -def make_vector(arr, dtype=gpuarray.vec.float3): +source = open(layout.source + '/kernel.cu').read() + +def make_vector(arr, dtype=float3): if len(arr.shape) != 2 or arr.shape[-1] != 3: raise Exception('shape mismatch') @@ -18,59 +22,52 @@ def make_vector(arr, dtype=gpuarray.vec.float3): return v -print 'device %s' % autoinit.device.name() - -source_directory = os.path.split(os.path.realpath(__file__))[0] + '/src' - -source = open(source_directory + '/intersect.cu').read() -module = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False) - -cuda_intersect = module.get_function('intersect_mesh') -cuda_rotate = module.get_function('rotate') -cuda_translate = module.get_function('translate') - class GPU(object): + """ + Object to handle all of the texture allocation/referencing when loading + a geometry onto the GPU. + """ def __init__(self): - pass + print 'device %s' % autoinit.device.name() - def load_geometry(self, geometry): - self.mesh = geometry.mesh - self.lower_bound = geometry.lower_bound - self.upper_bound = geometry.upper_bound - self.child_map = geometry.child_map.astype(np.uint32) - self.child_len = geometry.child_len.astype(np.uint32) - self.first_leaf = np.int32(geometry.first_leaf) - - self.mesh_vec = make_vector(self.mesh.reshape(self.mesh.shape[0]*3,3), dtype=gpuarray.vec.float4) - self.lower_bound_vec = make_vector(geometry.lower_bound, dtype=gpuarray.vec.float4) - self.upper_bound_vec = make_vector(geometry.upper_bound, dtype=gpuarray.vec.float4) + self.module = SourceModule(source, options=['-I' + layout.source], + no_extern_c=True, cache_dir=False) - self.mesh_gpu = cuda.to_device(self.mesh_vec) - self.lower_bound_gpu = cuda.to_device(self.lower_bound_vec) - self.upper_bound_gpu = cuda.to_device(self.upper_bound_vec) - self.child_map_gpu = cuda.to_device(self.child_map) - self.child_len_gpu = cuda.to_device(self.child_len) + self.get_function = self.module.get_function - self.mesh_tex = module.get_texref('mesh') - self.lower_bound_tex = module.get_texref('lower_bound_arr') - self.upper_bound_tex = module.get_texref('upper_bound_arr') - self.child_map_tex = module.get_texref('child_map_arr') - self.child_len_tex = module.get_texref('child_len_arr') + def load_geometry(self, geometry): + """ + Load all the textures from `geometry` onto the GPU and return a list + of texture references. + """ + self.mesh_vec = make_vector(geometry.mesh.reshape(geometry.mesh.shape[0]*3,3), float4) + self.lower_bounds_vec = make_vector(geometry.lower_bounds, float4) + self.upper_bounds_vec = make_vector(geometry.upper_bounds, float4) + self.uchild_map = geometry.child_map.astype(np.uint32) + self.uchild_len = geometry.child_len.astype(np.uint32) + self.mesh_gpu = cuda.to_device(self.mesh_vec) + self.lower_bounds_gpu = cuda.to_device(self.lower_bounds_vec) + self.upper_bounds_gpu = cuda.to_device(self.upper_bounds_vec) + self.child_map_gpu = cuda.to_device(self.uchild_map) + self.child_len_gpu = cuda.to_device(self.uchild_len) + + self.mesh_tex = self.module.get_texref('mesh') + self.lower_bounds_tex = self.module.get_texref('lower_bounds') + self.upper_bounds_tex = self.module.get_texref('upper_bounds') + self.child_map_tex = self.module.get_texref('child_map_arr') + self.child_len_tex = self.module.get_texref('child_len_arr') + self.mesh_tex.set_address(self.mesh_gpu, self.mesh_vec.nbytes) - self.lower_bound_tex.set_address(self.lower_bound_gpu, self.lower_bound_vec.nbytes) - self.upper_bound_tex.set_address(self.upper_bound_gpu, self.upper_bound_vec.nbytes) - self.child_map_tex.set_address(self.child_map_gpu, self.child_map.nbytes) - self.child_len_tex.set_address(self.child_len_gpu, self.child_len.nbytes) + self.lower_bounds_tex.set_address(self.lower_bounds_gpu, self.lower_bounds_vec.nbytes) + self.upper_bounds_tex.set_address(self.upper_bounds_gpu, self.upper_bounds_vec.nbytes) + self.child_map_tex.set_address(self.child_map_gpu, self.uchild_map.nbytes) + self.child_len_tex.set_address(self.child_len_gpu, self.uchild_len.nbytes) self.mesh_tex.set_format(cuda.array_format.FLOAT, 4) - self.lower_bound_tex.set_format(cuda.array_format.FLOAT, 4) - self.upper_bound_tex.set_format(cuda.array_format.FLOAT, 4) + self.lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4) + self.upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4) self.child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) self.child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) - self.geometry = geometry - - def call(self, *args, **kwargs): - kwargs['texrefs'] = [self.mesh_tex, self.lower_bound_tex, self.upper_bound_tex, self.child_map_tex, self.child_len_tex] - cuda_intersect(*args, **kwargs) + return [self.mesh_tex, self.lower_bounds_tex, self.upper_bounds_tex, self.child_map_tex, self.child_len_tex] |