import os import time import numpy as np from pycuda import autoinit from pycuda.compiler import SourceModule import pycuda.driver as cuda from pycuda import gpuarray def make_vector(arr, dtype=gpuarray.vec.float3): if len(arr.shape) != 2 or arr.shape[-1] != 3: raise Exception('shape mismatch') v = np.empty(arr.shape[0], dtype) v['x'] = arr[:,0] v['y'] = arr[:,1] v['z'] = arr[:,2] return v print 'device %s' % autoinit.device.name() source_directory = os.path.split(os.path.realpath(__file__))[0] + '/src' source = open(source_directory + '/intersect.cu').read() module = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False) cuda_intersect = module.get_function('intersect_mesh') cuda_rotate = module.get_function('rotate') cuda_translate = module.get_function('translate') class GPU(object): def __init__(self): pass def load_geometry(self, geometry): self.mesh = geometry.mesh self.lower_bound = geometry.lower_bound self.upper_bound = geometry.upper_bound self.child_map = geometry.child_map.astype(np.uint32) self.child_len = geometry.child_len.astype(np.uint32) self.first_leaf = np.int32(geometry.first_leaf) self.mesh_vec = make_vector(self.mesh.reshape(self.mesh.shape[0]*3,3), dtype=gpuarray.vec.float4) self.lower_bound_vec = make_vector(geometry.lower_bound, dtype=gpuarray.vec.float4) self.upper_bound_vec = make_vector(geometry.upper_bound, dtype=gpuarray.vec.float4) self.mesh_gpu = cuda.to_device(self.mesh_vec) self.lower_bound_gpu = cuda.to_device(self.lower_bound_vec) self.upper_bound_gpu = cuda.to_device(self.upper_bound_vec) self.child_map_gpu = cuda.to_device(self.child_map) self.child_len_gpu = cuda.to_device(self.child_len) self.mesh_tex = module.get_texref('mesh') self.lower_bound_tex = module.get_texref('lower_bound_arr') self.upper_bound_tex = module.get_texref('upper_bound_arr') self.child_map_tex = module.get_texref('child_map_arr') self.child_len_tex = module.get_texref('child_len_arr') self.mesh_tex.set_address(self.mesh_gpu, self.mesh_vec.nbytes) self.lower_bound_tex.set_address(self.lower_bound_gpu, self.lower_bound_vec.nbytes) self.upper_bound_tex.set_address(self.upper_bound_gpu, self.upper_bound_vec.nbytes) self.child_map_tex.set_address(self.child_map_gpu, self.child_map.nbytes) self.child_len_tex.set_address(self.child_len_gpu, self.child_len.nbytes) self.mesh_tex.set_format(cuda.array_format.FLOAT, 4) self.lower_bound_tex.set_format(cuda.array_format.FLOAT, 4) self.upper_bound_tex.set_format(cuda.array_format.FLOAT, 4) self.child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) self.child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) self.geometry = geometry def call(self, *args, **kwargs): kwargs['texrefs'] = [self.mesh_tex, self.lower_bound_tex, self.upper_bound_tex, self.child_map_tex, self.child_len_tex] cuda_intersect(*args, **kwargs)