summaryrefslogtreecommitdiff
path: root/gpu.py
diff options
context:
space:
mode:
Diffstat (limited to 'gpu.py')
-rw-r--r--gpu.py91
1 files changed, 44 insertions, 47 deletions
diff --git a/gpu.py b/gpu.py
index 3d30387..6ab525d 100644
--- a/gpu.py
+++ b/gpu.py
@@ -1,13 +1,17 @@
import os
-import time
import numpy as np
-
from pycuda import autoinit
from pycuda.compiler import SourceModule
import pycuda.driver as cuda
from pycuda import gpuarray
+import layout
+
+float3 = gpuarray.vec.float3
+float4 = gpuarray.vec.float4
-def make_vector(arr, dtype=gpuarray.vec.float3):
+source = open(layout.source + '/kernel.cu').read()
+
+def make_vector(arr, dtype=float3):
if len(arr.shape) != 2 or arr.shape[-1] != 3:
raise Exception('shape mismatch')
@@ -18,59 +22,52 @@ def make_vector(arr, dtype=gpuarray.vec.float3):
return v
-print 'device %s' % autoinit.device.name()
-
-source_directory = os.path.split(os.path.realpath(__file__))[0] + '/src'
-
-source = open(source_directory + '/intersect.cu').read()
-module = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False)
-
-cuda_intersect = module.get_function('intersect_mesh')
-cuda_rotate = module.get_function('rotate')
-cuda_translate = module.get_function('translate')
-
class GPU(object):
+ """
+ Object to handle all of the texture allocation/referencing when loading
+ a geometry onto the GPU.
+ """
def __init__(self):
- pass
+ print 'device %s' % autoinit.device.name()
- def load_geometry(self, geometry):
- self.mesh = geometry.mesh
- self.lower_bound = geometry.lower_bound
- self.upper_bound = geometry.upper_bound
- self.child_map = geometry.child_map.astype(np.uint32)
- self.child_len = geometry.child_len.astype(np.uint32)
- self.first_leaf = np.int32(geometry.first_leaf)
-
- self.mesh_vec = make_vector(self.mesh.reshape(self.mesh.shape[0]*3,3), dtype=gpuarray.vec.float4)
- self.lower_bound_vec = make_vector(geometry.lower_bound, dtype=gpuarray.vec.float4)
- self.upper_bound_vec = make_vector(geometry.upper_bound, dtype=gpuarray.vec.float4)
+ self.module = SourceModule(source, options=['-I' + layout.source],
+ no_extern_c=True, cache_dir=False)
- self.mesh_gpu = cuda.to_device(self.mesh_vec)
- self.lower_bound_gpu = cuda.to_device(self.lower_bound_vec)
- self.upper_bound_gpu = cuda.to_device(self.upper_bound_vec)
- self.child_map_gpu = cuda.to_device(self.child_map)
- self.child_len_gpu = cuda.to_device(self.child_len)
+ self.get_function = self.module.get_function
- self.mesh_tex = module.get_texref('mesh')
- self.lower_bound_tex = module.get_texref('lower_bound_arr')
- self.upper_bound_tex = module.get_texref('upper_bound_arr')
- self.child_map_tex = module.get_texref('child_map_arr')
- self.child_len_tex = module.get_texref('child_len_arr')
+ def load_geometry(self, geometry):
+ """
+ Load all the textures from `geometry` onto the GPU and return a list
+ of texture references.
+ """
+ self.mesh_vec = make_vector(geometry.mesh.reshape(geometry.mesh.shape[0]*3,3), float4)
+ self.lower_bounds_vec = make_vector(geometry.lower_bounds, float4)
+ self.upper_bounds_vec = make_vector(geometry.upper_bounds, float4)
+ self.uchild_map = geometry.child_map.astype(np.uint32)
+ self.uchild_len = geometry.child_len.astype(np.uint32)
+ self.mesh_gpu = cuda.to_device(self.mesh_vec)
+ self.lower_bounds_gpu = cuda.to_device(self.lower_bounds_vec)
+ self.upper_bounds_gpu = cuda.to_device(self.upper_bounds_vec)
+ self.child_map_gpu = cuda.to_device(self.uchild_map)
+ self.child_len_gpu = cuda.to_device(self.uchild_len)
+
+ self.mesh_tex = self.module.get_texref('mesh')
+ self.lower_bounds_tex = self.module.get_texref('lower_bounds')
+ self.upper_bounds_tex = self.module.get_texref('upper_bounds')
+ self.child_map_tex = self.module.get_texref('child_map_arr')
+ self.child_len_tex = self.module.get_texref('child_len_arr')
+
self.mesh_tex.set_address(self.mesh_gpu, self.mesh_vec.nbytes)
- self.lower_bound_tex.set_address(self.lower_bound_gpu, self.lower_bound_vec.nbytes)
- self.upper_bound_tex.set_address(self.upper_bound_gpu, self.upper_bound_vec.nbytes)
- self.child_map_tex.set_address(self.child_map_gpu, self.child_map.nbytes)
- self.child_len_tex.set_address(self.child_len_gpu, self.child_len.nbytes)
+ self.lower_bounds_tex.set_address(self.lower_bounds_gpu, self.lower_bounds_vec.nbytes)
+ self.upper_bounds_tex.set_address(self.upper_bounds_gpu, self.upper_bounds_vec.nbytes)
+ self.child_map_tex.set_address(self.child_map_gpu, self.uchild_map.nbytes)
+ self.child_len_tex.set_address(self.child_len_gpu, self.uchild_len.nbytes)
self.mesh_tex.set_format(cuda.array_format.FLOAT, 4)
- self.lower_bound_tex.set_format(cuda.array_format.FLOAT, 4)
- self.upper_bound_tex.set_format(cuda.array_format.FLOAT, 4)
+ self.lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4)
+ self.upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4)
self.child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
self.child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
- self.geometry = geometry
-
- def call(self, *args, **kwargs):
- kwargs['texrefs'] = [self.mesh_tex, self.lower_bound_tex, self.upper_bound_tex, self.child_map_tex, self.child_len_tex]
- cuda_intersect(*args, **kwargs)
+ return [self.mesh_tex, self.lower_bounds_tex, self.upper_bounds_tex, self.child_map_tex, self.child_len_tex]