diff options
-rw-r--r-- | chroma/gpu/geometry.py | 13 | ||||
-rw-r--r-- | chroma/gpu/tools.py | 11 |
2 files changed, 21 insertions, 3 deletions
diff --git a/chroma/gpu/geometry.py b/chroma/gpu/geometry.py index 2620ef1..60dc489 100644 --- a/chroma/gpu/geometry.py +++ b/chroma/gpu/geometry.py @@ -136,7 +136,7 @@ class GPUGeometry(object): write_combined=True) self.vertices[:] = to_float3(geometry.mesh.vertices) self.triangles[:] = to_uint3(geometry.mesh.triangles) - + self.world_origin = ga.vec.make_float3(*geometry.bvh.world_coords.world_origin) self.world_scale = np.float32(geometry.bvh.world_coords.world_scale) @@ -169,6 +169,17 @@ class GPUGeometry(object): logger.info('Splitting BVH between GPU and CPU memory at node %d' % split_index) self.extra_nodes[:] = geometry.bvh.nodes[split_index:] + # See if there is enough memory to put the and/ortriangles back on the GPU + gpu_free, gpu_total = cuda.mem_get_info() + if self.triangles.nbytes < (gpu_free - min_free_gpu_mem): + self.triangles = ga.to_gpu(self.triangles) + logger.info('Optimization: Sufficient memory to move triangles onto GPU') + + gpu_free, gpu_total = cuda.mem_get_info() + if self.vertices.nbytes < (gpu_free - min_free_gpu_mem): + self.vertices = ga.to_gpu(self.vertices) + logger.info('Optimization: Sufficient memory to move vertices onto GPU') + self.gpudata = make_gpu_struct(geometry_struct_size, [Mapped(self.vertices), Mapped(self.triangles), diff --git a/chroma/gpu/tools.py b/chroma/gpu/tools.py index 34b4166..98c7e6a 100644 --- a/chroma/gpu/tools.py +++ b/chroma/gpu/tools.py @@ -185,8 +185,15 @@ def format_array(name, array): def Mapped(array): '''Analog to pycuda.driver.InOut(), but indicates this array - is memory mapped to the device space and should not be copied.''' - return np.intp(array.base.get_device_pointer()) + is memory mapped to the device space and should not be copied. + + To simplify coding, Mapped() will pass anything with a gpudata + member, like a gpuarray, through unchanged. + ''' + if hasattr(array, 'gpudata'): + return array + else: + return np.intp(array.base.get_device_pointer()) def mapped_alloc(pagelocked_alloc_func, shape, dtype, write_combined): '''Returns a pagelocked host array mapped into the CUDA device |