summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--chroma/gpu/geometry.py13
-rw-r--r--chroma/gpu/tools.py11
2 files changed, 21 insertions, 3 deletions
diff --git a/chroma/gpu/geometry.py b/chroma/gpu/geometry.py
index 2620ef1..60dc489 100644
--- a/chroma/gpu/geometry.py
+++ b/chroma/gpu/geometry.py
@@ -136,7 +136,7 @@ class GPUGeometry(object):
write_combined=True)
self.vertices[:] = to_float3(geometry.mesh.vertices)
self.triangles[:] = to_uint3(geometry.mesh.triangles)
-
+
self.world_origin = ga.vec.make_float3(*geometry.bvh.world_coords.world_origin)
self.world_scale = np.float32(geometry.bvh.world_coords.world_scale)
@@ -169,6 +169,17 @@ class GPUGeometry(object):
logger.info('Splitting BVH between GPU and CPU memory at node %d' % split_index)
self.extra_nodes[:] = geometry.bvh.nodes[split_index:]
+ # See if there is enough memory to put the and/ortriangles back on the GPU
+ gpu_free, gpu_total = cuda.mem_get_info()
+ if self.triangles.nbytes < (gpu_free - min_free_gpu_mem):
+ self.triangles = ga.to_gpu(self.triangles)
+ logger.info('Optimization: Sufficient memory to move triangles onto GPU')
+
+ gpu_free, gpu_total = cuda.mem_get_info()
+ if self.vertices.nbytes < (gpu_free - min_free_gpu_mem):
+ self.vertices = ga.to_gpu(self.vertices)
+ logger.info('Optimization: Sufficient memory to move vertices onto GPU')
+
self.gpudata = make_gpu_struct(geometry_struct_size,
[Mapped(self.vertices),
Mapped(self.triangles),
diff --git a/chroma/gpu/tools.py b/chroma/gpu/tools.py
index 34b4166..98c7e6a 100644
--- a/chroma/gpu/tools.py
+++ b/chroma/gpu/tools.py
@@ -185,8 +185,15 @@ def format_array(name, array):
def Mapped(array):
'''Analog to pycuda.driver.InOut(), but indicates this array
- is memory mapped to the device space and should not be copied.'''
- return np.intp(array.base.get_device_pointer())
+ is memory mapped to the device space and should not be copied.
+
+ To simplify coding, Mapped() will pass anything with a gpudata
+ member, like a gpuarray, through unchanged.
+ '''
+ if hasattr(array, 'gpudata'):
+ return array
+ else:
+ return np.intp(array.base.get_device_pointer())
def mapped_alloc(pagelocked_alloc_func, shape, dtype, write_combined):
'''Returns a pagelocked host array mapped into the CUDA device