Put triangles and vertices back onto the GPU if there is sufficient room.

This is an optimization that is helpful for smaller detectors, even if giant water Cherenkov detectors cannot take advantage of it. Modified Mapped() helper function to pass through GPU arrays, which makes it much easier to flip between arrays on the CPU and the GPU.
author: Stan Seibert <stan@mtrr.org> 2012-05-24 11:33:19 -0400
committer: tlatorre <tlatorre@uchicago.edu> 2021-05-09 08:42:39 -0700
commit: 57424359aefd4221c64582573c8424c61fd7dc51 (patch)
tree: 448457af247bc24838fdb9ebc26c4f9ca9e0d809
parent: b62149c08f835068215b48110df876ae8d481969 (diff)
download: chroma-57424359aefd4221c64582573c8424c61fd7dc51.tar.gz
chroma-57424359aefd4221c64582573c8424c61fd7dc51.tar.bz2
chroma-57424359aefd4221c64582573c8424c61fd7dc51.zip
2 files changed, 21 insertions, 3 deletions
diff --git a/chroma/gpu/geometry.py b/chroma/gpu/geometry.py
index 2620ef1..60dc489 100644
--- a/chroma/gpu/geometry.py
+++ b/chroma/gpu/geometry.py
@@ -136,7 +136,7 @@ class GPUGeometry(object):
                                       write_combined=True)
         self.vertices[:] = to_float3(geometry.mesh.vertices)
         self.triangles[:] = to_uint3(geometry.mesh.triangles)
-
+        
         self.world_origin = ga.vec.make_float3(*geometry.bvh.world_coords.world_origin)
         self.world_scale = np.float32(geometry.bvh.world_coords.world_scale)
 
@@ -169,6 +169,17 @@ class GPUGeometry(object):
             logger.info('Splitting BVH between GPU and CPU memory at node %d' % split_index)
             self.extra_nodes[:] = geometry.bvh.nodes[split_index:]
 
+        # See if there is enough memory to put the and/ortriangles back on the GPU
+        gpu_free, gpu_total = cuda.mem_get_info()
+        if self.triangles.nbytes < (gpu_free - min_free_gpu_mem):
+            self.triangles = ga.to_gpu(self.triangles)
+            logger.info('Optimization: Sufficient memory to move triangles onto GPU')
+
+        gpu_free, gpu_total = cuda.mem_get_info()
+        if self.vertices.nbytes < (gpu_free - min_free_gpu_mem):
+            self.vertices = ga.to_gpu(self.vertices)
+            logger.info('Optimization: Sufficient memory to move vertices onto GPU')
+
         self.gpudata = make_gpu_struct(geometry_struct_size,
                                        [Mapped(self.vertices), 
                                         Mapped(self.triangles),
diff --git a/chroma/gpu/tools.py b/chroma/gpu/tools.py
index 34b4166..98c7e6a 100644
--- a/chroma/gpu/tools.py
+++ b/chroma/gpu/tools.py
@@ -185,8 +185,15 @@ def format_array(name, array):
 
 def Mapped(array):
     '''Analog to pycuda.driver.InOut(), but indicates this array
-    is memory mapped to the device space and should not be copied.'''
-    return np.intp(array.base.get_device_pointer())
+    is memory mapped to the device space and should not be copied.
+
+    To simplify coding, Mapped() will pass anything with a gpudata
+    member, like a gpuarray, through unchanged.
+    '''
+    if hasattr(array, 'gpudata'):
+        return array
+    else:
+        return np.intp(array.base.get_device_pointer())
 
 def mapped_alloc(pagelocked_alloc_func, shape, dtype, write_combined):
     '''Returns a pagelocked host array mapped into the CUDA device
author	Stan Seibert <stan@mtrr.org>	2012-05-24 11:33:19 -0400
committer	tlatorre <tlatorre@uchicago.edu>	2021-05-09 08:42:39 -0700
commit	57424359aefd4221c64582573c8424c61fd7dc51 (patch)
tree	448457af247bc24838fdb9ebc26c4f9ca9e0d809
parent	b62149c08f835068215b48110df876ae8d481969 (diff)
download	chroma-57424359aefd4221c64582573c8424c61fd7dc51.tar.gz chroma-57424359aefd4221c64582573c8424c61fd7dc51.tar.bz2 chroma-57424359aefd4221c64582573c8424c61fd7dc51.zip