From 57424359aefd4221c64582573c8424c61fd7dc51 Mon Sep 17 00:00:00 2001
From: Stan Seibert <stan@mtrr.org>
Date: Thu, 24 May 2012 11:33:19 -0400
Subject: Put triangles and vertices back onto the GPU if there is sufficient
 room.

This is an optimization that is helpful for smaller detectors, even if
giant water Cherenkov detectors cannot take advantage of it.

Modified Mapped() helper function to pass through GPU arrays, which
makes it much easier to flip between arrays on the CPU and the GPU.
---
 chroma/gpu/geometry.py | 13 ++++++++++++-
 chroma/gpu/tools.py    | 11 +++++++++--
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/chroma/gpu/geometry.py b/chroma/gpu/geometry.py
index 2620ef1..60dc489 100644
--- a/chroma/gpu/geometry.py
+++ b/chroma/gpu/geometry.py
@@ -136,7 +136,7 @@ class GPUGeometry(object):
                                       write_combined=True)
         self.vertices[:] = to_float3(geometry.mesh.vertices)
         self.triangles[:] = to_uint3(geometry.mesh.triangles)
-
+        
         self.world_origin = ga.vec.make_float3(*geometry.bvh.world_coords.world_origin)
         self.world_scale = np.float32(geometry.bvh.world_coords.world_scale)
 
@@ -169,6 +169,17 @@ class GPUGeometry(object):
             logger.info('Splitting BVH between GPU and CPU memory at node %d' % split_index)
             self.extra_nodes[:] = geometry.bvh.nodes[split_index:]
 
+        # See if there is enough memory to put the and/ortriangles back on the GPU
+        gpu_free, gpu_total = cuda.mem_get_info()
+        if self.triangles.nbytes < (gpu_free - min_free_gpu_mem):
+            self.triangles = ga.to_gpu(self.triangles)
+            logger.info('Optimization: Sufficient memory to move triangles onto GPU')
+
+        gpu_free, gpu_total = cuda.mem_get_info()
+        if self.vertices.nbytes < (gpu_free - min_free_gpu_mem):
+            self.vertices = ga.to_gpu(self.vertices)
+            logger.info('Optimization: Sufficient memory to move vertices onto GPU')
+
         self.gpudata = make_gpu_struct(geometry_struct_size,
                                        [Mapped(self.vertices), 
                                         Mapped(self.triangles),
diff --git a/chroma/gpu/tools.py b/chroma/gpu/tools.py
index 34b4166..98c7e6a 100644
--- a/chroma/gpu/tools.py
+++ b/chroma/gpu/tools.py
@@ -185,8 +185,15 @@ def format_array(name, array):
 
 def Mapped(array):
     '''Analog to pycuda.driver.InOut(), but indicates this array
-    is memory mapped to the device space and should not be copied.'''
-    return np.intp(array.base.get_device_pointer())
+    is memory mapped to the device space and should not be copied.
+
+    To simplify coding, Mapped() will pass anything with a gpudata
+    member, like a gpuarray, through unchanged.
+    '''
+    if hasattr(array, 'gpudata'):
+        return array
+    else:
+        return np.intp(array.base.get_device_pointer())
 
 def mapped_alloc(pagelocked_alloc_func, shape, dtype, write_combined):
     '''Returns a pagelocked host array mapped into the CUDA device
-- 
cgit