GPU geometry modification to permit the BVH node storage to be split

between GPU and CPU. This allows much more complex geometries to be run on CUDA devices with less memory. The GPUGeometry object now takes a min_free_gpu_mem parameter giving the minimum number of bytes that can be free on the GPU after the BVH is loaded. By default, this number is 300 MB. Cards with sufficient memory will have the entire BVH on card, but those without enough memory will have the BVH split such that the top of the hierarchy (the most frequently traversed) is on the GPU.
author: Stan Seibert <stan@mtrr.org> 2012-05-14 11:37:18 -0400
committer: tlatorre <tlatorre@uchicago.edu> 2021-05-09 08:42:39 -0700
commit: c83e502705ae1312c6030a4e9de2b4327e2d1dd7 (patch)
tree: 93237026aa0c70bdf97759a2fff6b4beddc4f5d2
parent: 6d515c3c8035c0899f7d27344c6ee545f3ce6f8a (diff)
download: chroma-c83e502705ae1312c6030a4e9de2b4327e2d1dd7.tar.gz
chroma-c83e502705ae1312c6030a4e9de2b4327e2d1dd7.tar.bz2
chroma-c83e502705ae1312c6030a4e9de2b4327e2d1dd7.zip
3 files changed, 34 insertions, 5 deletions
diff --git a/chroma/cuda/geometry.h b/chroma/cuda/geometry.h
index b718655..4d21779 100644
--- a/chroma/cuda/geometry.h
+++ b/chroma/cuda/geometry.h
@@ -22,7 +22,12 @@ __device__ uint4 read_skip_l1(uint4 *ptr)
 __device__ Node
 get_node(Geometry *geometry, const unsigned int &i)
 {
-    uint4 node = geometry->nodes[i];
+    uint4 node; 
+    if (i < geometry->nprimary_nodes)
+	node = geometry->primary_nodes[i];
+    else
+	node = geometry->extra_nodes[i - geometry->nprimary_nodes];
+	
     Node node_struct;
 
     uint3 lower_int = make_uint3(node.x & 0xFFFF, node.y & 0xFFFF, node.z & 0xFFFF);
diff --git a/chroma/cuda/geometry_types.h b/chroma/cuda/geometry_types.h
index 6da1a47..7e8bd4b 100644
--- a/chroma/cuda/geometry_types.h
+++ b/chroma/cuda/geometry_types.h
@@ -57,11 +57,13 @@ struct Geometry
     uint3 *triangles;
     unsigned int *material_codes;
     unsigned int *colors;
-    uint4 *nodes;
+    uint4 *primary_nodes;
+    uint4 *extra_nodes;
     Material **materials;
     Surface **surfaces;
     float3 world_origin;
     float world_scale;
+    int nprimary_nodes;
 };
 
 #endif
diff --git a/chroma/gpu/geometry.py b/chroma/gpu/geometry.py
index 41d52a2..847030a 100644
--- a/chroma/gpu/geometry.py
+++ b/chroma/gpu/geometry.py
@@ -11,7 +11,7 @@ from chroma.gpu.tools import get_cu_module, get_cu_source, cuda_options, \
 from chroma.log import logger
 
 class GPUGeometry(object):
-    def __init__(self, geometry, wavelengths=None, print_usage=False):
+    def __init__(self, geometry, wavelengths=None, print_usage=False, min_free_gpu_mem=300e6):
         if wavelengths is None:
             wavelengths = standard_wavelengths
 
@@ -137,7 +137,6 @@ class GPUGeometry(object):
         self.vertices[:] = to_float3(geometry.mesh.vertices)
         self.triangles[:] = to_uint3(geometry.mesh.triangles)
 
-        self.nodes = ga.to_gpu(geometry.bvh.nodes)
         self.world_origin = ga.vec.make_float3(*geometry.bvh.world_coords.world_origin)
         self.world_scale = np.float32(geometry.bvh.world_coords.world_scale)
 
@@ -149,15 +148,38 @@ class GPUGeometry(object):
         self.colors = ga.to_gpu(colors)
         self.solid_id_map = ga.to_gpu(geometry.solid_id.astype(np.uint32))
 
+        # Limit memory usage by splitting BVH into on and off-GPU parts
+        gpu_free, gpu_total = cuda.mem_get_info()
+        node_array_usage = geometry.bvh.nodes.nbytes
+
+        # Figure out how many elements we can fit on the GPU,
+        # but no fewer than 100 elements, and no more than the number of actual nodes
+        n_nodes = len(geometry.bvh.nodes)
+        split_index = min(
+            max(int((gpu_free - min_free_gpu_mem) / geometry.bvh.nodes.itemsize),100),
+            n_nodes
+            )
+        
+        self.nodes = ga.to_gpu(geometry.bvh.nodes[:split_index])
+        n_extra = max(1, (n_nodes - split_index)) # forbid zero size
+        self.extra_nodes = mapped_empty(shape=n_extra,
+                                        dtype=geometry.bvh.nodes.dtype,
+                                        write_combined=True)
+        if split_index < n_nodes:
+            logger.info('Splitting BVH between GPU and CPU memory at node %d' % split_index)
+            self.extra_nodes[:] = geometry.bvh.nodes[split_index:]
+
         self.gpudata = make_gpu_struct(geometry_struct_size,
                                        [Mapped(self.vertices), 
                                         Mapped(self.triangles),
                                         self.material_codes,
                                         self.colors, self.nodes,
+                                        Mapped(self.extra_nodes),
                                         self.material_pointer_array,
                                         self.surface_pointer_array,
                                         self.world_origin,
-                                        self.world_scale])
+                                        self.world_scale,
+                                        np.int32(len(self.nodes))])
 
         self.geometry = geometry
author	Stan Seibert <stan@mtrr.org>	2012-05-14 11:37:18 -0400
committer	tlatorre <tlatorre@uchicago.edu>	2021-05-09 08:42:39 -0700
commit	c83e502705ae1312c6030a4e9de2b4327e2d1dd7 (patch)
tree	93237026aa0c70bdf97759a2fff6b4beddc4f5d2
parent	6d515c3c8035c0899f7d27344c6ee545f3ce6f8a (diff)
download	chroma-c83e502705ae1312c6030a4e9de2b4327e2d1dd7.tar.gz chroma-c83e502705ae1312c6030a4e9de2b4327e2d1dd7.tar.bz2 chroma-c83e502705ae1312c6030a4e9de2b4327e2d1dd7.zip