summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStan Seibert <stan@mtrr.org>2012-05-14 11:37:18 -0400
committertlatorre <tlatorre@uchicago.edu>2021-05-09 08:42:39 -0700
commitc83e502705ae1312c6030a4e9de2b4327e2d1dd7 (patch)
tree93237026aa0c70bdf97759a2fff6b4beddc4f5d2
parent6d515c3c8035c0899f7d27344c6ee545f3ce6f8a (diff)
downloadchroma-c83e502705ae1312c6030a4e9de2b4327e2d1dd7.tar.gz
chroma-c83e502705ae1312c6030a4e9de2b4327e2d1dd7.tar.bz2
chroma-c83e502705ae1312c6030a4e9de2b4327e2d1dd7.zip
GPU geometry modification to permit the BVH node storage to be split
between GPU and CPU. This allows much more complex geometries to be run on CUDA devices with less memory. The GPUGeometry object now takes a min_free_gpu_mem parameter giving the minimum number of bytes that can be free on the GPU after the BVH is loaded. By default, this number is 300 MB. Cards with sufficient memory will have the entire BVH on card, but those without enough memory will have the BVH split such that the top of the hierarchy (the most frequently traversed) is on the GPU.
-rw-r--r--chroma/cuda/geometry.h7
-rw-r--r--chroma/cuda/geometry_types.h4
-rw-r--r--chroma/gpu/geometry.py28
3 files changed, 34 insertions, 5 deletions
diff --git a/chroma/cuda/geometry.h b/chroma/cuda/geometry.h
index b718655..4d21779 100644
--- a/chroma/cuda/geometry.h
+++ b/chroma/cuda/geometry.h
@@ -22,7 +22,12 @@ __device__ uint4 read_skip_l1(uint4 *ptr)
__device__ Node
get_node(Geometry *geometry, const unsigned int &i)
{
- uint4 node = geometry->nodes[i];
+ uint4 node;
+ if (i < geometry->nprimary_nodes)
+ node = geometry->primary_nodes[i];
+ else
+ node = geometry->extra_nodes[i - geometry->nprimary_nodes];
+
Node node_struct;
uint3 lower_int = make_uint3(node.x & 0xFFFF, node.y & 0xFFFF, node.z & 0xFFFF);
diff --git a/chroma/cuda/geometry_types.h b/chroma/cuda/geometry_types.h
index 6da1a47..7e8bd4b 100644
--- a/chroma/cuda/geometry_types.h
+++ b/chroma/cuda/geometry_types.h
@@ -57,11 +57,13 @@ struct Geometry
uint3 *triangles;
unsigned int *material_codes;
unsigned int *colors;
- uint4 *nodes;
+ uint4 *primary_nodes;
+ uint4 *extra_nodes;
Material **materials;
Surface **surfaces;
float3 world_origin;
float world_scale;
+ int nprimary_nodes;
};
#endif
diff --git a/chroma/gpu/geometry.py b/chroma/gpu/geometry.py
index 41d52a2..847030a 100644
--- a/chroma/gpu/geometry.py
+++ b/chroma/gpu/geometry.py
@@ -11,7 +11,7 @@ from chroma.gpu.tools import get_cu_module, get_cu_source, cuda_options, \
from chroma.log import logger
class GPUGeometry(object):
- def __init__(self, geometry, wavelengths=None, print_usage=False):
+ def __init__(self, geometry, wavelengths=None, print_usage=False, min_free_gpu_mem=300e6):
if wavelengths is None:
wavelengths = standard_wavelengths
@@ -137,7 +137,6 @@ class GPUGeometry(object):
self.vertices[:] = to_float3(geometry.mesh.vertices)
self.triangles[:] = to_uint3(geometry.mesh.triangles)
- self.nodes = ga.to_gpu(geometry.bvh.nodes)
self.world_origin = ga.vec.make_float3(*geometry.bvh.world_coords.world_origin)
self.world_scale = np.float32(geometry.bvh.world_coords.world_scale)
@@ -149,15 +148,38 @@ class GPUGeometry(object):
self.colors = ga.to_gpu(colors)
self.solid_id_map = ga.to_gpu(geometry.solid_id.astype(np.uint32))
+ # Limit memory usage by splitting BVH into on and off-GPU parts
+ gpu_free, gpu_total = cuda.mem_get_info()
+ node_array_usage = geometry.bvh.nodes.nbytes
+
+ # Figure out how many elements we can fit on the GPU,
+ # but no fewer than 100 elements, and no more than the number of actual nodes
+ n_nodes = len(geometry.bvh.nodes)
+ split_index = min(
+ max(int((gpu_free - min_free_gpu_mem) / geometry.bvh.nodes.itemsize),100),
+ n_nodes
+ )
+
+ self.nodes = ga.to_gpu(geometry.bvh.nodes[:split_index])
+ n_extra = max(1, (n_nodes - split_index)) # forbid zero size
+ self.extra_nodes = mapped_empty(shape=n_extra,
+ dtype=geometry.bvh.nodes.dtype,
+ write_combined=True)
+ if split_index < n_nodes:
+ logger.info('Splitting BVH between GPU and CPU memory at node %d' % split_index)
+ self.extra_nodes[:] = geometry.bvh.nodes[split_index:]
+
self.gpudata = make_gpu_struct(geometry_struct_size,
[Mapped(self.vertices),
Mapped(self.triangles),
self.material_codes,
self.colors, self.nodes,
+ Mapped(self.extra_nodes),
self.material_pointer_array,
self.surface_pointer_array,
self.world_origin,
- self.world_scale])
+ self.world_scale,
+ np.int32(len(self.nodes))])
self.geometry = geometry