summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--chroma/cuda/geometry.h7
-rw-r--r--chroma/cuda/geometry_types.h4
-rw-r--r--chroma/gpu/geometry.py28
3 files changed, 34 insertions, 5 deletions
diff --git a/chroma/cuda/geometry.h b/chroma/cuda/geometry.h
index b718655..4d21779 100644
--- a/chroma/cuda/geometry.h
+++ b/chroma/cuda/geometry.h
@@ -22,7 +22,12 @@ __device__ uint4 read_skip_l1(uint4 *ptr)
__device__ Node
get_node(Geometry *geometry, const unsigned int &i)
{
- uint4 node = geometry->nodes[i];
+ uint4 node;
+ if (i < geometry->nprimary_nodes)
+ node = geometry->primary_nodes[i];
+ else
+ node = geometry->extra_nodes[i - geometry->nprimary_nodes];
+
Node node_struct;
uint3 lower_int = make_uint3(node.x & 0xFFFF, node.y & 0xFFFF, node.z & 0xFFFF);
diff --git a/chroma/cuda/geometry_types.h b/chroma/cuda/geometry_types.h
index 6da1a47..7e8bd4b 100644
--- a/chroma/cuda/geometry_types.h
+++ b/chroma/cuda/geometry_types.h
@@ -57,11 +57,13 @@ struct Geometry
uint3 *triangles;
unsigned int *material_codes;
unsigned int *colors;
- uint4 *nodes;
+ uint4 *primary_nodes;
+ uint4 *extra_nodes;
Material **materials;
Surface **surfaces;
float3 world_origin;
float world_scale;
+ int nprimary_nodes;
};
#endif
diff --git a/chroma/gpu/geometry.py b/chroma/gpu/geometry.py
index 41d52a2..847030a 100644
--- a/chroma/gpu/geometry.py
+++ b/chroma/gpu/geometry.py
@@ -11,7 +11,7 @@ from chroma.gpu.tools import get_cu_module, get_cu_source, cuda_options, \
from chroma.log import logger
class GPUGeometry(object):
- def __init__(self, geometry, wavelengths=None, print_usage=False):
+ def __init__(self, geometry, wavelengths=None, print_usage=False, min_free_gpu_mem=300e6):
if wavelengths is None:
wavelengths = standard_wavelengths
@@ -137,7 +137,6 @@ class GPUGeometry(object):
self.vertices[:] = to_float3(geometry.mesh.vertices)
self.triangles[:] = to_uint3(geometry.mesh.triangles)
- self.nodes = ga.to_gpu(geometry.bvh.nodes)
self.world_origin = ga.vec.make_float3(*geometry.bvh.world_coords.world_origin)
self.world_scale = np.float32(geometry.bvh.world_coords.world_scale)
@@ -149,15 +148,38 @@ class GPUGeometry(object):
self.colors = ga.to_gpu(colors)
self.solid_id_map = ga.to_gpu(geometry.solid_id.astype(np.uint32))
+ # Limit memory usage by splitting BVH into on and off-GPU parts
+ gpu_free, gpu_total = cuda.mem_get_info()
+ node_array_usage = geometry.bvh.nodes.nbytes
+
+ # Figure out how many elements we can fit on the GPU,
+ # but no fewer than 100 elements, and no more than the number of actual nodes
+ n_nodes = len(geometry.bvh.nodes)
+ split_index = min(
+ max(int((gpu_free - min_free_gpu_mem) / geometry.bvh.nodes.itemsize),100),
+ n_nodes
+ )
+
+ self.nodes = ga.to_gpu(geometry.bvh.nodes[:split_index])
+ n_extra = max(1, (n_nodes - split_index)) # forbid zero size
+ self.extra_nodes = mapped_empty(shape=n_extra,
+ dtype=geometry.bvh.nodes.dtype,
+ write_combined=True)
+ if split_index < n_nodes:
+ logger.info('Splitting BVH between GPU and CPU memory at node %d' % split_index)
+ self.extra_nodes[:] = geometry.bvh.nodes[split_index:]
+
self.gpudata = make_gpu_struct(geometry_struct_size,
[Mapped(self.vertices),
Mapped(self.triangles),
self.material_codes,
self.colors, self.nodes,
+ Mapped(self.extra_nodes),
self.material_pointer_array,
self.surface_pointer_array,
self.world_origin,
- self.world_scale])
+ self.world_scale,
+ np.int32(len(self.nodes))])
self.geometry = geometry