diff options
author | Stan Seibert <stan@mtrr.org> | 2012-05-14 11:37:18 -0400 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2021-05-09 08:42:39 -0700 |
commit | c83e502705ae1312c6030a4e9de2b4327e2d1dd7 (patch) | |
tree | 93237026aa0c70bdf97759a2fff6b4beddc4f5d2 | |
parent | 6d515c3c8035c0899f7d27344c6ee545f3ce6f8a (diff) | |
download | chroma-c83e502705ae1312c6030a4e9de2b4327e2d1dd7.tar.gz chroma-c83e502705ae1312c6030a4e9de2b4327e2d1dd7.tar.bz2 chroma-c83e502705ae1312c6030a4e9de2b4327e2d1dd7.zip |
GPU geometry modification to permit the BVH node storage to be split
between GPU and CPU. This allows much more complex geometries to
be run on CUDA devices with less memory.
The GPUGeometry object now takes a min_free_gpu_mem parameter giving
the minimum number of bytes that can be free on the GPU after the BVH
is loaded. By default, this number is 300 MB. Cards with sufficient
memory will have the entire BVH on card, but those without enough
memory will have the BVH split such that the top of the hierarchy (the
most frequently traversed) is on the GPU.
-rw-r--r-- | chroma/cuda/geometry.h | 7 | ||||
-rw-r--r-- | chroma/cuda/geometry_types.h | 4 | ||||
-rw-r--r-- | chroma/gpu/geometry.py | 28 |
3 files changed, 34 insertions, 5 deletions
diff --git a/chroma/cuda/geometry.h b/chroma/cuda/geometry.h index b718655..4d21779 100644 --- a/chroma/cuda/geometry.h +++ b/chroma/cuda/geometry.h @@ -22,7 +22,12 @@ __device__ uint4 read_skip_l1(uint4 *ptr) __device__ Node get_node(Geometry *geometry, const unsigned int &i) { - uint4 node = geometry->nodes[i]; + uint4 node; + if (i < geometry->nprimary_nodes) + node = geometry->primary_nodes[i]; + else + node = geometry->extra_nodes[i - geometry->nprimary_nodes]; + Node node_struct; uint3 lower_int = make_uint3(node.x & 0xFFFF, node.y & 0xFFFF, node.z & 0xFFFF); diff --git a/chroma/cuda/geometry_types.h b/chroma/cuda/geometry_types.h index 6da1a47..7e8bd4b 100644 --- a/chroma/cuda/geometry_types.h +++ b/chroma/cuda/geometry_types.h @@ -57,11 +57,13 @@ struct Geometry uint3 *triangles; unsigned int *material_codes; unsigned int *colors; - uint4 *nodes; + uint4 *primary_nodes; + uint4 *extra_nodes; Material **materials; Surface **surfaces; float3 world_origin; float world_scale; + int nprimary_nodes; }; #endif diff --git a/chroma/gpu/geometry.py b/chroma/gpu/geometry.py index 41d52a2..847030a 100644 --- a/chroma/gpu/geometry.py +++ b/chroma/gpu/geometry.py @@ -11,7 +11,7 @@ from chroma.gpu.tools import get_cu_module, get_cu_source, cuda_options, \ from chroma.log import logger class GPUGeometry(object): - def __init__(self, geometry, wavelengths=None, print_usage=False): + def __init__(self, geometry, wavelengths=None, print_usage=False, min_free_gpu_mem=300e6): if wavelengths is None: wavelengths = standard_wavelengths @@ -137,7 +137,6 @@ class GPUGeometry(object): self.vertices[:] = to_float3(geometry.mesh.vertices) self.triangles[:] = to_uint3(geometry.mesh.triangles) - self.nodes = ga.to_gpu(geometry.bvh.nodes) self.world_origin = ga.vec.make_float3(*geometry.bvh.world_coords.world_origin) self.world_scale = np.float32(geometry.bvh.world_coords.world_scale) @@ -149,15 +148,38 @@ class GPUGeometry(object): self.colors = ga.to_gpu(colors) self.solid_id_map = ga.to_gpu(geometry.solid_id.astype(np.uint32)) + # Limit memory usage by splitting BVH into on and off-GPU parts + gpu_free, gpu_total = cuda.mem_get_info() + node_array_usage = geometry.bvh.nodes.nbytes + + # Figure out how many elements we can fit on the GPU, + # but no fewer than 100 elements, and no more than the number of actual nodes + n_nodes = len(geometry.bvh.nodes) + split_index = min( + max(int((gpu_free - min_free_gpu_mem) / geometry.bvh.nodes.itemsize),100), + n_nodes + ) + + self.nodes = ga.to_gpu(geometry.bvh.nodes[:split_index]) + n_extra = max(1, (n_nodes - split_index)) # forbid zero size + self.extra_nodes = mapped_empty(shape=n_extra, + dtype=geometry.bvh.nodes.dtype, + write_combined=True) + if split_index < n_nodes: + logger.info('Splitting BVH between GPU and CPU memory at node %d' % split_index) + self.extra_nodes[:] = geometry.bvh.nodes[split_index:] + self.gpudata = make_gpu_struct(geometry_struct_size, [Mapped(self.vertices), Mapped(self.triangles), self.material_codes, self.colors, self.nodes, + Mapped(self.extra_nodes), self.material_pointer_array, self.surface_pointer_array, self.world_origin, - self.world_scale]) + self.world_scale, + np.int32(len(self.nodes))]) self.geometry = geometry |