diff options
-rw-r--r-- | chroma/cuda/geometry.h | 7 | ||||
-rw-r--r-- | chroma/cuda/geometry_types.h | 4 | ||||
-rw-r--r-- | chroma/gpu/geometry.py | 28 |
3 files changed, 34 insertions, 5 deletions
diff --git a/chroma/cuda/geometry.h b/chroma/cuda/geometry.h index b718655..4d21779 100644 --- a/chroma/cuda/geometry.h +++ b/chroma/cuda/geometry.h @@ -22,7 +22,12 @@ __device__ uint4 read_skip_l1(uint4 *ptr) __device__ Node get_node(Geometry *geometry, const unsigned int &i) { - uint4 node = geometry->nodes[i]; + uint4 node; + if (i < geometry->nprimary_nodes) + node = geometry->primary_nodes[i]; + else + node = geometry->extra_nodes[i - geometry->nprimary_nodes]; + Node node_struct; uint3 lower_int = make_uint3(node.x & 0xFFFF, node.y & 0xFFFF, node.z & 0xFFFF); diff --git a/chroma/cuda/geometry_types.h b/chroma/cuda/geometry_types.h index 6da1a47..7e8bd4b 100644 --- a/chroma/cuda/geometry_types.h +++ b/chroma/cuda/geometry_types.h @@ -57,11 +57,13 @@ struct Geometry uint3 *triangles; unsigned int *material_codes; unsigned int *colors; - uint4 *nodes; + uint4 *primary_nodes; + uint4 *extra_nodes; Material **materials; Surface **surfaces; float3 world_origin; float world_scale; + int nprimary_nodes; }; #endif diff --git a/chroma/gpu/geometry.py b/chroma/gpu/geometry.py index 41d52a2..847030a 100644 --- a/chroma/gpu/geometry.py +++ b/chroma/gpu/geometry.py @@ -11,7 +11,7 @@ from chroma.gpu.tools import get_cu_module, get_cu_source, cuda_options, \ from chroma.log import logger class GPUGeometry(object): - def __init__(self, geometry, wavelengths=None, print_usage=False): + def __init__(self, geometry, wavelengths=None, print_usage=False, min_free_gpu_mem=300e6): if wavelengths is None: wavelengths = standard_wavelengths @@ -137,7 +137,6 @@ class GPUGeometry(object): self.vertices[:] = to_float3(geometry.mesh.vertices) self.triangles[:] = to_uint3(geometry.mesh.triangles) - self.nodes = ga.to_gpu(geometry.bvh.nodes) self.world_origin = ga.vec.make_float3(*geometry.bvh.world_coords.world_origin) self.world_scale = np.float32(geometry.bvh.world_coords.world_scale) @@ -149,15 +148,38 @@ class GPUGeometry(object): self.colors = ga.to_gpu(colors) self.solid_id_map = ga.to_gpu(geometry.solid_id.astype(np.uint32)) + # Limit memory usage by splitting BVH into on and off-GPU parts + gpu_free, gpu_total = cuda.mem_get_info() + node_array_usage = geometry.bvh.nodes.nbytes + + # Figure out how many elements we can fit on the GPU, + # but no fewer than 100 elements, and no more than the number of actual nodes + n_nodes = len(geometry.bvh.nodes) + split_index = min( + max(int((gpu_free - min_free_gpu_mem) / geometry.bvh.nodes.itemsize),100), + n_nodes + ) + + self.nodes = ga.to_gpu(geometry.bvh.nodes[:split_index]) + n_extra = max(1, (n_nodes - split_index)) # forbid zero size + self.extra_nodes = mapped_empty(shape=n_extra, + dtype=geometry.bvh.nodes.dtype, + write_combined=True) + if split_index < n_nodes: + logger.info('Splitting BVH between GPU and CPU memory at node %d' % split_index) + self.extra_nodes[:] = geometry.bvh.nodes[split_index:] + self.gpudata = make_gpu_struct(geometry_struct_size, [Mapped(self.vertices), Mapped(self.triangles), self.material_codes, self.colors, self.nodes, + Mapped(self.extra_nodes), self.material_pointer_array, self.surface_pointer_array, self.world_origin, - self.world_scale]) + self.world_scale, + np.int32(len(self.nodes))]) self.geometry = geometry |