diff options
Diffstat (limited to 'chroma/gpu/bvh.py')
-rw-r--r-- | chroma/gpu/bvh.py | 139 |
1 files changed, 139 insertions, 0 deletions
diff --git a/chroma/gpu/bvh.py b/chroma/gpu/bvh.py new file mode 100644 index 0000000..b9c920d --- /dev/null +++ b/chroma/gpu/bvh.py @@ -0,0 +1,139 @@ +import numpy as np +import pycuda.driver as cuda +from pycuda import gpuarray as ga +from pycuda import characterize + +from chroma.gpu.tools import get_cu_module, cuda_options, \ + chunk_iterator, to_uint3, to_float3, GPUFuncs, mapped_empty, Mapped + +from chroma.bvh.bvh import WorldCoords + +def round_up_to_multiple(x, multiple): + remainder = x % multiple + if remainder == 0: + return x + else: + return x + multiple - remainder + +def create_leaf_nodes(mesh, morton_bits=16, round_to_multiple=1): + '''Compute the leaf nodes surrounding a triangle mesh. + + ``mesh``: chroma.geometry.Mesh + Triangles to box + ``morton_bits``: int + Number of bits to use per dimension when computing Morton code. + ``round_to_multiple``: int + Round the number of nodes created up to multiple of this number + Extra nodes will be all zero. + + Returns (world_coords, nodes, morton_codes), where + ``world_coords``: chroma.bvh.WorldCoords + Defines the fixed point coordinate system + ``nodes``: ndarray(shape=len(mesh.triangles), dtype=uint4) + List of leaf nodes. Child IDs will be set to triangle offsets. + ``morton_codes``: ndarray(shape=len(mesh.triangles), dtype=np.uint64) + Morton codes for each triangle, using ``morton_bits`` per axis. + Must be <= 16 bits. + ''' + # Load GPU functions + bvh_module = get_cu_module('bvh.cu', options=cuda_options, + include_source_directory=True) + bvh_funcs = GPUFuncs(bvh_module) + + # compute world coordinates + world_origin = mesh.vertices.min(axis=0) + world_scale = np.max((mesh.vertices.max(axis=0) - world_origin)) \ + / (2**16 - 2) + world_coords = WorldCoords(world_origin=world_origin, + world_scale=world_scale) + + # Put triangles and vertices in mapped host memory + triangles = mapped_empty(shape=len(mesh.triangles), dtype=ga.vec.uint3, + write_combined=True) + triangles[:] = to_uint3(mesh.triangles) + vertices = mapped_empty(shape=len(mesh.vertices), dtype=ga.vec.float3, + write_combined=True) + vertices[:] = to_float3(mesh.vertices) + + # Call GPU to compute nodes + nodes = ga.zeros(shape=round_up_to_multiple(len(triangles), + round_to_multiple), + dtype=ga.vec.uint4) + morton_codes = ga.empty(shape=len(triangles), dtype=np.uint64) + + # Convert world coords to GPU-friendly types + world_origin = ga.vec.make_float3(*world_origin) + world_scale = np.float32(world_scale) + + nthreads_per_block = 256 + for first_index, elements_this_iter, nblocks_this_iter in \ + chunk_iterator(len(triangles), nthreads_per_block, + max_blocks=30000): + bvh_funcs.make_leaves(np.uint32(first_index), + np.uint32(elements_this_iter), + Mapped(triangles), Mapped(vertices), + world_origin, world_scale, + nodes, morton_codes, + block=(nthreads_per_block,1,1), + grid=(nblocks_this_iter,1)) + + morton_codes_host = morton_codes.get() >> (16 - morton_bits) + return world_coords, nodes.get(), morton_codes_host + + +def merge_nodes(nodes, degree): + bvh_module = get_cu_module('bvh.cu', options=cuda_options, + include_source_directory=True) + bvh_funcs = GPUFuncs(bvh_module) + + nparent = len(nodes) / degree + if nparent == 1: + nparent_pad = nparent + else: + nparent_pad = round_up_to_multiple(nparent, degree) + parent_nodes = ga.zeros(shape=nparent_pad, dtype=ga.vec.uint4) + + nthreads_per_block = 256 + for first_index, elements_this_iter, nblocks_this_iter in \ + chunk_iterator(nparent, nthreads_per_block, max_blocks=10000): + bvh_funcs.make_parents(np.uint32(first_index), + np.uint32(elements_this_iter), + np.uint32(degree), + parent_nodes, + cuda.In(nodes), + np.uint32(0), + block=(nthreads_per_block,1,1), + grid=(nblocks_this_iter,1)) + + return parent_nodes.get() + +def concatenate_layers(layers): + bvh_module = get_cu_module('bvh.cu', options=cuda_options, + include_source_directory=True) + bvh_funcs = GPUFuncs(bvh_module) + # Put 0 at beginning of list + layer_bounds = np.insert(np.cumsum(map(len, layers)), 0, 0) + nodes = ga.empty(shape=int(layer_bounds[-1]), dtype=ga.vec.uint4) + nthreads_per_block = 256 + + for layer_start, layer_end, layer in zip(layer_bounds[:-1], + layer_bounds[1:], + layers): + if layer_end == layer_bounds[-1]: + # leaf nodes need no offset + child_offset = 0 + else: + child_offset = layer_end + + for first_index, elements_this_iter, nblocks_this_iter in \ + chunk_iterator(layer_end-layer_start, nthreads_per_block, + max_blocks=10000): + bvh_funcs.copy_and_offset(np.uint32(first_index), + np.uint32(elements_this_iter), + np.uint32(child_offset), + cuda.In(layer), + nodes[layer_start:], + block=(nthreads_per_block,1,1), + grid=(nblocks_this_iter,1)) + + return nodes.get(), layer_bounds |