diff options
author | Stan Seibert <stan@mtrr.org> | 2012-02-15 15:32:23 -0500 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2021-05-09 08:42:38 -0700 |
commit | 65bf1df06c3a97f8346fb27b9c7dcc5f4d52e5f7 (patch) | |
tree | 4f19e62dc9794a9504641ef44eb6d985ae00fd84 | |
parent | bd4a0d0cc59b80dca84471174b2928bbd4c1586e (diff) | |
download | chroma-65bf1df06c3a97f8346fb27b9c7dcc5f4d52e5f7.tar.gz chroma-65bf1df06c3a97f8346fb27b9c7dcc5f4d52e5f7.tar.bz2 chroma-65bf1df06c3a97f8346fb27b9c7dcc5f4d52e5f7.zip |
New BVH algorithm: Recursive Grid
This is an adaptation of the original Chroma BVH construction algorithm.
The generation stage is very slow, but can be fixed.
-rw-r--r-- | bin/chroma-bvh | 4 | ||||
-rw-r--r-- | chroma/bvh/bvh.py | 4 | ||||
-rw-r--r-- | chroma/bvh/grid.py | 70 | ||||
-rw-r--r-- | chroma/cuda/bvh.cu | 42 | ||||
-rw-r--r-- | chroma/cuda/geometry_types.h | 2 | ||||
-rw-r--r-- | chroma/gpu/bvh.py | 30 | ||||
-rw-r--r-- | chroma/loader.py | 4 |
7 files changed, 138 insertions, 18 deletions
diff --git a/bin/chroma-bvh b/bin/chroma-bvh index 1bc256e..e3e6a8c 100644 --- a/bin/chroma-bvh +++ b/bin/chroma-bvh @@ -9,7 +9,7 @@ import numpy as np from chroma.cache import Cache from chroma.loader import load_geometry_from_string from chroma.log import logger, logging -from chroma.bvh import make_simple_bvh +from chroma.bvh import make_recursive_grid_bvh from chroma.gpu import create_cuda_context from chroma.gpu.bvh import optimize_layer @@ -85,7 +85,7 @@ def create(cache, args): start = time.time() context = create_cuda_context() - bvh = make_simple_bvh(geometry.mesh, degree=degree) + bvh = make_recursive_grid_bvh(geometry.mesh, target_degree=degree) context.pop() logger.info('BVH generated in %1.1f seconds.' % (time.time() - start)) diff --git a/chroma/bvh/bvh.py b/chroma/bvh/bvh.py index 5579670..6575fd6 100644 --- a/chroma/bvh/bvh.py +++ b/chroma/bvh/bvh.py @@ -5,8 +5,8 @@ from pycuda.gpuarray import vec uint4 = vec.uint4 # pylint: disable-msg=C0103,E1101 -CHILD_BITS = 26 -NCHILD_MASK = np.uint32(0xFFFF << 26) +CHILD_BITS = 28 +NCHILD_MASK = np.uint32(0xFFFF << CHILD_BITS) def unpack_nodes(nodes): '''Creates a numpy record array with the contents of nodes diff --git a/chroma/bvh/grid.py b/chroma/bvh/grid.py index 5392976..9ac0306 100644 --- a/chroma/bvh/grid.py +++ b/chroma/bvh/grid.py @@ -1,13 +1,16 @@ -from chroma.bvh.bvh import BVH, node_areas -from chroma.gpu.bvh import create_leaf_nodes +import numpy as np -def make_recursive_grid_bvh(mesh, bits=11): - '''Returns a binary tree BVH created using a 'recursive grid' method. +from chroma.bvh.bvh import BVH, CHILD_BITS +from chroma.gpu.bvh import create_leaf_nodes, merge_nodes_detailed, concatenate_layers + +MAX_CHILD = 2**(32 - CHILD_BITS) - 1 + +@profile +def make_recursive_grid_bvh(mesh, target_degree=3): + '''Returns a BVH created using a 'recursive grid' method. This method is somewhat similar to the original Chroma BVH generator, with a few adjustments: - * It is a strict binary tree, with dummy nodes inserted when - only one child is required on an inner node. * Every triangle is boxed individually by a leaf node in the BVH * Triangles are not rearranged in Morton order since the leaf nodes are sorted instead. @@ -15,14 +18,59 @@ def make_recursive_grid_bvh(mesh, bits=11): speed things up. ''' - world_coords, leaf_nodes, morton_codes = create_leaf_nodes(mesh, bits) + world_coords, leaf_nodes, morton_codes = create_leaf_nodes(mesh) # rearrange in morton order argsort = morton_codes.argsort() leaf_nodes = leaf_nodes[argsort] - morton_codes[argsort] + morton_codes = morton_codes[argsort] + + # Create parent layers + layers = [leaf_nodes] + while len(layers[0]) > 1: + top_layer = layers[0] + + # Figure out how many bits to shift in morton code to achieve desired + # degree + nnodes = len(top_layer) + nunique = len(np.unique(morton_codes)) + while nnodes / float(nunique) < target_degree and nunique > 1: + morton_codes >>= 1 + nunique = len(np.unique(morton_codes)) + + # Determine the grouping of child nodes into parents + parent_morton_codes, first_child = np.unique(morton_codes, return_index=True) + first_child = first_child.astype(np.int32) + nchild = np.ediff1d(first_child, to_end=nnodes - first_child[-1]) + + # Expand groups that have too many children + print 'Expanding %d parent nodes' % (np.count_nonzero(nchild > MAX_CHILD)) + while nchild.max() > MAX_CHILD: + index = nchild.argmax() + new_first = np.arange(first_child[index], first_child[index]+nchild[index], MAX_CHILD) + first_child = np.concatenate([first_child[:index], + new_first, + first_child[index+1:]]) + parent_morton_codes = np.concatenate([parent_morton_codes[:index], + np.repeat(parent_morton_codes[index], len(new_first)), + parent_morton_codes[index+1:]]) + nchild = np.ediff1d(first_child, to_end=nnodes - first_child[-1]) + + if nunique > 1: # Yes, I'm pedantic + plural = 's' + else: + plural = '' + print 'Merging %d nodes to %d parent%s' % (nnodes, nunique, plural) + + assert (nchild > 0).all() + assert (nchild < 31).all() + + #print 'Max|Avg children: %d|%d' % (nchild.max(), nchild.mean()) - - print node_areas(leaf_nodes).sum() * world_coords.world_scale**2 + # Merge children + parents = merge_nodes_detailed(top_layer, first_child, nchild) + layers = [parents] + layers + morton_codes = parent_morton_codes - + nodes, layer_bounds = concatenate_layers(layers) + return BVH(world_coords, nodes, layer_bounds[:-1]) diff --git a/chroma/cuda/bvh.cu b/chroma/cuda/bvh.cu index 8bc5f04..0c64157 100644 --- a/chroma/cuda/bvh.cu +++ b/chroma/cuda/bvh.cu @@ -262,6 +262,48 @@ extern "C" } __global__ void + make_parents_detailed(unsigned int first_node, + unsigned int elements_this_launch, + uint4 *child_nodes, + uint4 *parent_nodes, + int *first_children, + int *nchildren) + { + unsigned int thread_id = blockDim.x * blockIdx.x + threadIdx.x; + if (thread_id >= elements_this_launch) + return; + + unsigned int parent_id = first_node + thread_id; + unsigned int first_child = first_children[parent_id]; + unsigned int nchild = nchildren[parent_id]; + + // Load first child + uint4 parent_node = child_nodes[first_child]; + uint3 lower = make_uint3(parent_node.x & 0xFFFF, parent_node.y & 0xFFFF, parent_node.z & 0xFFFF); + uint3 upper = make_uint3(parent_node.x >> 16, parent_node.y >> 16, parent_node.z >> 16); + + // Scan remaining children + for (unsigned int i=1; i < nchild; i++) { + uint4 child_node = child_nodes[first_child + i]; + + uint3 child_lower = make_uint3(child_node.x & 0xFFFF, child_node.y & 0xFFFF, child_node.z & 0xFFFF); + uint3 child_upper = make_uint3(child_node.x >> 16, child_node.y >> 16, child_node.z >> 16); + + lower = min(lower, child_lower); + upper = max(upper, child_upper); + } + + parent_node.w = (nchild << CHILD_BITS) + | first_child; + parent_node.x = upper.x << 16 | lower.x; + parent_node.y = upper.y << 16 | lower.y; + parent_node.z = upper.z << 16 | lower.z; + + parent_nodes[parent_id] = parent_node; + } + + + __global__ void make_parents(unsigned int first_node, unsigned int elements_this_launch, unsigned int n_children_per_node, diff --git a/chroma/cuda/geometry_types.h b/chroma/cuda/geometry_types.h index b2b5947..59d6706 100644 --- a/chroma/cuda/geometry_types.h +++ b/chroma/cuda/geometry_types.h @@ -28,7 +28,7 @@ struct Triangle }; enum { INTERNAL_NODE, LEAF_NODE, PADDING_NODE }; -const unsigned int CHILD_BITS = 26; +const unsigned int CHILD_BITS = 28; const unsigned int NCHILD_MASK = (0xFFFFu << CHILD_BITS); struct Node diff --git a/chroma/gpu/bvh.py b/chroma/gpu/bvh.py index f23329f..00e2e69 100644 --- a/chroma/gpu/bvh.py +++ b/chroma/gpu/bvh.py @@ -81,6 +81,36 @@ def create_leaf_nodes(mesh, morton_bits=16, round_to_multiple=1): return world_coords, nodes.get(), morton_codes_host +def merge_nodes_detailed(nodes, first_child, nchild): + '''Merges nodes into len(first_child) parent nodes, using + the provided arrays to determine the index of the first + child of each parent, and how many children there are.''' + bvh_module = get_cu_module('bvh.cu', options=cuda_options, + include_source_directory=True) + bvh_funcs = GPUFuncs(bvh_module) + + gpu_nodes = ga.to_gpu(nodes) + gpu_first_child = ga.to_gpu(first_child.astype(np.int32)) + gpu_nchild = ga.to_gpu(nchild.astype(np.int32)) + + nparent = len(first_child) + gpu_parent_nodes = ga.empty(shape=nparent, dtype=ga.vec.uint4) + + nthreads_per_block = 256 + for first_index, elements_this_iter, nblocks_this_iter in \ + chunk_iterator(nparent, nthreads_per_block, max_blocks=10000): + + bvh_funcs.make_parents_detailed(np.uint32(first_index), + np.uint32(elements_this_iter), + gpu_nodes, + gpu_parent_nodes, + gpu_first_child, + gpu_nchild, + block=(nthreads_per_block,1,1), + grid=(nblocks_this_iter,1)) + + return gpu_parent_nodes.get() + def merge_nodes(nodes, degree, max_ratio=None): bvh_module = get_cu_module('bvh.cu', options=cuda_options, include_source_directory=True) diff --git a/chroma/loader.py b/chroma/loader.py index f65c310..7408d2f 100644 --- a/chroma/loader.py +++ b/chroma/loader.py @@ -4,7 +4,7 @@ import time from chroma.log import logger from chroma.cache import Cache -from chroma.bvh import make_simple_bvh +from chroma.bvh import make_simple_bvh, make_recursive_grid_bvh from chroma.geometry import Geometry, Solid, Mesh, vacuum from chroma.detector import Detector from chroma.stl import mesh_from_stl @@ -148,7 +148,7 @@ def load_bvh(geometry, bvh_name="default", start = time.time() context = create_cuda_context(cuda_device) - bvh = make_simple_bvh(geometry.mesh, degree=2) + bvh = make_recursive_grid_bvh(geometry.mesh, target_degree=2) context.pop() logger.info('BVH generated in %1.1f seconds.' % (time.time() - start)) |