New BVH algorithm: Recursive Grid

This is an adaptation of the original Chroma BVH construction algorithm. The generation stage is very slow, but can be fixed.
author: Stan Seibert <stan@mtrr.org> 2012-02-15 15:32:23 -0500
committer: tlatorre <tlatorre@uchicago.edu> 2021-05-09 08:42:38 -0700
commit: 65bf1df06c3a97f8346fb27b9c7dcc5f4d52e5f7 (patch)
tree: 4f19e62dc9794a9504641ef44eb6d985ae00fd84
parent: bd4a0d0cc59b80dca84471174b2928bbd4c1586e (diff)
download: chroma-65bf1df06c3a97f8346fb27b9c7dcc5f4d52e5f7.tar.gz
chroma-65bf1df06c3a97f8346fb27b9c7dcc5f4d52e5f7.tar.bz2
chroma-65bf1df06c3a97f8346fb27b9c7dcc5f4d52e5f7.zip
7 files changed, 138 insertions, 18 deletions
diff --git a/bin/chroma-bvh b/bin/chroma-bvh
index 1bc256e..e3e6a8c 100644
--- a/bin/chroma-bvh
+++ b/bin/chroma-bvh
@@ -9,7 +9,7 @@ import numpy as np
 from chroma.cache import Cache
 from chroma.loader import load_geometry_from_string
 from chroma.log import logger, logging
-from chroma.bvh import make_simple_bvh
+from chroma.bvh import make_recursive_grid_bvh
 from chroma.gpu import create_cuda_context
 from chroma.gpu.bvh import optimize_layer
 
@@ -85,7 +85,7 @@ def create(cache, args):
 
     start = time.time()
     context = create_cuda_context()
-    bvh = make_simple_bvh(geometry.mesh, degree=degree)
+    bvh = make_recursive_grid_bvh(geometry.mesh, target_degree=degree)
     context.pop()
     logger.info('BVH generated in %1.1f seconds.' % (time.time() - start))
 
diff --git a/chroma/bvh/bvh.py b/chroma/bvh/bvh.py
index 5579670..6575fd6 100644
--- a/chroma/bvh/bvh.py
+++ b/chroma/bvh/bvh.py
@@ -5,8 +5,8 @@ from pycuda.gpuarray import vec
 
 uint4 = vec.uint4 # pylint: disable-msg=C0103,E1101
 
-CHILD_BITS = 26
-NCHILD_MASK = np.uint32(0xFFFF << 26)
+CHILD_BITS = 28
+NCHILD_MASK = np.uint32(0xFFFF << CHILD_BITS)
 
 def unpack_nodes(nodes):
     '''Creates a numpy record array with the contents of nodes
diff --git a/chroma/bvh/grid.py b/chroma/bvh/grid.py
index 5392976..9ac0306 100644
--- a/chroma/bvh/grid.py
+++ b/chroma/bvh/grid.py
@@ -1,13 +1,16 @@
-from chroma.bvh.bvh import BVH, node_areas
-from chroma.gpu.bvh import create_leaf_nodes
+import numpy as np
 
-def make_recursive_grid_bvh(mesh, bits=11):
-    '''Returns a binary tree BVH created using a 'recursive grid' method.
+from chroma.bvh.bvh import BVH, CHILD_BITS
+from chroma.gpu.bvh import create_leaf_nodes, merge_nodes_detailed, concatenate_layers
+
+MAX_CHILD = 2**(32 - CHILD_BITS) - 1
+
+@profile
+def make_recursive_grid_bvh(mesh, target_degree=3):
+    '''Returns a BVH created using a 'recursive grid' method.
 
     This method is somewhat similar to the original Chroma BVH generator, 
     with a few adjustments:
-      * It is a strict binary tree, with dummy nodes inserted when
-        only one child is required on an inner node.
       * Every triangle is boxed individually by a leaf node in the BVH
       * Triangles are not rearranged in Morton order since the
         leaf nodes are sorted instead.
@@ -15,14 +18,59 @@ def make_recursive_grid_bvh(mesh, bits=11):
         speed things up.
 
     '''
-    world_coords, leaf_nodes, morton_codes = create_leaf_nodes(mesh, bits)
+    world_coords, leaf_nodes, morton_codes = create_leaf_nodes(mesh)
 
     # rearrange in morton order
     argsort = morton_codes.argsort()
     leaf_nodes = leaf_nodes[argsort]
-    morton_codes[argsort]
+    morton_codes = morton_codes[argsort]
+
+    # Create parent layers
+    layers = [leaf_nodes]
+    while len(layers[0]) > 1:        
+        top_layer = layers[0]
+
+        # Figure out how many bits to shift in morton code to achieve desired
+        # degree
+        nnodes = len(top_layer)
+        nunique = len(np.unique(morton_codes))
+        while nnodes / float(nunique) < target_degree and nunique > 1:
+            morton_codes >>= 1
+            nunique = len(np.unique(morton_codes))
+
+        # Determine the grouping of child nodes into parents
+        parent_morton_codes, first_child = np.unique(morton_codes, return_index=True)
+        first_child = first_child.astype(np.int32)
+        nchild = np.ediff1d(first_child, to_end=nnodes - first_child[-1])
+
+        # Expand groups that have too many children
+        print 'Expanding %d parent nodes' % (np.count_nonzero(nchild > MAX_CHILD))
+        while nchild.max() > MAX_CHILD:
+            index = nchild.argmax()
+            new_first = np.arange(first_child[index], first_child[index]+nchild[index], MAX_CHILD)
+            first_child = np.concatenate([first_child[:index],
+                                          new_first,
+                                          first_child[index+1:]])
+            parent_morton_codes = np.concatenate([parent_morton_codes[:index],
+                           np.repeat(parent_morton_codes[index], len(new_first)), 
+                                                  parent_morton_codes[index+1:]])
+            nchild = np.ediff1d(first_child, to_end=nnodes - first_child[-1])
+
+        if nunique > 1: # Yes, I'm pedantic
+            plural = 's'
+        else:
+            plural = ''
+        print 'Merging %d nodes to %d parent%s' % (nnodes, nunique, plural)
+
+        assert (nchild > 0).all()
+        assert (nchild < 31).all()
+        
+        #print 'Max|Avg children: %d|%d' % (nchild.max(), nchild.mean())
 
-    
-    print node_areas(leaf_nodes).sum() * world_coords.world_scale**2
+        # Merge children
+        parents = merge_nodes_detailed(top_layer, first_child, nchild)
+        layers = [parents] + layers
+        morton_codes = parent_morton_codes
 
-    
+    nodes, layer_bounds = concatenate_layers(layers)
+    return BVH(world_coords, nodes, layer_bounds[:-1])
diff --git a/chroma/cuda/bvh.cu b/chroma/cuda/bvh.cu
index 8bc5f04..0c64157 100644
--- a/chroma/cuda/bvh.cu
+++ b/chroma/cuda/bvh.cu
@@ -262,6 +262,48 @@ extern "C"
   }
 
   __global__ void
+  make_parents_detailed(unsigned int first_node,
+			unsigned int elements_this_launch, 
+			uint4 *child_nodes,
+			uint4 *parent_nodes,
+			int *first_children,
+			int *nchildren)
+  {
+    unsigned int thread_id = blockDim.x * blockIdx.x + threadIdx.x;
+    if (thread_id >= elements_this_launch)
+      return;
+
+    unsigned int parent_id = first_node + thread_id;
+    unsigned int first_child = first_children[parent_id];
+    unsigned int nchild = nchildren[parent_id];
+
+    // Load first child
+    uint4 parent_node = child_nodes[first_child];
+    uint3 lower = make_uint3(parent_node.x & 0xFFFF, parent_node.y & 0xFFFF, parent_node.z & 0xFFFF);
+    uint3 upper = make_uint3(parent_node.x >> 16, parent_node.y >> 16, parent_node.z >> 16);
+    
+    // Scan remaining children
+    for (unsigned int i=1; i < nchild; i++) {
+      uint4 child_node = child_nodes[first_child + i];
+      
+      uint3 child_lower = make_uint3(child_node.x & 0xFFFF, child_node.y & 0xFFFF, child_node.z & 0xFFFF);
+      uint3 child_upper = make_uint3(child_node.x >> 16, child_node.y >> 16, child_node.z >> 16);
+
+      lower = min(lower, child_lower);
+      upper = max(upper, child_upper);
+    }
+
+    parent_node.w = (nchild << CHILD_BITS)
+      | first_child;
+    parent_node.x = upper.x << 16 | lower.x;
+    parent_node.y = upper.y << 16 | lower.y;
+    parent_node.z = upper.z << 16 | lower.z;
+
+    parent_nodes[parent_id] = parent_node;
+  }
+
+
+  __global__ void
   make_parents(unsigned int first_node,
 	       unsigned int elements_this_launch, 
 	       unsigned int n_children_per_node,
diff --git a/chroma/cuda/geometry_types.h b/chroma/cuda/geometry_types.h
index b2b5947..59d6706 100644
--- a/chroma/cuda/geometry_types.h
+++ b/chroma/cuda/geometry_types.h
@@ -28,7 +28,7 @@ struct Triangle
 };
 
 enum { INTERNAL_NODE, LEAF_NODE, PADDING_NODE };
-const unsigned int CHILD_BITS = 26;
+const unsigned int CHILD_BITS = 28;
 const unsigned int NCHILD_MASK = (0xFFFFu << CHILD_BITS);
 
 struct Node
diff --git a/chroma/gpu/bvh.py b/chroma/gpu/bvh.py
index f23329f..00e2e69 100644
--- a/chroma/gpu/bvh.py
+++ b/chroma/gpu/bvh.py
@@ -81,6 +81,36 @@ def create_leaf_nodes(mesh, morton_bits=16, round_to_multiple=1):
     return world_coords, nodes.get(), morton_codes_host
 
 
+def merge_nodes_detailed(nodes, first_child, nchild):
+    '''Merges nodes into len(first_child) parent nodes, using
+    the provided arrays to determine the index of the first
+    child of each parent, and how many children there are.'''
+    bvh_module = get_cu_module('bvh.cu', options=cuda_options,
+                               include_source_directory=True)
+    bvh_funcs = GPUFuncs(bvh_module)
+
+    gpu_nodes = ga.to_gpu(nodes)
+    gpu_first_child = ga.to_gpu(first_child.astype(np.int32))
+    gpu_nchild = ga.to_gpu(nchild.astype(np.int32))
+
+    nparent = len(first_child)
+    gpu_parent_nodes = ga.empty(shape=nparent, dtype=ga.vec.uint4)
+
+    nthreads_per_block = 256
+    for first_index, elements_this_iter, nblocks_this_iter in \
+            chunk_iterator(nparent, nthreads_per_block, max_blocks=10000):
+
+        bvh_funcs.make_parents_detailed(np.uint32(first_index),
+                                        np.uint32(elements_this_iter),
+                                        gpu_nodes,
+                                        gpu_parent_nodes,
+                                        gpu_first_child,
+                                        gpu_nchild,
+                                        block=(nthreads_per_block,1,1),
+                                        grid=(nblocks_this_iter,1))
+
+    return gpu_parent_nodes.get()
+
 def merge_nodes(nodes, degree, max_ratio=None):
     bvh_module = get_cu_module('bvh.cu', options=cuda_options,
                                include_source_directory=True)
diff --git a/chroma/loader.py b/chroma/loader.py
index f65c310..7408d2f 100644
--- a/chroma/loader.py
+++ b/chroma/loader.py
@@ -4,7 +4,7 @@ import time
 
 from chroma.log import logger
 from chroma.cache import Cache
-from chroma.bvh import make_simple_bvh
+from chroma.bvh import make_simple_bvh, make_recursive_grid_bvh
 from chroma.geometry import Geometry, Solid, Mesh, vacuum
 from chroma.detector import Detector
 from chroma.stl import mesh_from_stl
@@ -148,7 +148,7 @@ def load_bvh(geometry,  bvh_name="default",
         start = time.time()
 
         context = create_cuda_context(cuda_device)
-        bvh = make_simple_bvh(geometry.mesh, degree=2)
+        bvh = make_recursive_grid_bvh(geometry.mesh, target_degree=2)
         context.pop()
 
         logger.info('BVH generated in %1.1f seconds.' % (time.time() - start))
author	Stan Seibert <stan@mtrr.org>	2012-02-15 15:32:23 -0500
committer	tlatorre <tlatorre@uchicago.edu>	2021-05-09 08:42:38 -0700
commit	65bf1df06c3a97f8346fb27b9c7dcc5f4d52e5f7 (patch)
tree	4f19e62dc9794a9504641ef44eb6d985ae00fd84
parent	bd4a0d0cc59b80dca84471174b2928bbd4c1586e (diff)
download	chroma-65bf1df06c3a97f8346fb27b9c7dcc5f4d52e5f7.tar.gz chroma-65bf1df06c3a97f8346fb27b9c7dcc5f4d52e5f7.tar.bz2 chroma-65bf1df06c3a97f8346fb27b9c7dcc5f4d52e5f7.zip