summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStan Seibert <stan@mtrr.org>2012-02-15 15:32:23 -0500
committertlatorre <tlatorre@uchicago.edu>2021-05-09 08:42:38 -0700
commit65bf1df06c3a97f8346fb27b9c7dcc5f4d52e5f7 (patch)
tree4f19e62dc9794a9504641ef44eb6d985ae00fd84
parentbd4a0d0cc59b80dca84471174b2928bbd4c1586e (diff)
downloadchroma-65bf1df06c3a97f8346fb27b9c7dcc5f4d52e5f7.tar.gz
chroma-65bf1df06c3a97f8346fb27b9c7dcc5f4d52e5f7.tar.bz2
chroma-65bf1df06c3a97f8346fb27b9c7dcc5f4d52e5f7.zip
New BVH algorithm: Recursive Grid
This is an adaptation of the original Chroma BVH construction algorithm. The generation stage is very slow, but can be fixed.
-rw-r--r--bin/chroma-bvh4
-rw-r--r--chroma/bvh/bvh.py4
-rw-r--r--chroma/bvh/grid.py70
-rw-r--r--chroma/cuda/bvh.cu42
-rw-r--r--chroma/cuda/geometry_types.h2
-rw-r--r--chroma/gpu/bvh.py30
-rw-r--r--chroma/loader.py4
7 files changed, 138 insertions, 18 deletions
diff --git a/bin/chroma-bvh b/bin/chroma-bvh
index 1bc256e..e3e6a8c 100644
--- a/bin/chroma-bvh
+++ b/bin/chroma-bvh
@@ -9,7 +9,7 @@ import numpy as np
from chroma.cache import Cache
from chroma.loader import load_geometry_from_string
from chroma.log import logger, logging
-from chroma.bvh import make_simple_bvh
+from chroma.bvh import make_recursive_grid_bvh
from chroma.gpu import create_cuda_context
from chroma.gpu.bvh import optimize_layer
@@ -85,7 +85,7 @@ def create(cache, args):
start = time.time()
context = create_cuda_context()
- bvh = make_simple_bvh(geometry.mesh, degree=degree)
+ bvh = make_recursive_grid_bvh(geometry.mesh, target_degree=degree)
context.pop()
logger.info('BVH generated in %1.1f seconds.' % (time.time() - start))
diff --git a/chroma/bvh/bvh.py b/chroma/bvh/bvh.py
index 5579670..6575fd6 100644
--- a/chroma/bvh/bvh.py
+++ b/chroma/bvh/bvh.py
@@ -5,8 +5,8 @@ from pycuda.gpuarray import vec
uint4 = vec.uint4 # pylint: disable-msg=C0103,E1101
-CHILD_BITS = 26
-NCHILD_MASK = np.uint32(0xFFFF << 26)
+CHILD_BITS = 28
+NCHILD_MASK = np.uint32(0xFFFF << CHILD_BITS)
def unpack_nodes(nodes):
'''Creates a numpy record array with the contents of nodes
diff --git a/chroma/bvh/grid.py b/chroma/bvh/grid.py
index 5392976..9ac0306 100644
--- a/chroma/bvh/grid.py
+++ b/chroma/bvh/grid.py
@@ -1,13 +1,16 @@
-from chroma.bvh.bvh import BVH, node_areas
-from chroma.gpu.bvh import create_leaf_nodes
+import numpy as np
-def make_recursive_grid_bvh(mesh, bits=11):
- '''Returns a binary tree BVH created using a 'recursive grid' method.
+from chroma.bvh.bvh import BVH, CHILD_BITS
+from chroma.gpu.bvh import create_leaf_nodes, merge_nodes_detailed, concatenate_layers
+
+MAX_CHILD = 2**(32 - CHILD_BITS) - 1
+
+@profile
+def make_recursive_grid_bvh(mesh, target_degree=3):
+ '''Returns a BVH created using a 'recursive grid' method.
This method is somewhat similar to the original Chroma BVH generator,
with a few adjustments:
- * It is a strict binary tree, with dummy nodes inserted when
- only one child is required on an inner node.
* Every triangle is boxed individually by a leaf node in the BVH
* Triangles are not rearranged in Morton order since the
leaf nodes are sorted instead.
@@ -15,14 +18,59 @@ def make_recursive_grid_bvh(mesh, bits=11):
speed things up.
'''
- world_coords, leaf_nodes, morton_codes = create_leaf_nodes(mesh, bits)
+ world_coords, leaf_nodes, morton_codes = create_leaf_nodes(mesh)
# rearrange in morton order
argsort = morton_codes.argsort()
leaf_nodes = leaf_nodes[argsort]
- morton_codes[argsort]
+ morton_codes = morton_codes[argsort]
+
+ # Create parent layers
+ layers = [leaf_nodes]
+ while len(layers[0]) > 1:
+ top_layer = layers[0]
+
+ # Figure out how many bits to shift in morton code to achieve desired
+ # degree
+ nnodes = len(top_layer)
+ nunique = len(np.unique(morton_codes))
+ while nnodes / float(nunique) < target_degree and nunique > 1:
+ morton_codes >>= 1
+ nunique = len(np.unique(morton_codes))
+
+ # Determine the grouping of child nodes into parents
+ parent_morton_codes, first_child = np.unique(morton_codes, return_index=True)
+ first_child = first_child.astype(np.int32)
+ nchild = np.ediff1d(first_child, to_end=nnodes - first_child[-1])
+
+ # Expand groups that have too many children
+ print 'Expanding %d parent nodes' % (np.count_nonzero(nchild > MAX_CHILD))
+ while nchild.max() > MAX_CHILD:
+ index = nchild.argmax()
+ new_first = np.arange(first_child[index], first_child[index]+nchild[index], MAX_CHILD)
+ first_child = np.concatenate([first_child[:index],
+ new_first,
+ first_child[index+1:]])
+ parent_morton_codes = np.concatenate([parent_morton_codes[:index],
+ np.repeat(parent_morton_codes[index], len(new_first)),
+ parent_morton_codes[index+1:]])
+ nchild = np.ediff1d(first_child, to_end=nnodes - first_child[-1])
+
+ if nunique > 1: # Yes, I'm pedantic
+ plural = 's'
+ else:
+ plural = ''
+ print 'Merging %d nodes to %d parent%s' % (nnodes, nunique, plural)
+
+ assert (nchild > 0).all()
+ assert (nchild < 31).all()
+
+ #print 'Max|Avg children: %d|%d' % (nchild.max(), nchild.mean())
-
- print node_areas(leaf_nodes).sum() * world_coords.world_scale**2
+ # Merge children
+ parents = merge_nodes_detailed(top_layer, first_child, nchild)
+ layers = [parents] + layers
+ morton_codes = parent_morton_codes
-
+ nodes, layer_bounds = concatenate_layers(layers)
+ return BVH(world_coords, nodes, layer_bounds[:-1])
diff --git a/chroma/cuda/bvh.cu b/chroma/cuda/bvh.cu
index 8bc5f04..0c64157 100644
--- a/chroma/cuda/bvh.cu
+++ b/chroma/cuda/bvh.cu
@@ -262,6 +262,48 @@ extern "C"
}
__global__ void
+ make_parents_detailed(unsigned int first_node,
+ unsigned int elements_this_launch,
+ uint4 *child_nodes,
+ uint4 *parent_nodes,
+ int *first_children,
+ int *nchildren)
+ {
+ unsigned int thread_id = blockDim.x * blockIdx.x + threadIdx.x;
+ if (thread_id >= elements_this_launch)
+ return;
+
+ unsigned int parent_id = first_node + thread_id;
+ unsigned int first_child = first_children[parent_id];
+ unsigned int nchild = nchildren[parent_id];
+
+ // Load first child
+ uint4 parent_node = child_nodes[first_child];
+ uint3 lower = make_uint3(parent_node.x & 0xFFFF, parent_node.y & 0xFFFF, parent_node.z & 0xFFFF);
+ uint3 upper = make_uint3(parent_node.x >> 16, parent_node.y >> 16, parent_node.z >> 16);
+
+ // Scan remaining children
+ for (unsigned int i=1; i < nchild; i++) {
+ uint4 child_node = child_nodes[first_child + i];
+
+ uint3 child_lower = make_uint3(child_node.x & 0xFFFF, child_node.y & 0xFFFF, child_node.z & 0xFFFF);
+ uint3 child_upper = make_uint3(child_node.x >> 16, child_node.y >> 16, child_node.z >> 16);
+
+ lower = min(lower, child_lower);
+ upper = max(upper, child_upper);
+ }
+
+ parent_node.w = (nchild << CHILD_BITS)
+ | first_child;
+ parent_node.x = upper.x << 16 | lower.x;
+ parent_node.y = upper.y << 16 | lower.y;
+ parent_node.z = upper.z << 16 | lower.z;
+
+ parent_nodes[parent_id] = parent_node;
+ }
+
+
+ __global__ void
make_parents(unsigned int first_node,
unsigned int elements_this_launch,
unsigned int n_children_per_node,
diff --git a/chroma/cuda/geometry_types.h b/chroma/cuda/geometry_types.h
index b2b5947..59d6706 100644
--- a/chroma/cuda/geometry_types.h
+++ b/chroma/cuda/geometry_types.h
@@ -28,7 +28,7 @@ struct Triangle
};
enum { INTERNAL_NODE, LEAF_NODE, PADDING_NODE };
-const unsigned int CHILD_BITS = 26;
+const unsigned int CHILD_BITS = 28;
const unsigned int NCHILD_MASK = (0xFFFFu << CHILD_BITS);
struct Node
diff --git a/chroma/gpu/bvh.py b/chroma/gpu/bvh.py
index f23329f..00e2e69 100644
--- a/chroma/gpu/bvh.py
+++ b/chroma/gpu/bvh.py
@@ -81,6 +81,36 @@ def create_leaf_nodes(mesh, morton_bits=16, round_to_multiple=1):
return world_coords, nodes.get(), morton_codes_host
+def merge_nodes_detailed(nodes, first_child, nchild):
+ '''Merges nodes into len(first_child) parent nodes, using
+ the provided arrays to determine the index of the first
+ child of each parent, and how many children there are.'''
+ bvh_module = get_cu_module('bvh.cu', options=cuda_options,
+ include_source_directory=True)
+ bvh_funcs = GPUFuncs(bvh_module)
+
+ gpu_nodes = ga.to_gpu(nodes)
+ gpu_first_child = ga.to_gpu(first_child.astype(np.int32))
+ gpu_nchild = ga.to_gpu(nchild.astype(np.int32))
+
+ nparent = len(first_child)
+ gpu_parent_nodes = ga.empty(shape=nparent, dtype=ga.vec.uint4)
+
+ nthreads_per_block = 256
+ for first_index, elements_this_iter, nblocks_this_iter in \
+ chunk_iterator(nparent, nthreads_per_block, max_blocks=10000):
+
+ bvh_funcs.make_parents_detailed(np.uint32(first_index),
+ np.uint32(elements_this_iter),
+ gpu_nodes,
+ gpu_parent_nodes,
+ gpu_first_child,
+ gpu_nchild,
+ block=(nthreads_per_block,1,1),
+ grid=(nblocks_this_iter,1))
+
+ return gpu_parent_nodes.get()
+
def merge_nodes(nodes, degree, max_ratio=None):
bvh_module = get_cu_module('bvh.cu', options=cuda_options,
include_source_directory=True)
diff --git a/chroma/loader.py b/chroma/loader.py
index f65c310..7408d2f 100644
--- a/chroma/loader.py
+++ b/chroma/loader.py
@@ -4,7 +4,7 @@ import time
from chroma.log import logger
from chroma.cache import Cache
-from chroma.bvh import make_simple_bvh
+from chroma.bvh import make_simple_bvh, make_recursive_grid_bvh
from chroma.geometry import Geometry, Solid, Mesh, vacuum
from chroma.detector import Detector
from chroma.stl import mesh_from_stl
@@ -148,7 +148,7 @@ def load_bvh(geometry, bvh_name="default",
start = time.time()
context = create_cuda_context(cuda_device)
- bvh = make_simple_bvh(geometry.mesh, degree=2)
+ bvh = make_recursive_grid_bvh(geometry.mesh, target_degree=2)
context.pop()
logger.info('BVH generated in %1.1f seconds.' % (time.time() - start))