diff options
author | Stan Seibert <stan@mtrr.org> | 2012-02-15 11:11:47 -0500 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2021-05-09 08:42:38 -0700 |
commit | fa0d480ca71b59cac938dfd5ff8538a36f7a2d67 (patch) | |
tree | e402a3cdc5367ad31cc827ae31df43af15bea6dc | |
parent | 81bcdf9264935195988e171d1339d9a47df139f5 (diff) | |
download | chroma-fa0d480ca71b59cac938dfd5ff8538a36f7a2d67.tar.gz chroma-fa0d480ca71b59cac938dfd5ff8538a36f7a2d67.tar.bz2 chroma-fa0d480ca71b59cac938dfd5ff8538a36f7a2d67.zip |
Implementation of "node splitting" which places children into separate
parent nodes if combining them would result in a parent node that is
excessively large compared to the surface area of the children.
This doesn't help as much as you might imagine.
-rw-r--r-- | chroma/bvh/simple.py | 11 | ||||
-rw-r--r-- | chroma/gpu/bvh.py | 77 |
2 files changed, 75 insertions, 13 deletions
diff --git a/chroma/bvh/simple.py b/chroma/bvh/simple.py index 6d9d144..961c79c 100644 --- a/chroma/bvh/simple.py +++ b/chroma/bvh/simple.py @@ -1,5 +1,6 @@ from chroma.bvh.bvh import BVH -from chroma.gpu.bvh import create_leaf_nodes, merge_nodes, concatenate_layers +from chroma.gpu.bvh import create_leaf_nodes, merge_nodes, concatenate_layers, optimize_layer +import numpy as np def make_simple_bvh(mesh, degree): '''Returns a BVH tree created by simple grouping of Morton ordered nodes. @@ -16,10 +17,10 @@ def make_simple_bvh(mesh, degree): # Create parent layers layers = [leaf_nodes] while len(layers[0]) > 1: - parent = merge_nodes(layers[0], degree=degree) - if len(parent) > 1: - assert len(parent) % degree == 0 - layers = [parent] + layers + #top = optimize_layer(layers[0]) + top = layers[0] + parent = merge_nodes(top, degree=degree, max_ratio=2) + layers = [parent, top] + layers[1:] # How many nodes total? nodes, layer_bounds = concatenate_layers(layers) diff --git a/chroma/gpu/bvh.py b/chroma/gpu/bvh.py index 937ff80..f23329f 100644 --- a/chroma/gpu/bvh.py +++ b/chroma/gpu/bvh.py @@ -6,7 +6,7 @@ from pycuda import characterize from chroma.gpu.tools import get_cu_module, cuda_options, \ chunk_iterator, to_uint3, to_float3, GPUFuncs, mapped_empty, Mapped -from chroma.bvh.bvh import WorldCoords +from chroma.bvh.bvh import WorldCoords, node_areas, NCHILD_MASK, CHILD_BITS def round_up_to_multiple(x, multiple): remainder = x % multiple @@ -57,7 +57,7 @@ def create_leaf_nodes(mesh, morton_bits=16, round_to_multiple=1): # Call GPU to compute nodes nodes = ga.zeros(shape=round_up_to_multiple(len(triangles), - round_to_multiple), + 1),#round_to_multiple), dtype=ga.vec.uint4) morton_codes = ga.empty(shape=len(triangles), dtype=np.uint64) @@ -81,17 +81,20 @@ def create_leaf_nodes(mesh, morton_bits=16, round_to_multiple=1): return world_coords, nodes.get(), morton_codes_host -def merge_nodes(nodes, degree): +def merge_nodes(nodes, degree, max_ratio=None): bvh_module = get_cu_module('bvh.cu', options=cuda_options, include_source_directory=True) bvh_funcs = GPUFuncs(bvh_module) nparent = len(nodes) / degree + if len(nodes) % degree != 0: + nparent += 1 + if nparent == 1: nparent_pad = nparent else: - nparent_pad = round_up_to_multiple(nparent, degree) - parent_nodes = ga.zeros(shape=nparent_pad, dtype=ga.vec.uint4) + nparent_pad = round_up_to_multiple(nparent, 1)#degree) + gpu_parent_nodes = ga.zeros(shape=nparent_pad, dtype=ga.vec.uint4) nthreads_per_block = 256 for first_index, elements_this_iter, nblocks_this_iter in \ @@ -99,14 +102,71 @@ def merge_nodes(nodes, degree): bvh_funcs.make_parents(np.uint32(first_index), np.uint32(elements_this_iter), np.uint32(degree), - parent_nodes, + gpu_parent_nodes, cuda.In(nodes), np.uint32(0), np.uint32(len(nodes)), block=(nthreads_per_block,1,1), grid=(nblocks_this_iter,1)) - return parent_nodes.get() + parent_nodes = gpu_parent_nodes.get() + + if max_ratio is not None: + areas = node_areas(parent_nodes) + child_areas = node_areas(nodes) + + excessive_area = np.zeros(shape=len(areas), dtype=bool) + for i, parent_area in enumerate(areas): + nchild = parent_nodes['w'][i] >> CHILD_BITS + child_index = parent_nodes['w'][i] & ~NCHILD_MASK + child_area = child_areas[child_index:child_index+nchild].sum() + #if parent_area > 1e9: + # print i, 'Children: %e, Parent: %e' % (child_area, parent_area) + if child_area/parent_area < 0.3: + excessive_area[i] = True + #print i, 'Children: %e, Parent: %e' % (child_area, parent_area) + + extra_slots = round_up_to_multiple((degree - 1) * np.count_nonzero(excessive_area), 1) + print 'Extra slots:', extra_slots + new_parent_nodes = np.zeros(shape=len(parent_nodes) + extra_slots, + dtype=parent_nodes.dtype) + new_parent_nodes[:len(parent_nodes)] = parent_nodes + + offset = 0 + for count, index in enumerate(np.argwhere(excessive_area)): + index = index[0] + offset + nchild = new_parent_nodes['w'][index] >> CHILD_BITS + child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK + new_parent_nodes[index] = nodes[child_index] + #new_parent_nodes['w'][index] = 1 << CHILD_BITS | child_index + tmp_nchild = new_parent_nodes['w'][index] >> CHILD_BITS + tmp_child_index = new_parent_nodes['w'][index] & ~NCHILD_MASK + new_parent_nodes['w'][index] = tmp_nchild << CHILD_BITS | (tmp_child_index + len(nodes)) + + if nchild == 1: + continue + + # slide everyone over + #print index, nchild, len(new_parent_nodes) + new_parent_nodes[index+nchild:] = new_parent_nodes[index+1:-nchild+1] + offset += nchild - 1 + for sibling in xrange(nchild - 1): + new_parent_index = index + 1 + sibling + new_parent_nodes[new_parent_index] = nodes[child_index + sibling + 1] + if new_parent_nodes['x'][new_parent_index] != 0: + tmp_nchild = new_parent_nodes['w'][new_parent_index] >> CHILD_BITS + tmp_child_index = new_parent_nodes['w'][new_parent_index] & ~NCHILD_MASK + new_parent_nodes['w'][new_parent_index] = tmp_nchild << CHILD_BITS | (tmp_child_index + len(nodes)) + + #new_parent_nodes['w'][new_parent_index] = 1 << CHILD_BITS | (child_index + sibling + 1) + + + #print 'intermediate: %e' % node_areas(new_parent_nodes).max() + print 'old: %e' % node_areas(parent_nodes).max() + print 'new: %e' % node_areas(new_parent_nodes).max() + parent_nodes = new_parent_nodes + + return parent_nodes def concatenate_layers(layers): bvh_module = get_cu_module('bvh.cu', options=cuda_options, @@ -136,7 +196,6 @@ def concatenate_layers(layers): nodes[layer_start:], block=(nthreads_per_block,1,1), grid=(nblocks_this_iter,1)) - return nodes.get(), layer_bounds def optimize_layer(orig_nodes): @@ -217,6 +276,8 @@ def optimize_layer(orig_nodes): swaps += 1 #print 'swap', test_index+1, better_i + assert 0 < better_i < len(nodes) + assert 0 < test_index + 1 < len(nodes) bvh_funcs.swap(np.uint32(test_index+1), np.uint32(better_i), nodes, block=(1,1,1), grid=(1,1)) cuda.Context.get_current().synchronize() |