summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--chroma/bvh/grid.py3
-rw-r--r--chroma/cuda/bvh.cu15
-rw-r--r--chroma/gpu/bvh.py19
3 files changed, 36 insertions, 1 deletions
diff --git a/chroma/bvh/grid.py b/chroma/bvh/grid.py
index d23b0bb..d8e1f38 100644
--- a/chroma/bvh/grid.py
+++ b/chroma/bvh/grid.py
@@ -1,7 +1,7 @@
import numpy as np
from chroma.bvh.bvh import BVH, CHILD_BITS
-from chroma.gpu.bvh import create_leaf_nodes, merge_nodes_detailed, concatenate_layers
+from chroma.gpu.bvh import create_leaf_nodes, merge_nodes_detailed, concatenate_layers, collapse_chains
MAX_CHILD = 2**(32 - CHILD_BITS) - 1
@@ -87,4 +87,5 @@ def make_recursive_grid_bvh(mesh, target_degree=3):
morton_codes = parent_morton_codes
nodes, layer_bounds = concatenate_layers(layers)
+ nodes = collapse_chains(nodes, layer_bounds)
return BVH(world_coords, nodes, layer_bounds[:-1])
diff --git a/chroma/cuda/bvh.cu b/chroma/cuda/bvh.cu
index 0c64157..882c22e 100644
--- a/chroma/cuda/bvh.cu
+++ b/chroma/cuda/bvh.cu
@@ -522,4 +522,19 @@ extern "C"
node[b_index] = temp4;
}
+ __global__ void collapse_child(unsigned int start, unsigned int end,
+ uint4 *node)
+ {
+ unsigned int thread_id = blockDim.x * blockIdx.x + threadIdx.x;
+ unsigned int stride = gridDim.x * blockDim.x;
+
+ for (unsigned int i=start+thread_id; i < end; i += stride) {
+ uint4 this_node = node[i];
+ unsigned int nchild = this_node.w >> CHILD_BITS;
+ unsigned int child_id = this_node.w & ~NCHILD_MASK;
+ if (nchild == 1)
+ node[i] = node[child_id];
+ }
+ }
+
} // extern "C"
diff --git a/chroma/gpu/bvh.py b/chroma/gpu/bvh.py
index 00e2e69..b138bb9 100644
--- a/chroma/gpu/bvh.py
+++ b/chroma/gpu/bvh.py
@@ -111,6 +111,25 @@ def merge_nodes_detailed(nodes, first_child, nchild):
return gpu_parent_nodes.get()
+def collapse_chains(nodes, layer_bounds):
+ bvh_module = get_cu_module('bvh.cu', options=cuda_options,
+ include_source_directory=True)
+ bvh_funcs = GPUFuncs(bvh_module)
+
+ gpu_nodes = ga.to_gpu(nodes)
+
+ bounds = zip(layer_bounds[:-1], layer_bounds[1:])[:-1]
+ bounds.reverse()
+ nthreads_per_block = 256
+ for start, end in bounds:
+ bvh_funcs.collapse_child(np.uint32(start),
+ np.uint32(end),
+ gpu_nodes,
+ block=(nthreads_per_block,1,1),
+ grid=(120,1))
+ return gpu_nodes.get()
+
+
def merge_nodes(nodes, degree, max_ratio=None):
bvh_module = get_cu_module('bvh.cu', options=cuda_options,
include_source_directory=True)