summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStan Seibert <stan@mtrr.org>2012-01-18 23:10:21 -0500
committertlatorre <tlatorre@uchicago.edu>2021-05-09 08:42:38 -0700
commit14309ab8618a80c7f67c7d80d43bbb4779f0bb2f (patch)
tree73bf89f3910eb39f92daf0793b81161be1c90b36
parent4c212dec68cd154577299b825aff00c0ed765813 (diff)
downloadchroma-14309ab8618a80c7f67c7d80d43bbb4779f0bb2f.tar.gz
chroma-14309ab8618a80c7f67c7d80d43bbb4779f0bb2f.tar.bz2
chroma-14309ab8618a80c7f67c7d80d43bbb4779f0bb2f.zip
Simple BVH generator using new infrastructure
-rw-r--r--chroma/bvh/__init__.py1
-rw-r--r--chroma/bvh/bvh.py23
-rw-r--r--chroma/bvh/grid.py15
-rw-r--r--chroma/bvh/simple.py31
-rw-r--r--chroma/cuda/bvh.cu75
-rw-r--r--chroma/gpu/bvh.py139
-rw-r--r--chroma/gpu/tools.py33
-rw-r--r--test/test_bvh_simple.py14
8 files changed, 308 insertions, 23 deletions
diff --git a/chroma/bvh/__init__.py b/chroma/bvh/__init__.py
index fa3d476..7ca6620 100644
--- a/chroma/bvh/__init__.py
+++ b/chroma/bvh/__init__.py
@@ -1,2 +1,3 @@
from chroma.bvh.bvh import *
from chroma.bvh.grid import make_recursive_grid_bvh
+from chroma.bvh.simple import make_simple_bvh
diff --git a/chroma/bvh/bvh.py b/chroma/bvh/bvh.py
index d13d285..f530b09 100644
--- a/chroma/bvh/bvh.py
+++ b/chroma/bvh/bvh.py
@@ -196,6 +196,19 @@ class BVH(object):
'''Returns the number of nodes in this BVH'''
return len(self.nodes)
+def node_area(nodes):
+ '''Returns the area of a list of nodes in fixed point units.'''
+ unpacked = unpack_nodes(nodes)
+ delta = np.empty(shape=len(nodes),
+ dtype=[('x', float), ('y', float), ('z', float)])
+ for axis in ['x', 'y', 'z']:
+ delta[axis] = unpacked[axis+'hi'] - unpacked[axis+'lo']
+
+ half_area = delta['x']*delta['y'] + delta['y']*delta['z'] \
+ + delta['z']*delta['x']
+
+ return 2.0 * half_area.sum()
+
class BVHLayerSlice(object):
'''A single layer in a bounding volume hierarchy represented as a slice
in the node array of the parent.
@@ -219,12 +232,4 @@ class BVHLayerSlice(object):
def area(self):
'''Return the surface area of all the nodes in this layer in world
units.'''
- unpacked = unpack_nodes(self.nodes)
- delta = np.empty(shape=len(self.nodes),
- dtype=[('x', float), ('y', float), ('z', float)])
- for axis in ['x', 'y', 'z']:
- delta[axis] = unpacked[axis+'hi'] - unpacked[axis+'lo']
-
- half_area = delta['x']*delta['y'] + delta['y']*delta['z'] \
- + delta['z']*delta['x']
- return 2.0 * half_area.sum() * self.world_coords.world_scale**2
+ return node_area(self.nodes) * self.world_coords.world_scale**2
diff --git a/chroma/bvh/grid.py b/chroma/bvh/grid.py
index 9fcf125..0b36a2f 100644
--- a/chroma/bvh/grid.py
+++ b/chroma/bvh/grid.py
@@ -1,6 +1,7 @@
-from chroma.bvh.bvh import BVH
+from chroma.bvh.bvh import BVH, node_area
+from chroma.gpu.bvh import create_leaf_nodes
-def make_recursive_grid_bvh(self, mesh, bits=11):
+def make_recursive_grid_bvh(mesh, bits=11):
'''Returns a binary tree BVH created using a 'recursive grid' method.
This method is somewhat similar to the original Chroma BVH generator,
@@ -14,6 +15,14 @@ def make_recursive_grid_bvh(self, mesh, bits=11):
speed things up.
'''
- pass
+ world_coords, leaf_nodes, morton_codes = create_leaf_nodes(mesh, bits)
+ # rearrange in morton order
+ argsort = morton_codes.argsort()
+ leaf_nodes = leaf_nodes[argsort]
+ morton_codes[argsort]
+
+ print node_area(leaf_nodes) * world_coords.world_scale**2
+
+
diff --git a/chroma/bvh/simple.py b/chroma/bvh/simple.py
new file mode 100644
index 0000000..6c0e02b
--- /dev/null
+++ b/chroma/bvh/simple.py
@@ -0,0 +1,31 @@
+from chroma.bvh.bvh import BVH, node_area
+from chroma.gpu.bvh import create_leaf_nodes, merge_nodes, concatenate_layers
+
+def make_simple_bvh(mesh, degree):
+ '''Returns a BVH tree created by simple grouping of Morton ordered nodes.
+ '''
+ world_coords, leaf_nodes, morton_codes = \
+ create_leaf_nodes(mesh, round_to_multiple=degree)
+
+ # rearrange in morton order
+ argsort = morton_codes.argsort()
+ leaf_nodes = leaf_nodes[argsort]
+ assert len(leaf_nodes) % degree == 0
+
+ # Create parent layers
+ layers = [leaf_nodes]
+ while len(layers[0]) > 1:
+ parent = merge_nodes(layers[0], degree=degree)
+ if len(parent) > 1:
+ assert len(parent) % degree == 0
+ layers = [parent] + layers
+
+ # How many nodes total?
+ nodes, layer_bounds = concatenate_layers(layers)
+
+ for i, (layer_start, layer_end) in enumerate(zip(layer_bounds[:-1],
+ layer_bounds[1:])):
+ print i, node_area(nodes[layer_start:layer_end]) * world_coords.world_scale**2
+
+
+
diff --git a/chroma/cuda/bvh.cu b/chroma/cuda/bvh.cu
index 50e877a..a6888db 100644
--- a/chroma/cuda/bvh.cu
+++ b/chroma/cuda/bvh.cu
@@ -186,17 +186,6 @@ extern "C"
spread3_16(q_centroid.x)
| (spread3_16(q_centroid.y) << 1)
| (spread3_16(q_centroid.z) << 2);
-
- //unsigned long long morton =
- // spread3_16(triangle.x & 0xFFFF)
- // | (spread3_16(triangle.y & 0xFFFF) << 1)
- // | (spread3_16(triangle.z & 0xFFFF) << 2);
-
-
- //unsigned long long morton =
- // ( ((unsigned long long) q_centroid.x) << 32 )
- // | ( ((unsigned long long) q_centroid.y) << 16 )
- // | ( ((unsigned long long) q_centroid.z) << 0 );
// Write leaf and morton code
uint4 leaf_node;
@@ -270,6 +259,70 @@ extern "C"
nodes[parent_layer_offset + parent_id] = parent_node;
}
+ __global__ void
+ make_parents(unsigned int first_node,
+ unsigned int elements_this_launch,
+ unsigned int n_children_per_node,
+ uint4 *parent_nodes,
+ uint4 *child_nodes,
+ unsigned int child_id_offset)
+ {
+ unsigned int thread_id = blockDim.x * blockIdx.x + threadIdx.x;
+ if (thread_id >= elements_this_launch)
+ return;
+
+ unsigned int parent_id = first_node + thread_id;
+ unsigned int first_child = parent_id * n_children_per_node;
+
+ // Load first child
+ uint4 parent_node = child_nodes[first_child];
+ uint3 lower = make_uint3(parent_node.x & 0xFFFF, parent_node.y & 0xFFFF, parent_node.z & 0xFFFF);
+ uint3 upper = make_uint3(parent_node.x >> 16, parent_node.y >> 16, parent_node.z >> 16);
+
+ // Scan remaining children
+ for (unsigned int i=1; i < n_children_per_node; i++) {
+ uint4 child_node = child_nodes[first_child + i];
+
+ if (child_node.x == 0)
+ break; // Hit first padding node in list of children
+
+ uint3 child_lower = make_uint3(child_node.x & 0xFFFF, child_node.y & 0xFFFF, child_node.z & 0xFFFF);
+ uint3 child_upper = make_uint3(child_node.x >> 16, child_node.y >> 16, child_node.z >> 16);
+
+ lower = min(lower, child_lower);
+ upper = max(upper, child_upper);
+ }
+
+ parent_node.w = first_child;
+ parent_node.x = upper.x << 16 | lower.x;
+ parent_node.y = upper.y << 16 | lower.y;
+ parent_node.z = upper.z << 16 | lower.z;
+
+ parent_nodes[parent_id] = parent_node;
+ }
+
+ __global__ void
+ copy_and_offset(unsigned int first_node,
+ unsigned int elements_this_launch,
+ unsigned int child_id_offset,
+ uint4 *src_nodes,
+ uint4 *dest_nodes)
+
+ {
+ unsigned int thread_id = blockDim.x * blockIdx.x + threadIdx.x;
+ if (thread_id >= elements_this_launch)
+ return;
+
+ unsigned int node_id = first_node + thread_id;
+ uint4 src_node = src_nodes[node_id];
+
+ unsigned int leaf_flag = src_node.w & 0x80000000;
+ unsigned int child_id = src_node.w & 0x7FFFFFFF;
+ src_node.w = leaf_flag | (child_id + child_id_offset);
+
+ dest_nodes[node_id] = src_node;
+ }
+
__global__ void distance_to_prev(unsigned int first_node, unsigned int threads_this_round,
uint4 *node, unsigned int *area)
{
diff --git a/chroma/gpu/bvh.py b/chroma/gpu/bvh.py
new file mode 100644
index 0000000..b9c920d
--- /dev/null
+++ b/chroma/gpu/bvh.py
@@ -0,0 +1,139 @@
+import numpy as np
+import pycuda.driver as cuda
+from pycuda import gpuarray as ga
+from pycuda import characterize
+
+from chroma.gpu.tools import get_cu_module, cuda_options, \
+ chunk_iterator, to_uint3, to_float3, GPUFuncs, mapped_empty, Mapped
+
+from chroma.bvh.bvh import WorldCoords
+
+def round_up_to_multiple(x, multiple):
+ remainder = x % multiple
+ if remainder == 0:
+ return x
+ else:
+ return x + multiple - remainder
+
+def create_leaf_nodes(mesh, morton_bits=16, round_to_multiple=1):
+ '''Compute the leaf nodes surrounding a triangle mesh.
+
+ ``mesh``: chroma.geometry.Mesh
+ Triangles to box
+ ``morton_bits``: int
+ Number of bits to use per dimension when computing Morton code.
+ ``round_to_multiple``: int
+ Round the number of nodes created up to multiple of this number
+ Extra nodes will be all zero.
+
+ Returns (world_coords, nodes, morton_codes), where
+ ``world_coords``: chroma.bvh.WorldCoords
+ Defines the fixed point coordinate system
+ ``nodes``: ndarray(shape=len(mesh.triangles), dtype=uint4)
+ List of leaf nodes. Child IDs will be set to triangle offsets.
+ ``morton_codes``: ndarray(shape=len(mesh.triangles), dtype=np.uint64)
+ Morton codes for each triangle, using ``morton_bits`` per axis.
+ Must be <= 16 bits.
+ '''
+ # Load GPU functions
+ bvh_module = get_cu_module('bvh.cu', options=cuda_options,
+ include_source_directory=True)
+ bvh_funcs = GPUFuncs(bvh_module)
+
+ # compute world coordinates
+ world_origin = mesh.vertices.min(axis=0)
+ world_scale = np.max((mesh.vertices.max(axis=0) - world_origin)) \
+ / (2**16 - 2)
+ world_coords = WorldCoords(world_origin=world_origin,
+ world_scale=world_scale)
+
+ # Put triangles and vertices in mapped host memory
+ triangles = mapped_empty(shape=len(mesh.triangles), dtype=ga.vec.uint3,
+ write_combined=True)
+ triangles[:] = to_uint3(mesh.triangles)
+ vertices = mapped_empty(shape=len(mesh.vertices), dtype=ga.vec.float3,
+ write_combined=True)
+ vertices[:] = to_float3(mesh.vertices)
+
+ # Call GPU to compute nodes
+ nodes = ga.zeros(shape=round_up_to_multiple(len(triangles),
+ round_to_multiple),
+ dtype=ga.vec.uint4)
+ morton_codes = ga.empty(shape=len(triangles), dtype=np.uint64)
+
+ # Convert world coords to GPU-friendly types
+ world_origin = ga.vec.make_float3(*world_origin)
+ world_scale = np.float32(world_scale)
+
+ nthreads_per_block = 256
+ for first_index, elements_this_iter, nblocks_this_iter in \
+ chunk_iterator(len(triangles), nthreads_per_block,
+ max_blocks=30000):
+ bvh_funcs.make_leaves(np.uint32(first_index),
+ np.uint32(elements_this_iter),
+ Mapped(triangles), Mapped(vertices),
+ world_origin, world_scale,
+ nodes, morton_codes,
+ block=(nthreads_per_block,1,1),
+ grid=(nblocks_this_iter,1))
+
+ morton_codes_host = morton_codes.get() >> (16 - morton_bits)
+ return world_coords, nodes.get(), morton_codes_host
+
+
+def merge_nodes(nodes, degree):
+ bvh_module = get_cu_module('bvh.cu', options=cuda_options,
+ include_source_directory=True)
+ bvh_funcs = GPUFuncs(bvh_module)
+
+ nparent = len(nodes) / degree
+ if nparent == 1:
+ nparent_pad = nparent
+ else:
+ nparent_pad = round_up_to_multiple(nparent, degree)
+ parent_nodes = ga.zeros(shape=nparent_pad, dtype=ga.vec.uint4)
+
+ nthreads_per_block = 256
+ for first_index, elements_this_iter, nblocks_this_iter in \
+ chunk_iterator(nparent, nthreads_per_block, max_blocks=10000):
+ bvh_funcs.make_parents(np.uint32(first_index),
+ np.uint32(elements_this_iter),
+ np.uint32(degree),
+ parent_nodes,
+ cuda.In(nodes),
+ np.uint32(0),
+ block=(nthreads_per_block,1,1),
+ grid=(nblocks_this_iter,1))
+
+ return parent_nodes.get()
+
+def concatenate_layers(layers):
+ bvh_module = get_cu_module('bvh.cu', options=cuda_options,
+ include_source_directory=True)
+ bvh_funcs = GPUFuncs(bvh_module)
+ # Put 0 at beginning of list
+ layer_bounds = np.insert(np.cumsum(map(len, layers)), 0, 0)
+ nodes = ga.empty(shape=int(layer_bounds[-1]), dtype=ga.vec.uint4)
+ nthreads_per_block = 256
+
+ for layer_start, layer_end, layer in zip(layer_bounds[:-1],
+ layer_bounds[1:],
+ layers):
+ if layer_end == layer_bounds[-1]:
+ # leaf nodes need no offset
+ child_offset = 0
+ else:
+ child_offset = layer_end
+
+ for first_index, elements_this_iter, nblocks_this_iter in \
+ chunk_iterator(layer_end-layer_start, nthreads_per_block,
+ max_blocks=10000):
+ bvh_funcs.copy_and_offset(np.uint32(first_index),
+ np.uint32(elements_this_iter),
+ np.uint32(child_offset),
+ cuda.In(layer),
+ nodes[layer_start:],
+ block=(nthreads_per_block,1,1),
+ grid=(nblocks_this_iter,1))
+
+ return nodes.get(), layer_bounds
diff --git a/chroma/gpu/tools.py b/chroma/gpu/tools.py
index 707a45d..b151f80 100644
--- a/chroma/gpu/tools.py
+++ b/chroma/gpu/tools.py
@@ -181,3 +181,36 @@ def format_size(size):
def format_array(name, array):
return '%-15s %6s %6s' % \
(name, format_size(len(array)), format_size(array.nbytes))
+
+def Mapped(array):
+ '''Analog to pycuda.driver.InOut(), but indicates this array
+ is memory mapped to the device space and should not be copied.'''
+ return np.intp(array.base.get_device_pointer())
+
+def mapped_alloc(pagelocked_alloc_func, shape, dtype, write_combined):
+ '''Returns a pagelocked host array mapped into the CUDA device
+ address space, with a gpudata field set so it just works with CUDA
+ functions.'''
+ flags = cuda.host_alloc_flags.DEVICEMAP
+ if write_combined:
+ flags |= cuda.host_alloc_flags.WRITECOMBINED
+ array = pagelocked_alloc_func(shape=shape, dtype=dtype, mem_flags=flags)
+ return array
+
+def mapped_empty(shape, dtype, write_combined=False):
+ '''See mapped_alloc()'''
+ return mapped_alloc(cuda.pagelocked_empty, shape, dtype, write_combined)
+
+def mapped_empty_like(other, write_combined=False):
+ '''See mapped_alloc()'''
+ return mapped_alloc(cuda.pagelocked_empty, other.shape, other.dtype,
+ write_combined)
+
+def mapped_zeros(shape, dtype, write_combined=False):
+ '''See mapped_alloc()'''
+ return mapped_alloc(cuda.pagelocked_zeros, shape, dtype, write_combined)
+
+def mapped_zeros_like(other, write_combined=False):
+ '''See mapped_alloc()'''
+ return mapped_alloc(cuda.pagelocked_zeros, other.shape, other.dtype,
+ write_combined)
diff --git a/test/test_bvh_simple.py b/test/test_bvh_simple.py
new file mode 100644
index 0000000..20c0df5
--- /dev/null
+++ b/test/test_bvh_simple.py
@@ -0,0 +1,14 @@
+import pycuda.autoinit
+import unittest
+from chroma.bvh import make_simple_bvh, BVH
+import chroma.models
+
+import numpy as np
+#from numpy.testing import assert_array_max_ulp, assert_array_equal, \
+# assert_approx_equal
+
+def test_simple_bvh():
+ mesh = chroma.models.lionsolid()
+ bvh = make_simple_bvh(mesh, degree=2)
+ assert isinstance(bvh, BVH)
+