diff options
| -rw-r--r-- | bin/chroma-bvh | 3 | ||||
| -rw-r--r-- | chroma/bvh/bvh.py | 23 | ||||
| -rw-r--r-- | chroma/bvh/simple.py | 2 | ||||
| -rw-r--r-- | chroma/cuda/bvh.cu | 22 | ||||
| -rw-r--r-- | chroma/cuda/geometry.h | 8 | ||||
| -rw-r--r-- | chroma/cuda/geometry_types.h | 4 | ||||
| -rw-r--r-- | chroma/cuda/mesh.h | 6 | ||||
| -rw-r--r-- | chroma/cuda/render.cu | 6 | ||||
| -rw-r--r-- | chroma/gpu/bvh.py | 35 | ||||
| -rw-r--r-- | chroma/gpu/geometry.py | 3 | ||||
| -rw-r--r-- | test/test_bvh.py | 2 | 
11 files changed, 44 insertions, 70 deletions
| diff --git a/bin/chroma-bvh b/bin/chroma-bvh index d0ac7ad..1bc256e 100644 --- a/bin/chroma-bvh +++ b/bin/chroma-bvh @@ -11,7 +11,7 @@ from chroma.loader import load_geometry_from_string  from chroma.log import logger, logging  from chroma.bvh import make_simple_bvh  from chroma.gpu import create_cuda_context -from chroma.gpu.bvh import rebuild_tree, optimize_layer +from chroma.gpu.bvh import optimize_layer  def parse_bvh_id(id_str): @@ -157,7 +157,6 @@ def print_stat(geo_name, bvh_name, mesh_hash, bvh):      print '-' * 72      print 'World origin: (%f,%f,%f)' % tuple(bvh.world_coords.world_origin)      print 'World scale factor: %f' % bvh.world_coords.world_scale -    print 'Tree degree: %d' % bvh.degree      print 'Nodes: %d' % len(bvh)      print 'Layers:' diff --git a/chroma/bvh/bvh.py b/chroma/bvh/bvh.py index baf95d6..5579670 100644 --- a/chroma/bvh/bvh.py +++ b/chroma/bvh/bvh.py @@ -5,6 +5,9 @@ from pycuda.gpuarray import vec  uint4 = vec.uint4 # pylint: disable-msg=C0103,E1101 +CHILD_BITS = 26 +NCHILD_MASK = np.uint32(0xFFFF << 26) +  def unpack_nodes(nodes):      '''Creates a numpy record array with the contents of nodes      array unpacked into separate fields. @@ -15,19 +18,19 @@ def unpack_nodes(nodes):      Returns ndarray(shape=n, dtype=[('xlo', np.uint16), ('xhi', np.uint16),                                      ('ylo', np.uint16), ('yhi', np.uint16),                                      ('zlo', np.uint16), ('zhi', np.uint16), -                                    ('child', np.uint32), ('leaf', np.bool)]) +                                    ('child', np.uint32), ('nchild', np.uint16)])      '''      unpacked_dtype = np.dtype([('xlo', np.uint16), ('xhi', np.uint16),                                 ('ylo', np.uint16), ('yhi', np.uint16),                                 ('zlo', np.uint16), ('zhi', np.uint16), -                               ('child', np.uint32), ('leaf', np.bool)]) +                               ('child', np.uint32), ('nchild', np.uint16)])      unpacked = np.empty(shape=len(nodes), dtype=unpacked_dtype)      for axis in ['x', 'y', 'z']:          unpacked[axis+'lo'] = nodes[axis] & 0xFFFF          unpacked[axis+'hi'] = nodes[axis] >> 16 -    unpacked['child'] = nodes['w'] & 0x7FFFFFFF -    unpacked['leaf']  = nodes['w'] & 0x80000000 > 0 +    unpacked['child'] = nodes['w'] & ~NCHILD_MASK +    unpacked['nchild']  = nodes['w'] >> CHILD_BITS      return unpacked @@ -148,12 +151,9 @@ class BVH(object):      to manipulate the contents of the BVH node array directly.      ''' -    def __init__(self, degree, world_coords, nodes, layer_offsets): +    def __init__(self, world_coords, nodes, layer_offsets):          '''Create a BVH object with the given properties. -          ``degree``: int -              Number of child nodes per parent -                           ``world_coords``: chroma.bvh.WorldCoords                Transformation from fixed point to world coordinates. @@ -170,7 +170,6 @@ class BVH(object):                second entry must be 1, for the first child of the root node,                unless the root node is also a leaf node.          ''' -        self.degree = degree          self.world_coords = world_coords          self.nodes = nodes          self.layer_offsets = layer_offsets @@ -184,8 +183,7 @@ class BVH(object):          '''          layer_slice = slice(self.layer_bounds[layer_number],                              self.layer_bounds[layer_number+1]) -        return BVHLayerSlice(degree=self.degree, -                             world_coords=self.world_coords, +        return BVHLayerSlice(world_coords=self.world_coords,                               nodes=self.nodes[layer_slice])      def layer_count(self): @@ -224,8 +222,7 @@ class BVHLayerSlice(object):      except no ``layer_offsets`` list.      ''' -    def __init__(self, degree, world_coords, nodes): -        self.degree = degree +    def __init__(self, world_coords, nodes):          self.world_coords = world_coords          self.nodes = nodes diff --git a/chroma/bvh/simple.py b/chroma/bvh/simple.py index f87f6d8..6d9d144 100644 --- a/chroma/bvh/simple.py +++ b/chroma/bvh/simple.py @@ -24,7 +24,7 @@ def make_simple_bvh(mesh, degree):      # How many nodes total?      nodes, layer_bounds = concatenate_layers(layers) -    return BVH(degree, world_coords, nodes, layer_bounds[:-1]) +    return BVH(world_coords, nodes, layer_bounds[:-1]) diff --git a/chroma/cuda/bvh.cu b/chroma/cuda/bvh.cu index 2ce0580..5b40bf7 100644 --- a/chroma/cuda/bvh.cu +++ b/chroma/cuda/bvh.cu @@ -1,6 +1,7 @@  //-*-c++-*-  #include <cuda.h> +#include "geometry_types.h"  #include "linalg.h"  #include "physical_constants.h" @@ -123,8 +124,6 @@ __device__ unsigned long long surface_half_area(const uint4 &node)    return x*y + y*z + z*x;  } -const unsigned int LEAF_BIT = (1U << 31); -  extern "C"  { @@ -192,7 +191,7 @@ extern "C"      leaf_node.x = q_lower.x | (q_upper.x << 16);      leaf_node.y = q_lower.y | (q_upper.y << 16);      leaf_node.z = q_lower.z | (q_upper.z << 16); -    leaf_node.w = triangle_id | LEAF_BIT; +    leaf_node.w = triangle_id;      leaf_nodes[triangle_id] = leaf_node;      morton_codes[triangle_id] = morton; @@ -238,12 +237,15 @@ extern "C"      // Scan remaining children +    unsigned int real_children = 1;      for (unsigned int i=1; i < n_children_per_node; i++) {        uint4 child_node = nodes[first_child + i];        if (child_node.x == 0)  	break;  // Hit first padding node in list of children +      real_children++; +        uint3 child_lower = make_uint3(child_node.x & 0xFFFF, child_node.y & 0xFFFF, child_node.z & 0xFFFF);        uint3 child_upper = make_uint3(child_node.x >> 16, child_node.y >> 16, child_node.z >> 16); @@ -251,7 +253,7 @@ extern "C"        upper = max(upper, child_upper);      } -    parent_node.w = first_child; +    parent_node.w = (real_children << CHILD_BITS) | first_child;      parent_node.x = upper.x << 16 | lower.x;      parent_node.y = upper.y << 16 | lower.y;      parent_node.z = upper.z << 16 | lower.z; @@ -284,12 +286,15 @@ extern "C"      uint3 upper = make_uint3(parent_node.x >> 16, parent_node.y >> 16, parent_node.z >> 16);      // Scan remaining children +    unsigned int real_children = 1;      for (unsigned int i=1; i < n_children_per_node; i++) {        uint4 child_node = child_nodes[first_child + i];        if (child_node.x == 0)  	break;  // Hit first padding node in list of children +      real_children++; +        uint3 child_lower = make_uint3(child_node.x & 0xFFFF, child_node.y & 0xFFFF, child_node.z & 0xFFFF);        uint3 child_upper = make_uint3(child_node.x >> 16, child_node.y >> 16, child_node.z >> 16); @@ -297,7 +302,8 @@ extern "C"        upper = max(upper, child_upper);      } -    parent_node.w = first_child + child_id_offset; +    parent_node.w = (real_children << CHILD_BITS) +      | (first_child + child_id_offset);      parent_node.x = upper.x << 16 | lower.x;      parent_node.y = upper.y << 16 | lower.y;      parent_node.z = upper.z << 16 | lower.z; @@ -320,9 +326,9 @@ extern "C"      unsigned int node_id = first_node + thread_id;      uint4 src_node = src_nodes[node_id]; -    unsigned int leaf_flag = src_node.w & 0x80000000; -    unsigned int child_id = src_node.w &  0x7FFFFFFF; -    src_node.w = leaf_flag | (child_id + child_id_offset); +    unsigned int nchild = src_node.w >> CHILD_BITS; +    unsigned int child_id = src_node.w &  ~NCHILD_MASK; +    src_node.w = (nchild << CHILD_BITS) | (child_id + child_id_offset);      dest_nodes[node_id] = src_node;        } diff --git a/chroma/cuda/geometry.h b/chroma/cuda/geometry.h index 577da72..0735792 100644 --- a/chroma/cuda/geometry.h +++ b/chroma/cuda/geometry.h @@ -4,9 +4,6 @@  #include "geometry_types.h"  #include "linalg.h" -const unsigned int LEAF_BIT = (1U << 31); - -  __device__ float3   to_float3(const uint3 &a)  { @@ -39,8 +36,9 @@ get_node(Geometry *geometry, const unsigned int &i)      node_struct.lower = geometry->world_origin + to_float3(lower_int) * geometry->world_scale;      node_struct.upper = geometry->world_origin + to_float3(upper_int) * geometry->world_scale; -    node_struct.child = node.w & ~LEAF_BIT; // Mask off leaf bit -    node_struct.kind = node.w & LEAF_BIT ? LEAF_NODE : INTERNAL_NODE; +    node_struct.child = node.w & ~NCHILD_MASK; +    node_struct.nchild = node.w >> CHILD_BITS; +    node_struct.kind = node_struct.nchild == 0 ? LEAF_NODE : INTERNAL_NODE;      return node_struct;  } diff --git a/chroma/cuda/geometry_types.h b/chroma/cuda/geometry_types.h index 89c53bc..b2b5947 100644 --- a/chroma/cuda/geometry_types.h +++ b/chroma/cuda/geometry_types.h @@ -28,12 +28,15 @@ struct Triangle  };  enum { INTERNAL_NODE, LEAF_NODE, PADDING_NODE }; +const unsigned int CHILD_BITS = 26; +const unsigned int NCHILD_MASK = (0xFFFFu << CHILD_BITS);  struct Node  {      float3 lower;      float3 upper;      unsigned int child; +    unsigned int nchild;      unsigned int kind;  }; @@ -48,7 +51,6 @@ struct Geometry      Surface **surfaces;      float3 world_origin;      float world_scale; -    unsigned int branch_degree;  };  #endif diff --git a/chroma/cuda/mesh.h b/chroma/cuda/mesh.h index 70ffc23..c52a241 100644 --- a/chroma/cuda/mesh.h +++ b/chroma/cuda/mesh.h @@ -56,7 +56,9 @@ intersect_mesh(const float3 &origin, const float3& direction, Geometry *g,  	return -1;      unsigned int child_ptr_stack[STACK_SIZE]; +    unsigned int nchild_ptr_stack[STACK_SIZE];      child_ptr_stack[0] = root.child; +    nchild_ptr_stack[0] = root.nchild;      int curr = 0; @@ -65,9 +67,10 @@ intersect_mesh(const float3 &origin, const float3& direction, Geometry *g,      while (curr >= 0) {  	unsigned int first_child = child_ptr_stack[curr]; +	unsigned int nchild = nchild_ptr_stack[curr];  	curr--; -	for (unsigned int i=first_child; i < first_child + g->branch_degree; i++) { +	for (unsigned int i=first_child; i < first_child + nchild; i++) {  	    Node node = get_node(g, i);  	    count++; @@ -97,6 +100,7 @@ intersect_mesh(const float3 &origin, const float3& direction, Geometry *g,  		} else {  		    curr++;  		    child_ptr_stack[curr] = node.child; +		    nchild_ptr_stack[curr] = node.nchild;  		} // leaf or internal node?  	    } // hit node? diff --git a/chroma/cuda/render.cu b/chroma/cuda/render.cu index bbb7383..eda9028 100644 --- a/chroma/cuda/render.cu +++ b/chroma/cuda/render.cu @@ -70,7 +70,9 @@ render(int nthreads, float3 *_origin, float3 *_direction, Geometry *g,      }      unsigned int child_ptr_stack[STACK_SIZE]; +    unsigned int nchild_ptr_stack[STACK_SIZE];      child_ptr_stack[0] = root.child; +    nchild_ptr_stack[0] = root.nchild;      int curr = 0; @@ -82,9 +84,10 @@ render(int nthreads, float3 *_origin, float3 *_direction, Geometry *g,      while (curr >= 0) {  	unsigned int first_child = child_ptr_stack[curr]; +	unsigned int nchild = child_ptr_stack[curr];  	curr--; -	for (unsigned int i=first_child; i < first_child + g->branch_degree; i++) { +	for (unsigned int i=first_child; i < first_child + nchild; i++) {  	    Node node = get_node(g, i);  	    count++; @@ -128,6 +131,7 @@ render(int nthreads, float3 *_origin, float3 *_direction, Geometry *g,  	      } else {  		curr++;  		child_ptr_stack[curr] = node.child; +		nchild_ptr_stack[curr] = node.nchild;  	      } // leaf or internal node?  	    } // hit node? diff --git a/chroma/gpu/bvh.py b/chroma/gpu/bvh.py index 3a70fcc..937ff80 100644 --- a/chroma/gpu/bvh.py +++ b/chroma/gpu/bvh.py @@ -139,41 +139,6 @@ def concatenate_layers(layers):      return nodes.get(), layer_bounds -def rebuild_tree(bvh, start_layer): -    bvh_module = get_cu_module('bvh.cu', options=cuda_options, -                               include_source_directory=True) -    bvh_funcs = GPUFuncs(bvh_module) - -    layer_bounds = bvh.layer_bounds -    layer_ranges = zip(layer_bounds[:start_layer],  -                       layer_bounds[1:start_layer+1], -                       layer_bounds[2:start_layer+2]) -    layer_ranges.reverse() - -    gpu_nodes = ga.to_gpu(bvh.nodes) -    nthreads_per_block = 256 - -    for parent_start, parent_end, child_end in layer_ranges: -        nparent = parent_end - parent_start -        child_start = parent_end -        nchild = child_end - child_start -        parent_nodes = gpu_nodes[parent_start:] -        child_nodes = gpu_nodes[child_start:] - -        for first_index, elements_this_iter, nblocks_this_iter in \ -            chunk_iterator(nparent, nthreads_per_block, max_blocks=10000): -            bvh_funcs.make_parents(np.uint32(first_index), -                                   np.uint32(elements_this_iter), -                                   np.uint32(bvh.degree), -                                   parent_nodes, -                                   child_nodes, -                                   np.uint32(child_start), -                                   np.uint32(nchild), -                                   block=(nthreads_per_block,1,1), -                                   grid=(nblocks_this_iter,1)) -         -    return gpu_nodes.get() -  def optimize_layer(orig_nodes):      bvh_module = get_cu_module('bvh.cu', options=cuda_options,                                 include_source_directory=True) diff --git a/chroma/gpu/geometry.py b/chroma/gpu/geometry.py index 77d33b2..f2156b0 100644 --- a/chroma/gpu/geometry.py +++ b/chroma/gpu/geometry.py @@ -134,8 +134,7 @@ class GPUGeometry(object):                                          self.material_pointer_array,                                          self.surface_pointer_array,                                          self.world_origin, -                                        self.world_scale, -                                        np.uint32(geometry.bvh.degree)]) +                                        self.world_scale])          self.geometry = geometry diff --git a/test/test_bvh.py b/test/test_bvh.py index b90b108..f27348b 100644 --- a/test/test_bvh.py +++ b/test/test_bvh.py @@ -83,7 +83,7 @@ def create_bvh():      nodes['w'][layer] = [ 0x00000001 ]      layer_offsets = list(layer_bounds[:-1]) # trim last entry -    bvh = BVH(degree=degree, world_coords=world_coords, +    bvh = BVH(world_coords=world_coords,                nodes=nodes, layer_offsets=layer_offsets)      return bvh | 
