BVH optimization to sort child nodes by area. Only has a small effect.

author: Stan Seibert <stan@mtrr.org> 2012-02-17 13:53:36 -0500
committer: tlatorre <tlatorre@uchicago.edu> 2021-05-09 08:42:38 -0700
commit: aa14bc9f0947a77781c1234c38f422f18b2fe154 (patch)
tree: fe9502d45c3cb6d41dbf87f29cf72af82c7571f4
parent: 3cd18a51d4e73d9148fc721527880c8dbbd08871 (diff)
download: chroma-aa14bc9f0947a77781c1234c38f422f18b2fe154.tar.gz
chroma-aa14bc9f0947a77781c1234c38f422f18b2fe154.tar.bz2
chroma-aa14bc9f0947a77781c1234c38f422f18b2fe154.zip
4 files changed, 76 insertions, 4 deletions
diff --git a/bin/chroma-bvh b/bin/chroma-bvh
index e3e6a8c..d4287ad 100644
--- a/bin/chroma-bvh
+++ b/bin/chroma-bvh
@@ -11,7 +11,8 @@ from chroma.loader import load_geometry_from_string
 from chroma.log import logger, logging
 from chroma.bvh import make_recursive_grid_bvh
 from chroma.gpu import create_cuda_context
-from chroma.gpu.bvh import optimize_layer
+from chroma.gpu.geometry import GPUGeometry
+from chroma.gpu.bvh import optimize_layer, area_sort_nodes
 
 
 def parse_bvh_id(id_str):
@@ -28,6 +29,23 @@ def parse_bvh_id(id_str):
         bvh_name = parts[1]
     geo_name = parts[0]
     return geo_name, bvh_name
+
+
+def sort_node(cache, args):
+    geo_name, bvh_name = parse_bvh_id(args[0])
+    mesh_hash = cache.get_geometry_hash(geo_name)
+    geometry = cache.load_geometry(geo_name)
+    geometry.bvh = cache.load_bvh(mesh_hash, bvh_name)
+
+    print 'Sorting BVH nodes by area...'
+
+    context = create_cuda_context()
+    gpu_geometry = GPUGeometry(geometry)
+    geometry.bvh.nodes = area_sort_nodes(gpu_geometry, geometry.bvh.layer_bounds)
+
+    print 'Saving new BVH...'
+    context.pop()
+    cache.save_bvh(geometry.bvh, mesh_hash, bvh_name)
         
 
 def node_swap(cache, args):
@@ -181,6 +199,7 @@ commands = {
     'remove' : remove,
     'node_swap' : node_swap,
     'hist' : area_hist,
+    'sort' : sort_node,
     }
              
 
diff --git a/chroma/cuda/bvh.cu b/chroma/cuda/bvh.cu
index 882c22e..2080ab7 100644
--- a/chroma/cuda/bvh.cu
+++ b/chroma/cuda/bvh.cu
@@ -2,8 +2,10 @@
 #include <cuda.h>
 
 #include "geometry_types.h"
+#include "geometry.h"
 #include "linalg.h"
 #include "physical_constants.h"
+#include "sorting.h"
 
 __device__ float3
 fminf(const float3 &a, const float3 &b)
@@ -537,4 +539,39 @@ extern "C"
      }
   }
 
+  __global__ void area_sort_child(unsigned int start, unsigned int end,
+				  Geometry *geometry)
+  {
+     unsigned int thread_id = blockDim.x * blockIdx.x + threadIdx.x;
+     unsigned int stride = gridDim.x * blockDim.x;
+
+     uint4 *node = geometry->nodes;
+
+     const int MAX_CHILD = 1 << (32 - CHILD_BITS);
+     float distance[MAX_CHILD];
+     uint4 children[MAX_CHILD];
+
+     for (unsigned int i=start+thread_id; i < end; i += stride) {
+       uint4 this_node = node[i];
+       unsigned int nchild = this_node.w >> CHILD_BITS;
+       unsigned int child_id = this_node.w &  ~NCHILD_MASK;
+
+       if (nchild <= 1)
+	 continue;
+
+       for (unsigned int i=0; i < nchild; i++) {
+	 children[i] = node[child_id + i];
+	 Node unpacked = get_node(geometry, child_id+i);
+	 float3 delta = unpacked.upper - unpacked.lower;
+	 distance[i] = -(delta.x * delta.y + delta.y * delta.z + delta.z * delta.x);
+       }
+
+       piksrt2(nchild, distance, children);
+
+       for (unsigned int i=0; i < nchild; i++)
+	 node[child_id + i] = children[i];
+     }
+  }
+
+
 } // extern "C"
diff --git a/chroma/cuda/mesh.h b/chroma/cuda/mesh.h
index c52a241..d1381c3 100644
--- a/chroma/cuda/mesh.h
+++ b/chroma/cuda/mesh.h
@@ -112,9 +112,9 @@ intersect_mesh(const float3 &origin, const float3& direction, Geometry *g,
 
     } // while nodes on stack
 
-    //if (threadIdx.x == 0) {
-    //printf("node count: %d\n", count);
-    //printf("triangle count: %d\n", tri_count);
+    //if (blockIdx.x == 0 && threadIdx.x == 0) {
+    //  printf("node count: %d\n", count);
+    //  printf("triangle count: %d\n", tri_count);
     //}
 
     return triangle_index;
diff --git a/chroma/gpu/bvh.py b/chroma/gpu/bvh.py
index b138bb9..5e75a1e 100644
--- a/chroma/gpu/bvh.py
+++ b/chroma/gpu/bvh.py
@@ -129,6 +129,22 @@ def collapse_chains(nodes, layer_bounds):
                                  grid=(120,1))
     return gpu_nodes.get()
 
+def area_sort_nodes(gpu_geometry, layer_bounds):
+    bvh_module = get_cu_module('bvh.cu', options=cuda_options,
+                               include_source_directory=True)
+    bvh_funcs = GPUFuncs(bvh_module)
+
+    bounds = zip(layer_bounds[:-1], layer_bounds[1:])[:-1]
+    bounds.reverse()
+    nthreads_per_block = 256
+    for start, end in bounds:
+        bvh_funcs.area_sort_child(np.uint32(start),
+                                  np.uint32(end),
+                                  gpu_geometry,
+                                  block=(nthreads_per_block,1,1),
+                                  grid=(120,1))
+    return gpu_geometry.nodes.get()
+
 
 def merge_nodes(nodes, degree, max_ratio=None):
     bvh_module = get_cu_module('bvh.cu', options=cuda_options,
author	Stan Seibert <stan@mtrr.org>	2012-02-17 13:53:36 -0500
committer	tlatorre <tlatorre@uchicago.edu>	2021-05-09 08:42:38 -0700
commit	aa14bc9f0947a77781c1234c38f422f18b2fe154 (patch)
tree	fe9502d45c3cb6d41dbf87f29cf72af82c7571f4
parent	3cd18a51d4e73d9148fc721527880c8dbbd08871 (diff)
download	chroma-aa14bc9f0947a77781c1234c38f422f18b2fe154.tar.gz chroma-aa14bc9f0947a77781c1234c38f422f18b2fe154.tar.bz2 chroma-aa14bc9f0947a77781c1234c38f422f18b2fe154.zip