summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStan Seibert <stan@mtrr.org>2012-02-17 13:53:36 -0500
committertlatorre <tlatorre@uchicago.edu>2021-05-09 08:42:38 -0700
commitaa14bc9f0947a77781c1234c38f422f18b2fe154 (patch)
treefe9502d45c3cb6d41dbf87f29cf72af82c7571f4
parent3cd18a51d4e73d9148fc721527880c8dbbd08871 (diff)
downloadchroma-aa14bc9f0947a77781c1234c38f422f18b2fe154.tar.gz
chroma-aa14bc9f0947a77781c1234c38f422f18b2fe154.tar.bz2
chroma-aa14bc9f0947a77781c1234c38f422f18b2fe154.zip
BVH optimization to sort child nodes by area. Only has a small effect.
-rw-r--r--bin/chroma-bvh21
-rw-r--r--chroma/cuda/bvh.cu37
-rw-r--r--chroma/cuda/mesh.h6
-rw-r--r--chroma/gpu/bvh.py16
4 files changed, 76 insertions, 4 deletions
diff --git a/bin/chroma-bvh b/bin/chroma-bvh
index e3e6a8c..d4287ad 100644
--- a/bin/chroma-bvh
+++ b/bin/chroma-bvh
@@ -11,7 +11,8 @@ from chroma.loader import load_geometry_from_string
from chroma.log import logger, logging
from chroma.bvh import make_recursive_grid_bvh
from chroma.gpu import create_cuda_context
-from chroma.gpu.bvh import optimize_layer
+from chroma.gpu.geometry import GPUGeometry
+from chroma.gpu.bvh import optimize_layer, area_sort_nodes
def parse_bvh_id(id_str):
@@ -28,6 +29,23 @@ def parse_bvh_id(id_str):
bvh_name = parts[1]
geo_name = parts[0]
return geo_name, bvh_name
+
+
+def sort_node(cache, args):
+ geo_name, bvh_name = parse_bvh_id(args[0])
+ mesh_hash = cache.get_geometry_hash(geo_name)
+ geometry = cache.load_geometry(geo_name)
+ geometry.bvh = cache.load_bvh(mesh_hash, bvh_name)
+
+ print 'Sorting BVH nodes by area...'
+
+ context = create_cuda_context()
+ gpu_geometry = GPUGeometry(geometry)
+ geometry.bvh.nodes = area_sort_nodes(gpu_geometry, geometry.bvh.layer_bounds)
+
+ print 'Saving new BVH...'
+ context.pop()
+ cache.save_bvh(geometry.bvh, mesh_hash, bvh_name)
def node_swap(cache, args):
@@ -181,6 +199,7 @@ commands = {
'remove' : remove,
'node_swap' : node_swap,
'hist' : area_hist,
+ 'sort' : sort_node,
}
diff --git a/chroma/cuda/bvh.cu b/chroma/cuda/bvh.cu
index 882c22e..2080ab7 100644
--- a/chroma/cuda/bvh.cu
+++ b/chroma/cuda/bvh.cu
@@ -2,8 +2,10 @@
#include <cuda.h>
#include "geometry_types.h"
+#include "geometry.h"
#include "linalg.h"
#include "physical_constants.h"
+#include "sorting.h"
__device__ float3
fminf(const float3 &a, const float3 &b)
@@ -537,4 +539,39 @@ extern "C"
}
}
+ __global__ void area_sort_child(unsigned int start, unsigned int end,
+ Geometry *geometry)
+ {
+ unsigned int thread_id = blockDim.x * blockIdx.x + threadIdx.x;
+ unsigned int stride = gridDim.x * blockDim.x;
+
+ uint4 *node = geometry->nodes;
+
+ const int MAX_CHILD = 1 << (32 - CHILD_BITS);
+ float distance[MAX_CHILD];
+ uint4 children[MAX_CHILD];
+
+ for (unsigned int i=start+thread_id; i < end; i += stride) {
+ uint4 this_node = node[i];
+ unsigned int nchild = this_node.w >> CHILD_BITS;
+ unsigned int child_id = this_node.w & ~NCHILD_MASK;
+
+ if (nchild <= 1)
+ continue;
+
+ for (unsigned int i=0; i < nchild; i++) {
+ children[i] = node[child_id + i];
+ Node unpacked = get_node(geometry, child_id+i);
+ float3 delta = unpacked.upper - unpacked.lower;
+ distance[i] = -(delta.x * delta.y + delta.y * delta.z + delta.z * delta.x);
+ }
+
+ piksrt2(nchild, distance, children);
+
+ for (unsigned int i=0; i < nchild; i++)
+ node[child_id + i] = children[i];
+ }
+ }
+
+
} // extern "C"
diff --git a/chroma/cuda/mesh.h b/chroma/cuda/mesh.h
index c52a241..d1381c3 100644
--- a/chroma/cuda/mesh.h
+++ b/chroma/cuda/mesh.h
@@ -112,9 +112,9 @@ intersect_mesh(const float3 &origin, const float3& direction, Geometry *g,
} // while nodes on stack
- //if (threadIdx.x == 0) {
- //printf("node count: %d\n", count);
- //printf("triangle count: %d\n", tri_count);
+ //if (blockIdx.x == 0 && threadIdx.x == 0) {
+ // printf("node count: %d\n", count);
+ // printf("triangle count: %d\n", tri_count);
//}
return triangle_index;
diff --git a/chroma/gpu/bvh.py b/chroma/gpu/bvh.py
index b138bb9..5e75a1e 100644
--- a/chroma/gpu/bvh.py
+++ b/chroma/gpu/bvh.py
@@ -129,6 +129,22 @@ def collapse_chains(nodes, layer_bounds):
grid=(120,1))
return gpu_nodes.get()
+def area_sort_nodes(gpu_geometry, layer_bounds):
+ bvh_module = get_cu_module('bvh.cu', options=cuda_options,
+ include_source_directory=True)
+ bvh_funcs = GPUFuncs(bvh_module)
+
+ bounds = zip(layer_bounds[:-1], layer_bounds[1:])[:-1]
+ bounds.reverse()
+ nthreads_per_block = 256
+ for start, end in bounds:
+ bvh_funcs.area_sort_child(np.uint32(start),
+ np.uint32(end),
+ gpu_geometry,
+ block=(nthreads_per_block,1,1),
+ grid=(120,1))
+ return gpu_geometry.nodes.get()
+
def merge_nodes(nodes, degree, max_ratio=None):
bvh_module = get_cu_module('bvh.cu', options=cuda_options,