diff options
author | Stan Seibert <stan@mtrr.org> | 2012-02-17 13:53:36 -0500 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2021-05-09 08:42:38 -0700 |
commit | aa14bc9f0947a77781c1234c38f422f18b2fe154 (patch) | |
tree | fe9502d45c3cb6d41dbf87f29cf72af82c7571f4 | |
parent | 3cd18a51d4e73d9148fc721527880c8dbbd08871 (diff) | |
download | chroma-aa14bc9f0947a77781c1234c38f422f18b2fe154.tar.gz chroma-aa14bc9f0947a77781c1234c38f422f18b2fe154.tar.bz2 chroma-aa14bc9f0947a77781c1234c38f422f18b2fe154.zip |
BVH optimization to sort child nodes by area. Only has a small effect.
-rw-r--r-- | bin/chroma-bvh | 21 | ||||
-rw-r--r-- | chroma/cuda/bvh.cu | 37 | ||||
-rw-r--r-- | chroma/cuda/mesh.h | 6 | ||||
-rw-r--r-- | chroma/gpu/bvh.py | 16 |
4 files changed, 76 insertions, 4 deletions
diff --git a/bin/chroma-bvh b/bin/chroma-bvh index e3e6a8c..d4287ad 100644 --- a/bin/chroma-bvh +++ b/bin/chroma-bvh @@ -11,7 +11,8 @@ from chroma.loader import load_geometry_from_string from chroma.log import logger, logging from chroma.bvh import make_recursive_grid_bvh from chroma.gpu import create_cuda_context -from chroma.gpu.bvh import optimize_layer +from chroma.gpu.geometry import GPUGeometry +from chroma.gpu.bvh import optimize_layer, area_sort_nodes def parse_bvh_id(id_str): @@ -28,6 +29,23 @@ def parse_bvh_id(id_str): bvh_name = parts[1] geo_name = parts[0] return geo_name, bvh_name + + +def sort_node(cache, args): + geo_name, bvh_name = parse_bvh_id(args[0]) + mesh_hash = cache.get_geometry_hash(geo_name) + geometry = cache.load_geometry(geo_name) + geometry.bvh = cache.load_bvh(mesh_hash, bvh_name) + + print 'Sorting BVH nodes by area...' + + context = create_cuda_context() + gpu_geometry = GPUGeometry(geometry) + geometry.bvh.nodes = area_sort_nodes(gpu_geometry, geometry.bvh.layer_bounds) + + print 'Saving new BVH...' + context.pop() + cache.save_bvh(geometry.bvh, mesh_hash, bvh_name) def node_swap(cache, args): @@ -181,6 +199,7 @@ commands = { 'remove' : remove, 'node_swap' : node_swap, 'hist' : area_hist, + 'sort' : sort_node, } diff --git a/chroma/cuda/bvh.cu b/chroma/cuda/bvh.cu index 882c22e..2080ab7 100644 --- a/chroma/cuda/bvh.cu +++ b/chroma/cuda/bvh.cu @@ -2,8 +2,10 @@ #include <cuda.h> #include "geometry_types.h" +#include "geometry.h" #include "linalg.h" #include "physical_constants.h" +#include "sorting.h" __device__ float3 fminf(const float3 &a, const float3 &b) @@ -537,4 +539,39 @@ extern "C" } } + __global__ void area_sort_child(unsigned int start, unsigned int end, + Geometry *geometry) + { + unsigned int thread_id = blockDim.x * blockIdx.x + threadIdx.x; + unsigned int stride = gridDim.x * blockDim.x; + + uint4 *node = geometry->nodes; + + const int MAX_CHILD = 1 << (32 - CHILD_BITS); + float distance[MAX_CHILD]; + uint4 children[MAX_CHILD]; + + for (unsigned int i=start+thread_id; i < end; i += stride) { + uint4 this_node = node[i]; + unsigned int nchild = this_node.w >> CHILD_BITS; + unsigned int child_id = this_node.w & ~NCHILD_MASK; + + if (nchild <= 1) + continue; + + for (unsigned int i=0; i < nchild; i++) { + children[i] = node[child_id + i]; + Node unpacked = get_node(geometry, child_id+i); + float3 delta = unpacked.upper - unpacked.lower; + distance[i] = -(delta.x * delta.y + delta.y * delta.z + delta.z * delta.x); + } + + piksrt2(nchild, distance, children); + + for (unsigned int i=0; i < nchild; i++) + node[child_id + i] = children[i]; + } + } + + } // extern "C" diff --git a/chroma/cuda/mesh.h b/chroma/cuda/mesh.h index c52a241..d1381c3 100644 --- a/chroma/cuda/mesh.h +++ b/chroma/cuda/mesh.h @@ -112,9 +112,9 @@ intersect_mesh(const float3 &origin, const float3& direction, Geometry *g, } // while nodes on stack - //if (threadIdx.x == 0) { - //printf("node count: %d\n", count); - //printf("triangle count: %d\n", tri_count); + //if (blockIdx.x == 0 && threadIdx.x == 0) { + // printf("node count: %d\n", count); + // printf("triangle count: %d\n", tri_count); //} return triangle_index; diff --git a/chroma/gpu/bvh.py b/chroma/gpu/bvh.py index b138bb9..5e75a1e 100644 --- a/chroma/gpu/bvh.py +++ b/chroma/gpu/bvh.py @@ -129,6 +129,22 @@ def collapse_chains(nodes, layer_bounds): grid=(120,1)) return gpu_nodes.get() +def area_sort_nodes(gpu_geometry, layer_bounds): + bvh_module = get_cu_module('bvh.cu', options=cuda_options, + include_source_directory=True) + bvh_funcs = GPUFuncs(bvh_module) + + bounds = zip(layer_bounds[:-1], layer_bounds[1:])[:-1] + bounds.reverse() + nthreads_per_block = 256 + for start, end in bounds: + bvh_funcs.area_sort_child(np.uint32(start), + np.uint32(end), + gpu_geometry, + block=(nthreads_per_block,1,1), + grid=(120,1)) + return gpu_geometry.nodes.get() + def merge_nodes(nodes, degree, max_ratio=None): bvh_module = get_cu_module('bvh.cu', options=cuda_options, |