diff options
Diffstat (limited to 'chroma')
| -rw-r--r-- | chroma/cuda/bvh.cu | 37 | ||||
| -rw-r--r-- | chroma/cuda/mesh.h | 6 | ||||
| -rw-r--r-- | chroma/gpu/bvh.py | 16 |
3 files changed, 56 insertions, 3 deletions
diff --git a/chroma/cuda/bvh.cu b/chroma/cuda/bvh.cu index 882c22e..2080ab7 100644 --- a/chroma/cuda/bvh.cu +++ b/chroma/cuda/bvh.cu @@ -2,8 +2,10 @@ #include <cuda.h> #include "geometry_types.h" +#include "geometry.h" #include "linalg.h" #include "physical_constants.h" +#include "sorting.h" __device__ float3 fminf(const float3 &a, const float3 &b) @@ -537,4 +539,39 @@ extern "C" } } + __global__ void area_sort_child(unsigned int start, unsigned int end, + Geometry *geometry) + { + unsigned int thread_id = blockDim.x * blockIdx.x + threadIdx.x; + unsigned int stride = gridDim.x * blockDim.x; + + uint4 *node = geometry->nodes; + + const int MAX_CHILD = 1 << (32 - CHILD_BITS); + float distance[MAX_CHILD]; + uint4 children[MAX_CHILD]; + + for (unsigned int i=start+thread_id; i < end; i += stride) { + uint4 this_node = node[i]; + unsigned int nchild = this_node.w >> CHILD_BITS; + unsigned int child_id = this_node.w & ~NCHILD_MASK; + + if (nchild <= 1) + continue; + + for (unsigned int i=0; i < nchild; i++) { + children[i] = node[child_id + i]; + Node unpacked = get_node(geometry, child_id+i); + float3 delta = unpacked.upper - unpacked.lower; + distance[i] = -(delta.x * delta.y + delta.y * delta.z + delta.z * delta.x); + } + + piksrt2(nchild, distance, children); + + for (unsigned int i=0; i < nchild; i++) + node[child_id + i] = children[i]; + } + } + + } // extern "C" diff --git a/chroma/cuda/mesh.h b/chroma/cuda/mesh.h index c52a241..d1381c3 100644 --- a/chroma/cuda/mesh.h +++ b/chroma/cuda/mesh.h @@ -112,9 +112,9 @@ intersect_mesh(const float3 &origin, const float3& direction, Geometry *g, } // while nodes on stack - //if (threadIdx.x == 0) { - //printf("node count: %d\n", count); - //printf("triangle count: %d\n", tri_count); + //if (blockIdx.x == 0 && threadIdx.x == 0) { + // printf("node count: %d\n", count); + // printf("triangle count: %d\n", tri_count); //} return triangle_index; diff --git a/chroma/gpu/bvh.py b/chroma/gpu/bvh.py index b138bb9..5e75a1e 100644 --- a/chroma/gpu/bvh.py +++ b/chroma/gpu/bvh.py @@ -129,6 +129,22 @@ def collapse_chains(nodes, layer_bounds): grid=(120,1)) return gpu_nodes.get() +def area_sort_nodes(gpu_geometry, layer_bounds): + bvh_module = get_cu_module('bvh.cu', options=cuda_options, + include_source_directory=True) + bvh_funcs = GPUFuncs(bvh_module) + + bounds = zip(layer_bounds[:-1], layer_bounds[1:])[:-1] + bounds.reverse() + nthreads_per_block = 256 + for start, end in bounds: + bvh_funcs.area_sort_child(np.uint32(start), + np.uint32(end), + gpu_geometry, + block=(nthreads_per_block,1,1), + grid=(120,1)) + return gpu_geometry.nodes.get() + def merge_nodes(nodes, degree, max_ratio=None): bvh_module = get_cu_module('bvh.cu', options=cuda_options, |
