diff options
-rw-r--r-- | chroma/cuda/bvh.cu | 8 | ||||
-rw-r--r-- | chroma/cuda/geometry.h | 23 |
2 files changed, 21 insertions, 10 deletions
diff --git a/chroma/cuda/bvh.cu b/chroma/cuda/bvh.cu index 053eb2b..6ee8404 100644 --- a/chroma/cuda/bvh.cu +++ b/chroma/cuda/bvh.cu @@ -548,14 +548,12 @@ extern "C" unsigned int thread_id = blockDim.x * blockIdx.x + threadIdx.x; unsigned int stride = gridDim.x * blockDim.x; - uint4 *node = geometry->nodes; - const int MAX_CHILD = 1 << (32 - CHILD_BITS); float distance[MAX_CHILD]; uint4 children[MAX_CHILD]; for (unsigned int i=start+thread_id; i < end; i += stride) { - uint4 this_node = node[i]; + uint4 this_node = get_packed_node(geometry, i); unsigned int nchild = this_node.w >> CHILD_BITS; unsigned int child_id = this_node.w & ~NCHILD_MASK; @@ -563,7 +561,7 @@ extern "C" continue; for (unsigned int i=0; i < nchild; i++) { - children[i] = node[child_id + i]; + children[i] = get_packed_node(geometry, child_id+i); Node unpacked = get_node(geometry, child_id+i); float3 delta = unpacked.upper - unpacked.lower; distance[i] = -(delta.x * delta.y + delta.y * delta.z + delta.z * delta.x); @@ -572,7 +570,7 @@ extern "C" piksrt2(nchild, distance, children); for (unsigned int i=0; i < nchild; i++) - node[child_id + i] = children[i]; + put_packed_node(geometry, child_id + i, children[i]); } } diff --git a/chroma/cuda/geometry.h b/chroma/cuda/geometry.h index 4d21779..6a930e7 100644 --- a/chroma/cuda/geometry.h +++ b/chroma/cuda/geometry.h @@ -19,14 +19,27 @@ __device__ uint4 read_skip_l1(uint4 *ptr) return val; } -__device__ Node -get_node(Geometry *geometry, const unsigned int &i) +__device__ uint4 +get_packed_node(Geometry *geometry, const unsigned int &i) +{ + if (i < geometry->nprimary_nodes) + return geometry->primary_nodes[i]; + else + return geometry->extra_nodes[i - geometry->nprimary_nodes]; +} +__device__ void +put_packed_node(Geometry *geometry, const unsigned int &i, const uint4 &node) { - uint4 node; if (i < geometry->nprimary_nodes) - node = geometry->primary_nodes[i]; + geometry->primary_nodes[i] = node; else - node = geometry->extra_nodes[i - geometry->nprimary_nodes]; + geometry->extra_nodes[i - geometry->nprimary_nodes] = node; +} + +__device__ Node +get_node(Geometry *geometry, const unsigned int &i) +{ + uint4 node = get_packed_node(geometry, i); Node node_struct; |