summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--chroma/cuda/bvh.cu183
-rw-r--r--chroma/cuda/geometry.h69
-rw-r--r--chroma/cuda/geometry_types.h55
-rw-r--r--chroma/cuda/mesh.h140
-rw-r--r--chroma/gpu/geometry.py171
5 files changed, 483 insertions, 135 deletions
diff --git a/chroma/cuda/bvh.cu b/chroma/cuda/bvh.cu
new file mode 100644
index 0000000..8543649
--- /dev/null
+++ b/chroma/cuda/bvh.cu
@@ -0,0 +1,183 @@
+//-*-c++-*-
+#include <cuda.h>
+
+#include "linalg.h"
+
+__device__ float3
+fminf(const float3 &a, const float3 &b)
+{
+ return make_float3(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z));
+}
+
+__device__ float3
+fmaxf(const float3 &a, const float3 &b)
+{
+ return make_float3(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z));
+}
+
+__device__ uint3
+min(const uint3 &a, const uint3 &b)
+{
+ return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
+}
+
+__device__ uint3
+max(const uint3 &a, const uint3 &b)
+{
+ return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
+}
+
+
+__device__ uint3
+operator+ (const uint3 &a, const unsigned int &b)
+{
+ return make_uint3(a.x + b, a.y + b, a.z + b);
+}
+
+// spread out the first 16 bits in x to occupy every 3rd slot in the return value
+__device__ unsigned long long spread3_16(unsigned int input)
+{
+ // method from http://stackoverflow.com/a/4838734
+ unsigned long long x = input;
+ x = (x | (x << 16)) & 0x00000000FF0000FFul;
+ x = (x | (x << 8)) & 0x000000F00F00F00Ful;
+ x = (x | (x << 4)) & 0x00000C30C30C30C3ul;
+ x = (x | (x << 2)) & 0X0000249249249249ul;
+
+ return x;
+}
+
+__device__ unsigned int quantize(float v, float world_origin, float world_scale)
+{
+ // truncate!
+ return (unsigned int) ((v - world_origin) / world_scale);
+}
+
+__device__ uint3 quantize3(float3 v, float3 world_origin, float3 world_scale)
+{
+ return make_uint3(quantize(v.x, world_origin.x, world_scale.x),
+ quantize(v.y, world_origin.y, world_scale.y),
+ quantize(v.z, world_origin.z, world_scale.z));
+}
+
+const unsigned int LEAF_BIT = (1U << 31);
+
+extern "C"
+{
+
+ __global__ void
+ make_leaves(unsigned int first_triangle,
+ unsigned int ntriangles, uint3 *triangles, float3 *vertices,
+ float3 world_origin, float3 world_scale,
+ uint4 *leaf_nodes, unsigned long long *morton_codes)
+
+ {
+ unsigned int thread_id = blockDim.x * blockIdx.x + threadIdx.x;
+ if (thread_id >= ntriangles)
+ return;
+
+ unsigned int triangle_id = first_triangle + thread_id;
+
+ // Find bounding corners and centroid
+ uint3 triangle = triangles[triangle_id];
+ float3 lower = vertices[triangle.x];
+ float3 centroid = lower;
+ float3 upper = lower;
+ float3 temp_vertex;
+
+ temp_vertex = vertices[triangle.y];
+ lower = fminf(lower, temp_vertex);
+ upper = fmaxf(upper, temp_vertex);
+ centroid += temp_vertex;
+
+ temp_vertex = vertices[triangle.z];
+ lower = fminf(lower, temp_vertex);
+ upper = fmaxf(upper, temp_vertex);
+ centroid += temp_vertex;
+
+ centroid /= 3.0f;
+
+ // Quantize bounding corners and centroid
+ uint3 q_lower = quantize3(lower, world_origin, world_scale);
+ uint3 q_upper = quantize3(upper, world_origin, world_scale) + 1;
+ uint3 q_centroid = quantize3(centroid, world_origin, world_scale);
+
+ // Compute Morton code from quantized centroid
+ unsigned long long morton =
+ spread3_16(q_centroid.x)
+ | (spread3_16(q_centroid.y) << 1)
+ | (spread3_16(q_centroid.z) << 2);
+
+ // Write leaf and morton code
+ uint4 leaf_node;
+ leaf_node.x = q_lower.x | (q_upper.x << 16);
+ leaf_node.y = q_lower.y | (q_upper.y << 16);
+ leaf_node.z = q_lower.z | (q_upper.z << 16);
+ leaf_node.w = triangle_id | LEAF_BIT;
+
+ leaf_nodes[triangle_id] = leaf_node;
+ morton_codes[triangle_id] = morton;
+ }
+
+ __global__ void
+ reorder_leaves(unsigned int first_triangle,
+ unsigned int ntriangles,
+ uint4 *leaf_nodes_in,
+ uint4 *leaf_nodes_out,
+ unsigned int *remap_order)
+
+ {
+ unsigned int thread_id = blockDim.x * blockIdx.x + threadIdx.x;
+ if (thread_id >= ntriangles)
+ return;
+
+ unsigned int dest_id = first_triangle + thread_id;
+ unsigned int source_id = remap_order[dest_id];
+
+ leaf_nodes_out[dest_id] = leaf_nodes_in[source_id];
+ }
+
+ __global__ void
+ build_layer(unsigned int first_node,
+ unsigned int n_parent_nodes,
+ unsigned int n_children_per_node,
+ uint4 *nodes,
+ unsigned int parent_layer_offset,
+ unsigned int child_layer_offset)
+ {
+ unsigned int thread_id = blockDim.x * blockIdx.x + threadIdx.x;
+ if (thread_id >= n_parent_nodes)
+ return;
+
+ unsigned int parent_id = first_node + thread_id;
+ unsigned int first_child = child_layer_offset + parent_id * n_children_per_node;
+
+ // Load first child
+ uint4 parent_node = nodes[first_child];
+ uint3 lower = make_uint3(parent_node.x & 0xFFFF, parent_node.y & 0xFFFF, parent_node.z & 0xFFFF);
+ uint3 upper = make_uint3(parent_node.x >> 16, parent_node.y >> 16, parent_node.z >> 16);
+
+
+ // Scan remaining children
+ for (unsigned int i=1; i < n_children_per_node; i++) {
+ uint4 child_node = nodes[first_child + i];
+
+ if (child_node.x == 0)
+ break; // Hit first padding node in list of children
+
+ uint3 child_lower = make_uint3(child_node.x & 0xFFFF, child_node.y & 0xFFFF, child_node.z & 0xFFFF);
+ uint3 child_upper = make_uint3(child_node.x >> 16, child_node.y >> 16, child_node.z >> 16);
+
+ lower = min(lower, child_lower);
+ upper = max(upper, child_upper);
+ }
+
+ parent_node.w = first_child;
+ parent_node.x = upper.x << 16 | lower.x;
+ parent_node.y = upper.y << 16 | lower.y;
+ parent_node.z = upper.z << 16 | lower.z;
+
+ nodes[parent_layer_offset + parent_id] = parent_node;
+ }
+
+} // extern "C"
diff --git a/chroma/cuda/geometry.h b/chroma/cuda/geometry.h
index 2b5eacb..86ced6c 100644
--- a/chroma/cuda/geometry.h
+++ b/chroma/cuda/geometry.h
@@ -1,47 +1,40 @@
#ifndef __GEOMETRY_H__
#define __GEOMETRY_H__
-struct Material
-{
- float *refractive_index;
- float *absorption_length;
- float *scattering_length;
- unsigned int n;
- float step;
- float wavelength0;
-};
-
-struct Surface
-{
- float *detect;
- float *absorb;
- float *reflect_diffuse;
- float *reflect_specular;
- unsigned int n;
- float step;
- float wavelength0;
-};
-
-struct Triangle
+#include "geometry_types.h"
+#include "linalg.h"
+
+const unsigned int LEAF_BIT = (1U << 31);
+
+
+__device__ float3
+to_float3(const uint3 &a)
{
- float3 v0, v1, v2;
-};
+ return make_float3(a.x, a.y, a.z);
+}
-struct Geometry
+__device__ Node
+get_node(Geometry *geometry, const unsigned int &i)
{
- float3 *vertices;
- uint3 *triangles;
- unsigned int *material_codes;
- unsigned int *colors;
- float3 *lower_bounds;
- float3 *upper_bounds;
- unsigned int *node_map;
- unsigned int *node_map_end;
- Material **materials;
- Surface **surfaces;
- unsigned int start_node;
- unsigned int first_node;
-};
+ uint4 node = geometry->nodes[i];
+ Node node_struct;
+
+ if (node.x == 0) {
+ node_struct.kind = PADDING_NODE;
+ return node_struct;
+ }
+
+ uint3 lower_int = make_uint3(node.x & 0xFFFF, node.y & 0xFFFF, node.z & 0xFFFF);
+ uint3 upper_int = make_uint3(node.x >> 16, node.y >> 16, node.z >> 16);
+
+
+ node_struct.lower = geometry->world_origin + to_float3(lower_int) * geometry->world_scale;
+ node_struct.upper = geometry->world_origin + to_float3(upper_int) * geometry->world_scale;
+ node_struct.child = node.w & ~LEAF_BIT; // Mask off leaf bit
+ node_struct.kind = node.w & LEAF_BIT ? LEAF_NODE : INTERNAL_NODE;
+
+ return node_struct;
+}
__device__ Triangle
get_triangle(Geometry *geometry, const unsigned int &i)
diff --git a/chroma/cuda/geometry_types.h b/chroma/cuda/geometry_types.h
new file mode 100644
index 0000000..31ff3c6
--- /dev/null
+++ b/chroma/cuda/geometry_types.h
@@ -0,0 +1,55 @@
+#ifndef __GEOMETRY_TYPES_H__
+#define __GEOMETRY_TYPES_H__
+
+struct Material
+{
+ float *refractive_index;
+ float *absorption_length;
+ float *scattering_length;
+ unsigned int n;
+ float step;
+ float wavelength0;
+};
+
+struct Surface
+{
+ float *detect;
+ float *absorb;
+ float *reflect_diffuse;
+ float *reflect_specular;
+ unsigned int n;
+ float step;
+ float wavelength0;
+};
+
+struct Triangle
+{
+ float3 v0, v1, v2;
+};
+
+enum { INTERNAL_NODE, LEAF_NODE, PADDING_NODE };
+
+struct Node
+{
+ float3 lower;
+ float3 upper;
+ unsigned int child;
+ unsigned int kind;
+};
+
+struct Geometry
+{
+ float3 *vertices;
+ uint3 *triangles;
+ unsigned int *material_codes;
+ unsigned int *colors;
+ uint4 *nodes;
+ Material **materials;
+ Surface **surfaces;
+ float3 world_origin;
+ float _dummy1; // for alignment
+ float3 world_scale;
+ unsigned int branch_degree;
+};
+
+#endif
diff --git a/chroma/cuda/mesh.h b/chroma/cuda/mesh.h
index d60d801..de15156 100644
--- a/chroma/cuda/mesh.h
+++ b/chroma/cuda/mesh.h
@@ -6,7 +6,7 @@
#include "stdio.h"
-#define STACK_SIZE 100
+#define STACK_SIZE 1000
/* Tests the intersection between a ray and a node in the bounding volume
hierarchy. If the ray intersects the bounding volume and `min_distance`
@@ -14,15 +14,11 @@
less than `min_distance`, return true, else return false. */
__device__ bool
intersect_node(const float3 &origin, const float3 &direction,
- Geometry *g, int i, float min_distance=-1.0f)
+ Geometry *g, const Node &node, const float min_distance=-1.0f)
{
- /* assigning these to local variables is faster for some reason */
- float3 lower_bound = g->lower_bounds[i];
- float3 upper_bound = g->upper_bounds[i];
-
float distance_to_box;
- if (intersect_box(origin, direction, lower_bound, upper_bound,
+ if (intersect_box(origin, direction, node.lower, node.upper,
distance_to_box)) {
if (min_distance < 0.0f)
return true;
@@ -35,7 +31,6 @@ intersect_node(const float3 &origin, const float3 &direction,
else {
return false;
}
-
}
/* Finds the intersection between a ray and `geometry`. If the ray does
@@ -52,73 +47,74 @@ intersect_mesh(const float3 &origin, const float3& direction, Geometry *g,
float distance;
min_distance = -1.0f;
- if (!intersect_node(origin, direction, g, g->start_node, min_distance))
+ Node root = get_node(g, 0);
+
+ if (!intersect_node(origin, direction, g, root, min_distance))
return -1;
- unsigned int stack[STACK_SIZE];
-
- unsigned int *head = &stack[0];
- unsigned int *node = &stack[1];
- unsigned int *tail = &stack[STACK_SIZE-1];
- *node = g->start_node;
-
- unsigned int i;
-
- do
- {
- unsigned int first_child = g->node_map[*node];
- unsigned int stop = g->node_map_end[*node];
-
- while (*node >= g->first_node && stop == first_child+1) {
- *node = first_child;
- first_child = g->node_map[*node];
- stop = g->node_map_end[*node];
- }
-
- if (*node >= g->first_node) {
- for (i=first_child; i < stop; i++) {
- if (intersect_node(origin, direction, g, i, min_distance)) {
- *node = i;
- node++;
- }
- }
+ //min_distance = 100.0f;
+ //return 1;
+
+ unsigned int child_ptr_stack[STACK_SIZE];
+ child_ptr_stack[0] = root.child;
+
+ const unsigned int *head = &child_ptr_stack[0];
+ unsigned int *curr = &child_ptr_stack[0];
+ const unsigned int *tail = &child_ptr_stack[STACK_SIZE-1];
+
+ unsigned int count = 0;
+ unsigned int tri_count = 0;
+
+ while (curr >= head) {
+ unsigned int first_child = *curr;
+ curr--;
+
+ for (unsigned int i=first_child; i < first_child + g->branch_degree; i++) {
+ Node node = get_node(g, i);
+ count++;
+
+ if (node.kind == PADDING_NODE)
+ break; // this node and rest of children are padding
+
+ if (intersect_node(origin, direction, g, node, min_distance)) {
+
+ if (node.kind == LEAF_NODE) {
+ // This node wraps a triangle
+
+ if (node.child != last_hit_triangle) {
+ // Can't hit same triangle twice in a row
+ tri_count++;
+ Triangle t = get_triangle(g, node.child);
+ if (intersect_triangle(origin, direction, t, distance)) {
+
+ if (triangle_index == -1 || distance < min_distance) {
+ triangle_index = node.child;
+ min_distance = distance;
+ } // if hit triangle is closer than previous hits
+
+ } // if hit triangle
+
+ } // if not hitting same triangle as last step
+
+ } else {
+ curr++;
+ *curr = node.child;
+ } // leaf or internal node?
+ } // hit node?
- node--;
- }
- else {
- // node is a leaf
- for (i=first_child; i < stop; i++) {
- if (last_hit_triangle == i)
- continue;
-
- Triangle t = get_triangle(g, i);
-
- if (intersect_triangle(origin, direction, t, distance)) {
- if (triangle_index == -1) {
- triangle_index = i;
- min_distance = distance;
- continue;
- }
-
- if (distance < min_distance) {
- triangle_index = i;
- min_distance = distance;
- }
- }
- } // triangle loop
-
- node--;
-
- } // node is a leaf
-
- if (node > tail) {
- printf("warning: intersect_mesh() aborted; node > tail\n");
- break;
- }
-
- } // while loop
- while (node != head);
-
+ if (curr > tail) {
+ printf("warning: intersect_mesh() aborted; node > tail\n");
+ break;
+ }
+ } // loop over children, starting with first_child
+
+ } // while nodes on stack
+
+ if (threadIdx.x == 0) {
+ printf("node count: %d\n", count);
+ printf("triangle count: %d\n", tri_count);
+ }
+
return triangle_index;
}
diff --git a/chroma/gpu/geometry.py b/chroma/gpu/geometry.py
index 081563f..6682f8f 100644
--- a/chroma/gpu/geometry.py
+++ b/chroma/gpu/geometry.py
@@ -6,11 +6,35 @@ from pycuda import characterize
from chroma.geometry import standard_wavelengths
from chroma.gpu.tools import get_cu_module, get_cu_source, cuda_options, \
chunk_iterator, format_array, format_size, to_uint3, to_float3, \
- make_gpu_struct
+ make_gpu_struct, GPUFuncs
+
from chroma.log import logger
+def round_up_to_multiple(x, multiple):
+ remainder = x % multiple
+ if remainder == 0:
+ return x
+ else:
+ return x + multiple - remainder
+
+def compute_layer_configuration(n, branch_degree):
+ if n == 1:
+ # Special case for root
+ return [ (1, 1) ]
+ else:
+ layer_conf = [ (n, round_up_to_multiple(n, branch_degree)) ]
+
+ while layer_conf[0][1] > 1:
+ nparent = int(np.ceil( float(layer_conf[0][1]) / branch_degree ))
+ if nparent == 1:
+ layer_conf = [ (1, 1) ] + layer_conf
+ else:
+ layer_conf = [ (nparent, round_up_to_multiple(nparent, branch_degree)) ] + layer_conf
+
+ return layer_conf
+
class GPUGeometry(object):
- def __init__(self, geometry, wavelengths=None, print_usage=False):
+ def __init__(self, geometry, wavelengths=None, print_usage=False, branch_degree=32):
if wavelengths is None:
wavelengths = standard_wavelengths
@@ -19,7 +43,7 @@ class GPUGeometry(object):
except ValueError:
raise ValueError('wavelengths must be equally spaced apart.')
- geometry_source = get_cu_source('geometry.h')
+ geometry_source = get_cu_source('geometry_types.h')
material_struct_size = characterize.sizeof('Material', geometry_source)
surface_struct_size = characterize.sizeof('Surface', geometry_source)
geometry_struct_size = characterize.sizeof('Geometry', geometry_source)
@@ -104,32 +128,50 @@ class GPUGeometry(object):
self.surface_pointer_array = \
make_gpu_struct(8*len(self.surface_ptrs), self.surface_ptrs)
- self.vertices = ga.to_gpu(to_float3(geometry.mesh.vertices))
- self.triangles = ga.to_gpu(to_uint3(geometry.mesh.triangles))
+ self.pagelocked_vertices = cuda.pagelocked_empty(shape=len(geometry.mesh.vertices),
+ dtype=ga.vec.float3,
+ mem_flags=cuda.host_alloc_flags.DEVICEMAP | cuda.host_alloc_flags.WRITECOMBINED)
+ self.pagelocked_triangles = cuda.pagelocked_empty(shape=len(geometry.mesh.triangles),
+ dtype=ga.vec.uint3,
+ mem_flags=cuda.host_alloc_flags.DEVICEMAP | cuda.host_alloc_flags.WRITECOMBINED)
+ self.pagelocked_vertices[:] = to_float3(geometry.mesh.vertices)
+ self.pagelocked_triangles[:] = to_uint3(geometry.mesh.triangles)
+ self.vertices = np.intp(self.pagelocked_vertices.base.get_device_pointer())
+ self.triangles = np.intp(self.pagelocked_triangles.base.get_device_pointer())
+
+
+ self.branch_degree = branch_degree
+ print 'bvh', cuda.mem_get_info()
+ self.world_origin, self.world_scale, self.nodes = self.make_bvh(geometry.mesh.vertices,
+ self.vertices,
+ len(geometry.mesh.triangles),
+ self.triangles,
+ self.branch_degree)
+ print 'bvh after'
material_codes = (((geometry.material1_index & 0xff) << 24) |
((geometry.material2_index & 0xff) << 16) |
((geometry.surface_index & 0xff) << 8)).astype(np.uint32)
-
- self.material_codes = ga.to_gpu(material_codes)
-
- self.lower_bounds = ga.to_gpu(to_float3(geometry.lower_bounds))
- self.upper_bounds = ga.to_gpu(to_float3(geometry.upper_bounds))
- self.colors = ga.to_gpu(geometry.colors.astype(np.uint32))
- self.node_map = ga.to_gpu(geometry.node_map.astype(np.uint32))
- self.node_map_end = ga.to_gpu(geometry.node_map_end.astype(np.uint32))
+ self.pagelocked_material_codes = cuda.pagelocked_empty_like(material_codes, mem_flags=cuda.host_alloc_flags.DEVICEMAP | cuda.host_alloc_flags.WRITECOMBINED)
+ self.pagelocked_material_codes[:] = material_codes
+ self.material_codes = np.intp(self.pagelocked_material_codes.base.get_device_pointer())
+
+ colors = geometry.colors.astype(np.uint32)
+ self.pagelocked_colors = cuda.pagelocked_empty_like(colors, mem_flags=cuda.host_alloc_flags.DEVICEMAP | cuda.host_alloc_flags.WRITECOMBINED)
+ self.pagelocked_colors[:] = colors
+ self.colors = np.intp(self.pagelocked_colors.base.get_device_pointer())
self.solid_id_map = ga.to_gpu(geometry.solid_id.astype(np.uint32))
self.gpudata = make_gpu_struct(geometry_struct_size,
[self.vertices, self.triangles,
self.material_codes,
- self.colors, self.lower_bounds,
- self.upper_bounds, self.node_map,
- self.node_map_end,
+ self.colors, self.nodes,
self.material_pointer_array,
self.surface_pointer_array,
- np.uint32(geometry.start_node),
- np.uint32(geometry.first_node)])
+ self.world_origin,
+ np.float32(0.0), # dummy1
+ self.world_scale,
+ np.uint32(self.branch_degree)])
self.geometry = geometry
@@ -137,17 +179,96 @@ class GPUGeometry(object):
self.print_device_usage()
logger.info(self.device_usage_str())
+ def make_bvh(self, vertices, gpu_vertices, ntriangles, gpu_triangles, branch_degree):
+ assert branch_degree > 1
+ bvh_module = get_cu_module('bvh.cu', options=cuda_options,
+ include_source_directory=True)
+ bvh_funcs = GPUFuncs(bvh_module)
+
+ world_min = vertices.min(axis=0)
+ # Full scale at 2**16 - 2 in order to ensure there is dynamic range to round
+ # up by one count after quantization
+ world_scale = (vertices.max(axis=0) - world_min) / (2**16 - 2)
+
+ world_origin = ga.vec.make_float3(*world_min)
+ world_scale = ga.vec.make_float3(*world_scale)
+
+ layer_conf = compute_layer_configuration(ntriangles, branch_degree)
+ layer_offsets = list(np.cumsum([npad for n, npad in layer_conf]))
+
+ # Last entry is number of nodes, trim off and add zero to get offset of each layer
+ n_nodes = int(layer_offsets[-1])
+ layer_offsets = [0] + layer_offsets[:-1]
+
+ leaf_nodes = ga.empty(shape=ntriangles, dtype=ga.vec.uint4)
+ morton_codes = ga.empty(shape=ntriangles, dtype=np.uint64)
+
+ # Step 1: Make leaves
+ nthreads_per_block=256
+ for first_index, elements_this_iter, nblocks_this_iter in \
+ chunk_iterator(ntriangles, nthreads_per_block, max_blocks=10000):
+ bvh_funcs.make_leaves(np.uint32(first_index),
+ np.uint32(elements_this_iter),
+ gpu_triangles, gpu_vertices,
+ world_origin, world_scale,
+ leaf_nodes, morton_codes,
+ block=(nthreads_per_block,1,1),
+ grid=(nblocks_this_iter,1))
+
+ # argsort on the CPU because I'm too lazy to do it on the GPU
+ argsort = morton_codes.get().argsort().astype(np.uint32)
+ del morton_codes
+ #local_leaf_nodes = leaf_nodes.get()[argsort]
+ #
+ remap_order = ga.to_gpu(argsort)
+ #m = morton_codes.get()
+ #m.sort()
+ #print m
+ #assert False
+ # Step 2: sort leaf nodes into full node list
+ print cuda.mem_get_info(), leaf_nodes.nbytes
+ nodes = ga.zeros(shape=n_nodes, dtype=ga.vec.uint4)
+ #cuda.memcpy_htod(int(nodes.gpudata)+int(layer_offsets[-1]), local_leaf_nodes)
+
+ for first_index, elements_this_iter, nblocks_this_iter in \
+ chunk_iterator(ntriangles, nthreads_per_block, max_blocks=10000):
+ bvh_funcs.reorder_leaves(np.uint32(first_index),
+ np.uint32(elements_this_iter),
+ leaf_nodes, nodes[layer_offsets[-1]:], remap_order,
+ block=(nthreads_per_block,1,1),
+ grid=(nblocks_this_iter,1))
+
+ del leaf_nodes
+ del remap_order
+
+ # Step 3: Create parent layers in reverse order
+ layer_parameters = zip(layer_offsets[:-1], layer_offsets[1:], layer_conf)
+ layer_parameters.reverse()
+
+ for parent_offset, child_offset, (nparent, nparent_pad) in layer_parameters:
+ for first_index, elements_this_iter, nblocks_this_iter in \
+ chunk_iterator(nparent, nthreads_per_block, max_blocks=10000):
+ print parent_offset, first_index, elements_this_iter, nblocks_this_iter
+ bvh_funcs.build_layer(np.uint32(first_index),
+ np.uint32(elements_this_iter),
+ np.uint32(branch_degree),
+ nodes,
+ np.uint32(parent_offset),
+ np.uint32(child_offset),
+ block=(nthreads_per_block,1,1),
+ grid=(nblocks_this_iter,1))
+
+ return world_origin, world_scale, nodes
+
+
def device_usage_str(self):
'''Returns a formatted string displaying the memory usage.'''
s = 'device usage:\n'
s += '-'*10 + '\n'
- s += format_array('vertices', self.vertices) + '\n'
- s += format_array('triangles', self.triangles) + '\n'
- s += format_array('lower_bounds', self.lower_bounds) + '\n'
- s += format_array('upper_bounds', self.upper_bounds) + '\n'
- s += format_array('node_map', self.node_map) + '\n'
- s += format_array('node_map_end', self.node_map_end) + '\n'
- s += '%-15s %6s %6s' % ('total', '', format_size(self.vertices.nbytes + self.triangles.nbytes + self.lower_bounds.nbytes + self.upper_bounds.nbytes + self.node_map.nbytes + self.node_map_end.nbytes)) + '\n'
+ #s += format_array('vertices', self.vertices) + '\n'
+ #s += format_array('triangles', self.triangles) + '\n'
+ s += format_array('nodes', self.nodes) + '\n'
+ s += '%-15s %6s %6s' % ('total', '', format_size(self.nodes.nbytes)) + '\n'
s += '-'*10 + '\n'
free, total = cuda.mem_get_info()
s += '%-15s %6s %6s' % ('device total', '', format_size(total)) + '\n'