summaryrefslogtreecommitdiff
path: root/chroma/gpu
diff options
context:
space:
mode:
authorStan Seibert <stan@mtrr.org>2012-01-19 21:36:19 -0500
committertlatorre <tlatorre@uchicago.edu>2021-05-09 08:42:38 -0700
commitbf46d6b9f42330b42f4634ea6ac0e2b318942786 (patch)
tree5b18635fcc5228c8326ab5cd43a35d38575c1215 /chroma/gpu
parent3aa00b69bf01f6b2a2f920642f8faa6a52bbb1c4 (diff)
downloadchroma-bf46d6b9f42330b42f4634ea6ac0e2b318942786.tar.gz
chroma-bf46d6b9f42330b42f4634ea6ac0e2b318942786.tar.bz2
chroma-bf46d6b9f42330b42f4634ea6ac0e2b318942786.zip
Implement new loader function that turns a string into a Geometry,
searching through files, named geometries in the cache, and geometry creation functions. The loader function also is responsible for fetching or creating a BVH to go with the geometry. This commit also removes some code that has been replaced by the new system. Other bits will come back in future commits.
Diffstat (limited to 'chroma/gpu')
-rw-r--r--chroma/gpu/geometry.py257
1 files changed, 17 insertions, 240 deletions
diff --git a/chroma/gpu/geometry.py b/chroma/gpu/geometry.py
index 6cb991c..77d33b2 100644
--- a/chroma/gpu/geometry.py
+++ b/chroma/gpu/geometry.py
@@ -6,228 +6,12 @@ from pycuda import characterize
from chroma.geometry import standard_wavelengths
from chroma.gpu.tools import get_cu_module, get_cu_source, cuda_options, \
chunk_iterator, format_array, format_size, to_uint3, to_float3, \
- make_gpu_struct, GPUFuncs
+ make_gpu_struct, GPUFuncs, mapped_empty, Mapped
from chroma.log import logger
-def round_up_to_multiple(x, multiple):
- remainder = x % multiple
- if remainder == 0:
- return x
- else:
- return x + multiple - remainder
-
-def compute_layer_configuration(n, branch_degree):
- if n == 1:
- # Special case for root
- return [ (1, 1) ]
- else:
- layer_conf = [ (n, round_up_to_multiple(n, branch_degree)) ]
-
- while layer_conf[0][1] > 1:
- nparent = int(np.ceil( float(layer_conf[0][1]) / branch_degree ))
- if nparent == 1:
- layer_conf = [ (1, 1) ] + layer_conf
- else:
- layer_conf = [ (nparent, round_up_to_multiple(nparent, branch_degree)) ] + layer_conf
-
- return layer_conf
-
-def optimize_bvh_layer(layer, bvh_funcs):
- n = len(layer)
- areas = ga.empty(shape=n, dtype=np.uint32)
- union_areas = ga.empty(shape=n, dtype=np.uint32)
- nthreads_per_block = 128
- min_areas = ga.empty(shape=int(np.ceil(n/float(nthreads_per_block))), dtype=np.uint32)
- min_index = ga.empty_like(min_areas)
-
- update = 50000
-
- skip_size = 1
- flag = cuda.pagelocked_empty(shape=skip_size, dtype=np.uint32, mem_flags=cuda.host_alloc_flags.DEVICEMAP)
- flag_gpu = np.intp(flag.base.get_device_pointer())
- print 'starting optimization'
-
- i = 0
- skips = 0
- while i < (n/2 - 1):
- # How are we doing?
- if i % update == 0:
- for first_index, elements_this_iter, nblocks_this_iter in \
- chunk_iterator(n-1, nthreads_per_block, max_blocks=10000):
-
- bvh_funcs.distance_to_prev(np.uint32(first_index + 1),
- np.uint32(elements_this_iter),
- layer,
- union_areas,
- block=(nthreads_per_block,1,1),
- grid=(nblocks_this_iter,1))
-
- union_areas_host = union_areas.get()[1::2]
- print 'Area of parent layer: %1.12e' % union_areas_host.astype(float).sum()
- print 'Area of parent layer so far (%d): %1.12e' % (i*2, union_areas_host.astype(float)[:i].\
-sum())
- print 'Skips:', skips
-
- test_index = i * 2
-
- blocks = 0
- look_forward = min(8192*400, n - test_index - 2)
- skip_this_round = min(skip_size, n - test_index - 1)
- flag[:] = 0
- for first_index, elements_this_iter, nblocks_this_iter in \
- chunk_iterator(look_forward, nthreads_per_block, max_blocks=10000):
- bvh_funcs.min_distance_to(np.uint32(first_index + test_index + 2),
- np.uint32(elements_this_iter),
- np.uint32(test_index),
- layer,
- np.uint32(blocks),
- min_areas,
- min_index,
- flag_gpu,
- block=(nthreads_per_block,1,1),
- grid=(nblocks_this_iter, skip_this_round))
- blocks += nblocks_this_iter
- cuda.Context.get_current().synchronize()
-
- if flag[0] == 0:
- flag_nonzero = flag.nonzero()[0]
- if len(flag_nonzero) == 0:
- no_swap_required = skip_size
- else:
- no_swap_required = flag_nonzero[0]
- i += no_swap_required
- skips += no_swap_required
- continue
-
- areas_host = min_areas[:blocks].get()
- min_index_host = min_index[:blocks].get()
- best_block = areas_host.argmin()
- better_i = min_index_host[best_block]
-
- if i % update == 0:
- print 'swapping %d and %d' % (test_index + 1, better_i)
-
- bvh_funcs.swap(np.uint32(test_index+1), np.uint32(better_i),
- layer, block=(1,1,1), grid=(1,1))
- i += 1
-
- for first_index, elements_this_iter, nblocks_this_iter in \
- chunk_iterator(n-1, nthreads_per_block, max_blocks=10000):
-
- bvh_funcs.distance_to_prev(np.uint32(first_index + 1),
- np.uint32(elements_this_iter),
- layer,
- union_areas,
- block=(nthreads_per_block,1,1),
- grid=(nblocks_this_iter,1))
-
- union_areas_host = union_areas.get()[1::2]
- print 'Final area of parent layer: %1.12e' % union_areas_host.sum()
- print 'Skips:', skips
-
-def make_bvh(vertices, gpu_vertices, ntriangles, gpu_triangles, branch_degree):
- assert branch_degree > 1
- bvh_module = get_cu_module('bvh.cu', options=cuda_options,
- include_source_directory=True)
- bvh_funcs = GPUFuncs(bvh_module)
-
- world_min = vertices.min(axis=0)
- # Full scale at 2**16 - 2 in order to ensure there is dynamic range to round
- # up by one count after quantization
- world_scale = np.max((vertices.max(axis=0) - world_min)) / (2**16 - 2)
-
- world_origin = ga.vec.make_float3(*world_min)
- world_scale = np.float32(world_scale)
-
- layer_conf = compute_layer_configuration(ntriangles, branch_degree)
- layer_offsets = list(np.cumsum([npad for n, npad in layer_conf]))
-
- # Last entry is number of nodes, trim off and add zero to get offset of each layer
- n_nodes = int(layer_offsets[-1])
- layer_offsets = [0] + layer_offsets[:-1]
-
- leaf_nodes = ga.empty(shape=ntriangles, dtype=ga.vec.uint4)
- morton_codes = ga.empty(shape=ntriangles, dtype=np.uint64)
-
- # Step 1: Make leaves
- nthreads_per_block=256
- for first_index, elements_this_iter, nblocks_this_iter in \
- chunk_iterator(ntriangles, nthreads_per_block, max_blocks=10000):
- bvh_funcs.make_leaves(np.uint32(first_index),
- np.uint32(elements_this_iter),
- gpu_triangles, gpu_vertices,
- world_origin, world_scale,
- leaf_nodes, morton_codes,
- block=(nthreads_per_block,1,1),
- grid=(nblocks_this_iter,1))
-
- # argsort on the CPU because I'm too lazy to do it on the GPU
- argsort = morton_codes.get().argsort().astype(np.uint32)
- del morton_codes
- local_leaf_nodes = leaf_nodes.get()[argsort]
- del leaf_nodes
- #del remap_order
- #
- #remap_order = ga.to_gpu(argsort)
- #m = morton_codes.get()
- #m.sort()
- #print m
- #assert False
- # Step 2: sort leaf nodes into full node list
- #print cuda.mem_get_info(), leaf_nodes.nbytes
- nodes = ga.zeros(shape=n_nodes, dtype=ga.vec.uint4)
- areas = ga.zeros(shape=n_nodes, dtype=np.uint32)
- cuda.memcpy_htod(int(nodes.gpudata)+int(layer_offsets[-1]), local_leaf_nodes)
-
- #for first_index, elements_this_iter, nblocks_this_iter in \
- # chunk_iterator(ntriangles, nthreads_per_block, max_blocks=10000):
- # bvh_funcs.reorder_leaves(np.uint32(first_index),
- # np.uint32(elements_this_iter),
- # leaf_nodes, nodes[layer_offsets[-1]:], remap_order,
- # block=(nthreads_per_block,1,1),
- # grid=(nblocks_this_iter,1))
-
-
- # Step 3: Create parent layers in reverse order
- layer_parameters = zip(layer_offsets[:-1], layer_offsets[1:], layer_conf)
- layer_parameters.reverse()
-
- i = len(layer_parameters)
- for parent_offset, child_offset, (nparent, nparent_pad) in layer_parameters:
- #if i < 30:
- # optimize_bvh_layer(nodes[child_offset:child_offset+nparent*branch_degree],
- # bvh_funcs)
-
- for first_index, elements_this_iter, nblocks_this_iter in \
- chunk_iterator(nparent * branch_degree, nthreads_per_block,
- max_blocks=10000):
- bvh_funcs.node_area(np.uint32(first_index+child_offset),
- np.uint32(elements_this_iter),
- nodes,
- areas,
- block=(nthreads_per_block,1,1),
- grid=(nblocks_this_iter,1))
-
- print 'area', i, nparent * branch_degree, '%e' % areas[child_offset:child_offset+nparent*branch_degree].get().astype(float).sum()
-
- for first_index, elements_this_iter, nblocks_this_iter in \
- chunk_iterator(nparent, nthreads_per_block, max_blocks=10000):
- bvh_funcs.build_layer(np.uint32(first_index),
- np.uint32(elements_this_iter),
- np.uint32(branch_degree),
- nodes,
- np.uint32(parent_offset),
- np.uint32(child_offset),
- block=(nthreads_per_block,1,1),
- grid=(nblocks_this_iter,1))
-
- i -= 1
-
- return world_origin, world_scale, nodes
-
class GPUGeometry(object):
- def __init__(self, geometry, wavelengths=None, print_usage=False, branch_degree=2):
+ def __init__(self, geometry, wavelengths=None, print_usage=False):
if wavelengths is None:
wavelengths = standard_wavelengths
@@ -321,26 +105,18 @@ class GPUGeometry(object):
self.surface_pointer_array = \
make_gpu_struct(8*len(self.surface_ptrs), self.surface_ptrs)
- self.pagelocked_vertices = cuda.pagelocked_empty(shape=len(geometry.mesh.vertices),
- dtype=ga.vec.float3,
- mem_flags=cuda.host_alloc_flags.DEVICEMAP | cuda.host_alloc_flags.WRITECOMBINED)
- self.pagelocked_triangles = cuda.pagelocked_empty(shape=len(geometry.mesh.triangles),
- dtype=ga.vec.uint3,
- mem_flags=cuda.host_alloc_flags.DEVICEMAP | cuda.host_alloc_flags.WRITECOMBINED)
- self.pagelocked_vertices[:] = to_float3(geometry.mesh.vertices)
- self.pagelocked_triangles[:] = to_uint3(geometry.mesh.triangles)
- self.vertices = np.intp(self.pagelocked_vertices.base.get_device_pointer())
- self.triangles = np.intp(self.pagelocked_triangles.base.get_device_pointer())
-
-
- self.branch_degree = branch_degree
- print 'bvh', cuda.mem_get_info()
- self.world_origin, self.world_scale, self.nodes = make_bvh(geometry.mesh.vertices,
- self.vertices,
- len(geometry.mesh.triangles),
- self.triangles,
- self.branch_degree)
- print 'bvh after', cuda.mem_get_info()
+ self.vertices = mapped_empty(shape=len(geometry.mesh.vertices),
+ dtype=ga.vec.float3,
+ write_combined=True)
+ self.triangles = mapped_empty(shape=len(geometry.mesh.triangles),
+ dtype=ga.vec.uint3,
+ write_combined=True)
+ self.vertices[:] = to_float3(geometry.mesh.vertices)
+ self.triangles[:] = to_uint3(geometry.mesh.triangles)
+
+ self.nodes = ga.to_gpu(geometry.bvh.nodes)
+ self.world_origin = ga.vec.make_float3(*geometry.bvh.world_coords.world_origin)
+ self.world_scale = np.float32(geometry.bvh.world_coords.world_scale)
material_codes = (((geometry.material1_index & 0xff) << 24) |
((geometry.material2_index & 0xff) << 16) |
@@ -351,14 +127,15 @@ class GPUGeometry(object):
self.solid_id_map = ga.to_gpu(geometry.solid_id.astype(np.uint32))
self.gpudata = make_gpu_struct(geometry_struct_size,
- [self.vertices, self.triangles,
+ [Mapped(self.vertices),
+ Mapped(self.triangles),
self.material_codes,
self.colors, self.nodes,
self.material_pointer_array,
self.surface_pointer_array,
self.world_origin,
self.world_scale,
- np.uint32(self.branch_degree)])
+ np.uint32(geometry.bvh.degree)])
self.geometry = geometry