diff options
| author | Stan Seibert <stan@mtrr.org> | 2012-01-19 21:36:19 -0500 |
|---|---|---|
| committer | tlatorre <tlatorre@uchicago.edu> | 2021-05-09 08:42:38 -0700 |
| commit | bf46d6b9f42330b42f4634ea6ac0e2b318942786 (patch) | |
| tree | 5b18635fcc5228c8326ab5cd43a35d38575c1215 /chroma/gpu | |
| parent | 3aa00b69bf01f6b2a2f920642f8faa6a52bbb1c4 (diff) | |
| download | chroma-bf46d6b9f42330b42f4634ea6ac0e2b318942786.tar.gz chroma-bf46d6b9f42330b42f4634ea6ac0e2b318942786.tar.bz2 chroma-bf46d6b9f42330b42f4634ea6ac0e2b318942786.zip | |
Implement new loader function that turns a string into a Geometry,
searching through files, named geometries in the cache, and geometry
creation functions.
The loader function also is responsible for fetching or creating a BVH
to go with the geometry.
This commit also removes some code that has been replaced by the new
system. Other bits will come back in future commits.
Diffstat (limited to 'chroma/gpu')
| -rw-r--r-- | chroma/gpu/geometry.py | 257 |
1 files changed, 17 insertions, 240 deletions
diff --git a/chroma/gpu/geometry.py b/chroma/gpu/geometry.py index 6cb991c..77d33b2 100644 --- a/chroma/gpu/geometry.py +++ b/chroma/gpu/geometry.py @@ -6,228 +6,12 @@ from pycuda import characterize from chroma.geometry import standard_wavelengths from chroma.gpu.tools import get_cu_module, get_cu_source, cuda_options, \ chunk_iterator, format_array, format_size, to_uint3, to_float3, \ - make_gpu_struct, GPUFuncs + make_gpu_struct, GPUFuncs, mapped_empty, Mapped from chroma.log import logger -def round_up_to_multiple(x, multiple): - remainder = x % multiple - if remainder == 0: - return x - else: - return x + multiple - remainder - -def compute_layer_configuration(n, branch_degree): - if n == 1: - # Special case for root - return [ (1, 1) ] - else: - layer_conf = [ (n, round_up_to_multiple(n, branch_degree)) ] - - while layer_conf[0][1] > 1: - nparent = int(np.ceil( float(layer_conf[0][1]) / branch_degree )) - if nparent == 1: - layer_conf = [ (1, 1) ] + layer_conf - else: - layer_conf = [ (nparent, round_up_to_multiple(nparent, branch_degree)) ] + layer_conf - - return layer_conf - -def optimize_bvh_layer(layer, bvh_funcs): - n = len(layer) - areas = ga.empty(shape=n, dtype=np.uint32) - union_areas = ga.empty(shape=n, dtype=np.uint32) - nthreads_per_block = 128 - min_areas = ga.empty(shape=int(np.ceil(n/float(nthreads_per_block))), dtype=np.uint32) - min_index = ga.empty_like(min_areas) - - update = 50000 - - skip_size = 1 - flag = cuda.pagelocked_empty(shape=skip_size, dtype=np.uint32, mem_flags=cuda.host_alloc_flags.DEVICEMAP) - flag_gpu = np.intp(flag.base.get_device_pointer()) - print 'starting optimization' - - i = 0 - skips = 0 - while i < (n/2 - 1): - # How are we doing? - if i % update == 0: - for first_index, elements_this_iter, nblocks_this_iter in \ - chunk_iterator(n-1, nthreads_per_block, max_blocks=10000): - - bvh_funcs.distance_to_prev(np.uint32(first_index + 1), - np.uint32(elements_this_iter), - layer, - union_areas, - block=(nthreads_per_block,1,1), - grid=(nblocks_this_iter,1)) - - union_areas_host = union_areas.get()[1::2] - print 'Area of parent layer: %1.12e' % union_areas_host.astype(float).sum() - print 'Area of parent layer so far (%d): %1.12e' % (i*2, union_areas_host.astype(float)[:i].\ -sum()) - print 'Skips:', skips - - test_index = i * 2 - - blocks = 0 - look_forward = min(8192*400, n - test_index - 2) - skip_this_round = min(skip_size, n - test_index - 1) - flag[:] = 0 - for first_index, elements_this_iter, nblocks_this_iter in \ - chunk_iterator(look_forward, nthreads_per_block, max_blocks=10000): - bvh_funcs.min_distance_to(np.uint32(first_index + test_index + 2), - np.uint32(elements_this_iter), - np.uint32(test_index), - layer, - np.uint32(blocks), - min_areas, - min_index, - flag_gpu, - block=(nthreads_per_block,1,1), - grid=(nblocks_this_iter, skip_this_round)) - blocks += nblocks_this_iter - cuda.Context.get_current().synchronize() - - if flag[0] == 0: - flag_nonzero = flag.nonzero()[0] - if len(flag_nonzero) == 0: - no_swap_required = skip_size - else: - no_swap_required = flag_nonzero[0] - i += no_swap_required - skips += no_swap_required - continue - - areas_host = min_areas[:blocks].get() - min_index_host = min_index[:blocks].get() - best_block = areas_host.argmin() - better_i = min_index_host[best_block] - - if i % update == 0: - print 'swapping %d and %d' % (test_index + 1, better_i) - - bvh_funcs.swap(np.uint32(test_index+1), np.uint32(better_i), - layer, block=(1,1,1), grid=(1,1)) - i += 1 - - for first_index, elements_this_iter, nblocks_this_iter in \ - chunk_iterator(n-1, nthreads_per_block, max_blocks=10000): - - bvh_funcs.distance_to_prev(np.uint32(first_index + 1), - np.uint32(elements_this_iter), - layer, - union_areas, - block=(nthreads_per_block,1,1), - grid=(nblocks_this_iter,1)) - - union_areas_host = union_areas.get()[1::2] - print 'Final area of parent layer: %1.12e' % union_areas_host.sum() - print 'Skips:', skips - -def make_bvh(vertices, gpu_vertices, ntriangles, gpu_triangles, branch_degree): - assert branch_degree > 1 - bvh_module = get_cu_module('bvh.cu', options=cuda_options, - include_source_directory=True) - bvh_funcs = GPUFuncs(bvh_module) - - world_min = vertices.min(axis=0) - # Full scale at 2**16 - 2 in order to ensure there is dynamic range to round - # up by one count after quantization - world_scale = np.max((vertices.max(axis=0) - world_min)) / (2**16 - 2) - - world_origin = ga.vec.make_float3(*world_min) - world_scale = np.float32(world_scale) - - layer_conf = compute_layer_configuration(ntriangles, branch_degree) - layer_offsets = list(np.cumsum([npad for n, npad in layer_conf])) - - # Last entry is number of nodes, trim off and add zero to get offset of each layer - n_nodes = int(layer_offsets[-1]) - layer_offsets = [0] + layer_offsets[:-1] - - leaf_nodes = ga.empty(shape=ntriangles, dtype=ga.vec.uint4) - morton_codes = ga.empty(shape=ntriangles, dtype=np.uint64) - - # Step 1: Make leaves - nthreads_per_block=256 - for first_index, elements_this_iter, nblocks_this_iter in \ - chunk_iterator(ntriangles, nthreads_per_block, max_blocks=10000): - bvh_funcs.make_leaves(np.uint32(first_index), - np.uint32(elements_this_iter), - gpu_triangles, gpu_vertices, - world_origin, world_scale, - leaf_nodes, morton_codes, - block=(nthreads_per_block,1,1), - grid=(nblocks_this_iter,1)) - - # argsort on the CPU because I'm too lazy to do it on the GPU - argsort = morton_codes.get().argsort().astype(np.uint32) - del morton_codes - local_leaf_nodes = leaf_nodes.get()[argsort] - del leaf_nodes - #del remap_order - # - #remap_order = ga.to_gpu(argsort) - #m = morton_codes.get() - #m.sort() - #print m - #assert False - # Step 2: sort leaf nodes into full node list - #print cuda.mem_get_info(), leaf_nodes.nbytes - nodes = ga.zeros(shape=n_nodes, dtype=ga.vec.uint4) - areas = ga.zeros(shape=n_nodes, dtype=np.uint32) - cuda.memcpy_htod(int(nodes.gpudata)+int(layer_offsets[-1]), local_leaf_nodes) - - #for first_index, elements_this_iter, nblocks_this_iter in \ - # chunk_iterator(ntriangles, nthreads_per_block, max_blocks=10000): - # bvh_funcs.reorder_leaves(np.uint32(first_index), - # np.uint32(elements_this_iter), - # leaf_nodes, nodes[layer_offsets[-1]:], remap_order, - # block=(nthreads_per_block,1,1), - # grid=(nblocks_this_iter,1)) - - - # Step 3: Create parent layers in reverse order - layer_parameters = zip(layer_offsets[:-1], layer_offsets[1:], layer_conf) - layer_parameters.reverse() - - i = len(layer_parameters) - for parent_offset, child_offset, (nparent, nparent_pad) in layer_parameters: - #if i < 30: - # optimize_bvh_layer(nodes[child_offset:child_offset+nparent*branch_degree], - # bvh_funcs) - - for first_index, elements_this_iter, nblocks_this_iter in \ - chunk_iterator(nparent * branch_degree, nthreads_per_block, - max_blocks=10000): - bvh_funcs.node_area(np.uint32(first_index+child_offset), - np.uint32(elements_this_iter), - nodes, - areas, - block=(nthreads_per_block,1,1), - grid=(nblocks_this_iter,1)) - - print 'area', i, nparent * branch_degree, '%e' % areas[child_offset:child_offset+nparent*branch_degree].get().astype(float).sum() - - for first_index, elements_this_iter, nblocks_this_iter in \ - chunk_iterator(nparent, nthreads_per_block, max_blocks=10000): - bvh_funcs.build_layer(np.uint32(first_index), - np.uint32(elements_this_iter), - np.uint32(branch_degree), - nodes, - np.uint32(parent_offset), - np.uint32(child_offset), - block=(nthreads_per_block,1,1), - grid=(nblocks_this_iter,1)) - - i -= 1 - - return world_origin, world_scale, nodes - class GPUGeometry(object): - def __init__(self, geometry, wavelengths=None, print_usage=False, branch_degree=2): + def __init__(self, geometry, wavelengths=None, print_usage=False): if wavelengths is None: wavelengths = standard_wavelengths @@ -321,26 +105,18 @@ class GPUGeometry(object): self.surface_pointer_array = \ make_gpu_struct(8*len(self.surface_ptrs), self.surface_ptrs) - self.pagelocked_vertices = cuda.pagelocked_empty(shape=len(geometry.mesh.vertices), - dtype=ga.vec.float3, - mem_flags=cuda.host_alloc_flags.DEVICEMAP | cuda.host_alloc_flags.WRITECOMBINED) - self.pagelocked_triangles = cuda.pagelocked_empty(shape=len(geometry.mesh.triangles), - dtype=ga.vec.uint3, - mem_flags=cuda.host_alloc_flags.DEVICEMAP | cuda.host_alloc_flags.WRITECOMBINED) - self.pagelocked_vertices[:] = to_float3(geometry.mesh.vertices) - self.pagelocked_triangles[:] = to_uint3(geometry.mesh.triangles) - self.vertices = np.intp(self.pagelocked_vertices.base.get_device_pointer()) - self.triangles = np.intp(self.pagelocked_triangles.base.get_device_pointer()) - - - self.branch_degree = branch_degree - print 'bvh', cuda.mem_get_info() - self.world_origin, self.world_scale, self.nodes = make_bvh(geometry.mesh.vertices, - self.vertices, - len(geometry.mesh.triangles), - self.triangles, - self.branch_degree) - print 'bvh after', cuda.mem_get_info() + self.vertices = mapped_empty(shape=len(geometry.mesh.vertices), + dtype=ga.vec.float3, + write_combined=True) + self.triangles = mapped_empty(shape=len(geometry.mesh.triangles), + dtype=ga.vec.uint3, + write_combined=True) + self.vertices[:] = to_float3(geometry.mesh.vertices) + self.triangles[:] = to_uint3(geometry.mesh.triangles) + + self.nodes = ga.to_gpu(geometry.bvh.nodes) + self.world_origin = ga.vec.make_float3(*geometry.bvh.world_coords.world_origin) + self.world_scale = np.float32(geometry.bvh.world_coords.world_scale) material_codes = (((geometry.material1_index & 0xff) << 24) | ((geometry.material2_index & 0xff) << 16) | @@ -351,14 +127,15 @@ class GPUGeometry(object): self.solid_id_map = ga.to_gpu(geometry.solid_id.astype(np.uint32)) self.gpudata = make_gpu_struct(geometry_struct_size, - [self.vertices, self.triangles, + [Mapped(self.vertices), + Mapped(self.triangles), self.material_codes, self.colors, self.nodes, self.material_pointer_array, self.surface_pointer_array, self.world_origin, self.world_scale, - np.uint32(self.branch_degree)]) + np.uint32(geometry.bvh.degree)]) self.geometry = geometry |
