diff options
-rw-r--r-- | geometry.py | 237 | ||||
-rw-r--r-- | likelihood.py | 49 | ||||
-rw-r--r-- | src/kernel.cu | 40 | ||||
-rwxr-xr-x | view.py | 14 |
4 files changed, 146 insertions, 194 deletions
diff --git a/geometry.py b/geometry.py index e3748e6..f50fe89 100644 --- a/geometry.py +++ b/geometry.py @@ -1,89 +1,21 @@ import numpy as np -from itertools import chain +import numpy.ma as ma import pycuda.driver as cuda from pycuda import gpuarray -def compress(data, selectors): - """ - Make an iterator that filters elements from `data` returning only those - that have a corresponding element in `selectors` that evaluates to True. - Stops when either the `data` or `selectors` iterables has been - exhausted. - - From Python 2.7 itertools. - """ - return (d for d, s in zip(data, selectors) if s) - -def get_bounds(mesh): - """Returns the lower/upper bounds for `mesh`.""" - lower_bound = np.array([np.min(mesh[:,:,0]), np.min(mesh[:,:,1]), \ - np.min(mesh[:,:,2])]) - upper_bound = np.array([np.max(mesh[:,:,0]), np.max(mesh[:,:,1]), \ - np.max(mesh[:,:,2])]) - return lower_bound, upper_bound - -class Leaf(object): - """ - Leaf object represents the last layer in the bounding volume hierarchy - which points to a subset of the triangle mesh. - - Args: - - mesh: array, - A subset of the triangle mesh. - - mesh_idx: int, - The index of the first triangle in the global mesh. - - zvlaue: int, *optional* - The zvalue associated with this leaf. - - .. note:: - The `mesh` passed in the constructor is not actually stored in the - leaf. It is simply used to construct the leaf's bounding box. - """ - def __init__(self, mesh, mesh_idx, zvalue=None): - self.zvalue = zvalue - self.mesh_idx = mesh_idx - - self.lower_bound, self.upper_bound = get_bounds(mesh) - - self.size = mesh.shape[0] - -class Node(object): - """ - Node object represents a node in the bounding volume hierarchy which - contains a list of child nodes. - - Args: - - children: list, - List of child nodes/leafs. - - zvalue: int, *optional* - The zvalue associated with this node. - """ - def __init__(self, children, zvalue=None): - self.zvalue = zvalue - - lower_pts = np.array([child.lower_bound for child in children]) - upper_pts = np.array([child.upper_bound for child in children]) - - self.lower_bound = np.array([np.min(lower_pts[:,0]), np.min(lower_pts[:,1]), np.min(lower_pts[:,2])]) - self.upper_bound = np.array([np.max(upper_pts[:,0]), np.max(upper_pts[:,1]), np.max(upper_pts[:,2])]) - - self.size = len(children) - - self.children = children - -def interleave(arr): +def interleave(arr, bits): """ Interleave the bits of quantized three-dimensional points in space. Example - >>> interleave(np.identity(3, dtpe=np.int)) + >>> interleave(np.identity(3, dtype=np.int)) array([4, 2, 1], dtype=uint64) """ if len(arr.shape) != 2 or arr.shape[1] != 3: raise Exception('shape mismatch') - z = np.zeros(arr.shape[0], dtype=np.uint64) - for i in range(arr.dtype.itemsize*8): + z = np.zeros(arr.shape[0], dtype=np.uint32) + for i in range(bits): z |= (arr[:,2] & 1 << i) << (2*i) | \ (arr[:,1] & 1 << i) << (2*i+1) | \ (arr[:,0] & 1 << i) << (2*i+2) @@ -95,16 +27,26 @@ def morton_order(mesh, bits): bits of the quantized center coordinates of each triangle. Each coordinate axis is quantized into 2**bits bins. """ - lower_bound, upper_bound = get_bounds(mesh) + + lower_bound = np.array([np.min(mesh[:,:,0]), + np.min(mesh[:,:,1]), + np.min(mesh[:,:,2])]) + + upper_bound = np.array([np.max(mesh[:,:,0]), + np.max(mesh[:,:,1]), + np.max(mesh[:,:,2])]) + + if bits <= 0 or bits > 12: + raise Exception('number of bits must be in the range (0,12].') max_value = 2**bits - 1 - + def quantize(x): - return np.uint64((x-lower_bound)*max_value/(upper_bound-lower_bound)) + return np.uint32((x-lower_bound)*max_value/(upper_bound-lower_bound)) mean_positions = quantize(np.mean(mesh, axis=1)) - return interleave(mean_positions) + return interleave(mean_positions, bits) class Solid(object): """ @@ -169,7 +111,7 @@ class Geometry(object): if solid.surface2 not in self.surfaces: self.surfaces.append(solid.surface1) - def build(self, bits=3): + def build(self, bits=8): """Build the bounding volume hierarchy of the geometry.""" mesh = np.concatenate([solid.mesh for solid in self.solids]) @@ -190,74 +132,91 @@ class Geometry(object): np.concatenate([np.tile(self.surfaces.index(solid.surface2), \ len(solid)) for solid in self.solids]) - # array of zvalue for each triangle in mesh - zvalues = morton_order(mesh, bits) + zvalues_mesh = morton_order(mesh, bits) + reorder = np.argsort(zvalues_mesh) + zvalues_mesh = zvalues_mesh[reorder] - # mesh indices in order of increasing zvalue - order = np.array(zip(*sorted(zip(zvalues, range(zvalues.size))))[-1]) + if (np.diff(zvalues_mesh) < 0).any(): + raise Exception('zvalues_mesh out of order') - # reorder all arrays ordered by triangle index - self.zvalues = zvalues[order] - self.mesh = mesh[order] - self.solid_index = solid_index[order] - self.material1_index = material1_index[order] - self.material2_index = material2_index[order] - self.surface1_index = surface1_index[order] - self.surface2_index = surface2_index[order] + self.mesh = mesh[reorder] + self.solid_index = solid_index[reorder] + self.material1_index = material1_index[reorder] + self.material2_index = material2_index[reorder] + self.surface1_index = surface1_index[reorder] + self.surface2_index = surface2_index[reorder] - leafs = [] - for z in sorted(set(self.zvalues)): - mask = (self.zvalues == z) - leafs.append(Leaf(self.mesh[mask], np.where(mask)[0][0], z)) + unique_zvalues = np.unique(zvalues_mesh) + zvalues = np.empty(unique_zvalues.size, dtype=np.uint64) - layers = [] - layers.append(leafs) + self.lower_bounds = np.empty((unique_zvalues.size,3), dtype=np.float32) + self.upper_bounds = np.empty((unique_zvalues.size,3), dtype=np.float32) + self.node_map = np.empty(unique_zvalues.size, dtype=np.uint32) + self.node_length = np.empty(unique_zvalues.size, dtype=np.uint32) - while True: - bit_shifted_zvalues = \ - np.array([node.zvalue for node in layers[-1]]) >> 1 + for i, z in enumerate(unique_zvalues): + i1 = np.searchsorted(zvalues_mesh, z) + i2 = np.searchsorted(zvalues_mesh, z, side='right') - nodes = [] - for z in sorted(set(bit_shifted_zvalues)): - mask = (bit_shifted_zvalues == z) - nodes.append(Node(list(compress(layers[-1], mask)), z)) + self.lower_bounds[i] = [np.min(self.mesh[i1:i2,:,0]), + np.min(self.mesh[i1:i2,:,1]), + np.min(self.mesh[i1:i2,:,2])] + + self.upper_bounds[i] = [np.max(self.mesh[i1:i2,:,0]), + np.max(self.mesh[i1:i2,:,1]), + np.max(self.mesh[i1:i2,:,2])] + + self.node_map[i] = i1 + self.node_length[i] = i2-i1 - layers.append(nodes) + zvalues[i] = z - if len(nodes) == 1: - break + self.first_node = unique_zvalues.size - layers.reverse() + begin_last_layer = 0 + + while True: + bit_shifted_zvalues = zvalues >> 1 + unique_bit_shifted_zvalues = np.unique(bit_shifted_zvalues) + zvalues = np.empty(unique_bit_shifted_zvalues.size, dtype=np.uint64) - nodes = [] - for layer in layers: - for node in layer: - nodes.append(node) + self.lower_bounds.resize(\ + (self.lower_bounds.shape[0]+unique_bit_shifted_zvalues.size,3)) + self.upper_bounds.resize(\ + (self.upper_bounds.shape[0]+unique_bit_shifted_zvalues.size,3)) + self.node_map.resize(\ + self.node_map.size+unique_bit_shifted_zvalues.size) + self.node_length.resize(\ + self.node_length.size+unique_bit_shifted_zvalues.size) - self.lower_bounds = np.empty((len(nodes),3)) - self.upper_bounds = np.empty((len(nodes),3)) - self.child_map = np.empty(len(nodes), dtype=np.uint32) - self.child_len = np.empty(len(nodes), dtype=np.uint32) - self.first_leaf = None + for i, z in enumerate(unique_bit_shifted_zvalues): + i1 = np.searchsorted(bit_shifted_zvalues, z) + \ + begin_last_layer + i2 = np.searchsorted(bit_shifted_zvalues, z, side='right') + \ + begin_last_layer - for i, node in enumerate(nodes): - self.lower_bounds[i] = node.lower_bound - self.upper_bounds[i] = node.upper_bound - - self.child_len[i] = node.size + zvalues[i] = z - if isinstance(node, Node): - for j, child in enumerate(nodes): - if child is node.children[0]: - self.child_map[i] = j - break + i += begin_last_layer + bit_shifted_zvalues.size - if isinstance(node, Leaf): - self.child_map[i] = node.mesh_idx + self.lower_bounds[i] = \ + [np.min(self.lower_bounds[i1:i2,0]), + np.min(self.lower_bounds[i1:i2,1]), + np.min(self.lower_bounds[i1:i2,2])] - if self.first_leaf is None: - self.first_leaf = i + self.upper_bounds[i] = \ + [np.max(self.upper_bounds[i1:i2,0]), + np.max(self.upper_bounds[i1:i2,1]), + np.max(self.upper_bounds[i1:i2,2])] + + self.node_map[i] = i1 + self.node_length[i] = i2-i1 + + begin_last_layer += bit_shifted_zvalues.size + + if unique_bit_shifted_zvalues.size == 1: + break def load(self, module): """ @@ -283,25 +242,25 @@ class Geometry(object): self.mesh_gpu = cuda.to_device(mesh) self.lower_bounds_gpu = cuda.to_device(lower_bounds) self.upper_bounds_gpu = cuda.to_device(upper_bounds) - self.child_map_gpu = cuda.to_device(self.child_map) - self.child_len_gpu = cuda.to_device(self.child_len) + self.node_map_gpu = cuda.to_device(self.node_map) + self.node_length_gpu = cuda.to_device(self.node_length) mesh_tex = module.get_texref('mesh') lower_bounds_tex = module.get_texref('lower_bounds') upper_bounds_tex = module.get_texref('upper_bounds') - child_map_tex = module.get_texref('child_map_arr') - child_len_tex = module.get_texref('child_len_arr') + node_map_tex = module.get_texref('node_map') + node_length_tex = module.get_texref('node_length') mesh_tex.set_address(self.mesh_gpu, mesh.nbytes) lower_bounds_tex.set_address(self.lower_bounds_gpu, lower_bounds.nbytes) upper_bounds_tex.set_address(self.upper_bounds_gpu, upper_bounds.nbytes) - child_map_tex.set_address(self.child_map_gpu, self.child_map.nbytes) - child_len_tex.set_address(self.child_len_gpu, self.child_len.nbytes) + node_map_tex.set_address(self.node_map_gpu, self.node_map.nbytes) + node_length_tex.set_address(self.node_length_gpu, self.node_length.nbytes) mesh_tex.set_format(cuda.array_format.FLOAT, 4) lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4) upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4) - child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) - child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) + node_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) + node_length_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) - return [mesh_tex, lower_bounds_tex, upper_bounds_tex, child_map_tex, child_len_tex] + return [mesh_tex, lower_bounds_tex, upper_bounds_tex, node_map_tex, node_length_tex] diff --git a/likelihood.py b/likelihood.py index ae712d8..b31f8fa 100644 --- a/likelihood.py +++ b/likelihood.py @@ -1,32 +1,26 @@ import sys +import time import numpy as np - from pycuda import autoinit from pycuda.compiler import SourceModule from pycuda import gpuarray import pycuda.driver as cuda - from uncertainties import ufloat, umath - from detectors import LBNE - from histogram import Histogram - from photon import uniform_sphere - import layout print 'device %s' % autoinit.device.name() source = open(layout.source + '/kernel.cu').read() module = SourceModule(source, options=['-I' + layout.source], no_extern_c=True, cache_dir=False) - -geometry = LBNE() -geometry.build(bits=6) -texrefs = geometry.load(module) - propagate = module.get_function('propagate') +lbne = LBNE() +lbne.build(bits=9) +texrefs = lbne.load(module) + nblocks = 64 def generate_event(z=0.0, nphotons=1000): @@ -43,14 +37,15 @@ def generate_event(z=0.0, nphotons=1000): dest = np.empty(nphotons, dtype=np.int32) dest_gpu = cuda.to_device(dest) - propagate(np.int32(nphotons), origins_gpu, directions_gpu, np.int32(geometry.first_leaf), dest_gpu, block=(nblocks,1,1), grid=(nphotons//nblocks+1,1)) + propagate(np.int32(nphotons), origins_gpu, directions_gpu, np.int32(lbne.node_map.size-1), np.int32(lbne.first_node), dest_gpu, block=(nblocks,1,1), grid=(nphotons//nblocks+1,1), texrefs=texrefs) + cuda.Context.synchronize() cuda.memcpy_dtoh(dest, dest_gpu) triangles = dest[(dest != -1)] - event_bincount = np.zeros(len(geometry.solids)) - gpu_bincount = np.bincount(geometry.solid_index[triangles]) + event_bincount = np.zeros(len(lbne.solids)) + gpu_bincount = np.bincount(lbne.solid_index[triangles]) event_bincount[:gpu_bincount.size] = gpu_bincount return event_bincount @@ -59,9 +54,9 @@ def likelihood(event_bincount, z=0.0, nphotons=1000, neval=1000): origins = np.tile(gpuarray.vec.make_float3(0,0,z), (nphotons,1)) origins_gpu = cuda.to_device(origins) - bincount = np.zeros((neval, len(geometry.solids))) + bincount = np.zeros((neval, len(lbne.solids))) for i in range(neval): - print '\reval %i' % i, + print '\revent: %i' % (i+1), sys.stdout.flush() directions = uniform_sphere(nphotons) @@ -74,36 +69,28 @@ def likelihood(event_bincount, z=0.0, nphotons=1000, neval=1000): dest = np.empty(nphotons, dtype=np.int32) dest_gpu = cuda.to_device(dest) - propagate(np.int32(nphotons), origins_gpu, directions_gpu, np.int32(geometry.first_leaf), dest_gpu, block=(nblocks,1,1), grid=(nphotons//nblocks+1,1)) + propagate(np.int32(nphotons), origins_gpu, directions_gpu, np.int32(lbne.node_map.size-1), np.int32(lbne.first_node), dest_gpu, block=(nblocks,1,1), grid=(nphotons//nblocks+1,1)) + cuda.Context.synchronize() cuda.memcpy_dtoh(dest, dest_gpu) triangles = dest[(dest != -1)] - gpu_bincount = np.bincount(geometry.solid_index[triangles]) + gpu_bincount = np.bincount(lbne.solid_index[triangles]) bincount[i][:gpu_bincount.size] = gpu_bincount print - histograms = [] - for i in range(len(geometry.solids)): + log_likelihood = ufloat((0,0)) + for i in range(len(lbne.solids)): h = Histogram(100, (-0.5, 99.5)) h.fill(bincount[:,i]) h.normalize() - histograms.append(h) - log_likelihood = ufloat((0,0)) - for i, h in enumerate(histograms): - probability = ufloat((h.eval(event_bincount[i]), h.eval_err(event_bincount[i]))) + probability = h.ueval(event_bincount[i]) if probability.nominal_value == 0.0: - return None + probability = ufloat((0.5/h.nentries, 0.5/h.nentries)) log_likelihood += umath.log(probability) return -log_likelihood - -if __name__ == '__main__': - event_bincount = generate_event() - - for z in np.arange(-2.5,2.5,0.1): - print 'z, likelihood = (%f, %s)' % (z, likelihood(event_bincount,z)) diff --git a/src/kernel.cu b/src/kernel.cu index 0d5e54b..f14ed69 100644 --- a/src/kernel.cu +++ b/src/kernel.cu @@ -7,7 +7,7 @@ #include "rotate.h" #include "intersect.h" -#define STACK_SIZE 100 +#define STACK_SIZE 500 /* flattened triangle mesh */ texture<float4, 1, cudaReadModeElementType> mesh; @@ -17,8 +17,8 @@ texture<float4, 1, cudaReadModeElementType> upper_bounds; texture<float4, 1, cudaReadModeElementType> lower_bounds; /* map to child nodes/triangles and the number of child nodes/triangles */ -texture<uint, 1, cudaReadModeElementType> child_map_arr; -texture<uint, 1, cudaReadModeElementType> child_len_arr; +texture<unsigned int, 1, cudaReadModeElementType> node_map; +texture<unsigned int, 1, cudaReadModeElementType> node_length; __device__ float3 make_float3(const float4 &a) { @@ -40,14 +40,14 @@ __device__ bool intersect_node(const float3 &origin, const float3 &direction, co direction `direction` and the global mesh texture. If the ray intersects the texture return the index of the triangle which the ray intersected, else return -1. */ -__device__ int intersect_mesh(const float3 &origin, const float3& direction, const int first_leaf) +__device__ int intersect_mesh(const float3 &origin, const float3& direction, const int start_node, const int first_node) { int triangle_idx = -1; float distance; float min_distance; - if (!intersect_node(origin, direction, 0)) + if (!intersect_node(origin, direction, start_node)) return -1; int stack[STACK_SIZE]; @@ -55,20 +55,20 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, con int *head = &stack[0]; int *node = &stack[1]; int *tail = &stack[STACK_SIZE-1]; - *node = 0; + *node = start_node; int i; do { - int child_map = tex1Dfetch(child_map_arr, *node); - int child_len = tex1Dfetch(child_len_arr, *node); + int index = tex1Dfetch(node_map, *node); + int length = tex1Dfetch(node_length, *node); - if (*node < first_leaf) + if (*node >= first_node) { - for (i=0; i < child_len; i++) - if (intersect_node(origin, direction, child_map+i)) - *node++ = child_map+i; + for (i=0; i < length; i++) + if (intersect_node(origin, direction, index+i)) + *node++ = index+i; if (node == head) break; @@ -77,9 +77,9 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, con } else // node is a leaf { - for (i=0; i < child_len; i++) + for (i=0; i < length; i++) { - int mesh_idx = 3*(child_map + i); + int mesh_idx = 3*(index + i); float3 v0 = make_float3(tex1Dfetch(mesh, mesh_idx)); float3 v1 = make_float3(tex1Dfetch(mesh, mesh_idx+1)); @@ -89,14 +89,14 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, con { if (triangle_idx == -1) { - triangle_idx = child_map + i; + triangle_idx = index + i; min_distance = distance; continue; } if (distance < min_distance) { - triangle_idx = child_map + i; + triangle_idx = index + i; min_distance = distance; } } @@ -164,7 +164,7 @@ __global__ void rotate(int max_idx, float3 *points, float phi, float3 axis) set the pixel associated with the ray to a 32 bit color whose brightness is determined by the cosine of the angle between the ray and the normal of the triangle it intersected, else set the pixel to 0. */ -__global__ void ray_trace(int max_idx, float3 *origins, float3 *directions, int first_leaf, int *pixels) +__global__ void ray_trace(int max_idx, float3 *origins, float3 *directions, int start_node, int first_node, int *pixels) { int idx = blockIdx.x*blockDim.x + threadIdx.x; @@ -175,7 +175,7 @@ __global__ void ray_trace(int max_idx, float3 *origins, float3 *directions, int float3 direction = *(directions+idx); direction /= norm(direction); - int intersection_idx = intersect_mesh(origin, direction, first_leaf); + int intersection_idx = intersect_mesh(origin, direction, start_node, first_node); if (intersection_idx == -1) { @@ -198,7 +198,7 @@ __global__ void ray_trace(int max_idx, float3 *origins, float3 *directions, int intersects the mesh set the hit_solid array value associated with the photon to the triangle index of the triangle the photon intersected, else set the hit_solid array value to -1. */ -__global__ void propagate(int max_idx, float3 *origins, float3 *directions, int first_leaf, int *hit_triangles) +__global__ void propagate(int max_idx, float3 *origins, float3 *directions, int start_node, int first_node, int *hit_triangles) { int idx = blockIdx.x*blockDim.x + threadIdx.x; @@ -209,7 +209,7 @@ __global__ void propagate(int max_idx, float3 *origins, float3 *directions, int float3 direction = *(directions+idx); direction /= norm(direction); - *(hit_triangles+idx) = intersect_mesh(origin, direction, first_leaf); + *(hit_triangles+idx) = intersect_mesh(origin, direction, start_node, first_node); } // propagate @@ -22,7 +22,14 @@ def view(geometry, name=''): - rotate: click and drag the mouse - move: shift+click and drag the mouse """ - lower_bound, upper_bound = get_bounds(geometry.mesh) + + lower_bound = np.array([np.min(geometry.mesh[:,:,0]), + np.min(geometry.mesh[:,:,1]), + np.min(geometry.mesh[:,:,2])]) + + upper_bound = np.array([np.max(geometry.mesh[:,:,0]), + np.max(geometry.mesh[:,:,1]), + np.max(geometry.mesh[:,:,2])]) scale = np.linalg.norm(upper_bound-lower_bound) @@ -74,8 +81,7 @@ def view(geometry, name=''): def render(): """Render the mesh and display to screen.""" - cuda_raytrace(np.int32(pixels.size), origins_gpu, directions_gpu, - np.int32(geometry.first_leaf), pixels_gpu, **gpu_kwargs) + cuda_raytrace(np.int32(pixels.size), origins_gpu, directions_gpu, np.int32(geometry.node_map.size-1), np.int32(geometry.first_node), pixels_gpu, texrefs=texrefs, **gpu_kwargs) cuda.memcpy_dtoh(pixels, pixels_gpu) pygame.surfarray.blit_array(screen, pixels.reshape(size)) @@ -172,7 +178,7 @@ if __name__ == '__main__': parser = optparse.OptionParser('%prog filename.stl') parser.add_option('-b', '--bits', type='int', dest='bits', - help='bits for z-ordering space axes', default=6) + help='bits for z-ordering space axes', default=8) options, args = parser.parse_args() if len(args) < 1: |