summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--geometry.py237
-rw-r--r--likelihood.py49
-rw-r--r--src/kernel.cu40
-rwxr-xr-xview.py14
4 files changed, 146 insertions, 194 deletions
diff --git a/geometry.py b/geometry.py
index e3748e6..f50fe89 100644
--- a/geometry.py
+++ b/geometry.py
@@ -1,89 +1,21 @@
import numpy as np
-from itertools import chain
+import numpy.ma as ma
import pycuda.driver as cuda
from pycuda import gpuarray
-def compress(data, selectors):
- """
- Make an iterator that filters elements from `data` returning only those
- that have a corresponding element in `selectors` that evaluates to True.
- Stops when either the `data` or `selectors` iterables has been
- exhausted.
-
- From Python 2.7 itertools.
- """
- return (d for d, s in zip(data, selectors) if s)
-
-def get_bounds(mesh):
- """Returns the lower/upper bounds for `mesh`."""
- lower_bound = np.array([np.min(mesh[:,:,0]), np.min(mesh[:,:,1]), \
- np.min(mesh[:,:,2])])
- upper_bound = np.array([np.max(mesh[:,:,0]), np.max(mesh[:,:,1]), \
- np.max(mesh[:,:,2])])
- return lower_bound, upper_bound
-
-class Leaf(object):
- """
- Leaf object represents the last layer in the bounding volume hierarchy
- which points to a subset of the triangle mesh.
-
- Args:
- - mesh: array,
- A subset of the triangle mesh.
- - mesh_idx: int,
- The index of the first triangle in the global mesh.
- - zvlaue: int, *optional*
- The zvalue associated with this leaf.
-
- .. note::
- The `mesh` passed in the constructor is not actually stored in the
- leaf. It is simply used to construct the leaf's bounding box.
- """
- def __init__(self, mesh, mesh_idx, zvalue=None):
- self.zvalue = zvalue
- self.mesh_idx = mesh_idx
-
- self.lower_bound, self.upper_bound = get_bounds(mesh)
-
- self.size = mesh.shape[0]
-
-class Node(object):
- """
- Node object represents a node in the bounding volume hierarchy which
- contains a list of child nodes.
-
- Args:
- - children: list,
- List of child nodes/leafs.
- - zvalue: int, *optional*
- The zvalue associated with this node.
- """
- def __init__(self, children, zvalue=None):
- self.zvalue = zvalue
-
- lower_pts = np.array([child.lower_bound for child in children])
- upper_pts = np.array([child.upper_bound for child in children])
-
- self.lower_bound = np.array([np.min(lower_pts[:,0]), np.min(lower_pts[:,1]), np.min(lower_pts[:,2])])
- self.upper_bound = np.array([np.max(upper_pts[:,0]), np.max(upper_pts[:,1]), np.max(upper_pts[:,2])])
-
- self.size = len(children)
-
- self.children = children
-
-def interleave(arr):
+def interleave(arr, bits):
"""
Interleave the bits of quantized three-dimensional points in space.
Example
- >>> interleave(np.identity(3, dtpe=np.int))
+ >>> interleave(np.identity(3, dtype=np.int))
array([4, 2, 1], dtype=uint64)
"""
if len(arr.shape) != 2 or arr.shape[1] != 3:
raise Exception('shape mismatch')
- z = np.zeros(arr.shape[0], dtype=np.uint64)
- for i in range(arr.dtype.itemsize*8):
+ z = np.zeros(arr.shape[0], dtype=np.uint32)
+ for i in range(bits):
z |= (arr[:,2] & 1 << i) << (2*i) | \
(arr[:,1] & 1 << i) << (2*i+1) | \
(arr[:,0] & 1 << i) << (2*i+2)
@@ -95,16 +27,26 @@ def morton_order(mesh, bits):
bits of the quantized center coordinates of each triangle. Each coordinate
axis is quantized into 2**bits bins.
"""
- lower_bound, upper_bound = get_bounds(mesh)
+
+ lower_bound = np.array([np.min(mesh[:,:,0]),
+ np.min(mesh[:,:,1]),
+ np.min(mesh[:,:,2])])
+
+ upper_bound = np.array([np.max(mesh[:,:,0]),
+ np.max(mesh[:,:,1]),
+ np.max(mesh[:,:,2])])
+
+ if bits <= 0 or bits > 12:
+ raise Exception('number of bits must be in the range (0,12].')
max_value = 2**bits - 1
-
+
def quantize(x):
- return np.uint64((x-lower_bound)*max_value/(upper_bound-lower_bound))
+ return np.uint32((x-lower_bound)*max_value/(upper_bound-lower_bound))
mean_positions = quantize(np.mean(mesh, axis=1))
- return interleave(mean_positions)
+ return interleave(mean_positions, bits)
class Solid(object):
"""
@@ -169,7 +111,7 @@ class Geometry(object):
if solid.surface2 not in self.surfaces:
self.surfaces.append(solid.surface1)
- def build(self, bits=3):
+ def build(self, bits=8):
"""Build the bounding volume hierarchy of the geometry."""
mesh = np.concatenate([solid.mesh for solid in self.solids])
@@ -190,74 +132,91 @@ class Geometry(object):
np.concatenate([np.tile(self.surfaces.index(solid.surface2), \
len(solid)) for solid in self.solids])
- # array of zvalue for each triangle in mesh
- zvalues = morton_order(mesh, bits)
+ zvalues_mesh = morton_order(mesh, bits)
+ reorder = np.argsort(zvalues_mesh)
+ zvalues_mesh = zvalues_mesh[reorder]
- # mesh indices in order of increasing zvalue
- order = np.array(zip(*sorted(zip(zvalues, range(zvalues.size))))[-1])
+ if (np.diff(zvalues_mesh) < 0).any():
+ raise Exception('zvalues_mesh out of order')
- # reorder all arrays ordered by triangle index
- self.zvalues = zvalues[order]
- self.mesh = mesh[order]
- self.solid_index = solid_index[order]
- self.material1_index = material1_index[order]
- self.material2_index = material2_index[order]
- self.surface1_index = surface1_index[order]
- self.surface2_index = surface2_index[order]
+ self.mesh = mesh[reorder]
+ self.solid_index = solid_index[reorder]
+ self.material1_index = material1_index[reorder]
+ self.material2_index = material2_index[reorder]
+ self.surface1_index = surface1_index[reorder]
+ self.surface2_index = surface2_index[reorder]
- leafs = []
- for z in sorted(set(self.zvalues)):
- mask = (self.zvalues == z)
- leafs.append(Leaf(self.mesh[mask], np.where(mask)[0][0], z))
+ unique_zvalues = np.unique(zvalues_mesh)
+ zvalues = np.empty(unique_zvalues.size, dtype=np.uint64)
- layers = []
- layers.append(leafs)
+ self.lower_bounds = np.empty((unique_zvalues.size,3), dtype=np.float32)
+ self.upper_bounds = np.empty((unique_zvalues.size,3), dtype=np.float32)
+ self.node_map = np.empty(unique_zvalues.size, dtype=np.uint32)
+ self.node_length = np.empty(unique_zvalues.size, dtype=np.uint32)
- while True:
- bit_shifted_zvalues = \
- np.array([node.zvalue for node in layers[-1]]) >> 1
+ for i, z in enumerate(unique_zvalues):
+ i1 = np.searchsorted(zvalues_mesh, z)
+ i2 = np.searchsorted(zvalues_mesh, z, side='right')
- nodes = []
- for z in sorted(set(bit_shifted_zvalues)):
- mask = (bit_shifted_zvalues == z)
- nodes.append(Node(list(compress(layers[-1], mask)), z))
+ self.lower_bounds[i] = [np.min(self.mesh[i1:i2,:,0]),
+ np.min(self.mesh[i1:i2,:,1]),
+ np.min(self.mesh[i1:i2,:,2])]
+
+ self.upper_bounds[i] = [np.max(self.mesh[i1:i2,:,0]),
+ np.max(self.mesh[i1:i2,:,1]),
+ np.max(self.mesh[i1:i2,:,2])]
+
+ self.node_map[i] = i1
+ self.node_length[i] = i2-i1
- layers.append(nodes)
+ zvalues[i] = z
- if len(nodes) == 1:
- break
+ self.first_node = unique_zvalues.size
- layers.reverse()
+ begin_last_layer = 0
+
+ while True:
+ bit_shifted_zvalues = zvalues >> 1
+ unique_bit_shifted_zvalues = np.unique(bit_shifted_zvalues)
+ zvalues = np.empty(unique_bit_shifted_zvalues.size, dtype=np.uint64)
- nodes = []
- for layer in layers:
- for node in layer:
- nodes.append(node)
+ self.lower_bounds.resize(\
+ (self.lower_bounds.shape[0]+unique_bit_shifted_zvalues.size,3))
+ self.upper_bounds.resize(\
+ (self.upper_bounds.shape[0]+unique_bit_shifted_zvalues.size,3))
+ self.node_map.resize(\
+ self.node_map.size+unique_bit_shifted_zvalues.size)
+ self.node_length.resize(\
+ self.node_length.size+unique_bit_shifted_zvalues.size)
- self.lower_bounds = np.empty((len(nodes),3))
- self.upper_bounds = np.empty((len(nodes),3))
- self.child_map = np.empty(len(nodes), dtype=np.uint32)
- self.child_len = np.empty(len(nodes), dtype=np.uint32)
- self.first_leaf = None
+ for i, z in enumerate(unique_bit_shifted_zvalues):
+ i1 = np.searchsorted(bit_shifted_zvalues, z) + \
+ begin_last_layer
+ i2 = np.searchsorted(bit_shifted_zvalues, z, side='right') + \
+ begin_last_layer
- for i, node in enumerate(nodes):
- self.lower_bounds[i] = node.lower_bound
- self.upper_bounds[i] = node.upper_bound
-
- self.child_len[i] = node.size
+ zvalues[i] = z
- if isinstance(node, Node):
- for j, child in enumerate(nodes):
- if child is node.children[0]:
- self.child_map[i] = j
- break
+ i += begin_last_layer + bit_shifted_zvalues.size
- if isinstance(node, Leaf):
- self.child_map[i] = node.mesh_idx
+ self.lower_bounds[i] = \
+ [np.min(self.lower_bounds[i1:i2,0]),
+ np.min(self.lower_bounds[i1:i2,1]),
+ np.min(self.lower_bounds[i1:i2,2])]
- if self.first_leaf is None:
- self.first_leaf = i
+ self.upper_bounds[i] = \
+ [np.max(self.upper_bounds[i1:i2,0]),
+ np.max(self.upper_bounds[i1:i2,1]),
+ np.max(self.upper_bounds[i1:i2,2])]
+
+ self.node_map[i] = i1
+ self.node_length[i] = i2-i1
+
+ begin_last_layer += bit_shifted_zvalues.size
+
+ if unique_bit_shifted_zvalues.size == 1:
+ break
def load(self, module):
"""
@@ -283,25 +242,25 @@ class Geometry(object):
self.mesh_gpu = cuda.to_device(mesh)
self.lower_bounds_gpu = cuda.to_device(lower_bounds)
self.upper_bounds_gpu = cuda.to_device(upper_bounds)
- self.child_map_gpu = cuda.to_device(self.child_map)
- self.child_len_gpu = cuda.to_device(self.child_len)
+ self.node_map_gpu = cuda.to_device(self.node_map)
+ self.node_length_gpu = cuda.to_device(self.node_length)
mesh_tex = module.get_texref('mesh')
lower_bounds_tex = module.get_texref('lower_bounds')
upper_bounds_tex = module.get_texref('upper_bounds')
- child_map_tex = module.get_texref('child_map_arr')
- child_len_tex = module.get_texref('child_len_arr')
+ node_map_tex = module.get_texref('node_map')
+ node_length_tex = module.get_texref('node_length')
mesh_tex.set_address(self.mesh_gpu, mesh.nbytes)
lower_bounds_tex.set_address(self.lower_bounds_gpu, lower_bounds.nbytes)
upper_bounds_tex.set_address(self.upper_bounds_gpu, upper_bounds.nbytes)
- child_map_tex.set_address(self.child_map_gpu, self.child_map.nbytes)
- child_len_tex.set_address(self.child_len_gpu, self.child_len.nbytes)
+ node_map_tex.set_address(self.node_map_gpu, self.node_map.nbytes)
+ node_length_tex.set_address(self.node_length_gpu, self.node_length.nbytes)
mesh_tex.set_format(cuda.array_format.FLOAT, 4)
lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4)
upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4)
- child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
- child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
+ node_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
+ node_length_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
- return [mesh_tex, lower_bounds_tex, upper_bounds_tex, child_map_tex, child_len_tex]
+ return [mesh_tex, lower_bounds_tex, upper_bounds_tex, node_map_tex, node_length_tex]
diff --git a/likelihood.py b/likelihood.py
index ae712d8..b31f8fa 100644
--- a/likelihood.py
+++ b/likelihood.py
@@ -1,32 +1,26 @@
import sys
+import time
import numpy as np
-
from pycuda import autoinit
from pycuda.compiler import SourceModule
from pycuda import gpuarray
import pycuda.driver as cuda
-
from uncertainties import ufloat, umath
-
from detectors import LBNE
-
from histogram import Histogram
-
from photon import uniform_sphere
-
import layout
print 'device %s' % autoinit.device.name()
source = open(layout.source + '/kernel.cu').read()
module = SourceModule(source, options=['-I' + layout.source], no_extern_c=True, cache_dir=False)
-
-geometry = LBNE()
-geometry.build(bits=6)
-texrefs = geometry.load(module)
-
propagate = module.get_function('propagate')
+lbne = LBNE()
+lbne.build(bits=9)
+texrefs = lbne.load(module)
+
nblocks = 64
def generate_event(z=0.0, nphotons=1000):
@@ -43,14 +37,15 @@ def generate_event(z=0.0, nphotons=1000):
dest = np.empty(nphotons, dtype=np.int32)
dest_gpu = cuda.to_device(dest)
- propagate(np.int32(nphotons), origins_gpu, directions_gpu, np.int32(geometry.first_leaf), dest_gpu, block=(nblocks,1,1), grid=(nphotons//nblocks+1,1))
+ propagate(np.int32(nphotons), origins_gpu, directions_gpu, np.int32(lbne.node_map.size-1), np.int32(lbne.first_node), dest_gpu, block=(nblocks,1,1), grid=(nphotons//nblocks+1,1), texrefs=texrefs)
+ cuda.Context.synchronize()
cuda.memcpy_dtoh(dest, dest_gpu)
triangles = dest[(dest != -1)]
- event_bincount = np.zeros(len(geometry.solids))
- gpu_bincount = np.bincount(geometry.solid_index[triangles])
+ event_bincount = np.zeros(len(lbne.solids))
+ gpu_bincount = np.bincount(lbne.solid_index[triangles])
event_bincount[:gpu_bincount.size] = gpu_bincount
return event_bincount
@@ -59,9 +54,9 @@ def likelihood(event_bincount, z=0.0, nphotons=1000, neval=1000):
origins = np.tile(gpuarray.vec.make_float3(0,0,z), (nphotons,1))
origins_gpu = cuda.to_device(origins)
- bincount = np.zeros((neval, len(geometry.solids)))
+ bincount = np.zeros((neval, len(lbne.solids)))
for i in range(neval):
- print '\reval %i' % i,
+ print '\revent: %i' % (i+1),
sys.stdout.flush()
directions = uniform_sphere(nphotons)
@@ -74,36 +69,28 @@ def likelihood(event_bincount, z=0.0, nphotons=1000, neval=1000):
dest = np.empty(nphotons, dtype=np.int32)
dest_gpu = cuda.to_device(dest)
- propagate(np.int32(nphotons), origins_gpu, directions_gpu, np.int32(geometry.first_leaf), dest_gpu, block=(nblocks,1,1), grid=(nphotons//nblocks+1,1))
+ propagate(np.int32(nphotons), origins_gpu, directions_gpu, np.int32(lbne.node_map.size-1), np.int32(lbne.first_node), dest_gpu, block=(nblocks,1,1), grid=(nphotons//nblocks+1,1))
+ cuda.Context.synchronize()
cuda.memcpy_dtoh(dest, dest_gpu)
triangles = dest[(dest != -1)]
- gpu_bincount = np.bincount(geometry.solid_index[triangles])
+ gpu_bincount = np.bincount(lbne.solid_index[triangles])
bincount[i][:gpu_bincount.size] = gpu_bincount
print
- histograms = []
- for i in range(len(geometry.solids)):
+ log_likelihood = ufloat((0,0))
+ for i in range(len(lbne.solids)):
h = Histogram(100, (-0.5, 99.5))
h.fill(bincount[:,i])
h.normalize()
- histograms.append(h)
- log_likelihood = ufloat((0,0))
- for i, h in enumerate(histograms):
- probability = ufloat((h.eval(event_bincount[i]), h.eval_err(event_bincount[i])))
+ probability = h.ueval(event_bincount[i])
if probability.nominal_value == 0.0:
- return None
+ probability = ufloat((0.5/h.nentries, 0.5/h.nentries))
log_likelihood += umath.log(probability)
return -log_likelihood
-
-if __name__ == '__main__':
- event_bincount = generate_event()
-
- for z in np.arange(-2.5,2.5,0.1):
- print 'z, likelihood = (%f, %s)' % (z, likelihood(event_bincount,z))
diff --git a/src/kernel.cu b/src/kernel.cu
index 0d5e54b..f14ed69 100644
--- a/src/kernel.cu
+++ b/src/kernel.cu
@@ -7,7 +7,7 @@
#include "rotate.h"
#include "intersect.h"
-#define STACK_SIZE 100
+#define STACK_SIZE 500
/* flattened triangle mesh */
texture<float4, 1, cudaReadModeElementType> mesh;
@@ -17,8 +17,8 @@ texture<float4, 1, cudaReadModeElementType> upper_bounds;
texture<float4, 1, cudaReadModeElementType> lower_bounds;
/* map to child nodes/triangles and the number of child nodes/triangles */
-texture<uint, 1, cudaReadModeElementType> child_map_arr;
-texture<uint, 1, cudaReadModeElementType> child_len_arr;
+texture<unsigned int, 1, cudaReadModeElementType> node_map;
+texture<unsigned int, 1, cudaReadModeElementType> node_length;
__device__ float3 make_float3(const float4 &a)
{
@@ -40,14 +40,14 @@ __device__ bool intersect_node(const float3 &origin, const float3 &direction, co
direction `direction` and the global mesh texture. If the ray intersects
the texture return the index of the triangle which the ray intersected,
else return -1. */
-__device__ int intersect_mesh(const float3 &origin, const float3& direction, const int first_leaf)
+__device__ int intersect_mesh(const float3 &origin, const float3& direction, const int start_node, const int first_node)
{
int triangle_idx = -1;
float distance;
float min_distance;
- if (!intersect_node(origin, direction, 0))
+ if (!intersect_node(origin, direction, start_node))
return -1;
int stack[STACK_SIZE];
@@ -55,20 +55,20 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, con
int *head = &stack[0];
int *node = &stack[1];
int *tail = &stack[STACK_SIZE-1];
- *node = 0;
+ *node = start_node;
int i;
do
{
- int child_map = tex1Dfetch(child_map_arr, *node);
- int child_len = tex1Dfetch(child_len_arr, *node);
+ int index = tex1Dfetch(node_map, *node);
+ int length = tex1Dfetch(node_length, *node);
- if (*node < first_leaf)
+ if (*node >= first_node)
{
- for (i=0; i < child_len; i++)
- if (intersect_node(origin, direction, child_map+i))
- *node++ = child_map+i;
+ for (i=0; i < length; i++)
+ if (intersect_node(origin, direction, index+i))
+ *node++ = index+i;
if (node == head)
break;
@@ -77,9 +77,9 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, con
}
else // node is a leaf
{
- for (i=0; i < child_len; i++)
+ for (i=0; i < length; i++)
{
- int mesh_idx = 3*(child_map + i);
+ int mesh_idx = 3*(index + i);
float3 v0 = make_float3(tex1Dfetch(mesh, mesh_idx));
float3 v1 = make_float3(tex1Dfetch(mesh, mesh_idx+1));
@@ -89,14 +89,14 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, con
{
if (triangle_idx == -1)
{
- triangle_idx = child_map + i;
+ triangle_idx = index + i;
min_distance = distance;
continue;
}
if (distance < min_distance)
{
- triangle_idx = child_map + i;
+ triangle_idx = index + i;
min_distance = distance;
}
}
@@ -164,7 +164,7 @@ __global__ void rotate(int max_idx, float3 *points, float phi, float3 axis)
set the pixel associated with the ray to a 32 bit color whose brightness is
determined by the cosine of the angle between the ray and the normal of the
triangle it intersected, else set the pixel to 0. */
-__global__ void ray_trace(int max_idx, float3 *origins, float3 *directions, int first_leaf, int *pixels)
+__global__ void ray_trace(int max_idx, float3 *origins, float3 *directions, int start_node, int first_node, int *pixels)
{
int idx = blockIdx.x*blockDim.x + threadIdx.x;
@@ -175,7 +175,7 @@ __global__ void ray_trace(int max_idx, float3 *origins, float3 *directions, int
float3 direction = *(directions+idx);
direction /= norm(direction);
- int intersection_idx = intersect_mesh(origin, direction, first_leaf);
+ int intersection_idx = intersect_mesh(origin, direction, start_node, first_node);
if (intersection_idx == -1)
{
@@ -198,7 +198,7 @@ __global__ void ray_trace(int max_idx, float3 *origins, float3 *directions, int
intersects the mesh set the hit_solid array value associated with the
photon to the triangle index of the triangle the photon intersected, else
set the hit_solid array value to -1. */
-__global__ void propagate(int max_idx, float3 *origins, float3 *directions, int first_leaf, int *hit_triangles)
+__global__ void propagate(int max_idx, float3 *origins, float3 *directions, int start_node, int first_node, int *hit_triangles)
{
int idx = blockIdx.x*blockDim.x + threadIdx.x;
@@ -209,7 +209,7 @@ __global__ void propagate(int max_idx, float3 *origins, float3 *directions, int
float3 direction = *(directions+idx);
direction /= norm(direction);
- *(hit_triangles+idx) = intersect_mesh(origin, direction, first_leaf);
+ *(hit_triangles+idx) = intersect_mesh(origin, direction, start_node, first_node);
} // propagate
diff --git a/view.py b/view.py
index 7e9e815..cbfb6d5 100755
--- a/view.py
+++ b/view.py
@@ -22,7 +22,14 @@ def view(geometry, name=''):
- rotate: click and drag the mouse
- move: shift+click and drag the mouse
"""
- lower_bound, upper_bound = get_bounds(geometry.mesh)
+
+ lower_bound = np.array([np.min(geometry.mesh[:,:,0]),
+ np.min(geometry.mesh[:,:,1]),
+ np.min(geometry.mesh[:,:,2])])
+
+ upper_bound = np.array([np.max(geometry.mesh[:,:,0]),
+ np.max(geometry.mesh[:,:,1]),
+ np.max(geometry.mesh[:,:,2])])
scale = np.linalg.norm(upper_bound-lower_bound)
@@ -74,8 +81,7 @@ def view(geometry, name=''):
def render():
"""Render the mesh and display to screen."""
- cuda_raytrace(np.int32(pixels.size), origins_gpu, directions_gpu,
- np.int32(geometry.first_leaf), pixels_gpu, **gpu_kwargs)
+ cuda_raytrace(np.int32(pixels.size), origins_gpu, directions_gpu, np.int32(geometry.node_map.size-1), np.int32(geometry.first_node), pixels_gpu, texrefs=texrefs, **gpu_kwargs)
cuda.memcpy_dtoh(pixels, pixels_gpu)
pygame.surfarray.blit_array(screen, pixels.reshape(size))
@@ -172,7 +178,7 @@ if __name__ == '__main__':
parser = optparse.OptionParser('%prog filename.stl')
parser.add_option('-b', '--bits', type='int', dest='bits',
- help='bits for z-ordering space axes', default=6)
+ help='bits for z-ordering space axes', default=8)
options, args = parser.parse_args()
if len(args) < 1: