diff options
| author | Anthony LaTorre <telatorre@gmail.com> | 2011-05-13 01:03:42 -0400 | 
|---|---|---|
| committer | Anthony LaTorre <telatorre@gmail.com> | 2011-05-13 01:03:42 -0400 | 
| commit | 519acb39bdb1df9869bb17bcc710108ac8c02983 (patch) | |
| tree | bc4fd6f165df09feb7fdc0b166d38df144ebc9a2 | |
| parent | 6996620497d0e6382df8e1cb0d07f6746ac3b0f3 (diff) | |
| download | chroma-519acb39bdb1df9869bb17bcc710108ac8c02983.tar.gz chroma-519acb39bdb1df9869bb17bcc710108ac8c02983.tar.bz2 chroma-519acb39bdb1df9869bb17bcc710108ac8c02983.zip | |
added a bounding volume hierarchy
| -rw-r--r-- | build.py | 102 | ||||
| -rw-r--r-- | src/intersect.cu | 222 | ||||
| -rw-r--r-- | src/linalg.h | 5 | ||||
| -rw-r--r-- | test.py | 88 | ||||
| -rw-r--r-- | zcurve.py | 25 | 
5 files changed, 381 insertions, 61 deletions
| diff --git a/build.py b/build.py new file mode 100644 index 0000000..8c05c5d --- /dev/null +++ b/build.py @@ -0,0 +1,102 @@ +import numpy as np + +class Leaf(object): +    def __init__(self, meshasdf, mesh_idx): +        self.xmin, self.ymin, self.zmin = meshasdf[0] +        self.xmax, self.ymax, self.zmax = meshasdf[0] + +        for x, y, z in meshasdf[1:]: +            if x < self.xmin: +                self.xmin = x +            if x > self.xmax: +                self.xmax = x +            if y < self.ymin: +                self.ymin = y +            if y > self.ymax: +                self.ymax = y +            if z < self.zmin: +                self.zmin = z +            if z > self.zmax: +                self.zmax = z + +        self.mesh_idx = mesh_idx +        self.size = meshasdf.size//3 + +class Node(object): +    def __init__(self, children): +        self.xmin = children[0].xmin +        self.xmax = children[0].xmax +        self.ymin = children[0].ymin +        self.ymax = children[0].ymax +        self.zmin = children[0].zmin +        self.zmax = children[0].zmax + +        for child in children[1:]: +            if child.xmin < self.xmin: +                self.xmin = child.xmin +            if child.xmax > self.xmax: +                self.xmax = child.xmax +            if child.ymin < self.ymin: +                self.ymin = child.ymin +            if child.ymax > self.ymax: +                self.ymax = child.ymax +            if child.zmin < self.zmin: +                self.zmin = child.zmin +            if child.zmax > self.zmax: +                self.zmax = child.zmax + +        self.children = children +        self.size = len(children) + +class Graph(object): +    def __init__(self, mesh, triangles_per_leaf=2, children_per_node=4): +        leafs = [] +        for i in range(0, mesh.size, triangles_per_leaf*3): +            leafs.append(Leaf(mesh[i:i+triangles_per_leaf*3], i)) + +        layers = [] +        layers.append(leafs) + +        while len(layers[-1]) > 1: +            nodes = [] +            for i in range(0, len(layers[-1]), children_per_node): +                nodes.append(Node(layers[-1][i:i+children_per_node])) + +            layers.append(nodes) + +        # leaves last +        layers.reverse() + +        nodes = [] +        for layer in layers: +            for node in layer: +                nodes.append(node) + +        lower = [] +        upper = [] +        start = [] +        count = [] # number of child nodes or triangles +        first_leaf = -1 + +        for i, node in enumerate(nodes): +            lower.append([node.xmin, node.ymin, node.zmin]) +            upper.append([node.xmax, node.ymax, node.zmax]) +            count.append(node.size) + +            if isinstance(node, Node): +                for j, child in enumerate(nodes): +                    if child is node.children[0]: +                        start.append(j) +                        break + +            if isinstance(node, Leaf): +                start.append(node.mesh_idx) + +                if first_leaf == -1: +                    first_leaf = i + +        self.lower = np.array(lower) +        self.upper = np.array(upper) +        self.start = np.array(start) +        self.count = np.array(count) +        self.first_leaf = first_leaf diff --git a/src/intersect.cu b/src/intersect.cu index c78350e..8c2bf45 100644 --- a/src/intersect.cu +++ b/src/intersect.cu @@ -1,17 +1,33 @@  //-*-c-*- +#include <math_constants.h> + +#include "linalg.h" +#include "matrix.h" +#include "rotate.h"  texture<float4, 1, cudaReadModeElementType> mesh; -__device__ bool intersect_triangle(const float3 &x, const float3 &p, const float3 &v0, const float3 &v1, const float3 &v2, float3 &intersection) +texture<float4, 1, cudaReadModeElementType> upper_bound_arr; +texture<float4, 1, cudaReadModeElementType> lower_bound_arr; + +texture<uint, 1, cudaReadModeElementType> child_map_arr; +texture<uint, 1, cudaReadModeElementType> child_len_arr; + +__device__ float3 make_float3(const float4 &a)  { -	Matrix m = make_matrix(v1-v0, v2-v0, -p); +	return make_float3(a.x, a.y, a.z); +} + +__device__ bool intersect_triangle(const float3 &origin, const float3 &direction, const float3 &v0, const float3 &v1, const float3 &v2, float3 &intersection) +{ +	Matrix m = make_matrix(v1-v0, v2-v0, -direction);  	float determinant = det(m);  	if (determinant == 0.0)  		return false; -	float3 b = x-v0; +	float3 b = origin-v0;  	float u1 = ((m.a11*m.a22 - m.a12*m.a21)*b.x + (m.a02*m.a21 - m.a01*m.a22)*b.y + (m.a01*m.a12 - m.a02*m.a11)*b.z)/determinant; @@ -28,103 +44,213 @@ __device__ bool intersect_triangle(const float3 &x, const float3 &p, const float  	if (u3 < 0.0 || (1-u1-u2) < 0.0)  		return false; -	intersection = x + p*u3; +	intersection = origin + direction*u3;  	return true;  } -__device__ int get_color(const float3 &p, const float3 &v0, const float3& v1, const float3 &v2) +__device__ int get_color(const float3 &direction, const float3 &v0, const float3& v1, const float3 &v2)  { -	float3 normal = cross(v1-v0,v2-v0); +	int rgb = floorf(255*dot(normalize(cross(v1-v0,v2-v0)),-direction)); + +	return rgb << 16 | rgb << 8 | rgb; +} + + +__device__ bool intersect_box(const float3 &origin, const float3 &direction, const float3 &lower_bound, const float3 &upper_bound) +{ +	float kmin, kmax, kymin, kymax, kzmin, kzmax; + +	if (direction.x >= 0.0) +	{ +		kmin = (lower_bound.x - origin.x)/direction.x; +		kmax = (upper_bound.x - origin.x)/direction.x; +	} +	else +	{ +		kmin = (upper_bound.x - origin.x)/direction.x; +		kmax = (lower_bound.x - origin.x)/direction.x; +	} + +	if (kmax < kmin) +		return false; + +	if (direction.y >= 0.0) +	{ +		kymin = (lower_bound.y - origin.y)/direction.y; +		kymax = (upper_bound.y - origin.y)/direction.y; +	} +	else +	{ +		kymin = (upper_bound.y - origin.y)/direction.y; +		kymax = (lower_bound.y - origin.y)/direction.y; +	} + +	if (kymax < kymin) +		return false; + +	if (kymin > kmin) +		kmin = kymin; + +	if (kymax < kmax) +		kmax = kymax; + +	if (kmin > kmax) +		return false; + +	if (direction.z >= 0.0) +	{ +		kzmin = (lower_bound.z - origin.z)/direction.z; +		kzmax = (upper_bound.z - origin.z)/direction.z; +	} +	else +	{ +		kzmin = (upper_bound.z - origin.z)/direction.z; +		kzmax = (lower_bound.z - origin.z)/direction.z; +	} -	float scale; -	scale = dot(normal,-p)/(norm(normal)*norm(p)); -	if (scale < 0.0) -		scale = dot(-normal,-p)/(norm(normal)*norm(p)); +	if (kzmax < kzmin) +		return false; -	int rgb = floorf(255*scale); +	if (kzmin > kmin) +		kmin = kzmin; -	return rgb*65536 + rgb*256 + rgb; +	if (kzmax < kmax) +		kmax = kzmax; + +	if (kmin > kmax) +		return false; + +	if (kmax < 0.0) +		return false; + +	return true;  } -__device__ float3 make_float3(const float4 &a) +__device__ bool intersect_node(const float3 &origin, const float3 &direction, const int &i)  { -	return make_float3(a.x, a.y, a.z); +	float3 lower_bound = make_float3(tex1Dfetch(lower_bound_arr, i)); +	float3 upper_bound = make_float3(tex1Dfetch(upper_bound_arr, i)); + +	return intersect_box(origin, direction, lower_bound, upper_bound);  }  extern "C"  { -__global__ void translate(int max_idx, float3 *x, float3 v) +__global__ void translate(int max_idx, float3 *pt, float3 v)  {  	int idx = blockIdx.x*blockDim.x + threadIdx.x;  	if (idx > max_idx)  		return; -	x[idx] += v; +	pt[idx] += v;  } -__global__ void rotate(int max_idx, float3 *x, float phi, float3 axis) +__global__ void rotate(int max_idx, float3 *pt, float phi, float3 axis)  {  	int idx = blockIdx.x*blockDim.x + threadIdx.x;  	if (idx > max_idx)  		return; -	x[idx] = rotate(x[idx], phi, axis); +	pt[idx] = rotate(pt[idx], phi, axis);  } -__global__ void intersect_triangle_mesh(int max_idx, float3 *xarr, float3 *parr, int n, int *pixelarr) +__global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *direction_arr, int *pixel_arr, int first_leaf)  {  	int idx = blockIdx.x*blockDim.x + threadIdx.x;  	if (idx > max_idx)  		return; -	float3 x = xarr[idx]; -	float3 p = parr[idx]; -	int *pixel = pixelarr+idx; +	float3 origin = origin_arr[idx]; +	float3 direction = direction_arr[idx]; + +	int *pixel = pixel_arr+idx;  	bool hit = false;  	float distance, min_distance;  	float3 intersection, min_intersection; +	 +	if (!intersect_node(origin, direction, 0)) +	{ +		*pixel = 0; +		return; +	} + +	int stack[100]; + +	int *head = &stack[0]; +	int *node = &stack[1]; +	*node = 0;  	int i; -	for (i=0; i < n; i++) + +	do  	{ -		float3 v0 = make_float3(tex1Dfetch(mesh, 3*i)); -		float3 v1 = make_float3(tex1Dfetch(mesh, 3*i+1)); -		float3 v2 = make_float3(tex1Dfetch(mesh, 3*i+2)); -			 -		if (intersect_triangle(x, p, v0, v1, v2, intersection)) -		{ -			if (!hit) -			{ -				*pixel = get_color(p, v0, v1, v2); +		int child_map = tex1Dfetch(child_map_arr, *node); +		int child_len = tex1Dfetch(child_len_arr, *node); -				min_distance = norm(intersection-x); -				min_intersection = intersection; -				hit = true; -				continue; -			} +		if (*node < first_leaf) +		{ +			for (i=0; i < child_len; i++) +				if (intersect_node(origin, direction, child_map+i)) +					*node++ = child_map+i; -			distance = norm(intersection-x); +			if (node == head) +				break; +			 +			node--; -			if (distance < min_distance) +		} +		else // node is a leaf +		{ +			for (i=0; i < child_len; i++)  			{ -				*pixel = get_color(p, v0, v1, v2); +				int mesh_idx = child_map + 3*i; + +				float3 v0 = make_float3(tex1Dfetch(mesh, mesh_idx)); +				float3 v1 = make_float3(tex1Dfetch(mesh, mesh_idx+1)); +				float3 v2 = make_float3(tex1Dfetch(mesh, mesh_idx+2)); +				 +				if (intersect_triangle(origin, direction, v0, v1, v2, intersection)) +				{ +					if (!hit) +					{ +						*pixel = get_color(direction, v0, v1, v2); +						 +						min_distance = norm(intersection-origin); +						min_intersection = intersection; +						hit = true; +						continue; +					} +					 +					distance = norm(intersection-origin); +					 +					if (distance < min_distance) +					{ +						*pixel = get_color(direction, v0, v1, v2); +						 +						min_distance = distance; +						min_intersection = intersection; +					} +				} + +			} // triangle loop + +			node--; + +		} // node is a leaf + +	} // while loop +	while (node != head); -				min_distance = distance; -				min_intersection = intersection; -			} -		} -	} -	  	if (!hit)  		*pixel = 0; -	 -} + +} // intersect mesh  } // extern "c" diff --git a/src/linalg.h b/src/linalg.h index 593aa9d..fe6b1b2 100644 --- a/src/linalg.h +++ b/src/linalg.h @@ -113,4 +113,9 @@ __device__ __host__ float norm(const float3 &a)  	return sqrtf(dot(a,a));  } +__device__ __host__ float3 normalize(const float3 &a) +{ +	return a/norm(a); +} +  #endif @@ -5,6 +5,7 @@ from pycuda import autoinit  from pycuda.compiler import SourceModule  import pycuda.driver as cuda  from pycuda import gpuarray +from string import Template  def array2vector(arr, dtype=gpuarray.vec.float3):      if len(arr.shape) != 2 or arr.shape[-1] != 3: @@ -19,18 +20,16 @@ def array2vector(arr, dtype=gpuarray.vec.float3):  print 'device %s' % autoinit.device.name() -source = open('src/linalg.h').read() + open('src/matrix.h').read() + \ -    open('src/rotate.h').read() + open('src/intersect.cu').read() +source = open('src/intersect.cu').read() +mod = SourceModule(source, options=['-I /home/tlatorre/projects/chroma/src'], no_extern_c=True, arch='sm_13') -mod = SourceModule(source, no_extern_c=True, arch='sm_13') - -intersect = mod.get_function('intersect_triangle_mesh')  rotate = mod.get_function('rotate')  translate = mod.get_function('translate') +intersect_mesh = mod.get_function('intersect_mesh')  import pygame  size = width, height = 800, 600 -screen = pygame.display.set_mode(size) +screen = pygame.display.set_mode(size, (pygame.NOFRAME | pygame.DOUBLEBUF))  film_size = (0.035, 0.024)  focal_length = 0.05 @@ -46,15 +45,65 @@ grid += (0,300,0)  x = array2vector(grid)  x_gpu = cuda.to_device(x) -p = array2vector(((0,300,0)-grid)) +p = (0,300,0)-grid + +for i in range(p.shape[0]): +    p[i] /= np.linalg.norm(p[i]) + +p = array2vector(p)  p_gpu = cuda.to_device(p) -mesh3 = array2vector(read_stl('models/tie_interceptor6.stl')) + +from zcurve import * + +mesh = read_stl('models/tie_interceptor6.stl') +mesh = mesh.reshape(mesh.shape[0]//3,3,3) +mesh = morton_order(mesh) +mesh = mesh.reshape(mesh.shape[0]*3, 3) + +mesh3 = array2vector(mesh) + +from build import Graph +  rotate(np.int32(mesh3.size), cuda.InOut(mesh3), np.float32(-np.pi/2), gpuarray.vec.make_float3(1,0,0), block=(256,1,1), grid=(mesh3.size//256+1,1))  translate(np.int32(mesh3.size), cuda.InOut(mesh3), gpuarray.vec.make_float3(0,30,0), block=(256,1,1), grid=(mesh3.size//256+1,1)) +graph = Graph(mesh3) + +lower = array2vector(graph.lower, dtype=gpuarray.vec.float4) +upper = array2vector(graph.upper, dtype=gpuarray.vec.float4) +start = graph.start.astype(np.uint32) +count = graph.count.astype(np.uint32) +stack = np.zeros(lower.size, dtype=np.int32) + +lower_gpu = cuda.to_device(lower) +upper_gpu = cuda.to_device(upper) + +lower_tex = mod.get_texref('lower_bound_arr') +upper_tex = mod.get_texref('upper_bound_arr') + +lower_tex.set_address(lower_gpu, lower.nbytes) +upper_tex.set_address(upper_gpu, upper.nbytes) + +lower_tex.set_format(cuda.array_format.FLOAT, 4) +upper_tex.set_format(cuda.array_format.FLOAT, 4) + +start_gpu = cuda.to_device(start) +count_gpu = cuda.to_device(count) +stack_gpu = cuda.mem_alloc(stack.nbytes) +cuda.memcpy_htod(stack_gpu, stack) + +child_map_tex = mod.get_texref('child_map_arr') +child_len_tex = mod.get_texref('child_len_arr') + +child_map_tex.set_address(start_gpu, start.nbytes) +child_len_tex.set_address(count_gpu, count.nbytes) + +child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) +child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) +  mesh = np.empty(mesh3.size, dtype=gpuarray.vec.float4)  mesh['x'] = mesh3['x']  mesh['y'] = mesh3['y'] @@ -66,7 +115,14 @@ mesh_tex.set_address(mesh_gpu, mesh.nbytes)  mesh_tex.set_format(cuda.array_format.FLOAT, 4)  pixel = np.empty(size, dtype=np.int32).flatten() -pixel_gpu = cuda.to_device(pixel) + +pixel_gpu = cuda.mem_alloc(pixel.nbytes) +cuda.memcpy_htod(pixel_gpu, pixel) + +speed = [] +elapsed = [] + +t0total = time.time()  block_size = 64  for i in range(100): @@ -75,16 +131,22 @@ for i in range(100):      rotate(np.int32(p.size), p_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))      t0 = time.time() -    intersect(np.int32(x.size), x_gpu, p_gpu, np.int32(mesh.size//3), pixel_gpu, block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex]) +    intersect_mesh(np.int32(x.size), x_gpu, p_gpu, pixel_gpu, np.int32(graph.first_leaf), block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, upper_tex, lower_tex, child_map_tex, child_len_tex])      cuda.Context.synchronize() -    elapsed = time.time() - t0 +    elapsed.append(time.time() - t0) -    print '%i triangles, %i photons, %f sec; (%f photons/s)' % \ -        (mesh.size//3, pixel.size, elapsed, pixel.size/elapsed) +    print '%i triangles, %i photons, %f sec; (%f photons/sec)' % \ +        (mesh.size//3, pixel.size, elapsed[-1], pixel.size/elapsed[-1]) + +    speed.append(pixel.size/elapsed[-1])      cuda.memcpy_dtoh(pixel, pixel_gpu)      pygame.surfarray.blit_array(screen, pixel.reshape(size))      pygame.display.flip() +print 'average time = %f sec' % np.mean(elapsed) +print 'average speed = %f photons/sec' % np.mean(speed) +print 'total time = %f sec' % (time.time() - t0total) +  raw_input('press enter to exit') diff --git a/zcurve.py b/zcurve.py new file mode 100644 index 0000000..5ef37f3 --- /dev/null +++ b/zcurve.py @@ -0,0 +1,25 @@ +import numpy as np +from itertools import chain + +def interleave(*args): +    return int("".join(chain(*zip(*[bin(x)[2:].zfill(x.nbytes*8) for x in args]))), 2) + +def morton_order(mesh, dtype=np.uint8): +    vertices = mesh.reshape(mesh.shape[0]*3, 3) +     +    upper_bound = np.max(vertices, axis=0) +    lower_bound = np.min(vertices, axis=0) + +    quantize = lambda x: dtype((x-lower_bound)*np.iinfo(dtype).max/(upper_bound-lower_bound)) + +    zvalue = [] +    for triangle in mesh: +        center = np.mean(np.vstack((triangle[0], triangle[1], triangle[2])), axis=0) +        zvalue.append(interleave(*quantize(center))) + +    ordered_mesh = np.empty(mesh.shape) + +    for i, idx in enumerate(zip(*sorted(zip(zvalue, range(len(zvalue)))))[-1]): +        ordered_mesh[i] = mesh[idx] + +    return ordered_mesh | 
