summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bvh.py47
-rw-r--r--camera.py2
-rw-r--r--geometry.py112
-rw-r--r--src/kernel.cu31
-rw-r--r--src/matrix.h4
-rw-r--r--src/rotate.h3
-rw-r--r--test.py16
-rw-r--r--tests/linalg_test.cu2
-rw-r--r--tests/linalg_test.py8
-rw-r--r--tests/matrix_test.cu2
-rw-r--r--tests/matrix_test.py9
-rw-r--r--tests/rotate_test.cu2
-rw-r--r--tests/rotate_test.py10
13 files changed, 128 insertions, 120 deletions
diff --git a/bvh.py b/bvh.py
deleted file mode 100644
index d1e4c1a..0000000
--- a/bvh.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import numpy as np
-from itertools import chain
-
-def compress(data, selectors):
- return (d for d, s in zip(data, selectors) if s)
-
-class Leaf(object):
- def __init__(self, mesh, mesh_idx, zvalue=None):
- self.mesh_idx = mesh_idx
- self.size = mesh.shape[0]
- self.zvalue = zvalue
- self.lower_bound = np.array([np.min(mesh[:,:,0]), np.min(mesh[:,:,1]), np.min(mesh[:,:,2])])
- self.upper_bound = np.array([np.max(mesh[:,:,0]), np.max(mesh[:,:,1]), np.max(mesh[:,:,2])])
- self.center = (self.lower_bound + self.upper_bound)/2.0
-
-class Node(object):
- def __init__(self, children, zvalue=None):
- self.size = len(children)
- self.zvalue = zvalue
-
- lower_pts = np.array([child.lower_bound for child in children])
- upper_pts = np.array([child.upper_bound for child in children])
-
- self.lower_bound = np.array([np.min(lower_pts[:,0]), np.min(lower_pts[:,1]), np.min(lower_pts[:,2])])
- self.upper_bound = np.array([np.max(upper_pts[:,0]), np.max(upper_pts[:,1]), np.max(upper_pts[:,2])])
-
- self.children = children
-
- self.center = (self.lower_bound + self.upper_bound)/2.0
-
-def interleave(*args):
- return int("".join(chain(*zip(*[bin(x)[2:].zfill(x.nbytes*8) for x in args]))), 2)
-
-def morton_order(mesh, bits=8):
- lower_bound = np.array([np.min(mesh[:,:,0]), np.min(mesh[:,:,1]), np.min(mesh[:,:,2])])
- upper_bound = np.array([np.max(mesh[:,:,0]), np.max(mesh[:,:,1]), np.max(mesh[:,:,2])])
-
- def quantize(x):
- return np.uint64((x-lower_bound)*bits/(upper_bound-lower_bound))
-
- zvalues = np.empty(mesh.shape[0], dtype=np.uint64)
- for i, triangle in enumerate(mesh):
- zvalues[i] = interleave(*quantize(np.mean(triangle, axis=0)))
-
- order = np.array(zip(*sorted(zip(zvalues, range(len(zvalues)))))[-1])
-
- return order, zvalues
diff --git a/camera.py b/camera.py
index dc3e32e..f00cc56 100644
--- a/camera.py
+++ b/camera.py
@@ -18,5 +18,5 @@ class Camera(object):
self.grid += position
self.focal_point += position
- def get_pixels(self):
+ def get_rays(self):
return self.grid, self.focal_point-self.grid
diff --git a/geometry.py b/geometry.py
index d365141..a65dbc4 100644
--- a/geometry.py
+++ b/geometry.py
@@ -1,63 +1,101 @@
import numpy as np
-from bvh import *
+from itertools import chain
-class Geometry(object):
- """
- Geometry object.
- """
+def compress(data, selectors):
+ return (d for d, s in zip(data, selectors) if s)
- def __init__(self):
- self.materials = []
- self.mesh = np.empty(0)
- self.inside = np.empty(0)
- self.outside = np.empty(0)
+class Leaf(object):
+ def __init__(self, mesh, mesh_idx, zvalue=None):
+ self.zvalue = zvalue
+ self.mesh_idx = mesh_idx
+
+ self.lower_bound = np.array([np.min(mesh[:,:,0]), np.min(mesh[:,:,1]), np.min(mesh[:,:,2])])
+ self.upper_bound = np.array([np.max(mesh[:,:,0]), np.max(mesh[:,:,1]), np.max(mesh[:,:,2])])
+
+ self.size = mesh.shape[0]
+
+class Node(object):
+ def __init__(self, children, zvalue=None):
+ self.zvalue = zvalue
+
+ lower_pts = np.array([child.lower_bound for child in children])
+ upper_pts = np.array([child.upper_bound for child in children])
+
+ self.lower_bound = np.array([np.min(lower_pts[:,0]), np.min(lower_pts[:,1]), np.min(lower_pts[:,2])])
+ self.upper_bound = np.array([np.max(upper_pts[:,0]), np.max(upper_pts[:,1]), np.max(upper_pts[:,2])])
+
+ self.size = len(children)
+
+ self.children = children
- def add_mesh(self, mesh, inside, outside):
- """
- Add a triangle mesh.
+def interleave(*args):
+ return int("".join(chain(*zip(*[bin(x)[2:].zfill(x.nbytes*8) for x in args]))), 2)
- Args:
- - mesh: array of shape (n,3,3)
- Array of vertices for a closed mesh with n triangles.
+def morton_order(mesh, bits=3):
+ lower_bound = np.array([np.min(mesh[:,:,0]), np.min(mesh[:,:,1]), np.min(mesh[:,:,2])])
+ upper_bound = np.array([np.max(mesh[:,:,0]), np.max(mesh[:,:,1]), np.max(mesh[:,:,2])])
- - inside: Material
- Material inside the mesh.
+ max_value = 2**bits - 1
+
+ def quantize(x):
+ return np.uint64((x-lower_bound)*max_value/(upper_bound-lower_bound))
- - outside: Material
- Material outside the mesh.
- """
+ zvalues = np.empty(mesh.shape[0], dtype=np.uint64)
+ for i, triangle in enumerate(mesh):
+ zvalues[i] = interleave(*quantize(np.mean(triangle, axis=0)))
+ return zvalues
+
+class Solid(object):
+ def __init__(self, mesh, inside, outside):
if len(mesh.shape) != 3 or mesh.shape[1] != 3 or mesh.shape[2] != 3:
raise Exception('shape mismatch')
- if inside not in self.materials:
- self.materials.append(inside)
- if outside not in self.materials:
- self.materials.append(outside)
+ self.mesh = mesh
+ self.inside = inside
+ self.outside = outside
+
+ def __len__(self):
+ return self.mesh.shape[0]
+
+class Geometry(object):
+ """
+ Geometry object.
+ """
+
+ def __init__(self):
+ self.solids = []
+ self.materials = []
+
+ def add_solid(self, solid):
+ self.solids.append(solid)
- self.inside.resize(self.inside.size+mesh.shape[0])
- self.inside[-mesh.shape[0]:] = np.tile(self.materials.index(inside), mesh.shape[0])
+ if solid.inside not in self.materials:
+ self.materials.append(solid.inside)
- self.outside.resize(self.outside.size+mesh.shape[0])
- self.outside[-mesh.shape[0]:] = np.tile(self.materials.index(outside), mesh.shape[0])
+ if solid.outside not in self.materials:
+ self.materials.append(solid.outside)
- self.mesh.resize((self.mesh.shape[0]+mesh.shape[0],3,3))
- self.mesh[-mesh.shape[0]:] = mesh
+ def build(self, bits=3):
+ self.mesh = np.concatenate([solid.mesh for solid in self.solids])
+ self.solid_index = np.concatenate([np.tile(self.solids.index(solid), len(solid)) for solid in self.solids])
+ self.inside_index = np.concatenate([np.tile(self.materials.index(solid.outside), len(solid)) for solid in self.solids])
+ self.outside_index = np.concatenate([np.tile(self.materials.index(solid.outside), len(solid)) for solid in self.solids])
- def build(self, bits=16):
- order, zvalues = morton_order(self.mesh, bits)
+ zvalues = morton_order(self.mesh, bits)
+ order = np.array(zip(*sorted(zip(zvalues, range(zvalues.size))))[-1])
zvalues = zvalues[order]
self.mesh = self.mesh[order]
- self.inside = self.inside[order]
- self.outside = self.outside[order]
+ self.solid_index = self.solid_index[order]
+ self.inside_index = self.inside_index[order]
+ self.outside_index = self.outside_index[order]
leafs = []
-
for z in sorted(set(zvalues)):
mask = (zvalues == z)
- leafs.append(Leaf(self.mesh[mask], 3*np.where(mask)[0][0], z))
+ leafs.append(Leaf(self.mesh[mask], np.where(mask)[0][0], z))
layers = []
layers.append(leafs)
diff --git a/src/kernel.cu b/src/kernel.cu
index 02c52cf..d8f2300 100644
--- a/src/kernel.cu
+++ b/src/kernel.cu
@@ -7,8 +7,6 @@
texture<float4, 1, cudaReadModeElementType> mesh;
-
-
texture<float4, 1, cudaReadModeElementType> upper_bound_arr;
texture<float4, 1, cudaReadModeElementType> lower_bound_arr;
@@ -20,7 +18,7 @@ __device__ float3 make_float3(const float4 &a)
return make_float3(a.x, a.y, a.z);
}
-__device__ bool intersect_triangle(const float3 &origin, const float3 &direction, const float3 &v0, const float3 &v1, const float3 &v2, float3 &intersection)
+__device__ bool intersect_triangle(const float3 &origin, const float3 &direction, const float3 &v0, const float3 &v1, const float3 &v2, float &distance)
{
Matrix m = make_matrix(v1-v0, v2-v0, -direction);
@@ -46,7 +44,7 @@ __device__ bool intersect_triangle(const float3 &origin, const float3 &direction
if (u3 < 0.0f || (1-u1-u2) < 0.0f)
return false;
- intersection = origin + direction*u3;
+ distance = norm(direction*u3);
return true;
}
@@ -54,13 +52,12 @@ __device__ bool intersect_triangle(const float3 &origin, const float3 &direction
__device__ int get_color(const float3 &direction, const float3 &v0, const float3& v1, const float3 &v2)
{
float scale = 255*dot(normalize(cross(v1-v0,v2-v0)),-direction);
+
if (scale < 0.0f)
scale = 255*dot(-normalize(cross(v1-v0,v2-v0)),-direction);
-
int rgb = floorf(scale);
-
return rgb << 16 | rgb << 8 | rgb;
}
@@ -166,7 +163,7 @@ __global__ void rotate(int max_idx, float3 *pt, float phi, float3 axis)
pt[idx] = rotate(pt[idx], phi, axis);
}
-__global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *direction_arr, int *pixel_arr, int first_leaf)
+__global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *direction_arr, int *pixel_arr, int first_leaf, int *state_arr)
{
int idx = blockIdx.x*blockDim.x + threadIdx.x;
@@ -177,11 +174,12 @@ __global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *directio
float3 direction = direction_arr[idx];
int *pixel = pixel_arr+idx;
+ int *state = state_arr+idx;
bool hit = false;
- float distance, min_distance;
- float3 intersection, min_intersection;
+ float distance;
+ float min_distance;
if (!intersect_node(origin, direction, 0))
{
@@ -226,32 +224,31 @@ __global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *directio
{
for (i=0; i < child_len; i++)
{
- int mesh_idx = child_map + 3*i;
+ int mesh_idx = 3*(child_map + i);
float3 v0 = make_float3(tex1Dfetch(mesh, mesh_idx));
float3 v1 = make_float3(tex1Dfetch(mesh, mesh_idx+1));
float3 v2 = make_float3(tex1Dfetch(mesh, mesh_idx+2));
- if (intersect_triangle(origin, direction, v0, v1, v2, intersection))
+ if (intersect_triangle(origin, direction, v0, v1, v2, distance))
{
if (!hit)
{
*pixel = get_color(direction, v0, v1, v2);
-
- min_distance = norm(intersection-origin);
- min_intersection = intersection;
+ *state = mesh_idx;
+
+ min_distance = distance;
+
hit = true;
continue;
}
- distance = norm(intersection-origin);
-
if (distance < min_distance)
{
*pixel = get_color(direction, v0, v1, v2);
+ *state = mesh_idx;
min_distance = distance;
- min_intersection = intersection;
}
}
diff --git a/src/matrix.h b/src/matrix.h
index 5c72344..a3e480b 100644
--- a/src/matrix.h
+++ b/src/matrix.h
@@ -197,9 +197,7 @@ __device__ __host__ Matrix operator/ (const float &c, const Matrix &m)
__device__ __host__ float det(const Matrix &m)
{
- return m.a00*(m.a11*m.a22 - m.a12*m.a21) - \
- m.a10*(m.a01*m.a22 - m.a02*m.a21) + \
- m.a20*(m.a01*m.a12 - m.a02*m.a11);
+ return m.a00*(m.a11*m.a22 - m.a12*m.a21) - m.a10*(m.a01*m.a22 - m.a02*m.a21) + m.a20*(m.a01*m.a12 - m.a02*m.a11);
}
__device__ __host__ Matrix inv(const Matrix &m)
diff --git a/src/rotate.h b/src/rotate.h
index fec76a8..7fc6f7e 100644
--- a/src/rotate.h
+++ b/src/rotate.h
@@ -1,6 +1,9 @@
#ifndef __ROTATE_H__
#define __ROTATE_H__
+#include "linalg.h"
+#include "matrix.h"
+
__device__ const Matrix IDENTITY_MATRIX = {1,0,0,0,1,0,0,0,1};
__device__ __host__ Matrix make_rotation_matrix(float phi, const float3 &n)
diff --git a/test.py b/test.py
index 2aef412..a484032 100644
--- a/test.py
+++ b/test.py
@@ -20,16 +20,16 @@ print 'device %s' % autoinit.device.name()
source_directory = os.path.split(os.path.realpath(__file__))[0] + '/src'
source = open(source_directory + '/kernel.cu').read()
-mod = SourceModule(source, options=['-I ' + source_directory], no_extern_c=True, arch='sm_13')
+mod = SourceModule(source, options=['-I ' + source_directory], no_extern_c=True, cache_dir=False)
intersect_mesh = mod.get_function('intersect_mesh')
rotate = mod.get_function('rotate')
size = width, height = 800, 600
screen = pygame.display.set_mode(size)
camera = Camera(size)
-camera.position((0,300,0))
+camera.position((0,300,50))
-origin, direction = camera.get_pixels()
+origin, direction = camera.get_rays()
for i in range(direction.shape[0]):
direction[i] /= np.linalg.norm(direction[i])
@@ -39,9 +39,10 @@ origin, direction = make_vector(origin), make_vector(direction)
origin_gpu = cuda.to_device(origin)
direction_gpu = cuda.to_device(direction)
+solid = Solid(read_stl('models/tie_interceptor6.stl'), vacuum, vacuum)
geometry = Geometry()
-geometry.add_mesh(read_stl('models/tie_interceptor6.stl'), vacuum, vacuum)
-geometry.build(bits=16)
+geometry.add_solid(solid)
+geometry.build()
mesh = geometry.mesh
mesh = mesh.reshape(mesh.shape[0]*3,3)
@@ -79,13 +80,16 @@ child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
pixels = np.empty(width*height, dtype=np.int32)
pixels_gpu = cuda.to_device(pixels)
+states = np.empty(width*height, dtype=np.int32)
+states_gpu = cuda.to_device(states)
+
block_size = 64
for i in range(100):
rotate(np.int32(origin.size), origin_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
rotate(np.int32(direction.size), direction_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1))
t0 = time.time()
- intersect_mesh(np.int32(origin.size), origin_gpu, direction_gpu, pixels_gpu, first_leaf, block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, lower_bound_tex, upper_bound_tex, child_map_tex, child_len_tex])
+ intersect_mesh(np.int32(origin.size), origin_gpu, direction_gpu, pixels_gpu, first_leaf, states_gpu, block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, lower_bound_tex, upper_bound_tex, child_map_tex, child_len_tex])
cuda.Context.synchronize()
elapsed = time.time() - t0
diff --git a/tests/linalg_test.cu b/tests/linalg_test.cu
index b61488f..4e9c983 100644
--- a/tests/linalg_test.cu
+++ b/tests/linalg_test.cu
@@ -1,5 +1,7 @@
//-*-c-*-
+#include "linalg.h"
+
extern "C"
{
diff --git a/tests/linalg_test.py b/tests/linalg_test.py
index 44c4b52..31688d9 100644
--- a/tests/linalg_test.py
+++ b/tests/linalg_test.py
@@ -1,3 +1,4 @@
+import os
import numpy as np
from pycuda import autoinit
from pycuda.compiler import SourceModule
@@ -8,9 +9,12 @@ float3 = gpuarray.vec.float3
print 'device %s' % autoinit.device.name()
-source = open('../linalg.h').read() + open('linalg_test.cu').read()
+current_directory = os.path.split(os.path.realpath(__file__))[0]
+source_directory = current_directory + '/../src'
-mod = SourceModule(source, no_extern_c=True, arch='sm_13')
+source = open(current_directory + '/linalg_test.cu').read()
+
+mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False)
float3add = mod.get_function('float3add')
float3addequal = mod.get_function('float3addequal')
diff --git a/tests/matrix_test.cu b/tests/matrix_test.cu
index 99e13dc..d64cb34 100644
--- a/tests/matrix_test.cu
+++ b/tests/matrix_test.cu
@@ -1,5 +1,7 @@
//-*-c-*-
+#include "matrix.h"
+
__device__ Matrix array2matrix(float *a)
{
return make_matrix(a[0], a[1], a[2],
diff --git a/tests/matrix_test.py b/tests/matrix_test.py
index bc4115e..c843025 100644
--- a/tests/matrix_test.py
+++ b/tests/matrix_test.py
@@ -1,3 +1,4 @@
+import os
import numpy as np
from pycuda import autoinit
from pycuda.compiler import SourceModule
@@ -8,10 +9,12 @@ float3 = gpuarray.vec.float3
print 'device %s' % autoinit.device.name()
-source = open('../matrix.h').read() + open('../linalg.h').read() + \
- open('matrix_test.cu').read()
+current_directory = os.path.split(os.path.realpath(__file__))[0]
+source_directory = current_directory + '/../src'
-mod = SourceModule(source, no_extern_c=True, arch='sm_13')
+source = open(current_directory + '/matrix_test.cu').read()
+
+mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False)
det = mod.get_function('det')
inv = mod.get_function('inv')
diff --git a/tests/rotate_test.cu b/tests/rotate_test.cu
index 96e4d75..6cafc12 100644
--- a/tests/rotate_test.cu
+++ b/tests/rotate_test.cu
@@ -1,6 +1,6 @@
//-*-c-*-
-
+#include "rotate.h"
extern "C"
{
diff --git a/tests/rotate_test.py b/tests/rotate_test.py
index 8c2cfcb..92eff84 100644
--- a/tests/rotate_test.py
+++ b/tests/rotate_test.py
@@ -1,3 +1,4 @@
+import os
import numpy as np
from pycuda import autoinit
from pycuda.compiler import SourceModule
@@ -16,9 +17,12 @@ def rotate(x, phi, n):
print 'device %s' % autoinit.device.name()
-source = open('../linalg.h').read() + open('../matrix.h').read() + \
- open('../rotate.h').read() + open('rotate_test.cu').read()
-mod = SourceModule(source, no_extern_c=True, arch='sm_13')
+current_directory = os.path.split(os.path.realpath(__file__))[0]
+source_directory = current_directory + '/../src'
+
+source = open(current_directory + '/rotate_test.cu').read()
+
+mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False)
rotate_gpu = mod.get_function('rotate')