diff options
-rw-r--r-- | bvh.py | 47 | ||||
-rw-r--r-- | camera.py | 2 | ||||
-rw-r--r-- | geometry.py | 112 | ||||
-rw-r--r-- | src/kernel.cu | 31 | ||||
-rw-r--r-- | src/matrix.h | 4 | ||||
-rw-r--r-- | src/rotate.h | 3 | ||||
-rw-r--r-- | test.py | 16 | ||||
-rw-r--r-- | tests/linalg_test.cu | 2 | ||||
-rw-r--r-- | tests/linalg_test.py | 8 | ||||
-rw-r--r-- | tests/matrix_test.cu | 2 | ||||
-rw-r--r-- | tests/matrix_test.py | 9 | ||||
-rw-r--r-- | tests/rotate_test.cu | 2 | ||||
-rw-r--r-- | tests/rotate_test.py | 10 |
13 files changed, 128 insertions, 120 deletions
@@ -1,47 +0,0 @@ -import numpy as np -from itertools import chain - -def compress(data, selectors): - return (d for d, s in zip(data, selectors) if s) - -class Leaf(object): - def __init__(self, mesh, mesh_idx, zvalue=None): - self.mesh_idx = mesh_idx - self.size = mesh.shape[0] - self.zvalue = zvalue - self.lower_bound = np.array([np.min(mesh[:,:,0]), np.min(mesh[:,:,1]), np.min(mesh[:,:,2])]) - self.upper_bound = np.array([np.max(mesh[:,:,0]), np.max(mesh[:,:,1]), np.max(mesh[:,:,2])]) - self.center = (self.lower_bound + self.upper_bound)/2.0 - -class Node(object): - def __init__(self, children, zvalue=None): - self.size = len(children) - self.zvalue = zvalue - - lower_pts = np.array([child.lower_bound for child in children]) - upper_pts = np.array([child.upper_bound for child in children]) - - self.lower_bound = np.array([np.min(lower_pts[:,0]), np.min(lower_pts[:,1]), np.min(lower_pts[:,2])]) - self.upper_bound = np.array([np.max(upper_pts[:,0]), np.max(upper_pts[:,1]), np.max(upper_pts[:,2])]) - - self.children = children - - self.center = (self.lower_bound + self.upper_bound)/2.0 - -def interleave(*args): - return int("".join(chain(*zip(*[bin(x)[2:].zfill(x.nbytes*8) for x in args]))), 2) - -def morton_order(mesh, bits=8): - lower_bound = np.array([np.min(mesh[:,:,0]), np.min(mesh[:,:,1]), np.min(mesh[:,:,2])]) - upper_bound = np.array([np.max(mesh[:,:,0]), np.max(mesh[:,:,1]), np.max(mesh[:,:,2])]) - - def quantize(x): - return np.uint64((x-lower_bound)*bits/(upper_bound-lower_bound)) - - zvalues = np.empty(mesh.shape[0], dtype=np.uint64) - for i, triangle in enumerate(mesh): - zvalues[i] = interleave(*quantize(np.mean(triangle, axis=0))) - - order = np.array(zip(*sorted(zip(zvalues, range(len(zvalues)))))[-1]) - - return order, zvalues @@ -18,5 +18,5 @@ class Camera(object): self.grid += position self.focal_point += position - def get_pixels(self): + def get_rays(self): return self.grid, self.focal_point-self.grid diff --git a/geometry.py b/geometry.py index d365141..a65dbc4 100644 --- a/geometry.py +++ b/geometry.py @@ -1,63 +1,101 @@ import numpy as np -from bvh import * +from itertools import chain -class Geometry(object): - """ - Geometry object. - """ +def compress(data, selectors): + return (d for d, s in zip(data, selectors) if s) - def __init__(self): - self.materials = [] - self.mesh = np.empty(0) - self.inside = np.empty(0) - self.outside = np.empty(0) +class Leaf(object): + def __init__(self, mesh, mesh_idx, zvalue=None): + self.zvalue = zvalue + self.mesh_idx = mesh_idx + + self.lower_bound = np.array([np.min(mesh[:,:,0]), np.min(mesh[:,:,1]), np.min(mesh[:,:,2])]) + self.upper_bound = np.array([np.max(mesh[:,:,0]), np.max(mesh[:,:,1]), np.max(mesh[:,:,2])]) + + self.size = mesh.shape[0] + +class Node(object): + def __init__(self, children, zvalue=None): + self.zvalue = zvalue + + lower_pts = np.array([child.lower_bound for child in children]) + upper_pts = np.array([child.upper_bound for child in children]) + + self.lower_bound = np.array([np.min(lower_pts[:,0]), np.min(lower_pts[:,1]), np.min(lower_pts[:,2])]) + self.upper_bound = np.array([np.max(upper_pts[:,0]), np.max(upper_pts[:,1]), np.max(upper_pts[:,2])]) + + self.size = len(children) + + self.children = children - def add_mesh(self, mesh, inside, outside): - """ - Add a triangle mesh. +def interleave(*args): + return int("".join(chain(*zip(*[bin(x)[2:].zfill(x.nbytes*8) for x in args]))), 2) - Args: - - mesh: array of shape (n,3,3) - Array of vertices for a closed mesh with n triangles. +def morton_order(mesh, bits=3): + lower_bound = np.array([np.min(mesh[:,:,0]), np.min(mesh[:,:,1]), np.min(mesh[:,:,2])]) + upper_bound = np.array([np.max(mesh[:,:,0]), np.max(mesh[:,:,1]), np.max(mesh[:,:,2])]) - - inside: Material - Material inside the mesh. + max_value = 2**bits - 1 + + def quantize(x): + return np.uint64((x-lower_bound)*max_value/(upper_bound-lower_bound)) - - outside: Material - Material outside the mesh. - """ + zvalues = np.empty(mesh.shape[0], dtype=np.uint64) + for i, triangle in enumerate(mesh): + zvalues[i] = interleave(*quantize(np.mean(triangle, axis=0))) + return zvalues + +class Solid(object): + def __init__(self, mesh, inside, outside): if len(mesh.shape) != 3 or mesh.shape[1] != 3 or mesh.shape[2] != 3: raise Exception('shape mismatch') - if inside not in self.materials: - self.materials.append(inside) - if outside not in self.materials: - self.materials.append(outside) + self.mesh = mesh + self.inside = inside + self.outside = outside + + def __len__(self): + return self.mesh.shape[0] + +class Geometry(object): + """ + Geometry object. + """ + + def __init__(self): + self.solids = [] + self.materials = [] + + def add_solid(self, solid): + self.solids.append(solid) - self.inside.resize(self.inside.size+mesh.shape[0]) - self.inside[-mesh.shape[0]:] = np.tile(self.materials.index(inside), mesh.shape[0]) + if solid.inside not in self.materials: + self.materials.append(solid.inside) - self.outside.resize(self.outside.size+mesh.shape[0]) - self.outside[-mesh.shape[0]:] = np.tile(self.materials.index(outside), mesh.shape[0]) + if solid.outside not in self.materials: + self.materials.append(solid.outside) - self.mesh.resize((self.mesh.shape[0]+mesh.shape[0],3,3)) - self.mesh[-mesh.shape[0]:] = mesh + def build(self, bits=3): + self.mesh = np.concatenate([solid.mesh for solid in self.solids]) + self.solid_index = np.concatenate([np.tile(self.solids.index(solid), len(solid)) for solid in self.solids]) + self.inside_index = np.concatenate([np.tile(self.materials.index(solid.outside), len(solid)) for solid in self.solids]) + self.outside_index = np.concatenate([np.tile(self.materials.index(solid.outside), len(solid)) for solid in self.solids]) - def build(self, bits=16): - order, zvalues = morton_order(self.mesh, bits) + zvalues = morton_order(self.mesh, bits) + order = np.array(zip(*sorted(zip(zvalues, range(zvalues.size))))[-1]) zvalues = zvalues[order] self.mesh = self.mesh[order] - self.inside = self.inside[order] - self.outside = self.outside[order] + self.solid_index = self.solid_index[order] + self.inside_index = self.inside_index[order] + self.outside_index = self.outside_index[order] leafs = [] - for z in sorted(set(zvalues)): mask = (zvalues == z) - leafs.append(Leaf(self.mesh[mask], 3*np.where(mask)[0][0], z)) + leafs.append(Leaf(self.mesh[mask], np.where(mask)[0][0], z)) layers = [] layers.append(leafs) diff --git a/src/kernel.cu b/src/kernel.cu index 02c52cf..d8f2300 100644 --- a/src/kernel.cu +++ b/src/kernel.cu @@ -7,8 +7,6 @@ texture<float4, 1, cudaReadModeElementType> mesh; - - texture<float4, 1, cudaReadModeElementType> upper_bound_arr; texture<float4, 1, cudaReadModeElementType> lower_bound_arr; @@ -20,7 +18,7 @@ __device__ float3 make_float3(const float4 &a) return make_float3(a.x, a.y, a.z); } -__device__ bool intersect_triangle(const float3 &origin, const float3 &direction, const float3 &v0, const float3 &v1, const float3 &v2, float3 &intersection) +__device__ bool intersect_triangle(const float3 &origin, const float3 &direction, const float3 &v0, const float3 &v1, const float3 &v2, float &distance) { Matrix m = make_matrix(v1-v0, v2-v0, -direction); @@ -46,7 +44,7 @@ __device__ bool intersect_triangle(const float3 &origin, const float3 &direction if (u3 < 0.0f || (1-u1-u2) < 0.0f) return false; - intersection = origin + direction*u3; + distance = norm(direction*u3); return true; } @@ -54,13 +52,12 @@ __device__ bool intersect_triangle(const float3 &origin, const float3 &direction __device__ int get_color(const float3 &direction, const float3 &v0, const float3& v1, const float3 &v2) { float scale = 255*dot(normalize(cross(v1-v0,v2-v0)),-direction); + if (scale < 0.0f) scale = 255*dot(-normalize(cross(v1-v0,v2-v0)),-direction); - int rgb = floorf(scale); - return rgb << 16 | rgb << 8 | rgb; } @@ -166,7 +163,7 @@ __global__ void rotate(int max_idx, float3 *pt, float phi, float3 axis) pt[idx] = rotate(pt[idx], phi, axis); } -__global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *direction_arr, int *pixel_arr, int first_leaf) +__global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *direction_arr, int *pixel_arr, int first_leaf, int *state_arr) { int idx = blockIdx.x*blockDim.x + threadIdx.x; @@ -177,11 +174,12 @@ __global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *directio float3 direction = direction_arr[idx]; int *pixel = pixel_arr+idx; + int *state = state_arr+idx; bool hit = false; - float distance, min_distance; - float3 intersection, min_intersection; + float distance; + float min_distance; if (!intersect_node(origin, direction, 0)) { @@ -226,32 +224,31 @@ __global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *directio { for (i=0; i < child_len; i++) { - int mesh_idx = child_map + 3*i; + int mesh_idx = 3*(child_map + i); float3 v0 = make_float3(tex1Dfetch(mesh, mesh_idx)); float3 v1 = make_float3(tex1Dfetch(mesh, mesh_idx+1)); float3 v2 = make_float3(tex1Dfetch(mesh, mesh_idx+2)); - if (intersect_triangle(origin, direction, v0, v1, v2, intersection)) + if (intersect_triangle(origin, direction, v0, v1, v2, distance)) { if (!hit) { *pixel = get_color(direction, v0, v1, v2); - - min_distance = norm(intersection-origin); - min_intersection = intersection; + *state = mesh_idx; + + min_distance = distance; + hit = true; continue; } - distance = norm(intersection-origin); - if (distance < min_distance) { *pixel = get_color(direction, v0, v1, v2); + *state = mesh_idx; min_distance = distance; - min_intersection = intersection; } } diff --git a/src/matrix.h b/src/matrix.h index 5c72344..a3e480b 100644 --- a/src/matrix.h +++ b/src/matrix.h @@ -197,9 +197,7 @@ __device__ __host__ Matrix operator/ (const float &c, const Matrix &m) __device__ __host__ float det(const Matrix &m) { - return m.a00*(m.a11*m.a22 - m.a12*m.a21) - \ - m.a10*(m.a01*m.a22 - m.a02*m.a21) + \ - m.a20*(m.a01*m.a12 - m.a02*m.a11); + return m.a00*(m.a11*m.a22 - m.a12*m.a21) - m.a10*(m.a01*m.a22 - m.a02*m.a21) + m.a20*(m.a01*m.a12 - m.a02*m.a11); } __device__ __host__ Matrix inv(const Matrix &m) diff --git a/src/rotate.h b/src/rotate.h index fec76a8..7fc6f7e 100644 --- a/src/rotate.h +++ b/src/rotate.h @@ -1,6 +1,9 @@ #ifndef __ROTATE_H__ #define __ROTATE_H__ +#include "linalg.h" +#include "matrix.h" + __device__ const Matrix IDENTITY_MATRIX = {1,0,0,0,1,0,0,0,1}; __device__ __host__ Matrix make_rotation_matrix(float phi, const float3 &n) @@ -20,16 +20,16 @@ print 'device %s' % autoinit.device.name() source_directory = os.path.split(os.path.realpath(__file__))[0] + '/src' source = open(source_directory + '/kernel.cu').read() -mod = SourceModule(source, options=['-I ' + source_directory], no_extern_c=True, arch='sm_13') +mod = SourceModule(source, options=['-I ' + source_directory], no_extern_c=True, cache_dir=False) intersect_mesh = mod.get_function('intersect_mesh') rotate = mod.get_function('rotate') size = width, height = 800, 600 screen = pygame.display.set_mode(size) camera = Camera(size) -camera.position((0,300,0)) +camera.position((0,300,50)) -origin, direction = camera.get_pixels() +origin, direction = camera.get_rays() for i in range(direction.shape[0]): direction[i] /= np.linalg.norm(direction[i]) @@ -39,9 +39,10 @@ origin, direction = make_vector(origin), make_vector(direction) origin_gpu = cuda.to_device(origin) direction_gpu = cuda.to_device(direction) +solid = Solid(read_stl('models/tie_interceptor6.stl'), vacuum, vacuum) geometry = Geometry() -geometry.add_mesh(read_stl('models/tie_interceptor6.stl'), vacuum, vacuum) -geometry.build(bits=16) +geometry.add_solid(solid) +geometry.build() mesh = geometry.mesh mesh = mesh.reshape(mesh.shape[0]*3,3) @@ -79,13 +80,16 @@ child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) pixels = np.empty(width*height, dtype=np.int32) pixels_gpu = cuda.to_device(pixels) +states = np.empty(width*height, dtype=np.int32) +states_gpu = cuda.to_device(states) + block_size = 64 for i in range(100): rotate(np.int32(origin.size), origin_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1)) rotate(np.int32(direction.size), direction_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1)) t0 = time.time() - intersect_mesh(np.int32(origin.size), origin_gpu, direction_gpu, pixels_gpu, first_leaf, block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, lower_bound_tex, upper_bound_tex, child_map_tex, child_len_tex]) + intersect_mesh(np.int32(origin.size), origin_gpu, direction_gpu, pixels_gpu, first_leaf, states_gpu, block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, lower_bound_tex, upper_bound_tex, child_map_tex, child_len_tex]) cuda.Context.synchronize() elapsed = time.time() - t0 diff --git a/tests/linalg_test.cu b/tests/linalg_test.cu index b61488f..4e9c983 100644 --- a/tests/linalg_test.cu +++ b/tests/linalg_test.cu @@ -1,5 +1,7 @@ //-*-c-*- +#include "linalg.h" + extern "C" { diff --git a/tests/linalg_test.py b/tests/linalg_test.py index 44c4b52..31688d9 100644 --- a/tests/linalg_test.py +++ b/tests/linalg_test.py @@ -1,3 +1,4 @@ +import os import numpy as np from pycuda import autoinit from pycuda.compiler import SourceModule @@ -8,9 +9,12 @@ float3 = gpuarray.vec.float3 print 'device %s' % autoinit.device.name() -source = open('../linalg.h').read() + open('linalg_test.cu').read() +current_directory = os.path.split(os.path.realpath(__file__))[0] +source_directory = current_directory + '/../src' -mod = SourceModule(source, no_extern_c=True, arch='sm_13') +source = open(current_directory + '/linalg_test.cu').read() + +mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False) float3add = mod.get_function('float3add') float3addequal = mod.get_function('float3addequal') diff --git a/tests/matrix_test.cu b/tests/matrix_test.cu index 99e13dc..d64cb34 100644 --- a/tests/matrix_test.cu +++ b/tests/matrix_test.cu @@ -1,5 +1,7 @@ //-*-c-*- +#include "matrix.h" + __device__ Matrix array2matrix(float *a) { return make_matrix(a[0], a[1], a[2], diff --git a/tests/matrix_test.py b/tests/matrix_test.py index bc4115e..c843025 100644 --- a/tests/matrix_test.py +++ b/tests/matrix_test.py @@ -1,3 +1,4 @@ +import os import numpy as np from pycuda import autoinit from pycuda.compiler import SourceModule @@ -8,10 +9,12 @@ float3 = gpuarray.vec.float3 print 'device %s' % autoinit.device.name() -source = open('../matrix.h').read() + open('../linalg.h').read() + \ - open('matrix_test.cu').read() +current_directory = os.path.split(os.path.realpath(__file__))[0] +source_directory = current_directory + '/../src' -mod = SourceModule(source, no_extern_c=True, arch='sm_13') +source = open(current_directory + '/matrix_test.cu').read() + +mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False) det = mod.get_function('det') inv = mod.get_function('inv') diff --git a/tests/rotate_test.cu b/tests/rotate_test.cu index 96e4d75..6cafc12 100644 --- a/tests/rotate_test.cu +++ b/tests/rotate_test.cu @@ -1,6 +1,6 @@ //-*-c-*- - +#include "rotate.h" extern "C" { diff --git a/tests/rotate_test.py b/tests/rotate_test.py index 8c2cfcb..92eff84 100644 --- a/tests/rotate_test.py +++ b/tests/rotate_test.py @@ -1,3 +1,4 @@ +import os import numpy as np from pycuda import autoinit from pycuda.compiler import SourceModule @@ -16,9 +17,12 @@ def rotate(x, phi, n): print 'device %s' % autoinit.device.name() -source = open('../linalg.h').read() + open('../matrix.h').read() + \ - open('../rotate.h').read() + open('rotate_test.cu').read() -mod = SourceModule(source, no_extern_c=True, arch='sm_13') +current_directory = os.path.split(os.path.realpath(__file__))[0] +source_directory = current_directory + '/../src' + +source = open(current_directory + '/rotate_test.cu').read() + +mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False) rotate_gpu = mod.get_function('rotate') |