diff options
-rw-r--r-- | build.py | 102 | ||||
-rw-r--r-- | bvh.py | 47 | ||||
-rw-r--r-- | camera.py | 22 | ||||
-rw-r--r-- | geometry.py | 110 | ||||
-rw-r--r-- | materials.py | 12 | ||||
-rw-r--r-- | src/kernel.cu (renamed from src/intersect.cu) | 18 | ||||
-rw-r--r-- | stl.py | 4 | ||||
-rw-r--r-- | test.py | 167 | ||||
-rw-r--r-- | test_lbne.py | 164 | ||||
-rw-r--r-- | vector.py | 13 | ||||
-rw-r--r-- | zcurve.py | 25 |
11 files changed, 280 insertions, 404 deletions
diff --git a/build.py b/build.py deleted file mode 100644 index 8c05c5d..0000000 --- a/build.py +++ /dev/null @@ -1,102 +0,0 @@ -import numpy as np - -class Leaf(object): - def __init__(self, meshasdf, mesh_idx): - self.xmin, self.ymin, self.zmin = meshasdf[0] - self.xmax, self.ymax, self.zmax = meshasdf[0] - - for x, y, z in meshasdf[1:]: - if x < self.xmin: - self.xmin = x - if x > self.xmax: - self.xmax = x - if y < self.ymin: - self.ymin = y - if y > self.ymax: - self.ymax = y - if z < self.zmin: - self.zmin = z - if z > self.zmax: - self.zmax = z - - self.mesh_idx = mesh_idx - self.size = meshasdf.size//3 - -class Node(object): - def __init__(self, children): - self.xmin = children[0].xmin - self.xmax = children[0].xmax - self.ymin = children[0].ymin - self.ymax = children[0].ymax - self.zmin = children[0].zmin - self.zmax = children[0].zmax - - for child in children[1:]: - if child.xmin < self.xmin: - self.xmin = child.xmin - if child.xmax > self.xmax: - self.xmax = child.xmax - if child.ymin < self.ymin: - self.ymin = child.ymin - if child.ymax > self.ymax: - self.ymax = child.ymax - if child.zmin < self.zmin: - self.zmin = child.zmin - if child.zmax > self.zmax: - self.zmax = child.zmax - - self.children = children - self.size = len(children) - -class Graph(object): - def __init__(self, mesh, triangles_per_leaf=2, children_per_node=4): - leafs = [] - for i in range(0, mesh.size, triangles_per_leaf*3): - leafs.append(Leaf(mesh[i:i+triangles_per_leaf*3], i)) - - layers = [] - layers.append(leafs) - - while len(layers[-1]) > 1: - nodes = [] - for i in range(0, len(layers[-1]), children_per_node): - nodes.append(Node(layers[-1][i:i+children_per_node])) - - layers.append(nodes) - - # leaves last - layers.reverse() - - nodes = [] - for layer in layers: - for node in layer: - nodes.append(node) - - lower = [] - upper = [] - start = [] - count = [] # number of child nodes or triangles - first_leaf = -1 - - for i, node in enumerate(nodes): - lower.append([node.xmin, node.ymin, node.zmin]) - upper.append([node.xmax, node.ymax, node.zmax]) - count.append(node.size) - - if isinstance(node, Node): - for j, child in enumerate(nodes): - if child is node.children[0]: - start.append(j) - break - - if isinstance(node, Leaf): - start.append(node.mesh_idx) - - if first_leaf == -1: - first_leaf = i - - self.lower = np.array(lower) - self.upper = np.array(upper) - self.start = np.array(start) - self.count = np.array(count) - self.first_leaf = first_leaf @@ -0,0 +1,47 @@ +import numpy as np +from itertools import chain + +def compress(data, selectors): + return (d for d, s in zip(data, selectors) if s) + +class Leaf(object): + def __init__(self, mesh, mesh_idx, zvalue=None): + self.mesh_idx = mesh_idx + self.size = mesh.shape[0] + self.zvalue = zvalue + self.lower_bound = np.array([np.min(mesh[:,:,0]), np.min(mesh[:,:,1]), np.min(mesh[:,:,2])]) + self.upper_bound = np.array([np.max(mesh[:,:,0]), np.max(mesh[:,:,1]), np.max(mesh[:,:,2])]) + self.center = (self.lower_bound + self.upper_bound)/2.0 + +class Node(object): + def __init__(self, children, zvalue=None): + self.size = len(children) + self.zvalue = zvalue + + lower_pts = np.array([child.lower_bound for child in children]) + upper_pts = np.array([child.upper_bound for child in children]) + + self.lower_bound = np.array([np.min(lower_pts[:,0]), np.min(lower_pts[:,1]), np.min(lower_pts[:,2])]) + self.upper_bound = np.array([np.max(upper_pts[:,0]), np.max(upper_pts[:,1]), np.max(upper_pts[:,2])]) + + self.children = children + + self.center = (self.lower_bound + self.upper_bound)/2.0 + +def interleave(*args): + return int("".join(chain(*zip(*[bin(x)[2:].zfill(x.nbytes*8) for x in args]))), 2) + +def morton_order(mesh, bits=8): + lower_bound = np.array([np.min(mesh[:,:,0]), np.min(mesh[:,:,1]), np.min(mesh[:,:,2])]) + upper_bound = np.array([np.max(mesh[:,:,0]), np.max(mesh[:,:,1]), np.max(mesh[:,:,2])]) + + def quantize(x): + return np.uint64((x-lower_bound)*bits/(upper_bound-lower_bound)) + + zvalues = np.empty(mesh.shape[0], dtype=np.uint64) + for i, triangle in enumerate(mesh): + zvalues[i] = interleave(*quantize(np.mean(triangle, axis=0))) + + order = np.array(zip(*sorted(zip(zvalues, range(len(zvalues)))))[-1]) + + return order, zvalues diff --git a/camera.py b/camera.py new file mode 100644 index 0000000..dc3e32e --- /dev/null +++ b/camera.py @@ -0,0 +1,22 @@ +import numpy as np + +class Camera(object): + def __init__(self, size = (800, 600), film_size = (0.035, 0.024), focal_length=0.05): + width, height = size + + grid = [] + for i, x in enumerate(np.linspace(-film_size[0]/2, film_size[0]/2, width)): + for j, z in enumerate(np.linspace(-film_size[1]/2, film_size[1]/2, height)): + grid.append((x,0,z)) + + self.grid = np.array(grid) + self.grid += (0,focal_length,0) + + self.focal_point = np.zeros(3) + + def position(self, position): + self.grid += position + self.focal_point += position + + def get_pixels(self): + return self.grid, self.focal_point-self.grid diff --git a/geometry.py b/geometry.py new file mode 100644 index 0000000..d365141 --- /dev/null +++ b/geometry.py @@ -0,0 +1,110 @@ +import numpy as np +from bvh import * + +class Geometry(object): + """ + Geometry object. + """ + + def __init__(self): + self.materials = [] + self.mesh = np.empty(0) + self.inside = np.empty(0) + self.outside = np.empty(0) + + def add_mesh(self, mesh, inside, outside): + """ + Add a triangle mesh. + + Args: + - mesh: array of shape (n,3,3) + Array of vertices for a closed mesh with n triangles. + + - inside: Material + Material inside the mesh. + + - outside: Material + Material outside the mesh. + """ + + if len(mesh.shape) != 3 or mesh.shape[1] != 3 or mesh.shape[2] != 3: + raise Exception('shape mismatch') + + if inside not in self.materials: + self.materials.append(inside) + if outside not in self.materials: + self.materials.append(outside) + + self.inside.resize(self.inside.size+mesh.shape[0]) + self.inside[-mesh.shape[0]:] = np.tile(self.materials.index(inside), mesh.shape[0]) + + self.outside.resize(self.outside.size+mesh.shape[0]) + self.outside[-mesh.shape[0]:] = np.tile(self.materials.index(outside), mesh.shape[0]) + + self.mesh.resize((self.mesh.shape[0]+mesh.shape[0],3,3)) + self.mesh[-mesh.shape[0]:] = mesh + + def build(self, bits=16): + order, zvalues = morton_order(self.mesh, bits) + + + zvalues = zvalues[order] + self.mesh = self.mesh[order] + self.inside = self.inside[order] + self.outside = self.outside[order] + + leafs = [] + + for z in sorted(set(zvalues)): + mask = (zvalues == z) + leafs.append(Leaf(self.mesh[mask], 3*np.where(mask)[0][0], z)) + + layers = [] + layers.append(leafs) + + while True: + zvalues = np.array([node.zvalue for node in layers[-1]]) + + bit_shifted_zvalues = zvalues >> 1 + + nodes = [] + for z in sorted(set(bit_shifted_zvalues)): + mask = (bit_shifted_zvalues == z) + nodes.append(Node(list(compress(layers[-1], mask)), z)) + + layers.append(nodes) + + if len(nodes) == 1: + break + + layers.reverse() + + nodes = [] + for layer in layers: + for node in layer: + nodes.append(node) + + + self.lower_bound = np.empty((len(nodes),3)) + self.upper_bound = np.empty((len(nodes),3)) + self.child_map = np.empty(len(nodes)) + self.child_len = np.empty(len(nodes)) + self.first_leaf = -1 + + for i, node in enumerate(nodes): + self.lower_bound[i] = node.lower_bound + self.upper_bound[i] = node.upper_bound + + self.child_len[i] = node.size + + if isinstance(node, Node): + for j, child in enumerate(nodes): + if child is node.children[0]: + self.child_map[i] = j + break + + if isinstance(node, Leaf): + self.child_map[i] = node.mesh_idx + + if self.first_leaf == -1: + self.first_leaf = i diff --git a/materials.py b/materials.py new file mode 100644 index 0000000..9639a52 --- /dev/null +++ b/materials.py @@ -0,0 +1,12 @@ +class Material(object): + def __init__(self, name='none'): + self.name = name + + self.refractive_index = None + self.absorption_length = None + self.scattering_length = None + +air = Material('air') +h2o = Material('h2o') +glass = Material('glass') +vacuum = Material('vacuum') diff --git a/src/intersect.cu b/src/kernel.cu index 73b4483..02c52cf 100644 --- a/src/intersect.cu +++ b/src/kernel.cu @@ -7,6 +7,8 @@ texture<float4, 1, cudaReadModeElementType> mesh; + + texture<float4, 1, cudaReadModeElementType> upper_bound_arr; texture<float4, 1, cudaReadModeElementType> lower_bound_arr; @@ -51,7 +53,13 @@ __device__ bool intersect_triangle(const float3 &origin, const float3 &direction __device__ int get_color(const float3 &direction, const float3 &v0, const float3& v1, const float3 &v2) { - int rgb = floorf(255*dot(normalize(cross(v1-v0,v2-v0)),-direction)); + float scale = 255*dot(normalize(cross(v1-v0,v2-v0)),-direction); + if (scale < 0.0f) + scale = 255*dot(-normalize(cross(v1-v0,v2-v0)),-direction); + + + int rgb = floorf(scale); + return rgb << 16 | rgb << 8 | rgb; } @@ -189,6 +197,8 @@ __global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *directio int i; + bool show_leafs = false; + do { int child_map = tex1Dfetch(child_map_arr, *node); @@ -206,6 +216,12 @@ __global__ void intersect_mesh(int max_idx, float3 *origin_arr, float3 *directio node--; } + else if (show_leafs) + { + hit = true; + *pixel = 255; + node--; + } else // node is a leaf { for (i=0; i < child_len; i++) @@ -23,7 +23,7 @@ def read_ascii_stl(filename): vertex.append([float(s) for s in line.strip().split()[1:]]) f.close() - return np.array(vertex) + return np.array(vertex).reshape(len(vertex)//3,3,3) def read_binary_stl(filename): f = open(filename) @@ -39,4 +39,4 @@ def read_binary_stl(filename): f.read(2) f.close() - return np.array(vertex) + return np.array(vertex).reshape(len(vertex)//3,3,3) @@ -1,152 +1,99 @@ +import os import time -from stl import * import numpy as np + from pycuda import autoinit from pycuda.compiler import SourceModule import pycuda.driver as cuda from pycuda import gpuarray -from string import Template - -def array2vector(arr, dtype=gpuarray.vec.float3): - if len(arr.shape) != 2 or arr.shape[-1] != 3: - raise Exception('shape mismatch') - x = np.empty(arr.shape[0], dtype=dtype) - x['x'] = arr[:,0] - x['y'] = arr[:,1] - x['z'] = arr[:,2] +from stl import * +from geometry import * +from materials import * +from camera import * +from vector import * - return x +import pygame print 'device %s' % autoinit.device.name() -source = open('src/intersect.cu').read() -mod = SourceModule(source, options=['-I /home/tlatorre/projects/chroma/src'], no_extern_c=True, arch='sm_13') +source_directory = os.path.split(os.path.realpath(__file__))[0] + '/src' -rotate = mod.get_function('rotate') -translate = mod.get_function('translate') +source = open(source_directory + '/kernel.cu').read() +mod = SourceModule(source, options=['-I ' + source_directory], no_extern_c=True, arch='sm_13') intersect_mesh = mod.get_function('intersect_mesh') +rotate = mod.get_function('rotate') -import pygame size = width, height = 800, 600 -screen = pygame.display.set_mode(size, (pygame.NOFRAME | pygame.DOUBLEBUF)) - -film_size = (0.035, 0.024) -focal_length = 0.05 - -grid = [] -for x in np.linspace(-film_size[0]/2, film_size[0]/2, width): - for z in np.linspace(-film_size[1]/2, film_size[1]/2, height): - grid.append((x,0,z)) -grid = np.array(grid) -grid += (0,focal_length,0) -grid += (0,300,0) - -x = array2vector(grid) -x_gpu = cuda.to_device(x) - -p = (0,300,0)-grid - -for i in range(p.shape[0]): - p[i] /= np.linalg.norm(p[i]) - -p = array2vector(p) -p_gpu = cuda.to_device(p) - - -from zcurve import * - -mesh = read_stl('models/tie_interceptor6.stl') -mesh = mesh.reshape(mesh.shape[0]//3,3,3) -mesh = morton_order(mesh) -mesh = mesh.reshape(mesh.shape[0]*3, 3) - -mesh3 = array2vector(mesh) - -from build import Graph - +screen = pygame.display.set_mode(size) +camera = Camera(size) +camera.position((0,300,0)) -rotate(np.int32(mesh3.size), cuda.InOut(mesh3), np.float32(-np.pi/2), gpuarray.vec.make_float3(1,0,0), block=(256,1,1), grid=(mesh3.size//256+1,1)) +origin, direction = camera.get_pixels() -translate(np.int32(mesh3.size), cuda.InOut(mesh3), gpuarray.vec.make_float3(0,30,0), block=(256,1,1), grid=(mesh3.size//256+1,1)) +for i in range(direction.shape[0]): + direction[i] /= np.linalg.norm(direction[i]) -graph = Graph(mesh3) +origin, direction = make_vector(origin), make_vector(direction) -lower = array2vector(graph.lower, dtype=gpuarray.vec.float4) -upper = array2vector(graph.upper, dtype=gpuarray.vec.float4) -start = graph.start.astype(np.uint32) -count = graph.count.astype(np.uint32) -stack = np.zeros(lower.size, dtype=np.int32) +origin_gpu = cuda.to_device(origin) +direction_gpu = cuda.to_device(direction) -lower_gpu = cuda.to_device(lower) -upper_gpu = cuda.to_device(upper) +geometry = Geometry() +geometry.add_mesh(read_stl('models/tie_interceptor6.stl'), vacuum, vacuum) +geometry.build(bits=16) -lower_tex = mod.get_texref('lower_bound_arr') -upper_tex = mod.get_texref('upper_bound_arr') +mesh = geometry.mesh +mesh = mesh.reshape(mesh.shape[0]*3,3) +mesh = make_vector(mesh, dtype=gpuarray.vec.float4) +lower_bound = make_vector(geometry.lower_bound, dtype=gpuarray.vec.float4) +upper_bound = make_vector(geometry.upper_bound, dtype=gpuarray.vec.float4) +child_map = geometry.child_map.astype(np.uint32) +child_len = geometry.child_len.astype(np.uint32) +first_leaf = np.int32(geometry.first_leaf) -lower_tex.set_address(lower_gpu, lower.nbytes) -upper_tex.set_address(upper_gpu, upper.nbytes) - -lower_tex.set_format(cuda.array_format.FLOAT, 4) -upper_tex.set_format(cuda.array_format.FLOAT, 4) - -start_gpu = cuda.to_device(start) -count_gpu = cuda.to_device(count) -stack_gpu = cuda.mem_alloc(stack.nbytes) -cuda.memcpy_htod(stack_gpu, stack) +mesh_gpu = cuda.to_device(mesh) +lower_bound_gpu = cuda.to_device(lower_bound) +upper_bound_gpu = cuda.to_device(upper_bound) +child_map_gpu = cuda.to_device(child_map) +child_len_gpu = cuda.to_device(child_len) +mesh_tex = mod.get_texref('mesh') +lower_bound_tex = mod.get_texref('lower_bound_arr') +upper_bound_tex = mod.get_texref('upper_bound_arr') child_map_tex = mod.get_texref('child_map_arr') child_len_tex = mod.get_texref('child_len_arr') -child_map_tex.set_address(start_gpu, start.nbytes) -child_len_tex.set_address(count_gpu, count.nbytes) +mesh_tex.set_address(mesh_gpu, mesh.nbytes) +lower_bound_tex.set_address(lower_bound_gpu, lower_bound.nbytes) +upper_bound_tex.set_address(upper_bound_gpu, upper_bound.nbytes) +child_map_tex.set_address(child_map_gpu, child_map.nbytes) +child_len_tex.set_address(child_len_gpu, child_len.nbytes) +mesh_tex.set_format(cuda.array_format.FLOAT, 4) +lower_bound_tex.set_format(cuda.array_format.FLOAT, 4) +upper_bound_tex.set_format(cuda.array_format.FLOAT, 4) child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) -mesh = np.empty(mesh3.size, dtype=gpuarray.vec.float4) -mesh['x'] = mesh3['x'] -mesh['y'] = mesh3['y'] -mesh['z'] = mesh3['z'] - -mesh_gpu = cuda.to_device(mesh) -mesh_tex = mod.get_texref('mesh') -mesh_tex.set_address(mesh_gpu, mesh.nbytes) -mesh_tex.set_format(cuda.array_format.FLOAT, 4) - -pixel = np.empty(size, dtype=np.int32).flatten() - -pixel_gpu = cuda.mem_alloc(pixel.nbytes) -cuda.memcpy_htod(pixel_gpu, pixel) - -speed = [] -elapsed = [] - -t0total = time.time() +pixels = np.empty(width*height, dtype=np.int32) +pixels_gpu = cuda.to_device(pixels) block_size = 64 for i in range(100): - rotate(np.int32(x.size), x_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1)) - - rotate(np.int32(p.size), p_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1)) + rotate(np.int32(origin.size), origin_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1)) + rotate(np.int32(direction.size), direction_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1)) t0 = time.time() - intersect_mesh(np.int32(x.size), x_gpu, p_gpu, pixel_gpu, np.int32(graph.first_leaf), block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, upper_tex, lower_tex, child_map_tex, child_len_tex]) + intersect_mesh(np.int32(origin.size), origin_gpu, direction_gpu, pixels_gpu, first_leaf, block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, lower_bound_tex, upper_bound_tex, child_map_tex, child_len_tex]) cuda.Context.synchronize() - - elapsed.append(time.time() - t0) + elapsed = time.time() - t0 print '%i triangles, %i photons, %f sec; (%f photons/sec)' % \ - (mesh.size//3, pixel.size, elapsed[-1], pixel.size/elapsed[-1]) + (mesh.size//3, pixels.size, elapsed, pixels.size/elapsed) - speed.append(pixel.size/elapsed[-1]) - - cuda.memcpy_dtoh(pixel, pixel_gpu) - pygame.surfarray.blit_array(screen, pixel.reshape(size)) + cuda.memcpy_dtoh(pixels, pixels_gpu) + pygame.surfarray.blit_array(screen, pixels.reshape(size)) pygame.display.flip() -print 'average time = %f sec' % np.mean(elapsed) -print 'average speed = %f photons/sec' % np.mean(speed) -print 'total time = %f sec' % (time.time() - t0total) - raw_input('press enter to exit') diff --git a/test_lbne.py b/test_lbne.py deleted file mode 100644 index 9fe27ef..0000000 --- a/test_lbne.py +++ /dev/null @@ -1,164 +0,0 @@ -import time -from stl import * -import numpy as np -from pycuda import autoinit -from pycuda.compiler import SourceModule -import pycuda.driver as cuda -from pycuda import gpuarray -from string import Template - -def array2vector(arr, dtype=gpuarray.vec.float3): - if len(arr.shape) != 2 or arr.shape[-1] != 3: - raise Exception('shape mismatch') - - x = np.empty(arr.shape[0], dtype=dtype) - x['x'] = arr[:,0] - x['y'] = arr[:,1] - x['z'] = arr[:,2] - - return x - -print 'device %s' % autoinit.device.name() - -source = open('src/intersect.cu').read() -mod = SourceModule(source, options=['-I /home/tlatorre/projects/chroma/src'], no_extern_c=True, arch='sm_13') - -rotate = mod.get_function('rotate') -translate = mod.get_function('translate') -intersect_mesh = mod.get_function('intersect_mesh') - -import pygame -size = width, height = 800, 600 -screen = pygame.display.set_mode(size, (pygame.NOFRAME | pygame.DOUBLEBUF)) - -film_size = (0.035, 0.024) -focal_length = 0.05 - -grid = [] -for x in np.linspace(-film_size[0]/2, film_size[0]/2, width): - for z in np.linspace(-film_size[1]/2, film_size[1]/2, height): - grid.append((x,0,z)) -grid = np.array(grid) -grid += (0,focal_length,0) -grid += (0,200,30) - -x = array2vector(grid) -x_gpu = cuda.to_device(x) - -p = (0,200,30)-grid - -for i in range(p.shape[0]): - p[i] /= np.linalg.norm(p[i]) - -p = array2vector(p) -p_gpu = cuda.to_device(p) - - -from zcurve import * - -#mesh = read_stl('models/tie_interceptor6.stl') -#mesh = read_stl('models/sphere.stl') -#mesh = read_stl('models/IV.stl') - -#from lbne import build_lbne -#mesh = build_lbne() - -mesh = read_stl('models/lbne_sphere_only.stl') - -mesh = mesh.reshape(mesh.shape[0]//3,3,3) -mesh = morton_order(mesh) -mesh = mesh.reshape(mesh.shape[0]*3, 3) - -mesh3 = array2vector(mesh) - -from build import Graph - - -#rotate(np.int32(mesh3.size), cuda.InOut(mesh3), np.float32(-np.pi/2), gpuarray.vec.make_float3(1,0,0), block=(256,1,1), grid=(mesh3.size//256+1,1)) - -#translate(np.int32(mesh3.size), cuda.InOut(mesh3), gpuarray.vec.make_float3(0,30,0), block=(256,1,1), grid=(mesh3.size//256+1,1)) - -graph = Graph(mesh3) - -lower = array2vector(graph.lower, dtype=gpuarray.vec.float4) -upper = array2vector(graph.upper, dtype=gpuarray.vec.float4) -start = graph.start.astype(np.uint32) -count = graph.count.astype(np.uint32) -stack = np.zeros(lower.size, dtype=np.int32) - -lower_gpu = cuda.to_device(lower) -upper_gpu = cuda.to_device(upper) - -lower_tex = mod.get_texref('lower_bound_arr') -upper_tex = mod.get_texref('upper_bound_arr') - -lower_tex.set_address(lower_gpu, lower.nbytes) -upper_tex.set_address(upper_gpu, upper.nbytes) - -lower_tex.set_format(cuda.array_format.FLOAT, 4) -upper_tex.set_format(cuda.array_format.FLOAT, 4) - -start_gpu = cuda.to_device(start) -count_gpu = cuda.to_device(count) -stack_gpu = cuda.mem_alloc(stack.nbytes) -cuda.memcpy_htod(stack_gpu, stack) - -child_map_tex = mod.get_texref('child_map_arr') -child_len_tex = mod.get_texref('child_len_arr') - -child_map_tex.set_address(start_gpu, start.nbytes) -child_len_tex.set_address(count_gpu, count.nbytes) - -child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) -child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) - -mesh = np.empty(mesh3.size, dtype=gpuarray.vec.float4) -mesh['x'] = mesh3['x'] -mesh['y'] = mesh3['y'] -mesh['z'] = mesh3['z'] - -mesh_gpu = cuda.to_device(mesh) -mesh_tex = mod.get_texref('mesh') -mesh_tex.set_address(mesh_gpu, mesh.nbytes) -mesh_tex.set_format(cuda.array_format.FLOAT, 4) - -pixel = np.empty(size, dtype=np.int32).flatten() - -pixel_gpu = cuda.mem_alloc(pixel.nbytes) -cuda.memcpy_htod(pixel_gpu, pixel) - -speed = [] -elapsed = [] - -t0total = time.time() - -block_size = 64 -for i in range(1000): - rotate(np.int32(x.size), x_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1)) - - rotate(np.int32(p.size), p_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1)) - - translate(np.int32(x.size), x_gpu, - gpuarray.vec.make_float3(-np.sin(i*np.pi/100),-np.cos(i*np.pi/100),0), - block=(block_size,1,1), grid=(width*height//block_size+1,1)) - - t0 = time.time() - intersect_mesh(np.int32(x.size), x_gpu, p_gpu, pixel_gpu, np.int32(graph.first_leaf), block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, upper_tex, lower_tex, child_map_tex, child_len_tex]) - cuda.Context.synchronize() - - elapsed.append(time.time() - t0) - - print '%i triangles, %i photons, %f sec; (%f photons/sec)' % \ - (mesh.size//3, pixel.size, elapsed[-1], pixel.size/elapsed[-1]) - - speed.append(pixel.size/elapsed[-1]) - - cuda.memcpy_dtoh(pixel, pixel_gpu) - pygame.surfarray.blit_array(screen, pixel.reshape(size)) - pygame.display.flip() - -print 'average time = %f sec' % np.mean(elapsed) -print 'average speed = %f photons/sec' % np.mean(speed) -print 'total time = %f sec' % (time.time() - t0total) - -raw_input('press enter to exit') diff --git a/vector.py b/vector.py new file mode 100644 index 0000000..7b36e07 --- /dev/null +++ b/vector.py @@ -0,0 +1,13 @@ +import numpy as np +from pycuda import gpuarray + +def make_vector(arr, dtype=gpuarray.vec.float3): + if len(arr.shape) != 2 or arr.shape[-1] != 3: + raise Exception('shape mismatch') + + x = np.empty(arr.shape[0], dtype) + x['x'] = arr[:,0] + x['y'] = arr[:,1] + x['z'] = arr[:,2] + + return x diff --git a/zcurve.py b/zcurve.py deleted file mode 100644 index 5ef37f3..0000000 --- a/zcurve.py +++ /dev/null @@ -1,25 +0,0 @@ -import numpy as np -from itertools import chain - -def interleave(*args): - return int("".join(chain(*zip(*[bin(x)[2:].zfill(x.nbytes*8) for x in args]))), 2) - -def morton_order(mesh, dtype=np.uint8): - vertices = mesh.reshape(mesh.shape[0]*3, 3) - - upper_bound = np.max(vertices, axis=0) - lower_bound = np.min(vertices, axis=0) - - quantize = lambda x: dtype((x-lower_bound)*np.iinfo(dtype).max/(upper_bound-lower_bound)) - - zvalue = [] - for triangle in mesh: - center = np.mean(np.vstack((triangle[0], triangle[1], triangle[2])), axis=0) - zvalue.append(interleave(*quantize(center))) - - ordered_mesh = np.empty(mesh.shape) - - for i, idx in enumerate(zip(*sorted(zip(zvalue, range(len(zvalue)))))[-1]): - ordered_mesh[i] = mesh[idx] - - return ordered_mesh |