diff options
-rw-r--r-- | geometry.py | 27 | ||||
-rw-r--r-- | mesh.py | 4 | ||||
-rw-r--r-- | solid.py | 4 | ||||
-rw-r--r-- | src/kernel.cu | 11 |
4 files changed, 30 insertions, 16 deletions
diff --git a/geometry.py b/geometry.py index 938001e..8af8d49 100644 --- a/geometry.py +++ b/geometry.py @@ -58,14 +58,23 @@ class Geometry(object): self.solids.append(solid) def build(self, bits=8): - vertices = [] - triangles = [] + offsets = [ (0,0) ] for solid in self.solids: - triangles.extend(solid.mesh.triangles + len(vertices)) - vertices.extend(np.inner(solid.mesh.vertices, solid.rotation) + \ - solid.displacement) + offsets.append( (offsets[-1][0] + len(solid.mesh.vertices), + offsets[-1][1] + len(solid.mesh.triangles)) ) + vertices = np.zeros(shape=(offsets[-1][0], 3), dtype=np.float32) + triangles = np.zeros(shape=(offsets[-1][1],3), dtype=np.int32) + + for solid, (vertex_offset, triangle_offset) in zip(self.solids, offsets[:-1]): + triangles[triangle_offset:triangle_offset+len(solid.mesh.triangles),:] = \ + solid.mesh.triangles + vertex_offset + vertices[vertex_offset:vertex_offset + len(solid.mesh.vertices),:] = \ + np.inner(solid.mesh.vertices, solid.rotation) + solid.displacement + self.mesh = Mesh(vertices, triangles) + del vertices + del triangles zvalues_mesh = morton_order(self.mesh[:], bits) reorder = np.argsort(zvalues_mesh) @@ -284,25 +293,25 @@ class Geometry(object): self.node_map_gpu = cuda.to_device(self.node_map) self.node_length_gpu = cuda.to_device(self.node_length) + set_pointer = module.get_function('set_pointer') + set_pointer(self.triangles_gpu, block=(1,1,1), grid=(1,1)) + vertices_tex = module.get_texref('vertices') - triangles_tex = module.get_texref('triangles') lower_bounds_tex = module.get_texref('lower_bounds') upper_bounds_tex = module.get_texref('upper_bounds') node_map_tex = module.get_texref('node_map') node_length_tex = module.get_texref('node_length') vertices_tex.set_address(self.vertices_gpu, vertices.nbytes) - triangles_tex.set_address(self.triangles_gpu, triangles.nbytes) lower_bounds_tex.set_address(self.lower_bounds_gpu, lower_bounds.nbytes) upper_bounds_tex.set_address(self.upper_bounds_gpu, upper_bounds.nbytes) node_map_tex.set_address(self.node_map_gpu, self.node_map.nbytes) node_length_tex.set_address(self.node_length_gpu, self.node_length.nbytes) vertices_tex.set_format(cuda.array_format.FLOAT, 4) - triangles_tex.set_format(cuda.array_format.UNSIGNED_INT32, 4) lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4) upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4) node_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) node_length_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) - return [vertices_tex, triangles_tex, lower_bounds_tex, upper_bounds_tex, node_map_tex, node_length_tex] + return [vertices_tex, lower_bounds_tex, upper_bounds_tex, node_map_tex, node_length_tex] @@ -4,8 +4,8 @@ import struct class Mesh(object): def __init__(self, vertices, triangles): - vertices = np.asarray(vertices, dtype=float) - triangles = np.asarray(triangles, dtype=int) + vertices = np.asarray(vertices, dtype=np.float32) + triangles = np.asarray(triangles, dtype=np.int32) if len(vertices.shape) != 2 or vertices.shape[1] != 3: raise ValueError('shape mismatch') @@ -10,9 +10,9 @@ class Solid(object): if rotation.shape != (3,3): raise ValueError('shape mismatch') - self.rotation = rotation + self.rotation = rotation.astype(np.float32) - displacement = np.asarray(displacement) + displacement = np.asarray(displacement, dtype=np.float32) if displacement.shape != (3,): raise ValueError('shape mismatch') diff --git a/src/kernel.cu b/src/kernel.cu index 4d85f6e..796de54 100644 --- a/src/kernel.cu +++ b/src/kernel.cu @@ -12,7 +12,7 @@ /* flattened triangle mesh */ texture<float4, 1, cudaReadModeElementType> vertices; -texture<uint4, 1, cudaReadModeElementType> triangles; +__device__ uint4 *triangles; /* material/surface index lookup for each triangle */ texture<int, 1, cudaReadModeElementType> material1_index; @@ -87,7 +87,7 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, con { for (i=0; i < length; i++) { - uint4 triangle_data = tex1Dfetch(triangles, index+i); + uint4 triangle_data = triangles[index+i]; float3 v0 = make_float3(tex1Dfetch(vertices, triangle_data.x)); float3 v1 = make_float3(tex1Dfetch(vertices, triangle_data.y)); @@ -125,6 +125,11 @@ __device__ curandState rng_states[256*512]; extern "C" { +__global__ void set_pointer(uint4 *triangle_ptr) +{ + triangles = triangle_ptr; +} + /* Initialize random number states */ __global__ void init_rng(unsigned long long seed, unsigned long long offset) { @@ -191,7 +196,7 @@ __global__ void ray_trace(int nthreads, float3 *origins, float3 *directions, int } else { - uint4 triangle_data = tex1Dfetch(triangles, intersection_idx); + uint4 triangle_data = triangles[intersection_idx]; float3 v0 = make_float3(tex1Dfetch(vertices, triangle_data.x)); float3 v1 = make_float3(tex1Dfetch(vertices, triangle_data.y)); |