diff options
-rw-r--r-- | geometry.py | 16 | ||||
-rw-r--r-- | src/kernel.cu | 23 |
2 files changed, 23 insertions, 16 deletions
diff --git a/geometry.py b/geometry.py index 04be460..111006f 100644 --- a/geometry.py +++ b/geometry.py @@ -399,25 +399,31 @@ class Geometry(object): self.node_map_gpu = cuda.to_device(self.node_map) self.node_length_gpu = cuda.to_device(self.node_length) + print 'Device usage:' + print 'vertices:', vertices.nbytes + print 'triangles:', triangles.nbytes + print 'lower_bounds:', lower_bounds.nbytes + print 'upper_bounds:', upper_bounds.nbytes + print 'node_map:', self.node_map.nbytes + print 'node_length:', self.node_length.nbytes + set_pointer = module.get_function('set_pointer') - set_pointer(self.triangles_gpu, block=(1,1,1), grid=(1,1)) + set_pointer(self.triangles_gpu, self.vertices_gpu, + block=(1,1,1), grid=(1,1)) - vertices_tex = module.get_texref('vertices') lower_bounds_tex = module.get_texref('lower_bounds') upper_bounds_tex = module.get_texref('upper_bounds') node_map_tex = module.get_texref('node_map') node_length_tex = module.get_texref('node_length') - vertices_tex.set_address(self.vertices_gpu, vertices.nbytes) lower_bounds_tex.set_address(self.lower_bounds_gpu, lower_bounds.nbytes) upper_bounds_tex.set_address(self.upper_bounds_gpu, upper_bounds.nbytes) node_map_tex.set_address(self.node_map_gpu, self.node_map.nbytes) node_length_tex.set_address(self.node_length_gpu, self.node_length.nbytes) - vertices_tex.set_format(cuda.array_format.FLOAT, 4) lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4) upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4) node_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) node_length_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) - return [vertices_tex, lower_bounds_tex, upper_bounds_tex, node_map_tex, node_length_tex] + return [lower_bounds_tex, upper_bounds_tex, node_map_tex, node_length_tex] diff --git a/src/kernel.cu b/src/kernel.cu index 405f06c..2d579be 100644 --- a/src/kernel.cu +++ b/src/kernel.cu @@ -22,7 +22,7 @@ enum }; /* flattened triangle mesh */ -texture<float4, 1, cudaReadModeElementType> vertices; +__device__ float4 *vertices; __device__ uint4 *triangles; /* lower/upper bounds for the bounding box associated with each node/leaf */ @@ -110,9 +110,9 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, con uint4 triangle_data = triangles[index+i]; - float3 v0 = make_float3(tex1Dfetch(vertices, triangle_data.x)); - float3 v1 = make_float3(tex1Dfetch(vertices, triangle_data.y)); - float3 v2 = make_float3(tex1Dfetch(vertices, triangle_data.z)); + float3 v0 = make_float3(vertices[triangle_data.x]); + float3 v1 = make_float3(vertices[triangle_data.y]); + float3 v2 = make_float3(vertices[triangle_data.z]); if (intersect_triangle(origin, direction, v0, v1, v2, distance)) { @@ -146,9 +146,10 @@ __device__ curandState rng_states[100000]; extern "C" { -__global__ void set_pointer(uint4 *triangle_ptr) + __global__ void set_pointer(uint4 *triangle_ptr, float4 *vertex_ptr) { triangles = triangle_ptr; + vertices = vertex_ptr; } /* Initialize random number states */ @@ -213,9 +214,9 @@ __global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, i { uint4 triangle_data = triangles[triangle_index]; - float3 v0 = make_float3(tex1Dfetch(vertices, triangle_data.x)); - float3 v1 = make_float3(tex1Dfetch(vertices, triangle_data.y)); - float3 v2 = make_float3(tex1Dfetch(vertices, triangle_data.z)); + float3 v0 = make_float3(vertices[triangle_data.x]); + float3 v1 = make_float3(vertices[triangle_data.y]); + float3 v2 = make_float3(vertices[triangle_data.z]); pixels[id] = get_color(direction, v0, v1, v2, triangle_data.w); } @@ -262,9 +263,9 @@ __global__ void propagate(int nthreads, float3 *positions, float3 *directions, f uint4 triangle_data = triangles[last_hit_triangle]; - float3 v0 = make_float3(tex1Dfetch(vertices, triangle_data.x)); - float3 v1 = make_float3(tex1Dfetch(vertices, triangle_data.y)); - float3 v2 = make_float3(tex1Dfetch(vertices, triangle_data.z)); + float3 v0 = make_float3(vertices[triangle_data.x]); + float3 v1 = make_float3(vertices[triangle_data.y]); + float3 v2 = make_float3(vertices[triangle_data.z]); int material_in_index = convert(0xFF & (triangle_data.w >> 24)); int material_out_index = convert(0xFF & (triangle_data.w >> 16)); |