summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--geometry.py16
-rw-r--r--src/kernel.cu23
2 files changed, 23 insertions, 16 deletions
diff --git a/geometry.py b/geometry.py
index 04be460..111006f 100644
--- a/geometry.py
+++ b/geometry.py
@@ -399,25 +399,31 @@ class Geometry(object):
self.node_map_gpu = cuda.to_device(self.node_map)
self.node_length_gpu = cuda.to_device(self.node_length)
+ print 'Device usage:'
+ print 'vertices:', vertices.nbytes
+ print 'triangles:', triangles.nbytes
+ print 'lower_bounds:', lower_bounds.nbytes
+ print 'upper_bounds:', upper_bounds.nbytes
+ print 'node_map:', self.node_map.nbytes
+ print 'node_length:', self.node_length.nbytes
+
set_pointer = module.get_function('set_pointer')
- set_pointer(self.triangles_gpu, block=(1,1,1), grid=(1,1))
+ set_pointer(self.triangles_gpu, self.vertices_gpu,
+ block=(1,1,1), grid=(1,1))
- vertices_tex = module.get_texref('vertices')
lower_bounds_tex = module.get_texref('lower_bounds')
upper_bounds_tex = module.get_texref('upper_bounds')
node_map_tex = module.get_texref('node_map')
node_length_tex = module.get_texref('node_length')
- vertices_tex.set_address(self.vertices_gpu, vertices.nbytes)
lower_bounds_tex.set_address(self.lower_bounds_gpu, lower_bounds.nbytes)
upper_bounds_tex.set_address(self.upper_bounds_gpu, upper_bounds.nbytes)
node_map_tex.set_address(self.node_map_gpu, self.node_map.nbytes)
node_length_tex.set_address(self.node_length_gpu, self.node_length.nbytes)
- vertices_tex.set_format(cuda.array_format.FLOAT, 4)
lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4)
upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4)
node_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
node_length_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
- return [vertices_tex, lower_bounds_tex, upper_bounds_tex, node_map_tex, node_length_tex]
+ return [lower_bounds_tex, upper_bounds_tex, node_map_tex, node_length_tex]
diff --git a/src/kernel.cu b/src/kernel.cu
index 405f06c..2d579be 100644
--- a/src/kernel.cu
+++ b/src/kernel.cu
@@ -22,7 +22,7 @@ enum
};
/* flattened triangle mesh */
-texture<float4, 1, cudaReadModeElementType> vertices;
+__device__ float4 *vertices;
__device__ uint4 *triangles;
/* lower/upper bounds for the bounding box associated with each node/leaf */
@@ -110,9 +110,9 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, con
uint4 triangle_data = triangles[index+i];
- float3 v0 = make_float3(tex1Dfetch(vertices, triangle_data.x));
- float3 v1 = make_float3(tex1Dfetch(vertices, triangle_data.y));
- float3 v2 = make_float3(tex1Dfetch(vertices, triangle_data.z));
+ float3 v0 = make_float3(vertices[triangle_data.x]);
+ float3 v1 = make_float3(vertices[triangle_data.y]);
+ float3 v2 = make_float3(vertices[triangle_data.z]);
if (intersect_triangle(origin, direction, v0, v1, v2, distance))
{
@@ -146,9 +146,10 @@ __device__ curandState rng_states[100000];
extern "C"
{
-__global__ void set_pointer(uint4 *triangle_ptr)
+ __global__ void set_pointer(uint4 *triangle_ptr, float4 *vertex_ptr)
{
triangles = triangle_ptr;
+ vertices = vertex_ptr;
}
/* Initialize random number states */
@@ -213,9 +214,9 @@ __global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, i
{
uint4 triangle_data = triangles[triangle_index];
- float3 v0 = make_float3(tex1Dfetch(vertices, triangle_data.x));
- float3 v1 = make_float3(tex1Dfetch(vertices, triangle_data.y));
- float3 v2 = make_float3(tex1Dfetch(vertices, triangle_data.z));
+ float3 v0 = make_float3(vertices[triangle_data.x]);
+ float3 v1 = make_float3(vertices[triangle_data.y]);
+ float3 v2 = make_float3(vertices[triangle_data.z]);
pixels[id] = get_color(direction, v0, v1, v2, triangle_data.w);
}
@@ -262,9 +263,9 @@ __global__ void propagate(int nthreads, float3 *positions, float3 *directions, f
uint4 triangle_data = triangles[last_hit_triangle];
- float3 v0 = make_float3(tex1Dfetch(vertices, triangle_data.x));
- float3 v1 = make_float3(tex1Dfetch(vertices, triangle_data.y));
- float3 v2 = make_float3(tex1Dfetch(vertices, triangle_data.z));
+ float3 v0 = make_float3(vertices[triangle_data.x]);
+ float3 v1 = make_float3(vertices[triangle_data.y]);
+ float3 v2 = make_float3(vertices[triangle_data.z]);
int material_in_index = convert(0xFF & (triangle_data.w >> 24));
int material_out_index = convert(0xFF & (triangle_data.w >> 16));