summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gpu.py30
-rw-r--r--src/mesh.h12
2 files changed, 19 insertions, 23 deletions
diff --git a/gpu.py b/gpu.py
index e6b6856..ffa17a6 100644
--- a/gpu.py
+++ b/gpu.py
@@ -143,37 +143,31 @@ class GPU(object):
triangles['w'] = ((geometry.material1_index & 0xff) << 24) | ((geometry.material2_index & 0xff) << 16) | ((geometry.surface_index & 0xff) << 8)
self.triangles_gpu = gpuarray.to_gpu(triangles)
- lower_bounds_float4 = np.empty(geometry.lower_bounds.shape[0], dtype=gpuarray.vec.float4)
- lower_bounds_float4['x'] = geometry.lower_bounds[:,0]
- lower_bounds_float4['y'] = geometry.lower_bounds[:,1]
- lower_bounds_float4['z'] = geometry.lower_bounds[:,2]
- self.lower_bounds_gpu = gpuarray.to_gpu(lower_bounds_float4)
-
- upper_bounds_float4 = np.empty(geometry.upper_bounds.shape[0], dtype=gpuarray.vec.float4)
- upper_bounds_float4['x'] = geometry.upper_bounds[:,0]
- upper_bounds_float4['y'] = geometry.upper_bounds[:,1]
- upper_bounds_float4['z'] = geometry.upper_bounds[:,2]
- self.upper_bounds_gpu = gpuarray.to_gpu(upper_bounds_float4)
+ lower_bounds_float3 = np.empty(geometry.lower_bounds.shape[0], dtype=gpuarray.vec.float3)
+ lower_bounds_float3['x'] = geometry.lower_bounds[:,0]
+ lower_bounds_float3['y'] = geometry.lower_bounds[:,1]
+ lower_bounds_float3['z'] = geometry.lower_bounds[:,2]
+ self.lower_bounds_gpu = gpuarray.to_gpu(lower_bounds_float3)
+
+ upper_bounds_float3 = np.empty(geometry.upper_bounds.shape[0], dtype=gpuarray.vec.float3)
+ upper_bounds_float3['x'] = geometry.upper_bounds[:,0]
+ upper_bounds_float3['y'] = geometry.upper_bounds[:,1]
+ upper_bounds_float3['z'] = geometry.upper_bounds[:,2]
+ self.upper_bounds_gpu = gpuarray.to_gpu(upper_bounds_float3)
self.colors_gpu = gpuarray.to_gpu(geometry.colors.astype(np.uint32))
self.node_map_gpu = gpuarray.to_gpu(geometry.node_map.astype(np.uint32))
self.node_map_end_gpu = gpuarray.to_gpu(geometry.node_map_end.astype(np.uint32))
self.solid_id_map_gpu = gpuarray.to_gpu(geometry.solid_id.astype(np.uint32))
- self.geo_funcs.set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(geometry.node_map.size-1), np.uint32(geometry.first_node), block=(1,1,1), grid=(1,1))
+ self.geo_funcs.set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(geometry.node_map.size-1), np.uint32(geometry.first_node), self.lower_bounds_gpu, self.upper_bounds_gpu, block=(1,1,1), grid=(1,1))
- self.lower_bounds_tex = self.module.get_texref('lower_bounds')
- self.upper_bounds_tex = self.module.get_texref('upper_bounds')
self.node_map_tex = self.module.get_texref('node_map')
self.node_map_end_tex = self.module.get_texref('node_map_end')
- self.lower_bounds_tex.set_address(self.lower_bounds_gpu.gpudata, self.lower_bounds_gpu.nbytes)
- self.upper_bounds_tex.set_address(self.upper_bounds_gpu.gpudata, self.upper_bounds_gpu.nbytes)
self.node_map_tex.set_address(self.node_map_gpu.gpudata, self.node_map_gpu.nbytes)
self.node_map_end_tex.set_address(self.node_map_end_gpu.gpudata, self.node_map_end_gpu.nbytes)
- self.lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4)
- self.upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4)
self.node_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
self.node_map_end_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
diff --git a/src/mesh.h b/src/mesh.h
index b4714c4..bb30bef 100644
--- a/src/mesh.h
+++ b/src/mesh.h
@@ -13,8 +13,8 @@ __device__ unsigned int g_start_node;
__device__ unsigned int g_first_node;
/* lower/upper bounds for the bounding box associated with each node/leaf */
-texture<float4, 1, cudaReadModeElementType> upper_bounds;
-texture<float4, 1, cudaReadModeElementType> lower_bounds;
+__device__ float3 *g_lower_bounds;
+__device__ float3 *g_upper_bounds;
/* map to child node/triangle indices */
texture<unsigned int, 1, cudaReadModeElementType> node_map;
@@ -38,8 +38,8 @@ __device__ int convert(int c)
intersects the bounding box return true, else return false. */
__device__ bool intersect_node(const float3 &origin, const float3 &direction, const int &i)
{
- float3 lower_bound = make_float3(tex1Dfetch(lower_bounds, i));
- float3 upper_bound = make_float3(tex1Dfetch(upper_bounds, i));
+ float3 lower_bound = g_lower_bounds[i];
+ float3 upper_bound = g_upper_bounds[i];
return intersect_box(origin, direction, lower_bound, upper_bound);
}
@@ -134,13 +134,15 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, flo
extern "C"
{
-__global__ void set_global_mesh_variables(uint4 *triangles, float3 *vertices, unsigned int *colors, unsigned int start_node, unsigned int first_node)
+ __global__ void set_global_mesh_variables(uint4 *triangles, float3 *vertices, unsigned int *colors, unsigned int start_node, unsigned int first_node, float3 *lower_bounds, float3 *upper_bounds)
{
g_triangles = triangles;
g_vertices = vertices;
g_colors = colors;
g_start_node = start_node;
g_first_node = first_node;
+ g_lower_bounds = lower_bounds;
+ g_upper_bounds = upper_bounds;
}
__global__ void set_colors(unsigned int *colors)