diff options
author | Stan Seibert <stan@mtrr.org> | 2011-08-11 11:49:54 -0400 |
---|---|---|
committer | Stan Seibert <stan@mtrr.org> | 2011-08-11 11:49:54 -0400 |
commit | 6310592deb66d75f473c63fe286187ca41dae0f6 (patch) | |
tree | 67d60718b7ee21a8a7008a955c24afe0330c5cc8 | |
parent | 56afb978b2416ee9a14ecacdf41ab996d1747b66 (diff) | |
download | chroma-6310592deb66d75f473c63fe286187ca41dae0f6.tar.gz chroma-6310592deb66d75f473c63fe286187ca41dae0f6.tar.bz2 chroma-6310592deb66d75f473c63fe286187ca41dae0f6.zip |
Switch from texture to float3 array for upper and lower bounds. 10% speed boost!
-rw-r--r-- | gpu.py | 30 | ||||
-rw-r--r-- | src/mesh.h | 12 |
2 files changed, 19 insertions, 23 deletions
@@ -143,37 +143,31 @@ class GPU(object): triangles['w'] = ((geometry.material1_index & 0xff) << 24) | ((geometry.material2_index & 0xff) << 16) | ((geometry.surface_index & 0xff) << 8) self.triangles_gpu = gpuarray.to_gpu(triangles) - lower_bounds_float4 = np.empty(geometry.lower_bounds.shape[0], dtype=gpuarray.vec.float4) - lower_bounds_float4['x'] = geometry.lower_bounds[:,0] - lower_bounds_float4['y'] = geometry.lower_bounds[:,1] - lower_bounds_float4['z'] = geometry.lower_bounds[:,2] - self.lower_bounds_gpu = gpuarray.to_gpu(lower_bounds_float4) - - upper_bounds_float4 = np.empty(geometry.upper_bounds.shape[0], dtype=gpuarray.vec.float4) - upper_bounds_float4['x'] = geometry.upper_bounds[:,0] - upper_bounds_float4['y'] = geometry.upper_bounds[:,1] - upper_bounds_float4['z'] = geometry.upper_bounds[:,2] - self.upper_bounds_gpu = gpuarray.to_gpu(upper_bounds_float4) + lower_bounds_float3 = np.empty(geometry.lower_bounds.shape[0], dtype=gpuarray.vec.float3) + lower_bounds_float3['x'] = geometry.lower_bounds[:,0] + lower_bounds_float3['y'] = geometry.lower_bounds[:,1] + lower_bounds_float3['z'] = geometry.lower_bounds[:,2] + self.lower_bounds_gpu = gpuarray.to_gpu(lower_bounds_float3) + + upper_bounds_float3 = np.empty(geometry.upper_bounds.shape[0], dtype=gpuarray.vec.float3) + upper_bounds_float3['x'] = geometry.upper_bounds[:,0] + upper_bounds_float3['y'] = geometry.upper_bounds[:,1] + upper_bounds_float3['z'] = geometry.upper_bounds[:,2] + self.upper_bounds_gpu = gpuarray.to_gpu(upper_bounds_float3) self.colors_gpu = gpuarray.to_gpu(geometry.colors.astype(np.uint32)) self.node_map_gpu = gpuarray.to_gpu(geometry.node_map.astype(np.uint32)) self.node_map_end_gpu = gpuarray.to_gpu(geometry.node_map_end.astype(np.uint32)) self.solid_id_map_gpu = gpuarray.to_gpu(geometry.solid_id.astype(np.uint32)) - self.geo_funcs.set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(geometry.node_map.size-1), np.uint32(geometry.first_node), block=(1,1,1), grid=(1,1)) + self.geo_funcs.set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(geometry.node_map.size-1), np.uint32(geometry.first_node), self.lower_bounds_gpu, self.upper_bounds_gpu, block=(1,1,1), grid=(1,1)) - self.lower_bounds_tex = self.module.get_texref('lower_bounds') - self.upper_bounds_tex = self.module.get_texref('upper_bounds') self.node_map_tex = self.module.get_texref('node_map') self.node_map_end_tex = self.module.get_texref('node_map_end') - self.lower_bounds_tex.set_address(self.lower_bounds_gpu.gpudata, self.lower_bounds_gpu.nbytes) - self.upper_bounds_tex.set_address(self.upper_bounds_gpu.gpudata, self.upper_bounds_gpu.nbytes) self.node_map_tex.set_address(self.node_map_gpu.gpudata, self.node_map_gpu.nbytes) self.node_map_end_tex.set_address(self.node_map_end_gpu.gpudata, self.node_map_end_gpu.nbytes) - self.lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4) - self.upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4) self.node_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) self.node_map_end_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) @@ -13,8 +13,8 @@ __device__ unsigned int g_start_node; __device__ unsigned int g_first_node; /* lower/upper bounds for the bounding box associated with each node/leaf */ -texture<float4, 1, cudaReadModeElementType> upper_bounds; -texture<float4, 1, cudaReadModeElementType> lower_bounds; +__device__ float3 *g_lower_bounds; +__device__ float3 *g_upper_bounds; /* map to child node/triangle indices */ texture<unsigned int, 1, cudaReadModeElementType> node_map; @@ -38,8 +38,8 @@ __device__ int convert(int c) intersects the bounding box return true, else return false. */ __device__ bool intersect_node(const float3 &origin, const float3 &direction, const int &i) { - float3 lower_bound = make_float3(tex1Dfetch(lower_bounds, i)); - float3 upper_bound = make_float3(tex1Dfetch(upper_bounds, i)); + float3 lower_bound = g_lower_bounds[i]; + float3 upper_bound = g_upper_bounds[i]; return intersect_box(origin, direction, lower_bound, upper_bound); } @@ -134,13 +134,15 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, flo extern "C" { -__global__ void set_global_mesh_variables(uint4 *triangles, float3 *vertices, unsigned int *colors, unsigned int start_node, unsigned int first_node) + __global__ void set_global_mesh_variables(uint4 *triangles, float3 *vertices, unsigned int *colors, unsigned int start_node, unsigned int first_node, float3 *lower_bounds, float3 *upper_bounds) { g_triangles = triangles; g_vertices = vertices; g_colors = colors; g_start_node = start_node; g_first_node = first_node; + g_lower_bounds = lower_bounds; + g_upper_bounds = upper_bounds; } __global__ void set_colors(unsigned int *colors) |