From 2d7220415ec99a80a794f6c642d6e14de8481945 Mon Sep 17 00:00:00 2001 From: Stan Seibert Date: Sun, 14 Aug 2011 21:05:07 -0400 Subject: Rewrite the color_solid function in gpu.GPU (and associated CUDA code) to make it 100x faster. Instead of having each CUDA thread loop over the full triangle list, we give each thread a single triangle and ask it to look up the hit status and color for that triangle. The hit array and color array are small enough (approx 30,000 entries) to fit into the cache, so this goes much faster. Now the event viewer is quite snappy! --- src/mesh.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/mesh.h b/src/mesh.h index a2bd207..f466470 100644 --- a/src/mesh.h +++ b/src/mesh.h @@ -167,21 +167,20 @@ __global__ void set_colors(unsigned int *colors) g_colors = colors; } -__global__ void color_solids(int nthreads, unsigned int ntriangles, int *solid_id_map, int *solid_ids, unsigned int *solid_colors) +__global__ void color_solids(int first_triangle, int nthreads, + int *solid_id_map, + bool *solid_hit, + unsigned int *solid_colors) { int id = blockIdx.x*blockDim.x + threadIdx.x; if (id >= nthreads) return; - int solid_id = solid_ids[id]; - unsigned int color = solid_colors[id]; - - for (int i=0; i < ntriangles; i++) - { - if (solid_id_map[i] == solid_id) - g_colors[i] = color; - } + int triangle_id = first_triangle + id; + int solid_id = solid_id_map[triangle_id]; + if (solid_hit[solid_id]) + g_colors[triangle_id] = solid_colors[solid_id]; } } // extern "c" -- cgit