From 2d7220415ec99a80a794f6c642d6e14de8481945 Mon Sep 17 00:00:00 2001
From: Stan Seibert <stan@mtrr.org>
Date: Sun, 14 Aug 2011 21:05:07 -0400
Subject: Rewrite the color_solid function in gpu.GPU (and associated CUDA
 code) to make it 100x faster.  Instead of having each CUDA thread loop over
 the full triangle list, we give each thread a single triangle and ask it to
 look up the hit status and color for that triangle.  The hit array and color
 array are small enough (approx 30,000 entries) to fit into the cache, so this
 goes much faster.

Now the event viewer is quite snappy!
---
 src/mesh.h | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'src')

diff --git a/src/mesh.h b/src/mesh.h
index a2bd207..f466470 100644
--- a/src/mesh.h
+++ b/src/mesh.h
@@ -167,21 +167,20 @@ __global__ void set_colors(unsigned int *colors)
 	g_colors = colors;
 }
 
-__global__ void color_solids(int nthreads, unsigned int ntriangles, int *solid_id_map, int *solid_ids, unsigned int *solid_colors)
+__global__ void color_solids(int first_triangle, int nthreads, 
+			     int *solid_id_map, 
+			     bool *solid_hit, 
+			     unsigned int *solid_colors)
 {
 	int id = blockIdx.x*blockDim.x + threadIdx.x;
 
 	if (id >= nthreads)
 		return;
 
-	int solid_id = solid_ids[id];
-	unsigned int color = solid_colors[id];
-
-	for (int i=0; i < ntriangles; i++)
-	{
-		if (solid_id_map[i] == solid_id)
-			g_colors[i] = color;
-	}
+	int triangle_id = first_triangle + id;
+	int solid_id = solid_id_map[triangle_id];
+	if (solid_hit[solid_id])
+		g_colors[triangle_id] = solid_colors[solid_id];
 }
 
 } // extern "c"
-- 
cgit