Rewrite the color_solid function in gpu.GPU (and associated CUDA code)

to make it 100x faster. Instead of having each CUDA thread loop over the full triangle list, we give each thread a single triangle and ask it to look up the hit status and color for that triangle. The hit array and color array are small enough (approx 30,000 entries) to fit into the cache, so this goes much faster. Now the event viewer is quite snappy!
author: Stan Seibert <stan@mtrr.org> 2011-08-14 21:05:07 -0400
committer: Stan Seibert <stan@mtrr.org> 2011-08-14 21:05:07 -0400
commit: 2d7220415ec99a80a794f6c642d6e14de8481945 (patch)
tree: 9ad4d2a847efffdd9e34562f56fe63ff3e25a119
parent: 9d61786d403a340d5ceb88b1c565cf0362c00580 (diff)
download: chroma-2d7220415ec99a80a794f6c642d6e14de8481945.tar.gz
chroma-2d7220415ec99a80a794f6c642d6e14de8481945.tar.bz2
chroma-2d7220415ec99a80a794f6c642d6e14de8481945.zip
3 files changed, 32 insertions, 21 deletions
diff --git a/camera.py b/camera.py
index 0045ae1..c270472 100755
--- a/camera.py
+++ b/camera.py
@@ -427,20 +427,25 @@ class EventViewer(Camera):
         self.f = ROOT.TFile(filename)
         self.T = self.f.Get('T')
         self.T.GetEntry(0)
+        self.nsolids = geometry.solid_id.max() + 1
 
     def color_hit_pmts(self):
         self.gpu.reset_colors()
 
-        solid_ids = np.empty(len(self.T.ev.channel), np.uint32)
-        t = np.empty(len(self.T.ev.channel), np.float32)
-        q = np.empty(len(self.T.ev.channel), np.float32)
+        hit = np.empty(self.nsolids, np.int32)
+        t = np.empty(self.nsolids, np.float32)
+        q = np.empty(self.nsolids, np.float32)
 
-        for i, channel in enumerate(self.T.ev.channel):
-            solid_ids[i] = channel.channel_id
-            t[i] = channel.t
-            q[i] = channel.q
+        # self.nsolids has a weird data type that PyROOT doesn't understand
+        self.T.ev.get_channels(int(self.nsolids), hit, t, q)
 
-        self.gpu.color_solids(solid_ids, map_to_color(t, range=(t.min(),t.mean())))
+        # PyROOT prints warnings when we try to pass a bool array directly
+        # so we convert afterward
+        hit = hit.astype(np.bool)
+
+        # Important: Compute range only with HIT channels
+        solid_colors = map_to_color(t, range=(t[hit].min(),t[hit].mean()))
+        self.gpu.color_solids(hit, solid_colors)
         self.update()
 
     def process_event(self, event):
diff --git a/gpu.py b/gpu.py
index 5ad195d..7e45c3d 100644
--- a/gpu.py
+++ b/gpu.py
@@ -207,13 +207,20 @@ class GPU(object):
         self.print_device_usage()
 
     def reset_colors(self):
-        self.colors_gpu.set(self.geometry.colors.astype(np.uint32))
+        self.colors_gpu.set_async(self.geometry.colors.astype(np.uint32))
 
-    def color_solids(self, solid_ids, colors):
-        solid_ids_gpu = gpuarray.to_gpu(np.array(solid_ids, dtype=np.int32))
+    def color_solids(self, solid_hit, colors):
+        solid_hit_gpu = gpuarray.to_gpu(np.array(solid_hit, dtype=np.bool))
         solid_colors_gpu = gpuarray.to_gpu(np.array(colors, dtype=np.uint32))
 
-        self.geo_funcs.color_solids(np.int32(solid_ids_gpu.size), np.uint32(self.triangles_gpu.size), self.solid_id_map_gpu, solid_ids_gpu, solid_colors_gpu, block=(self.nthread_per_block,1,1), grid=(solid_ids_gpu.size//self.nthread_per_block+1,1))
+        for first_triangle, triangles_this_round, blocks in chunk_iterator(self.triangles_gpu.size, self.nthread_per_block, self.max_blocks):
+            self.geo_funcs.color_solids(np.int32(first_triangle),
+                                        np.int32(triangles_this_round),
+                                        self.solid_id_map_gpu, 
+                                        solid_hit_gpu, 
+                                        solid_colors_gpu, 
+                                        block=(self.nthread_per_block,1,1), 
+                                        grid=(blocks,1))
         self.context.synchronize()
 
     def setup_propagate(self, seed=1):
diff --git a/src/mesh.h b/src/mesh.h
index a2bd207..f466470 100644
--- a/src/mesh.h
+++ b/src/mesh.h
@@ -167,21 +167,20 @@ __global__ void set_colors(unsigned int *colors)
 	g_colors = colors;
 }
 
-__global__ void color_solids(int nthreads, unsigned int ntriangles, int *solid_id_map, int *solid_ids, unsigned int *solid_colors)
+__global__ void color_solids(int first_triangle, int nthreads, 
+			     int *solid_id_map, 
+			     bool *solid_hit, 
+			     unsigned int *solid_colors)
 {
 	int id = blockIdx.x*blockDim.x + threadIdx.x;
 
 	if (id >= nthreads)
 		return;
 
-	int solid_id = solid_ids[id];
-	unsigned int color = solid_colors[id];
-
-	for (int i=0; i < ntriangles; i++)
-	{
-		if (solid_id_map[i] == solid_id)
-			g_colors[i] = color;
-	}
+	int triangle_id = first_triangle + id;
+	int solid_id = solid_id_map[triangle_id];
+	if (solid_hit[solid_id])
+		g_colors[triangle_id] = solid_colors[solid_id];
 }
 
 } // extern "c"
author	Stan Seibert <stan@mtrr.org>	2011-08-14 21:05:07 -0400
committer	Stan Seibert <stan@mtrr.org>	2011-08-14 21:05:07 -0400
commit	2d7220415ec99a80a794f6c642d6e14de8481945 (patch)
tree	9ad4d2a847efffdd9e34562f56fe63ff3e25a119
parent	9d61786d403a340d5ceb88b1c565cf0362c00580 (diff)
download	chroma-2d7220415ec99a80a794f6c642d6e14de8481945.tar.gz chroma-2d7220415ec99a80a794f6c642d6e14de8481945.tar.bz2 chroma-2d7220415ec99a80a794f6c642d6e14de8481945.zip