From 2d7220415ec99a80a794f6c642d6e14de8481945 Mon Sep 17 00:00:00 2001 From: Stan Seibert Date: Sun, 14 Aug 2011 21:05:07 -0400 Subject: Rewrite the color_solid function in gpu.GPU (and associated CUDA code) to make it 100x faster. Instead of having each CUDA thread loop over the full triangle list, we give each thread a single triangle and ask it to look up the hit status and color for that triangle. The hit array and color array are small enough (approx 30,000 entries) to fit into the cache, so this goes much faster. Now the event viewer is quite snappy! --- camera.py | 21 +++++++++++++-------- gpu.py | 15 +++++++++++---- src/mesh.h | 17 ++++++++--------- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/camera.py b/camera.py index 0045ae1..c270472 100755 --- a/camera.py +++ b/camera.py @@ -427,20 +427,25 @@ class EventViewer(Camera): self.f = ROOT.TFile(filename) self.T = self.f.Get('T') self.T.GetEntry(0) + self.nsolids = geometry.solid_id.max() + 1 def color_hit_pmts(self): self.gpu.reset_colors() - solid_ids = np.empty(len(self.T.ev.channel), np.uint32) - t = np.empty(len(self.T.ev.channel), np.float32) - q = np.empty(len(self.T.ev.channel), np.float32) + hit = np.empty(self.nsolids, np.int32) + t = np.empty(self.nsolids, np.float32) + q = np.empty(self.nsolids, np.float32) - for i, channel in enumerate(self.T.ev.channel): - solid_ids[i] = channel.channel_id - t[i] = channel.t - q[i] = channel.q + # self.nsolids has a weird data type that PyROOT doesn't understand + self.T.ev.get_channels(int(self.nsolids), hit, t, q) - self.gpu.color_solids(solid_ids, map_to_color(t, range=(t.min(),t.mean()))) + # PyROOT prints warnings when we try to pass a bool array directly + # so we convert afterward + hit = hit.astype(np.bool) + + # Important: Compute range only with HIT channels + solid_colors = map_to_color(t, range=(t[hit].min(),t[hit].mean())) + self.gpu.color_solids(hit, solid_colors) self.update() def process_event(self, event): diff --git a/gpu.py b/gpu.py index 5ad195d..7e45c3d 100644 --- a/gpu.py +++ b/gpu.py @@ -207,13 +207,20 @@ class GPU(object): self.print_device_usage() def reset_colors(self): - self.colors_gpu.set(self.geometry.colors.astype(np.uint32)) + self.colors_gpu.set_async(self.geometry.colors.astype(np.uint32)) - def color_solids(self, solid_ids, colors): - solid_ids_gpu = gpuarray.to_gpu(np.array(solid_ids, dtype=np.int32)) + def color_solids(self, solid_hit, colors): + solid_hit_gpu = gpuarray.to_gpu(np.array(solid_hit, dtype=np.bool)) solid_colors_gpu = gpuarray.to_gpu(np.array(colors, dtype=np.uint32)) - self.geo_funcs.color_solids(np.int32(solid_ids_gpu.size), np.uint32(self.triangles_gpu.size), self.solid_id_map_gpu, solid_ids_gpu, solid_colors_gpu, block=(self.nthread_per_block,1,1), grid=(solid_ids_gpu.size//self.nthread_per_block+1,1)) + for first_triangle, triangles_this_round, blocks in chunk_iterator(self.triangles_gpu.size, self.nthread_per_block, self.max_blocks): + self.geo_funcs.color_solids(np.int32(first_triangle), + np.int32(triangles_this_round), + self.solid_id_map_gpu, + solid_hit_gpu, + solid_colors_gpu, + block=(self.nthread_per_block,1,1), + grid=(blocks,1)) self.context.synchronize() def setup_propagate(self, seed=1): diff --git a/src/mesh.h b/src/mesh.h index a2bd207..f466470 100644 --- a/src/mesh.h +++ b/src/mesh.h @@ -167,21 +167,20 @@ __global__ void set_colors(unsigned int *colors) g_colors = colors; } -__global__ void color_solids(int nthreads, unsigned int ntriangles, int *solid_id_map, int *solid_ids, unsigned int *solid_colors) +__global__ void color_solids(int first_triangle, int nthreads, + int *solid_id_map, + bool *solid_hit, + unsigned int *solid_colors) { int id = blockIdx.x*blockDim.x + threadIdx.x; if (id >= nthreads) return; - int solid_id = solid_ids[id]; - unsigned int color = solid_colors[id]; - - for (int i=0; i < ntriangles; i++) - { - if (solid_id_map[i] == solid_id) - g_colors[i] = color; - } + int triangle_id = first_triangle + id; + int solid_id = solid_id_map[triangle_id]; + if (solid_hit[solid_id]) + g_colors[triangle_id] = solid_colors[solid_id]; } } // extern "c" -- cgit