From 55cec145d72baf58639aad7773e23e28c9ca8676 Mon Sep 17 00:00:00 2001 From: Anthony LaTorre Date: Sat, 10 Sep 2011 20:40:05 -0400 Subject: store geometry struct in shared memory. this increases photon propagation speed from 3.3M -> 3.45!. --- src/mesh.h | 9 +++++++++ src/propagate.cu | 18 ++++++++++++++---- src/render.cu | 9 +++++++++ 3 files changed, 32 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/mesh.h b/src/mesh.h index 734ab92..73dff71 100644 --- a/src/mesh.h +++ b/src/mesh.h @@ -122,11 +122,20 @@ __global__ void distance_to_mesh(int nthreads, float3 *_origin, float3 *_direction, Geometry *g, float *_distance) { + __shared__ Geometry sg; + + if (threadIdx.x == 0) + sg = *g; + + __syncthreads(); + int id = blockIdx.x*blockDim.x + threadIdx.x; if (id >= nthreads) return; + g = &sg; + float3 origin = _origin[id]; float3 direction = _direction[id]; direction /= norm(direction); diff --git a/src/propagate.cu b/src/propagate.cu index ecc32c0..5e98c9f 100644 --- a/src/propagate.cu +++ b/src/propagate.cu @@ -5,6 +5,8 @@ #include "geometry.h" #include "photon.h" +#include "stdio.h" + extern "C" { @@ -45,7 +47,6 @@ photon_duplicate(int first_photon, int nthreads, } } - __global__ void propagate(int first_photon, int nthreads, unsigned int *input_queue, unsigned int *output_queue, curandState *rng_states, @@ -53,13 +54,22 @@ propagate(int first_photon, int nthreads, unsigned int *input_queue, float *wavelengths, float3 *polarizations, float *times, unsigned int *histories, int *last_hit_triangles, int max_steps, - Geometry *geometry) + Geometry *g) { + __shared__ Geometry sg; + + if (threadIdx.x == 0) + sg = *g; + + __syncthreads(); + int id = blockIdx.x*blockDim.x + threadIdx.x; if (id >= nthreads) return; + g = &sg; + curandState rng = rng_states[id]; int photon_id = input_queue[first_photon + id]; @@ -92,7 +102,7 @@ propagate(int first_photon, int nthreads, unsigned int *input_queue, break; } - fill_state(s, p, geometry); + fill_state(s, p, g); if (p.last_hit_triangle == -1) break; @@ -106,7 +116,7 @@ propagate(int first_photon, int nthreads, unsigned int *input_queue, continue; if (s.surface_index != -1) { - command = propagate_at_surface(p, s, rng, geometry); + command = propagate_at_surface(p, s, rng, g); if (command == BREAK) break; diff --git a/src/render.cu b/src/render.cu index 005950f..d4ed443 100644 --- a/src/render.cu +++ b/src/render.cu @@ -39,11 +39,20 @@ render(int nthreads, float3 *_origin, float3 *_direction, Geometry *g, unsigned int alpha_depth, unsigned int *pixels, float *_dx, unsigned int *dxlen, float4 *_color) { + __shared__ Geometry sg; + + if (threadIdx.x == 0) + sg = *g; + + __syncthreads(); + int id = blockIdx.x*blockDim.x + threadIdx.x; if (id >= nthreads) return; + g = &sg; + float3 origin = _origin[id]; float3 direction = _direction[id]; unsigned int n = dxlen[id]; -- cgit