diff options
-rwxr-xr-x | render.py | 34 | ||||
-rw-r--r-- | src/kernel.cu | 157 | ||||
-rw-r--r-- | src/photon.h | 6 |
3 files changed, 131 insertions, 66 deletions
@@ -17,6 +17,8 @@ def render(viewable, size=(800,600), name='', bits=8, make_movie=False): lower_bound, upper_bound = geometry.mesh.get_bounds() + source_position = [0, 0, upper_bound[2]+1.0] + scale = np.linalg.norm(upper_bound-lower_bound) print 'device %s' % autoinit.device.name() @@ -39,32 +41,32 @@ def render(viewable, size=(800,600), name='', bits=8, make_movie=False): diagonal = np.linalg.norm(upper_bound-lower_bound) - source_position = [0, 0, upper_bound[2]+1.0] - source_positions_gpu = gpuarray.empty(len(geometry.mesh.triangles), dtype=gpuarray.vec.float3) source_positions_gpu.fill(gpuarray.vec.make_float3(*source_position)) source_directions = np.mean(geometry.mesh[:], axis=1) - source_position source_directions_gpu = gpuarray.to_gpu(source_directions.astype(np.float32).view(gpuarray.vec.float3)) - rgb_lookup_gpu = gpuarray.zeros(source_positions_gpu.size, dtype=gpuarray.vec.float3) + rgb_lookup1_gpu = gpuarray.zeros(source_positions_gpu.size, dtype=gpuarray.vec.float3) + rgb_lookup2_gpu = gpuarray.zeros(source_positions_gpu.size, dtype=gpuarray.vec.float3) max_steps = 10 - rgb_runs = 1000 + rgb_runs = 100 print 'building rgb lookup.' - cuda_build_rgb_lookup(np.int32(source_positions_gpu.size), rng_states_gpu, source_positions_gpu, source_directions_gpu, rgb_lookup_gpu, np.int32(rgb_runs), np.int32(max_steps), block=(64,1,1), grid=(source_positions_gpu.size//64+1,1)) + cuda_build_rgb_lookup(np.int32(source_positions_gpu.size), rng_states_gpu, source_positions_gpu, source_directions_gpu, rgb_lookup1_gpu, rgb_lookup2_gpu, np.int32(rgb_runs), np.int32(max_steps), block=(64,1,1), grid=(source_positions_gpu.size//64+1,1)) cuda.Context.synchronize() print 'done.' - rgb_lookup = rgb_lookup_gpu.get() - rgb_lookup['x'] /= rgb_runs - rgb_lookup['y'] /= rgb_runs - rgb_lookup['z'] /= rgb_runs - rgb_lookup['x'][rgb_lookup['x'] > 1.0] = 1.0 - rgb_lookup['y'][rgb_lookup['y'] > 1.0] = 1.0 - rgb_lookup['z'][rgb_lookup['z'] > 1.0] = 1.0 - rgb_lookup_gpu = cuda.to_device(rgb_lookup) + rgb_lookup1 = rgb_lookup1_gpu.get().view(np.float32) + rgb_lookup1 /= rgb_runs + rgb_lookup1[rgb_lookup1 > 1.0] = 1.0 + rgb_lookup1_gpu.set(rgb_lookup1.view(gpuarray.vec.float3)) + + rgb_lookup2 = rgb_lookup2_gpu.get().view(np.float32) + rgb_lookup2 /= rgb_runs + rgb_lookup2[rgb_lookup2 > 1.0] = 1.0 + rgb_lookup2_gpu.set(rgb_lookup2.view(gpuarray.vec.float3)) camera = Camera(size) @@ -80,7 +82,7 @@ def render(viewable, size=(800,600), name='', bits=8, make_movie=False): directions_gpu = gpuarray.to_gpu(directions.astype(np.float32).view(gpuarray.vec.float3)) pixels_gpu = gpuarray.zeros(width*height, dtype=np.int32) - nruns = 5 + nruns = 1 screen = pygame.display.set_mode(size) pygame.display.set_caption(name) @@ -97,11 +99,11 @@ def render(viewable, size=(800,600), name='', bits=8, make_movie=False): global image_index t0 = time.time() - cuda_render(np.int32(pixels_gpu.size), rng_states_gpu, origins_gpu, directions_gpu, rgb_lookup_gpu, np.int32(nruns), pixels_gpu, np.int32(max_steps), block=(nblocks,1,1), grid=(pixels_gpu.size//nblocks+1,1)) + cuda_render(np.int32(pixels_gpu.size), rng_states_gpu, origins_gpu, directions_gpu, rgb_lookup1_gpu, rgb_lookup2_gpu, np.int32(nruns), pixels_gpu, np.int32(max_steps), block=(nblocks,1,1), grid=(pixels_gpu.size//nblocks+1,1)) cuda.Context.synchronize() elapsed = time.time() - t0 - print 'elapsed %f sec' % elapsed + #print 'elapsed %f sec' % elapsed pygame.surfarray.blit_array(screen, pixels_gpu.get().reshape(size)) pygame.display.flip() diff --git a/src/kernel.cu b/src/kernel.cu index 73800a3..2cd9347 100644 --- a/src/kernel.cu +++ b/src/kernel.cu @@ -12,20 +12,14 @@ #define BLUE_WAVELENGTH 465 #define GREEN_WAVELENGTH 545 -__device__ void myAtomicAdd(float *addr, float data) +__device__ void fAtomicAdd(float *addr, float data) { while (data) data = atomicExch(addr, data+atomicExch(addr, 0.0f)); } -__device__ int to_diffuse(Photon p, int max_steps) +__device__ void to_diffuse(Photon &p, State &s, const int &max_steps) { - // note that p is NOT passed by reference; this is intentional - - p.last_hit_triangle = -1; - - State s; - int steps = 0; while (steps < max_steps) { @@ -51,7 +45,7 @@ __device__ int to_diffuse(Photon p, int max_steps) command = propagate_at_surface(p, s); if (p.history & REFLECT_DIFFUSE) - return p.last_hit_triangle; + break; if (command == BREAK) break; @@ -64,7 +58,6 @@ __device__ int to_diffuse(Photon p, int max_steps) } // while (steps < max_steps) - return -1; } // to_diffuse extern "C" @@ -93,25 +86,25 @@ __global__ void rotate(int nthreads, float3 *points, float phi, float3 axis) points[id] = rotate(points[id], phi, axis); } -__global__ void build_rgb_lookup(int nthreads, curandState *rng_states, float3 *positions, float3 *directions, float3 *rgb_lookup, int runs, int max_steps) +__global__ void build_rgb_lookup(int nthreads, curandState *rng_states, float3 *positions, float3 *directions, float3 *rgb_lookup1, float3 *rgb_lookup2, int runs, int max_steps) { int id = blockIdx.x*blockDim.x + threadIdx.x; if (id >= nthreads) return; - Photon p; - p.rng = rng_states[id]; - p.position = positions[id]; - p.direction = directions[id]; - p.direction /= norm(p.direction); - p.polarization = uniform_sphere(&p.rng); - p.time = 0.0f; - p.history = 0x0; + Photon seed; + seed.rng = rng_states[id]; + seed.position = positions[id]; + seed.direction = directions[id]; + seed.direction /= norm(seed.direction); + seed.polarization = uniform_sphere(&seed.rng); + seed.time = 0.0f; + seed.history = 0x0; float distance; - int hit_triangle = intersect_mesh(p.position, p.direction, distance); + int hit_triangle = intersect_mesh(seed.position, seed.direction, distance); if (hit_triangle != id) return; @@ -124,79 +117,143 @@ __global__ void build_rgb_lookup(int nthreads, curandState *rng_states, float3 * float3 v1 = g_vertices[triangle_data.y]; float3 v2 = g_vertices[triangle_data.z]; - float cos_theta = dot(normalize(cross(v1-v0, v2-v1)), -p.direction); + float cos_theta = dot(normalize(cross(v1-v0, v2-v1)), -seed.direction); if (cos_theta < 0.0f) - cos_theta = dot(-normalize(cross(v1-v0, v2-v1)), -p.direction); + cos_theta = dot(-normalize(cross(v1-v0, v2-v1)), -seed.direction); + + Photon p; + State s; for (int i=0; i < runs; i++) { + p = seed; p.wavelength = RED_WAVELENGTH; - hit_triangle = to_diffuse(p, max_steps); + to_diffuse(p, s, max_steps); - if (hit_triangle != -1) - myAtomicAdd(&rgb_lookup[hit_triangle].x, cos_theta); + if (p.history & REFLECT_DIFFUSE) + { + if (s.inside_to_outside) + { + fAtomicAdd(&rgb_lookup1[p.last_hit_triangle].x, cos_theta); + } + else + { + fAtomicAdd(&rgb_lookup2[p.last_hit_triangle].x, cos_theta); + } + } + p = seed; p.wavelength = BLUE_WAVELENGTH; - hit_triangle = to_diffuse(p, max_steps); + to_diffuse(p, s, max_steps); - if (hit_triangle != -1) - myAtomicAdd(&rgb_lookup[hit_triangle].y, cos_theta); + if (p.history & REFLECT_DIFFUSE) + { + if (s.inside_to_outside) + { + fAtomicAdd(&rgb_lookup1[p.last_hit_triangle].y, cos_theta); + } + else + { + fAtomicAdd(&rgb_lookup2[p.last_hit_triangle].y, cos_theta); + } + } + p = seed; p.wavelength = GREEN_WAVELENGTH; - hit_triangle = to_diffuse(p, max_steps); + to_diffuse(p, s, max_steps); - if (hit_triangle != -1) - myAtomicAdd(&rgb_lookup[hit_triangle].z, cos_theta); + if (p.history & REFLECT_DIFFUSE) + { + if (s.inside_to_outside) + { + fAtomicAdd(&rgb_lookup1[p.last_hit_triangle].z, cos_theta); + } + else + { + fAtomicAdd(&rgb_lookup2[p.last_hit_triangle].z, cos_theta); + } + } } } // build_rgb_lookup -__global__ void render(int nthreads, curandState *rng_states, float3 *positions, float3 *directions, float3 *rgb_lookup, int runs, int *pixels, int max_steps) +__global__ void render(int nthreads, curandState *rng_states, float3 *positions, float3 *directions, float3 *rgb_lookup1, float3 *rgb_lookup2, int runs, int *pixels, int max_steps) { int id = blockIdx.x*blockDim.x + threadIdx.x; if (id >= nthreads) return; - Photon p; - p.rng = rng_states[id]; - p.position = positions[id]; - p.direction = directions[id]; - p.direction /= norm(p.direction); - p.polarization = uniform_sphere(&p.rng); - p.time = 0.0f; - p.history = 0x0; + Photon seed; + seed.rng = rng_states[id]; + seed.position = positions[id]; + seed.direction = directions[id]; + seed.direction /= norm(seed.direction); + seed.polarization = uniform_sphere(&seed.rng); + seed.time = 0.0f; + seed.history = 0x0; float3 rgb = make_float3(0.0, 0.0, 0.0); - int hit_triangle; + Photon p; + State s; for (int i=0; i < runs; i++) { + p = seed; p.wavelength = RED_WAVELENGTH; - hit_triangle = to_diffuse(p, max_steps); + to_diffuse(p, s, max_steps); - if (hit_triangle != -1) - rgb.x += rgb_lookup[hit_triangle].x; + if (p.history & REFLECT_DIFFUSE) + { + if (s.inside_to_outside) + { + rgb.x += rgb_lookup1[p.last_hit_triangle].x; + } + else + { + rgb.x += rgb_lookup2[p.last_hit_triangle].x; + } + } + p = seed; p.wavelength = BLUE_WAVELENGTH; - hit_triangle = to_diffuse(p, max_steps); + to_diffuse(p, s, max_steps); - if (hit_triangle != -1) - rgb.y += rgb_lookup[hit_triangle].y; + if (p.history & REFLECT_DIFFUSE) + { + if (s.inside_to_outside) + { + rgb.y += rgb_lookup1[p.last_hit_triangle].y; + } + else + { + rgb.y += rgb_lookup2[p.last_hit_triangle].y; + } + } + p = seed; p.wavelength = GREEN_WAVELENGTH; - hit_triangle = to_diffuse(p, max_steps); + to_diffuse(p, s, max_steps); - if (hit_triangle != -1) - rgb.z += rgb_lookup[hit_triangle].z; + if (p.history & REFLECT_DIFFUSE) + { + if (s.inside_to_outside) + { + rgb.z += rgb_lookup1[p.last_hit_triangle].z; + } + else + { + rgb.z += rgb_lookup2[p.last_hit_triangle].z; + } + } } rgb /= runs; diff --git a/src/photon.h b/src/photon.h index 69ecd34..11bcfc1 100644 --- a/src/photon.h +++ b/src/photon.h @@ -25,6 +25,8 @@ struct Photon struct State { + bool inside_to_outside; + float3 surface_normal; float refractive_index1, refractive_index2; @@ -78,6 +80,8 @@ __device__ int fill_state(State &s, Photon &p) // outside to inside material1 = materials[outer_material_index]; material2 = materials[inner_material_index]; + + s.inside_to_outside = false; } else { @@ -85,6 +89,8 @@ __device__ int fill_state(State &s, Photon &p) material1 = materials[inner_material_index]; material2 = materials[outer_material_index]; s.surface_normal = -s.surface_normal; + + s.inside_to_outside = true; } s.refractive_index1 = interp_property(p.wavelength, material1.refractive_index); |