add 3d support to camera views by displaying images as an anaglyph. alpha coloring is now calculated using the new searching/sorting algorithms.

author: Anthony LaTorre <tlatorre9@gmail.com> 2011-08-25 21:42:04 -0400
committer: Anthony LaTorre <tlatorre9@gmail.com> 2011-08-25 21:42:04 -0400
commit: fa8d1082f9d989f2a3819540a9bf30dc67618709 (patch)
tree: 7560f7e21e0e30ad48cac77295d08bd8ad65c549
parent: 61f09cb550c619c5c2a5300d96f8590be77682bb (diff)
download: chroma-fa8d1082f9d989f2a3819540a9bf30dc67618709.tar.gz
chroma-fa8d1082f9d989f2a3819540a9bf30dc67618709.tar.bz2
chroma-fa8d1082f9d989f2a3819540a9bf30dc67618709.zip
3 files changed, 149 insertions, 105 deletions
diff --git a/camera.py b/camera.py
index 03b07f5..479d47a 100755
--- a/camera.py
+++ b/camera.py
@@ -9,15 +9,16 @@ from chroma.geometry import Mesh, Solid, Geometry
 import chroma.make as make
 import matplotlib.pyplot as plt
 
-from chroma.gpu import GPU, CUDAFuncs
+from chroma.gpu import GPU, CUDAFuncs, to_float3
 from chroma.tools import timeit
 from chroma.transform import rotate
 from chroma.optics import vacuum, lambertian_surface
+import chroma.project as project
 
 import pygame
 from pygame.locals import *
 
-from pycuda import gpuarray
+from pycuda import gpuarray as ga
 from pycuda.characterize import sizeof
 import pycuda.driver as cuda
 
@@ -115,11 +116,12 @@ def get_rays(position, size = (800, 600), width = 0.035, focal_length=0.018):
     return grid, focal_point-grid
 
 class Camera(Thread):
-    def __init__(self, geometry, size=(800,600), device_id=None):
+    def __init__(self, geometry, size=(800,600), device_id=None, enable3d=False):
         Thread.__init__(self)
         self.geometry = geometry
         self.device_id = device_id
         self.size = size
+        self.enable3d = enable3d
 
         self.unique_bvh_layers = np.unique(self.geometry.layers)
         self.currentlayer = None
@@ -159,17 +161,30 @@ class Camera(Thread):
 
         self.nblocks = 64
 
-        self.point = np.array([0, self.scale*1.0, (lower_bound[2]+upper_bound[2])/2])
-        self.axis1 = np.array([1,0,0], float)
-        self.axis2 = np.array([0,0,1], float)
+        self.point = np.array([0, -self.scale*1.0, (lower_bound[2]+upper_bound[2])/2])
+        self.axis1 = np.array([0,0,1], float)
+        self.axis2 = np.array([1,0,0], float)
 
-        origins, directions = get_rays(self.point, self.size)
+        #origins, directions = get_rays(self.point, self.size)
 
-        self.origins_gpu = gpuarray.to_gpu(origins.astype(np.float32).view(gpuarray.vec.float3))
-        self.directions_gpu = gpuarray.to_gpu(directions.astype(np.float32).view(gpuarray.vec.float3))
-        self.pixels_gpu = gpuarray.zeros(self.width*self.height, dtype=np.int32)
+        if self.enable3d:
+            self.point1 = self.point-(30.5e-3,0,0)
+            self.point2 = self.point+(30.5e-3,0,0)
 
-        self.alpha = True
+            origins1, directions1 = project.from_film(self.point1, axis2=np.cross(-self.point1,(0,0,1)), size=self.size)
+            origins2, directions2 = project.from_film(self.point2, axis2=np.cross(-self.point2,(0,0,1)), size=self.size)
+
+            origins = np.concatenate([origins1,origins2])
+            directions = np.concatenate([directions1,directions2])
+        else:
+            origins, directions = project.from_film(self.point, size=self.size)
+
+        self.origins_gpu = ga.to_gpu(to_float3(origins))
+        self.directions_gpu = ga.to_gpu(to_float3(directions))
+        self.pixels_gpu = ga.zeros(self.origins_gpu.size, dtype=np.int32)
+        self.distances_gpu = ga.empty(self.origins_gpu.size, dtype=np.float32)
+
+        self.alpha = False#True
         self.movie = False
         self.movie_index = 0
         self.movie_dir = None
@@ -179,9 +194,9 @@ class Camera(Thread):
     def initialize_render(self):
         self.rng_states_gpu = cuda.mem_alloc(self.width*self.height*sizeof('curandStateXORWOW', '#include <curand_kernel.h>'))
         self.gpu.kernels.init_rng(np.int32(self.width*self.height), self.rng_states_gpu, np.int32(0), np.int32(0), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1))
-        self.xyz_lookup1_gpu = gpuarray.zeros(len(self.geometry.mesh.triangles), dtype=gpuarray.vec.float3)
-        self.xyz_lookup2_gpu = gpuarray.zeros(len(self.geometry.mesh.triangles), dtype=gpuarray.vec.float3)
-        self.image_gpu = gpuarray.zeros(self.width*self.height, dtype=gpuarray.vec.float3)
+        self.xyz_lookup1_gpu = ga.zeros(len(self.geometry.mesh.triangles), dtype=ga.vec.float3)
+        self.xyz_lookup2_gpu = ga.zeros(len(self.geometry.mesh.triangles), dtype=ga.vec.float3)
+        self.image_gpu = ga.zeros(self.width*self.height, dtype=ga.vec.float3)
         self.gpu.context.synchronize()
 
         self.source_position = self.point
@@ -191,39 +206,39 @@ class Camera(Thread):
         self.max_steps = 10
 
     def clear_xyz_lookup(self):
-        self.xyz_lookup1_gpu.fill(gpuarray.vec.make_float3(0.0,0.0,0.0))
-        self.xyz_lookup2_gpu.fill(gpuarray.vec.make_float3(0.0,0.0,0.0))
+        self.xyz_lookup1_gpu.fill(ga.vec.make_float3(0.0,0.0,0.0))
+        self.xyz_lookup2_gpu.fill(ga.vec.make_float3(0.0,0.0,0.0))
 
         self.nlookup_calls = 0
 
     def update_xyz_lookup(self, source_position):
         for i in range(self.xyz_lookup1_gpu.size//(self.width*self.height)+1):
-            self.gpu.kernels.update_xyz_lookup(np.int32(self.width*self.height), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.width*self.height), gpuarray.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(685.0), gpuarray.vec.make_float3(1.0,0.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1))
+            self.gpu.kernels.update_xyz_lookup(np.int32(self.width*self.height), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.width*self.height), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(685.0), ga.vec.make_float3(1.0,0.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1))
 
         for i in range(self.xyz_lookup1_gpu.size//(self.width*self.height)+1):
-            self.gpu.kernels.update_xyz_lookup(np.int32(self.width*self.height), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.width*self.height), gpuarray.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(545.0), gpuarray.vec.make_float3(0.0,1.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1))
+            self.gpu.kernels.update_xyz_lookup(np.int32(self.width*self.height), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.width*self.height), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(545.0), ga.vec.make_float3(0.0,1.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1))
 
         for i in range(self.xyz_lookup1_gpu.size//(self.width*self.height)+1):
-            self.gpu.kernels.update_xyz_lookup(np.int32(self.width*self.height), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.width*self.height), gpuarray.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(445.0), gpuarray.vec.make_float3(0.0,0.0,1.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1))
+            self.gpu.kernels.update_xyz_lookup(np.int32(self.width*self.height), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.width*self.height), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(445.0), ga.vec.make_float3(0.0,0.0,1.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1))
 
         self.nlookup_calls += 1
 
     def clear_image(self):
-        self.image_gpu.fill(gpuarray.vec.make_float3(0.0,0.0,0.0))
+        self.image_gpu.fill(ga.vec.make_float3(0.0,0.0,0.0))
 
         self.nimages = 0
 
     def update_image(self):
-        self.gpu.kernels.update_xyz_image(np.int32(self.width*self.height), self.rng_states_gpu, self.origins_gpu, self.directions_gpu, np.float32(685.0), gpuarray.vec.make_float3(1.0,0.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, self.image_gpu, np.int32(self.nlookup_calls), np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1))
+        self.gpu.kernels.update_xyz_image(np.int32(self.origins_gpu.size), self.rng_states_gpu, self.origins_gpu, self.directions_gpu, np.float32(685.0), ga.vec.make_float3(1.0,0.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, self.image_gpu, np.int32(self.nlookup_calls), np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.origins_gpu.size//self.nblocks+1,1))
 
-        self.gpu.kernels.update_xyz_image(np.int32(self.width*self.height), self.rng_states_gpu, self.origins_gpu, self.directions_gpu, np.float32(545.0), gpuarray.vec.make_float3(0.0,1.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, self.image_gpu, np.int32(self.nlookup_calls), np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1))
+        self.gpu.kernels.update_xyz_image(np.int32(self.origins_gpu.size), self.rng_states_gpu, self.origins_gpu, self.directions_gpu, np.float32(545.0), ga.vec.make_float3(0.0,1.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, self.image_gpu, np.int32(self.nlookup_calls), np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.origins_gpu.size//self.nblocks+1,1))
 
-        self.gpu.kernels.update_xyz_image(np.int32(self.width*self.height), self.rng_states_gpu, self.origins_gpu, self.directions_gpu, np.float32(445.0), gpuarray.vec.make_float3(0.0,0.0,1.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, self.image_gpu, np.int32(self.nlookup_calls), np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1))
+        self.gpu.kernels.update_xyz_image(np.int32(self.origins_gpu.size), self.rng_states_gpu, self.origins_gpu, self.directions_gpu, np.float32(445.0), ga.vec.make_float3(0.0,0.0,1.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, self.image_gpu, np.int32(self.nlookup_calls), np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.origins_gpu.size//self.nblocks+1,1))
 
         self.nimages += 1
 
     def process_image(self):
-        self.gpu.kernels.process_image(np.int32(self.width*self.height), self.image_gpu, self.pixels_gpu, np.int32(self.nimages), block=(self.nblocks,1,1), grid=((self.width*self.height)//self.nblocks+1,1))
+        self.gpu.kernels.process_image(np.int32(self.pixels_gpu.size), self.image_gpu, self.pixels_gpu, np.int32(self.nimages), block=(self.nblocks,1,1), grid=((self.pixels_gpu.size)//self.nblocks+1,1))
 
     def screenshot(self, dir='', start=0):
         root, ext = 'screenshot', 'png'
@@ -238,21 +253,25 @@ class Camera(Thread):
         print 'image saved to %s' % filename
 
     def rotate(self, phi, n):
-        self.gpu.kernels.rotate(np.int32(self.pixels_gpu.size), self.origins_gpu, np.float32(phi), gpuarray.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1))
-        self.gpu.kernels.rotate(np.int32(self.pixels_gpu.size), self.directions_gpu, np.float32(phi), gpuarray.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1))
+        self.gpu.kernels.rotate(np.int32(self.pixels_gpu.size), self.origins_gpu, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1))
+        self.gpu.kernels.rotate(np.int32(self.pixels_gpu.size), self.directions_gpu, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1))
 
         self.point = rotate(self.point, phi, n)
         self.axis1 = rotate(self.axis1, phi, n)
         self.axis2 = rotate(self.axis2, phi, n)
 
+        if self.enable3d:
+            self.point1 = rotate(self.point1, phi, n)
+            self.point2 = rotate(self.point2, phi, n)
+
         if self.render:
             self.clear_image()
 
         self.update()
 
     def rotate_around_point(self, phi, n, point, redraw=True):
-        self.gpu.kernels.rotate_around_point(np.int32(self.origins_gpu.size), self.origins_gpu, np.float32(phi), gpuarray.vec.make_float3(*n), gpuarray.vec.make_float3(*point), block=(self.nblocks,1,1), grid=(self.origins_gpu.size//self.nblocks+1,1))
-        self.gpu.kernels.rotate(np.int32(self.directions_gpu.size), self.directions_gpu, np.float32(phi), gpuarray.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.directions_gpu.size//self.nblocks+1,1))
+        self.gpu.kernels.rotate_around_point(np.int32(self.origins_gpu.size), self.origins_gpu, np.float32(phi), ga.vec.make_float3(*n), ga.vec.make_float3(*point), block=(self.nblocks,1,1), grid=(self.origins_gpu.size//self.nblocks+1,1))
+        self.gpu.kernels.rotate(np.int32(self.directions_gpu.size), self.directions_gpu, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.directions_gpu.size//self.nblocks+1,1))
 
         self.axis1 = rotate(self.axis1, phi, n)
         self.axis2 = rotate(self.axis2, phi, n)
@@ -264,10 +283,14 @@ class Camera(Thread):
             self.update()
 
     def translate(self, v, redraw=True):
-        self.gpu.kernels.translate(np.int32(self.pixels_gpu.size), self.origins_gpu, gpuarray.vec.make_float3(*v), block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks,1))
+        self.gpu.kernels.translate(np.int32(self.pixels_gpu.size), self.origins_gpu, ga.vec.make_float3(*v), block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks,1))
 
         self.point += v
 
+        if self.enable3d:
+            self.point1 += v
+            self.point2 += v
+
         if redraw:
             if self.render:
                 self.clear_image()
@@ -282,11 +305,39 @@ class Camera(Thread):
             self.process_image()
         else:
             if self.alpha:
-                self.gpu.kernels.ray_trace_alpha(np.int32(self.pixels_gpu.size), self.origins_gpu, self.directions_gpu, self.pixels_gpu, block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1))
+                self.gpu.kernels.ray_trace_alpha(np.int32(self.pixels_gpu.size), self.origins_gpu, self.directions_gpu, self.pixels_gpu, self.distances_gpu, block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1))
             else:
-                self.gpu.kernels.ray_trace(np.int32(self.pixels_gpu.size), self.origins_gpu, self.directions_gpu, self.pixels_gpu, block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1))
+                self.gpu.kernels.ray_trace(np.int32(self.pixels_gpu.size), self.origins_gpu, self.directions_gpu, self.pixels_gpu, self.distances_gpu, block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1))
+
+        if self.enable3d:
+            baseline = ga.min(self.distances_gpu).get().item()
+
+            if baseline < 1e9:
+                d1 = self.point1 - self.point
+                v1 = d1/np.linalg.norm(d1)
+                v1 *= baseline/60 - np.linalg.norm(d1)
+
+                self.gpu.kernels.translate(np.int32(self.pixels_gpu.size//2), self.origins_gpu[:self.pixels_gpu.size//2], ga.vec.make_float3(*v1), block=(self.nblocks,1,1), grid=((self.pixels_gpu.size//2)//self.nblocks+1,1))
+
+                self.point1 += v1
 
-        pygame.surfarray.blit_array(self.screen, self.pixels_gpu.get().reshape(self.size))
+                d2 = self.point2 - self.point
+                v2 = d2/np.linalg.norm(d2)
+                v2 *= baseline/60 - np.linalg.norm(d2)
+
+                self.gpu.kernels.translate(np.int32(self.pixels_gpu.size//2), self.origins_gpu[self.pixels_gpu.size//2:], ga.vec.make_float3(*v2), block=(self.nblocks,1,1), grid=((self.pixels_gpu.size//2)//self.nblocks+1,1))
+
+                self.point2 += v2
+
+        pixels = self.pixels_gpu.get()
+
+        if self.enable3d:
+            pixels1 = pixels[:len(pixels)//2]
+            pixels2 = pixels[len(pixels)//2:]
+
+            pixels = (pixels1 & 0xff0000) | (pixels2 & 0xff)
+
+        pygame.surfarray.blit_array(self.screen, pixels.reshape(self.size))
         if self.doom_mode:
             self.screen.blit(self.doom_hud, self.doom_rect)
         pygame.display.flip()
@@ -331,7 +382,7 @@ class Camera(Thread):
 
             length = np.linalg.norm(movement)
 
-            mouse_direction = movement[0]*self.axis1 + movement[1]*self.axis2
+            mouse_direction = movement[0]*self.axis2 - movement[1]*self.axis1
             mouse_direction /= np.linalg.norm(mouse_direction)
 
             if pygame.key.get_mods() & (KMOD_LSHIFT | KMOD_RSHIFT):
@@ -339,7 +390,7 @@ class Camera(Thread):
                 self.translate(v)
             else:
                 phi = np.float32(2*np.pi*length/float(self.width))
-                n = rotate(mouse_direction, np.pi/2, -np.cross(self.axis1,self.axis2))
+                n = rotate(mouse_direction, np.pi/2, np.cross(self.axis1,self.axis2))
 
                 if pygame.key.get_mods() & KMOD_LCTRL:
                     self.rotate_around_point(phi, n, self.point)
@@ -348,11 +399,11 @@ class Camera(Thread):
 
         elif event.type == KEYDOWN:
             if event.key == K_a:
-                v = self.scale*self.axis1/10.0
+                v = self.scale*self.axis2/10.0
                 self.translate(v)
 
             elif event.key == K_d:
-                v = -self.scale*self.axis1/10.0
+                v = -self.scale*self.axis2/10.0
                 self.translate(v)
 
             elif event.key == K_w:
@@ -363,10 +414,6 @@ class Camera(Thread):
                 v = -self.scale*np.cross(self.axis1,self.axis2)/10.0
                 self.translate(v)
 
-            elif event.key == K_SPACE:
-                v = self.scale*self.axis2/10.0
-                self.translate(v)
-
             elif event.key == K_F6:
                 self.clear_xyz_lookup()
                 self.clear_image()
@@ -443,9 +490,9 @@ class Camera(Thread):
                 else:
                     accelerate_factor = 1.0
                 #print 'raw:', spnav_event
-                v1 = self.axis1
-                v2 = self.axis2
-                v3 = np.cross(self.axis1,self.axis2)
+                v1 = self.axis2
+                v2 = self.axis1
+                v3 = -np.cross(self.axis1,self.axis2)
 
                 v = -v1*spnav_event.motion.x + v2*spnav_event.motion.y \
                     + v3*spnav_event.motion.z
@@ -589,6 +636,8 @@ if __name__ == '__main__':
                       help='bits for z-ordering space axes', default=10)
     parser.add_option('-r', '--resolution', dest='resolution',
                       help='specify resolution', default='1024,576')
+    parser.add_option('--3d', action='store_true', dest='enable3d',
+                      help='enable 3d', default=False)
     parser.add_option('-i', dest='io_file', default=None)
     options, args = parser.parse_args()
 
@@ -619,9 +668,9 @@ if __name__ == '__main__':
 
     geometry = build(obj, options.bits)
     if options.io_file is not None:
-        camera = EventViewer(geometry, options.io_file, size=size)
+        camera = EventViewer(geometry, options.io_file, size=size, enable3d=options.enable3d)
     else:
-        camera = Camera(geometry, size)
+        camera = Camera(geometry, size, enable3d=options.enable3d)
 
     camera.start()
     camera.join()
diff --git a/src/alpha.h b/src/alpha.h
index e4a3a40..263fa1e 100644
--- a/src/alpha.h
+++ b/src/alpha.h
@@ -4,59 +4,14 @@
 #include "linalg.h"
 #include "intersect.h"
 #include "mesh.h"
+#include "sorting.h"
 
-template <class T>
-__device__ void swap(T &a, T &b)
-{
-	T temp = a;
-	a = b;
-	b = temp;
-}
+#include "stdio.h"
 
 #define ALPHA_DEPTH 10
 
-struct HitList
-{
-	unsigned int size;
-	unsigned int indices[ALPHA_DEPTH];
-	float distances[ALPHA_DEPTH];
-};
-
-__device__ void add_to_hit_list(const float &distance, const int &index, HitList &h)
-{
-	unsigned int i;
-	if (h.size >= ALPHA_DEPTH)
-	{
-		if (distance > h.distances[ALPHA_DEPTH-1])
-			return;
-
-		i = ALPHA_DEPTH-1;
-	}
-	else
-	{
-		i = h.size;
-		h.size += 1;
-	}
-
-	h.indices[i] = index;
-	h.distances[i] = distance;
-
-	while (i > 0 && h.distances[i-1] > h.distances[i])
-	{
-		swap(h.distances[i-1], h.distances[i]);
-		swap(h.indices[i-1], h.indices[i]);
-
-		i -= 1;
-	}
-}
-
-__device__ int get_color_alpha(const float3 &origin, const float3& direction)
+__device__ int get_color_alpha(const float3 &origin, const float3& direction, bool &hit, float &distance)
 {
-	HitList h;
-	h.size = 0;
-
-	float distance;
-
 	if (!intersect_node(origin, direction, g_start_node, -1.0f))
 		return 0;
 
@@ -69,6 +24,10 @@ __device__ int get_color_alpha(const float3 &origin, const float3& direction)
 
 	unsigned int i;
 
+	float distances[ALPHA_DEPTH];
+	unsigned int indices[ALPHA_DEPTH];
+	unsigned int n=0;
+
 	do
 	{
 		unsigned int first_child = tex1Dfetch(node_map, *node);
@@ -106,9 +65,26 @@ __device__ int get_color_alpha(const float3 &origin, const float3& direction)
 
 				if (intersect_triangle(origin, direction, v0, v1, v2, distance))
 				{
-					add_to_hit_list(distance, i, h);
+					if (n < 1)
+					{
+						distances[0] = distance;
+						indices[0] = i;
+					}
+					else
+					{
+						unsigned long j = searchsorted(n, distances, distance);
+
+						if (j <= ALPHA_DEPTH-1)
+						{
+							insert(ALPHA_DEPTH, distances, j, distance);
+							insert(ALPHA_DEPTH, indices, j, i);
+						}
+					}
+					
+					if (n < ALPHA_DEPTH)
+						n++;
 				}
-
+				
 			} // triangle loop
 
 			node--;
@@ -118,16 +94,25 @@ __device__ int get_color_alpha(const float3 &origin, const float3& direction)
 	} // while loop
 	while (node != head);
 
-	if (h.size < 1)
+	if (n < 1)
+	{
+		hit = false;
 		return 0;
+	}
+
+	hit = true;
+	distance = distances[0];
 
 	float scale = 1.0f;
 	float fr = 0.0f;
 	float fg = 0.0f;
 	float fb = 0.0f;
-	for (i=0; i < h.size; i++)
+	unsigned int index;
+	for (i=0; i < n; i++)
 	{
-		uint4 triangle_data = g_triangles[h.indices[i]];
+		index = indices[i];
+
+		uint4 triangle_data = g_triangles[index];
 
 		float3 v0 = g_vertices[triangle_data.x];
 		float3 v1 = g_vertices[triangle_data.y];
@@ -138,11 +123,11 @@ __device__ int get_color_alpha(const float3 &origin, const float3& direction)
 		if (cos_theta < 0.0f)
 			cos_theta = dot(-normalize(cross(v1-v0,v2-v1)),-direction);
 
-		unsigned int r0 = 0xff & (g_colors[h.indices[i]] >> 16);
-		unsigned int g0 = 0xff & (g_colors[h.indices[i]] >> 8);
-		unsigned int b0 = 0xff & g_colors[h.indices[i]];
+		unsigned int r0 = 0xff & (g_colors[index] >> 16);
+		unsigned int g0 = 0xff & (g_colors[index] >> 8);
+		unsigned int b0 = 0xff & g_colors[index];
 
-		float alpha = (255 - (0xff & (g_colors[h.indices[i]] >> 24)))/255.0f;
+		float alpha = (255 - (0xff & (g_colors[index] >> 24)))/255.0f;
 
 		fr += r0*scale*cos_theta*alpha;
 		fg += g0*scale*cos_theta*alpha;
diff --git a/src/kernel.cu b/src/kernel.cu
index 307412e..71b4153 100644
--- a/src/kernel.cu
+++ b/src/kernel.cu
@@ -239,7 +239,7 @@ __global__ void process_image(int nthreads, float3 *image, int *pixels, int nima
 
 } // process_image
 
-__global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, int *pixels)
+__global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, int *pixels, float *distances)
 {
 	int id = blockIdx.x*blockDim.x + threadIdx.x;
 
@@ -257,6 +257,7 @@ __global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, i
 	if (triangle_index == -1)
 	{
 		pixels[id] = 0;
+		distances[id] = 1e9f;
 	}
 	else
 	{
@@ -267,6 +268,7 @@ __global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, i
 		float3 v2 = g_vertices[triangle_data.z];
 
 		pixels[id] = get_color(direction, v0, v1, v2, g_colors[triangle_index]);
+		distances[id] = distance;
 	}
 
 } // ray_trace
@@ -277,7 +279,7 @@ __global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, i
    color whose brightness is determined by the cosine of the angle between
    the ray and the normal of the triangle it intersected, else set the pixel
    to 0. */
-__global__ void ray_trace_alpha(int nthreads, float3 *positions, float3 *directions, int *pixels)
+__global__ void ray_trace_alpha(int nthreads, float3 *positions, float3 *directions, int *pixels, float *distances)
 {
 	int id = blockIdx.x*blockDim.x + threadIdx.x;
 
@@ -288,7 +290,15 @@ __global__ void ray_trace_alpha(int nthreads, float3 *positions, float3 *directi
 	float3 direction = directions[id];
 	direction /= norm(direction);
 
-	pixels[id] = get_color_alpha(position, direction);
+	bool hit;
+	float distance;
+
+	pixels[id] = get_color_alpha(position, direction, hit, distance);
+
+	if (hit)
+		distances[id] = distance;
+	else
+		distances[id] = 1e9;
 
 } // ray_trace
author	Anthony LaTorre <tlatorre9@gmail.com>	2011-08-25 21:42:04 -0400
committer	Anthony LaTorre <tlatorre9@gmail.com>	2011-08-25 21:42:04 -0400
commit	fa8d1082f9d989f2a3819540a9bf30dc67618709 (patch)
tree	7560f7e21e0e30ad48cac77295d08bd8ad65c549
parent	61f09cb550c619c5c2a5300d96f8590be77682bb (diff)
download	chroma-fa8d1082f9d989f2a3819540a9bf30dc67618709.tar.gz chroma-fa8d1082f9d989f2a3819540a9bf30dc67618709.tar.bz2 chroma-fa8d1082f9d989f2a3819540a9bf30dc67618709.zip