4 files changed, 91 insertions, 41 deletions
diff --git a/camera.py b/camera.py
index 479d47a..1e31a81 100755
--- a/camera.py
+++ b/camera.py
@@ -165,26 +165,34 @@ class Camera(Thread):
         self.axis1 = np.array([0,0,1], float)
         self.axis2 = np.array([1,0,0], float)
 
-        #origins, directions = get_rays(self.point, self.size)
+        self.film_width = 0.035
 
         if self.enable3d:
-            self.point1 = self.point-(30.5e-3,0,0)
-            self.point2 = self.point+(30.5e-3,0,0)
+            self.point1 = self.point-(self.scale/60,0,0)
+            self.point2 = self.point+(self.scale/60,0,0)
 
-            origins1, directions1 = project.from_film(self.point1, axis2=np.cross(-self.point1,(0,0,1)), size=self.size)
-            origins2, directions2 = project.from_film(self.point2, axis2=np.cross(-self.point2,(0,0,1)), size=self.size)
+            self.viewing_angle = 0.0
+
+            origins1, directions1 = project.from_film(self.point1, size=self.size, width=self.film_width)
+            origins2, directions2 = project.from_film(self.point2, size=self.size, width=self.film_width)
 
             origins = np.concatenate([origins1,origins2])
             directions = np.concatenate([directions1,directions2])
+
+            scope_origins, scope_directions = project.from_film(self.point, size=np.array(self.size)/4.0, width=self.film_width/4.0)
+
+            self.scope_origins_gpu = ga.to_gpu(to_float3(scope_origins))
+            self.scope_directions_gpu = ga.to_gpu(to_float3(scope_directions))
+
+            self.distances_gpu = ga.empty(self.scope_origins_gpu.size, dtype=np.float32)
         else:
-            origins, directions = project.from_film(self.point, size=self.size)
+            origins, directions = project.from_film(self.point, size=self.size, width=self.film_width)
 
         self.origins_gpu = ga.to_gpu(to_float3(origins))
         self.directions_gpu = ga.to_gpu(to_float3(directions))
         self.pixels_gpu = ga.zeros(self.origins_gpu.size, dtype=np.int32)
-        self.distances_gpu = ga.empty(self.origins_gpu.size, dtype=np.float32)
 
-        self.alpha = False#True
+        self.alpha = True
         self.movie = False
         self.movie_index = 0
         self.movie_dir = None
@@ -192,11 +200,11 @@ class Camera(Thread):
 
     @timeit
     def initialize_render(self):
-        self.rng_states_gpu = cuda.mem_alloc(self.width*self.height*sizeof('curandStateXORWOW', '#include <curand_kernel.h>'))
-        self.gpu.kernels.init_rng(np.int32(self.width*self.height), self.rng_states_gpu, np.int32(0), np.int32(0), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1))
+        self.rng_states_gpu = cuda.mem_alloc(self.origins_gpu.size*sizeof('curandStateXORWOW', '#include <curand_kernel.h>'))
+        self.gpu.kernels.init_rng(np.int32(self.origins_gpu.size), self.rng_states_gpu, np.int32(0), np.int32(0), block=(self.nblocks,1,1), grid=(self.origins_gpu.size//self.nblocks+1,1))
         self.xyz_lookup1_gpu = ga.zeros(len(self.geometry.mesh.triangles), dtype=ga.vec.float3)
         self.xyz_lookup2_gpu = ga.zeros(len(self.geometry.mesh.triangles), dtype=ga.vec.float3)
-        self.image_gpu = ga.zeros(self.width*self.height, dtype=ga.vec.float3)
+        self.image_gpu = ga.zeros(self.pixels_gpu.size, dtype=ga.vec.float3)
         self.gpu.context.synchronize()
 
         self.source_position = self.point
@@ -212,14 +220,14 @@ class Camera(Thread):
         self.nlookup_calls = 0
 
     def update_xyz_lookup(self, source_position):
-        for i in range(self.xyz_lookup1_gpu.size//(self.width*self.height)+1):
-            self.gpu.kernels.update_xyz_lookup(np.int32(self.width*self.height), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.width*self.height), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(685.0), ga.vec.make_float3(1.0,0.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1))
+        for i in range(self.xyz_lookup1_gpu.size//(self.origins_gpu.size)+1):
+            self.gpu.kernels.update_xyz_lookup(np.int32(self.origins_gpu.size), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.origins_gpu.size), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(685.0), ga.vec.make_float3(1.0,0.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.origins_gpu.size//self.nblocks+1,1))
 
-        for i in range(self.xyz_lookup1_gpu.size//(self.width*self.height)+1):
-            self.gpu.kernels.update_xyz_lookup(np.int32(self.width*self.height), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.width*self.height), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(545.0), ga.vec.make_float3(0.0,1.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1))
+        for i in range(self.xyz_lookup1_gpu.size//(self.origins_gpu.size)+1):
+            self.gpu.kernels.update_xyz_lookup(np.int32(self.origins_gpu.size), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.origins_gpu.size), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(545.0), ga.vec.make_float3(0.0,1.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.origins_gpu.size//self.nblocks+1,1))
 
-        for i in range(self.xyz_lookup1_gpu.size//(self.width*self.height)+1):
-            self.gpu.kernels.update_xyz_lookup(np.int32(self.width*self.height), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.width*self.height), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(445.0), ga.vec.make_float3(0.0,0.0,1.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1))
+        for i in range(self.xyz_lookup1_gpu.size//(self.origins_gpu.size)+1):
+            self.gpu.kernels.update_xyz_lookup(np.int32(self.origins_gpu.size), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.origins_gpu.size), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(445.0), ga.vec.make_float3(0.0,0.0,1.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.origins_gpu.size//self.nblocks+1,1))
 
         self.nlookup_calls += 1
 
@@ -261,6 +269,9 @@ class Camera(Thread):
         self.axis2 = rotate(self.axis2, phi, n)
 
         if self.enable3d:
+            self.gpu.kernels.rotate(np.int32(self.scope_origins_gpu.size), self.scope_origins_gpu, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1))
+            self.gpu.kernels.rotate(np.int32(self.scope_directions_gpu.size), self.scope_directions_gpu, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1))
+
             self.point1 = rotate(self.point1, phi, n)
             self.point2 = rotate(self.point2, phi, n)
 
@@ -276,6 +287,10 @@ class Camera(Thread):
         self.axis1 = rotate(self.axis1, phi, n)
         self.axis2 = rotate(self.axis2, phi, n)
 
+        if self.enable3d:
+            self.gpu.kernels.rotate_around_point(np.int32(self.scope_origins_gpu.size), self.scope_origins_gpu, np.float32(phi), ga.vec.make_float3(*n), ga.vec.make_float3(*point), block=(self.nblocks,1,1), grid=(self.origins_gpu.size//self.nblocks+1,1))
+            self.gpu.kernels.rotate(np.int32(self.scope_directions_gpu.size), self.scope_directions_gpu, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.directions_gpu.size//self.nblocks+1,1))
+
         if redraw:
             if self.render:
                 self.clear_image()
@@ -288,6 +303,8 @@ class Camera(Thread):
         self.point += v
 
         if self.enable3d:
+            self.gpu.kernels.translate(np.int32(self.scope_origins_gpu.size), self.scope_origins_gpu, ga.vec.make_float3(*v), block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks,1))
+
             self.point1 += v
             self.point2 += v
 
@@ -297,7 +314,7 @@ class Camera(Thread):
 
             self.update()
 
-    def update(self):
+    def update_pixels(self):
         if self.render:
             while self.nlookup_calls < 10:
                 self.update_xyz_lookup(self.source_position)
@@ -305,11 +322,14 @@ class Camera(Thread):
             self.process_image()
         else:
             if self.alpha:
-                self.gpu.kernels.ray_trace_alpha(np.int32(self.pixels_gpu.size), self.origins_gpu, self.directions_gpu, self.pixels_gpu, self.distances_gpu, block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1))
+                self.gpu.kernels.ray_trace_alpha(np.int32(self.pixels_gpu.size), self.origins_gpu, self.directions_gpu, self.pixels_gpu, block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1))
             else:
-                self.gpu.kernels.ray_trace(np.int32(self.pixels_gpu.size), self.origins_gpu, self.directions_gpu, self.pixels_gpu, self.distances_gpu, block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1))
+                self.gpu.kernels.ray_trace(np.int32(self.pixels_gpu.size), self.origins_gpu, self.directions_gpu, self.pixels_gpu, block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1))
 
+    def update(self):
         if self.enable3d:
+            self.gpu.kernels.distance_to_mesh(np.int32(self.scope_origins_gpu.size), self.scope_origins_gpu, self.scope_directions_gpu, self.distances_gpu, block=(self.nblocks,1,1), grid=(self.scope_origins_gpu.size//self.nblocks,1))
+
             baseline = ga.min(self.distances_gpu).get().item()
 
             if baseline < 1e9:
@@ -329,6 +349,24 @@ class Camera(Thread):
 
                 self.point2 += v2
 
+            direction = np.cross(self.axis1,self.axis2)
+            direction /= np.linalg.norm(direction)
+            direction1 = self.point + direction*baseline - self.point1
+            direction1 /= np.linalg.norm(direction1)
+
+            new_viewing_angle = np.arccos(direction1.dot(direction))
+
+            phi = new_viewing_angle - self.viewing_angle
+
+            self.gpu.kernels.rotate_around_point(np.int32(self.pixels_gpu.size//2), self.origins_gpu[:self.pixels_gpu.size//2], np.float32(phi), ga.vec.make_float3(*self.axis1), ga.vec.make_float3(*self.point1), block=(self.nblocks,1,1), grid=((self.pixels_gpu.size//2)//self.nblocks+1,1))
+            self.gpu.kernels.rotate_around_point(np.int32(self.pixels_gpu.size//2), self.origins_gpu[self.pixels_gpu.size//2:], np.float32(-phi), ga.vec.make_float3(*self.axis1), ga.vec.make_float3(*self.point2), block=(self.nblocks,1,1), grid=((self.pixels_gpu.size//2)//self.nblocks+1,1))
+            self.gpu.kernels.rotate(np.int32(self.pixels_gpu.size//2), self.directions_gpu[:self.pixels_gpu.size//2], np.float32(phi), ga.vec.make_float3(*self.axis1), block=(self.nblocks,1,1), grid=((self.pixels_gpu.size//2)//self.nblocks+1,1))
+            self.gpu.kernels.rotate(np.int32(self.pixels_gpu.size//2), self.directions_gpu[self.pixels_gpu.size//2:], np.float32(-phi), ga.vec.make_float3(*self.axis1), block=(self.nblocks,1,1), grid=((self.pixels_gpu.size//2)//self.nblocks+1,1))
+
+            self.viewing_angle = new_viewing_angle
+
+        self.update_pixels()
+
         pixels = self.pixels_gpu.get()
 
         if self.enable3d:
@@ -386,7 +424,7 @@ class Camera(Thread):
             mouse_direction /= np.linalg.norm(mouse_direction)
 
             if pygame.key.get_mods() & (KMOD_LSHIFT | KMOD_RSHIFT):
-                v = mouse_direction*self.scale*length/float(self.width)
+                v = -mouse_direction*self.scale*length/float(self.width)
                 self.translate(v)
             else:
                 phi = np.float32(2*np.pi*length/float(self.width))
@@ -549,12 +587,13 @@ class Camera(Thread):
                 self.spnav = False
 
         self.update()
+
+        if self.enable3d:
+            self.update()
         
         self.done = False
         self.clicked = False
 
-        #current_layer = 0
-
         while not self.done:
             self.clock.tick(20)
 
diff --git a/gpu.py b/gpu.py
index faa282c..585ab5f 100644
--- a/gpu.py
+++ b/gpu.py
@@ -107,7 +107,7 @@ class GPU(object):
         cuda_options = ['-I' + dirname(chroma.src.__file__), '--use_fast_math', '--ptxas-options=-v']
 
         self.module = SourceModule(chroma.src.kernel, options=cuda_options, no_extern_c=True)
-        self.kernels = CUDAFuncs(self.module, ['ray_trace', 'ray_trace_alpha', 'rotate', 'rotate_around_point', 'translate', 'update_xyz_lookup', 'update_xyz_image', 'process_image', 'init_rng'])
+        self.kernels = CUDAFuncs(self.module, ['ray_trace', 'ray_trace_alpha', 'distance_to_mesh', 'rotate', 'rotate_around_point', 'translate', 'update_xyz_lookup', 'update_xyz_image', 'process_image', 'init_rng'])
 
         self.geo_funcs = CUDAFuncs(self.module, ['set_wavelength_range', 'set_material', 'set_surface', 'set_global_mesh_variables', 'color_solids'])
 
diff --git a/src/alpha.h b/src/alpha.h
index 263fa1e..ac75834 100644
--- a/src/alpha.h
+++ b/src/alpha.h
@@ -10,8 +10,10 @@
 
 #define ALPHA_DEPTH 10
 
-__device__ int get_color_alpha(const float3 &origin, const float3& direction, bool &hit, float &distance)
+__device__ int get_color_alpha(const float3 &origin, const float3& direction)
 {
+	float distance;
+
 	if (!intersect_node(origin, direction, g_start_node, -1.0f))
 		return 0;
 
@@ -95,13 +97,7 @@ __device__ int get_color_alpha(const float3 &origin, const float3& direction, bo
 	while (node != head);
 
 	if (n < 1)
-	{
-		hit = false;
 		return 0;
-	}
-
-	hit = true;
-	distance = distances[0];
 
 	float scale = 1.0f;
 	float fr = 0.0f;
diff --git a/src/kernel.cu b/src/kernel.cu
index 71b4153..d36d260 100644
--- a/src/kernel.cu
+++ b/src/kernel.cu
@@ -239,7 +239,29 @@ __global__ void process_image(int nthreads, float3 *image, int *pixels, int nima
 
 } // process_image
 
-__global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, int *pixels, float *distances)
+__global__ void distance_to_mesh(int nthreads, float3 *positions, float3 *directions, float *distances)
+{
+	int id = blockIdx.x*blockDim.x + threadIdx.x;
+
+	if (id >= nthreads)
+		return;
+
+	float3 position = positions[id];
+	float3 direction = directions[id];
+	direction /= norm(direction);
+
+	float distance;
+
+	int triangle_index = intersect_mesh(position, direction, distance);
+
+	if (triangle_index == -1)
+		distances[id] = 1e9;
+	else
+		distances[id] = distance;
+
+} // distance_to_mesh
+
+__global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, int *pixels)
 {
 	int id = blockIdx.x*blockDim.x + threadIdx.x;
 
@@ -257,7 +279,6 @@ __global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, i
 	if (triangle_index == -1)
 	{
 		pixels[id] = 0;
-		distances[id] = 1e9f;
 	}
 	else
 	{
@@ -268,7 +289,6 @@ __global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, i
 		float3 v2 = g_vertices[triangle_data.z];
 
 		pixels[id] = get_color(direction, v0, v1, v2, g_colors[triangle_index]);
-		distances[id] = distance;
 	}
 
 } // ray_trace
@@ -279,7 +299,7 @@ __global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, i
    color whose brightness is determined by the cosine of the angle between
    the ray and the normal of the triangle it intersected, else set the pixel
    to 0. */
-__global__ void ray_trace_alpha(int nthreads, float3 *positions, float3 *directions, int *pixels, float *distances)
+__global__ void ray_trace_alpha(int nthreads, float3 *positions, float3 *directions, int *pixels)
 {
 	int id = blockIdx.x*blockDim.x + threadIdx.x;
 
@@ -293,12 +313,7 @@ __global__ void ray_trace_alpha(int nthreads, float3 *positions, float3 *directi
 	bool hit;
 	float distance;
 
-	pixels[id] = get_color_alpha(position, direction, hit, distance);
-
-	if (hit)
-		distances[id] = distance;
-	else
-		distances[id] = 1e9;
+	pixels[id] = get_color_alpha(position, direction);
 
 } // ray_trace