diff options
-rwxr-xr-x | camera.py | 85 | ||||
-rw-r--r-- | gpu.py | 2 | ||||
-rw-r--r-- | src/alpha.h | 10 | ||||
-rw-r--r-- | src/kernel.cu | 35 |
4 files changed, 91 insertions, 41 deletions
@@ -165,26 +165,34 @@ class Camera(Thread): self.axis1 = np.array([0,0,1], float) self.axis2 = np.array([1,0,0], float) - #origins, directions = get_rays(self.point, self.size) + self.film_width = 0.035 if self.enable3d: - self.point1 = self.point-(30.5e-3,0,0) - self.point2 = self.point+(30.5e-3,0,0) + self.point1 = self.point-(self.scale/60,0,0) + self.point2 = self.point+(self.scale/60,0,0) - origins1, directions1 = project.from_film(self.point1, axis2=np.cross(-self.point1,(0,0,1)), size=self.size) - origins2, directions2 = project.from_film(self.point2, axis2=np.cross(-self.point2,(0,0,1)), size=self.size) + self.viewing_angle = 0.0 + + origins1, directions1 = project.from_film(self.point1, size=self.size, width=self.film_width) + origins2, directions2 = project.from_film(self.point2, size=self.size, width=self.film_width) origins = np.concatenate([origins1,origins2]) directions = np.concatenate([directions1,directions2]) + + scope_origins, scope_directions = project.from_film(self.point, size=np.array(self.size)/4.0, width=self.film_width/4.0) + + self.scope_origins_gpu = ga.to_gpu(to_float3(scope_origins)) + self.scope_directions_gpu = ga.to_gpu(to_float3(scope_directions)) + + self.distances_gpu = ga.empty(self.scope_origins_gpu.size, dtype=np.float32) else: - origins, directions = project.from_film(self.point, size=self.size) + origins, directions = project.from_film(self.point, size=self.size, width=self.film_width) self.origins_gpu = ga.to_gpu(to_float3(origins)) self.directions_gpu = ga.to_gpu(to_float3(directions)) self.pixels_gpu = ga.zeros(self.origins_gpu.size, dtype=np.int32) - self.distances_gpu = ga.empty(self.origins_gpu.size, dtype=np.float32) - self.alpha = False#True + self.alpha = True self.movie = False self.movie_index = 0 self.movie_dir = None @@ -192,11 +200,11 @@ class Camera(Thread): @timeit def initialize_render(self): - self.rng_states_gpu = cuda.mem_alloc(self.width*self.height*sizeof('curandStateXORWOW', '#include <curand_kernel.h>')) - self.gpu.kernels.init_rng(np.int32(self.width*self.height), self.rng_states_gpu, np.int32(0), np.int32(0), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1)) + self.rng_states_gpu = cuda.mem_alloc(self.origins_gpu.size*sizeof('curandStateXORWOW', '#include <curand_kernel.h>')) + self.gpu.kernels.init_rng(np.int32(self.origins_gpu.size), self.rng_states_gpu, np.int32(0), np.int32(0), block=(self.nblocks,1,1), grid=(self.origins_gpu.size//self.nblocks+1,1)) self.xyz_lookup1_gpu = ga.zeros(len(self.geometry.mesh.triangles), dtype=ga.vec.float3) self.xyz_lookup2_gpu = ga.zeros(len(self.geometry.mesh.triangles), dtype=ga.vec.float3) - self.image_gpu = ga.zeros(self.width*self.height, dtype=ga.vec.float3) + self.image_gpu = ga.zeros(self.pixels_gpu.size, dtype=ga.vec.float3) self.gpu.context.synchronize() self.source_position = self.point @@ -212,14 +220,14 @@ class Camera(Thread): self.nlookup_calls = 0 def update_xyz_lookup(self, source_position): - for i in range(self.xyz_lookup1_gpu.size//(self.width*self.height)+1): - self.gpu.kernels.update_xyz_lookup(np.int32(self.width*self.height), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.width*self.height), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(685.0), ga.vec.make_float3(1.0,0.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1)) + for i in range(self.xyz_lookup1_gpu.size//(self.origins_gpu.size)+1): + self.gpu.kernels.update_xyz_lookup(np.int32(self.origins_gpu.size), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.origins_gpu.size), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(685.0), ga.vec.make_float3(1.0,0.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.origins_gpu.size//self.nblocks+1,1)) - for i in range(self.xyz_lookup1_gpu.size//(self.width*self.height)+1): - self.gpu.kernels.update_xyz_lookup(np.int32(self.width*self.height), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.width*self.height), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(545.0), ga.vec.make_float3(0.0,1.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1)) + for i in range(self.xyz_lookup1_gpu.size//(self.origins_gpu.size)+1): + self.gpu.kernels.update_xyz_lookup(np.int32(self.origins_gpu.size), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.origins_gpu.size), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(545.0), ga.vec.make_float3(0.0,1.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.origins_gpu.size//self.nblocks+1,1)) - for i in range(self.xyz_lookup1_gpu.size//(self.width*self.height)+1): - self.gpu.kernels.update_xyz_lookup(np.int32(self.width*self.height), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.width*self.height), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(445.0), ga.vec.make_float3(0.0,0.0,1.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1)) + for i in range(self.xyz_lookup1_gpu.size//(self.origins_gpu.size)+1): + self.gpu.kernels.update_xyz_lookup(np.int32(self.origins_gpu.size), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.origins_gpu.size), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(445.0), ga.vec.make_float3(0.0,0.0,1.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.origins_gpu.size//self.nblocks+1,1)) self.nlookup_calls += 1 @@ -261,6 +269,9 @@ class Camera(Thread): self.axis2 = rotate(self.axis2, phi, n) if self.enable3d: + self.gpu.kernels.rotate(np.int32(self.scope_origins_gpu.size), self.scope_origins_gpu, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1)) + self.gpu.kernels.rotate(np.int32(self.scope_directions_gpu.size), self.scope_directions_gpu, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1)) + self.point1 = rotate(self.point1, phi, n) self.point2 = rotate(self.point2, phi, n) @@ -276,6 +287,10 @@ class Camera(Thread): self.axis1 = rotate(self.axis1, phi, n) self.axis2 = rotate(self.axis2, phi, n) + if self.enable3d: + self.gpu.kernels.rotate_around_point(np.int32(self.scope_origins_gpu.size), self.scope_origins_gpu, np.float32(phi), ga.vec.make_float3(*n), ga.vec.make_float3(*point), block=(self.nblocks,1,1), grid=(self.origins_gpu.size//self.nblocks+1,1)) + self.gpu.kernels.rotate(np.int32(self.scope_directions_gpu.size), self.scope_directions_gpu, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.directions_gpu.size//self.nblocks+1,1)) + if redraw: if self.render: self.clear_image() @@ -288,6 +303,8 @@ class Camera(Thread): self.point += v if self.enable3d: + self.gpu.kernels.translate(np.int32(self.scope_origins_gpu.size), self.scope_origins_gpu, ga.vec.make_float3(*v), block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks,1)) + self.point1 += v self.point2 += v @@ -297,7 +314,7 @@ class Camera(Thread): self.update() - def update(self): + def update_pixels(self): if self.render: while self.nlookup_calls < 10: self.update_xyz_lookup(self.source_position) @@ -305,11 +322,14 @@ class Camera(Thread): self.process_image() else: if self.alpha: - self.gpu.kernels.ray_trace_alpha(np.int32(self.pixels_gpu.size), self.origins_gpu, self.directions_gpu, self.pixels_gpu, self.distances_gpu, block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1)) + self.gpu.kernels.ray_trace_alpha(np.int32(self.pixels_gpu.size), self.origins_gpu, self.directions_gpu, self.pixels_gpu, block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1)) else: - self.gpu.kernels.ray_trace(np.int32(self.pixels_gpu.size), self.origins_gpu, self.directions_gpu, self.pixels_gpu, self.distances_gpu, block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1)) + self.gpu.kernels.ray_trace(np.int32(self.pixels_gpu.size), self.origins_gpu, self.directions_gpu, self.pixels_gpu, block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1)) + def update(self): if self.enable3d: + self.gpu.kernels.distance_to_mesh(np.int32(self.scope_origins_gpu.size), self.scope_origins_gpu, self.scope_directions_gpu, self.distances_gpu, block=(self.nblocks,1,1), grid=(self.scope_origins_gpu.size//self.nblocks,1)) + baseline = ga.min(self.distances_gpu).get().item() if baseline < 1e9: @@ -329,6 +349,24 @@ class Camera(Thread): self.point2 += v2 + direction = np.cross(self.axis1,self.axis2) + direction /= np.linalg.norm(direction) + direction1 = self.point + direction*baseline - self.point1 + direction1 /= np.linalg.norm(direction1) + + new_viewing_angle = np.arccos(direction1.dot(direction)) + + phi = new_viewing_angle - self.viewing_angle + + self.gpu.kernels.rotate_around_point(np.int32(self.pixels_gpu.size//2), self.origins_gpu[:self.pixels_gpu.size//2], np.float32(phi), ga.vec.make_float3(*self.axis1), ga.vec.make_float3(*self.point1), block=(self.nblocks,1,1), grid=((self.pixels_gpu.size//2)//self.nblocks+1,1)) + self.gpu.kernels.rotate_around_point(np.int32(self.pixels_gpu.size//2), self.origins_gpu[self.pixels_gpu.size//2:], np.float32(-phi), ga.vec.make_float3(*self.axis1), ga.vec.make_float3(*self.point2), block=(self.nblocks,1,1), grid=((self.pixels_gpu.size//2)//self.nblocks+1,1)) + self.gpu.kernels.rotate(np.int32(self.pixels_gpu.size//2), self.directions_gpu[:self.pixels_gpu.size//2], np.float32(phi), ga.vec.make_float3(*self.axis1), block=(self.nblocks,1,1), grid=((self.pixels_gpu.size//2)//self.nblocks+1,1)) + self.gpu.kernels.rotate(np.int32(self.pixels_gpu.size//2), self.directions_gpu[self.pixels_gpu.size//2:], np.float32(-phi), ga.vec.make_float3(*self.axis1), block=(self.nblocks,1,1), grid=((self.pixels_gpu.size//2)//self.nblocks+1,1)) + + self.viewing_angle = new_viewing_angle + + self.update_pixels() + pixels = self.pixels_gpu.get() if self.enable3d: @@ -386,7 +424,7 @@ class Camera(Thread): mouse_direction /= np.linalg.norm(mouse_direction) if pygame.key.get_mods() & (KMOD_LSHIFT | KMOD_RSHIFT): - v = mouse_direction*self.scale*length/float(self.width) + v = -mouse_direction*self.scale*length/float(self.width) self.translate(v) else: phi = np.float32(2*np.pi*length/float(self.width)) @@ -549,12 +587,13 @@ class Camera(Thread): self.spnav = False self.update() + + if self.enable3d: + self.update() self.done = False self.clicked = False - #current_layer = 0 - while not self.done: self.clock.tick(20) @@ -107,7 +107,7 @@ class GPU(object): cuda_options = ['-I' + dirname(chroma.src.__file__), '--use_fast_math', '--ptxas-options=-v'] self.module = SourceModule(chroma.src.kernel, options=cuda_options, no_extern_c=True) - self.kernels = CUDAFuncs(self.module, ['ray_trace', 'ray_trace_alpha', 'rotate', 'rotate_around_point', 'translate', 'update_xyz_lookup', 'update_xyz_image', 'process_image', 'init_rng']) + self.kernels = CUDAFuncs(self.module, ['ray_trace', 'ray_trace_alpha', 'distance_to_mesh', 'rotate', 'rotate_around_point', 'translate', 'update_xyz_lookup', 'update_xyz_image', 'process_image', 'init_rng']) self.geo_funcs = CUDAFuncs(self.module, ['set_wavelength_range', 'set_material', 'set_surface', 'set_global_mesh_variables', 'color_solids']) diff --git a/src/alpha.h b/src/alpha.h index 263fa1e..ac75834 100644 --- a/src/alpha.h +++ b/src/alpha.h @@ -10,8 +10,10 @@ #define ALPHA_DEPTH 10 -__device__ int get_color_alpha(const float3 &origin, const float3& direction, bool &hit, float &distance) +__device__ int get_color_alpha(const float3 &origin, const float3& direction) { + float distance; + if (!intersect_node(origin, direction, g_start_node, -1.0f)) return 0; @@ -95,13 +97,7 @@ __device__ int get_color_alpha(const float3 &origin, const float3& direction, bo while (node != head); if (n < 1) - { - hit = false; return 0; - } - - hit = true; - distance = distances[0]; float scale = 1.0f; float fr = 0.0f; diff --git a/src/kernel.cu b/src/kernel.cu index 71b4153..d36d260 100644 --- a/src/kernel.cu +++ b/src/kernel.cu @@ -239,7 +239,29 @@ __global__ void process_image(int nthreads, float3 *image, int *pixels, int nima } // process_image -__global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, int *pixels, float *distances) +__global__ void distance_to_mesh(int nthreads, float3 *positions, float3 *directions, float *distances) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + float3 position = positions[id]; + float3 direction = directions[id]; + direction /= norm(direction); + + float distance; + + int triangle_index = intersect_mesh(position, direction, distance); + + if (triangle_index == -1) + distances[id] = 1e9; + else + distances[id] = distance; + +} // distance_to_mesh + +__global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, int *pixels) { int id = blockIdx.x*blockDim.x + threadIdx.x; @@ -257,7 +279,6 @@ __global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, i if (triangle_index == -1) { pixels[id] = 0; - distances[id] = 1e9f; } else { @@ -268,7 +289,6 @@ __global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, i float3 v2 = g_vertices[triangle_data.z]; pixels[id] = get_color(direction, v0, v1, v2, g_colors[triangle_index]); - distances[id] = distance; } } // ray_trace @@ -279,7 +299,7 @@ __global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, i color whose brightness is determined by the cosine of the angle between the ray and the normal of the triangle it intersected, else set the pixel to 0. */ -__global__ void ray_trace_alpha(int nthreads, float3 *positions, float3 *directions, int *pixels, float *distances) +__global__ void ray_trace_alpha(int nthreads, float3 *positions, float3 *directions, int *pixels) { int id = blockIdx.x*blockDim.x + threadIdx.x; @@ -293,12 +313,7 @@ __global__ void ray_trace_alpha(int nthreads, float3 *positions, float3 *directi bool hit; float distance; - pixels[id] = get_color_alpha(position, direction, hit, distance); - - if (hit) - distances[id] = distance; - else - distances[id] = 1e9; + pixels[id] = get_color_alpha(position, direction); } // ray_trace |