diff options
-rw-r--r-- | detectors/lbne.py | 2 | ||||
-rw-r--r-- | geometry.py | 29 | ||||
-rwxr-xr-x | render.py | 2 | ||||
-rw-r--r-- | src/alpha.h | 157 | ||||
-rw-r--r-- | src/intersect.h | 4 | ||||
-rw-r--r-- | src/kernel.cu | 22 | ||||
-rw-r--r-- | src/mesh.h | 19 | ||||
-rwxr-xr-x | view.py | 6 |
8 files changed, 197 insertions, 44 deletions
diff --git a/detectors/lbne.py b/detectors/lbne.py index d7b6843..8fe2c18 100644 --- a/detectors/lbne.py +++ b/detectors/lbne.py @@ -20,7 +20,7 @@ def build_lbne(radius, height, nstrings, pmts_per_string, endcap_spacing): # outer cylinder cylinder_mesh = cylinder(radius, radius, height+height/(pmts_per_string-1), theta=(2*np.pi/nstrings)/4) cylinder_mesh.vertices = rotate(cylinder_mesh.vertices, np.pi/2, (-1,0,0)) - lbne.add_solid(Solid(cylinder_mesh, water, vacuum, black_surface, 0xff)) + lbne.add_solid(Solid(cylinder_mesh, water, vacuum, black_surface, 0x990000ff)) lbne.pmtids = [] diff --git a/geometry.py b/geometry.py index 875d403..c76b089 100644 --- a/geometry.py +++ b/geometry.py @@ -58,7 +58,7 @@ class Mesh(object): return Mesh(np.concatenate((self.vertices, other.vertices)), np.concatenate((self.triangles, other.triangles + len(self.vertices)))) class Solid(object): - def __init__(self, mesh, material1=None, material2=None, surface=None, color=0xffffffff): + def __init__(self, mesh, material1=None, material2=None, surface=None, color=0xffffff): self.mesh = mesh if np.iterable(material1): @@ -349,7 +349,19 @@ class Geometry(object): if unique_bit_shifted_zvalues.size == 1: break - def load(self, module, color=False): + def load_colors(self, module): + self.colors = \ + np.concatenate([solid.color for solid in self.solids])[reorder] + + if not hasattr(self, 'colors_gpu'): + raise Exception('cannot load colors before geometry is loaded.') + + self.colors_gpu.set(self.colors.astype(np.uint32)) + + set_colors = module.get_function('set_colors') + set_colors(self.colors_gpu, block=(1,1,1), grid=(1,1)) + + def load(self, module): """ Load the bounding volume hierarchy onto the GPU module, bind it to the appropriate textures, and return a list @@ -404,11 +416,7 @@ class Geometry(object): triangles['x'] = self.mesh.triangles[:,0] triangles['y'] = self.mesh.triangles[:,1] triangles['z'] = self.mesh.triangles[:,2] - - if color: - triangles['w'] = self.colors - else: - triangles['w'] = ((self.material1_index & 0xff) << 24) | ((self.material2_index & 0xff) << 16) | ((self.surface_index & 0xff) << 8) + triangles['w'] = ((self.material1_index & 0xff) << 24) | ((self.material2_index & 0xff) << 16) | ((self.surface_index & 0xff) << 8) lower_bounds = np.empty(self.lower_bounds.shape[0], dtype=gpuarray.vec.float4) lower_bounds['x'] = self.lower_bounds[:,0] @@ -422,6 +430,7 @@ class Geometry(object): self.vertices_gpu = cuda.to_device(vertices) self.triangles_gpu = cuda.to_device(triangles) + self.colors_gpu = gpuarray.to_gpu(self.colors.astype(np.uint32)) self.lower_bounds_gpu = cuda.to_device(lower_bounds) self.upper_bounds_gpu = cuda.to_device(upper_bounds) self.node_map_gpu = cuda.to_device(self.node_map) @@ -449,12 +458,8 @@ class Geometry(object): print format_array('node_length', self.node_length) print '%-15s %6s %6s' % ('total', '', format_size(vertices.nbytes + triangles.nbytes + self.lower_bounds.nbytes + self.upper_bounds.nbytes + self.node_map.nbytes + self.node_length.nbytes)) - #set_pointer = module.get_function('set_pointer') - #set_pointer(self.triangles_gpu, self.vertices_gpu, - # block=(1,1,1), grid=(1,1)) - set_global_mesh_variables = module.get_function('set_global_mesh_variables') - set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, np.int32(self.node_map.size-1), np.int32(self.first_node), block=(1,1,1), grid=(1,1)) + set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(self.node_map.size-1), np.uint32(self.first_node), block=(1,1,1), grid=(1,1)) self.lower_bounds_tex = module.get_texref('lower_bounds') self.upper_bounds_tex = module.get_texref('upper_bounds') @@ -24,7 +24,7 @@ def render(viewable, size=(800,600), name='', bits=8, make_movie=False): print 'device %s' % autoinit.device.name() module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True, cache_dir=False) - geometry.load(module, color=False) + geometry.load(module) cuda_build_rgb_lookup = module.get_function('build_rgb_lookup') cuda_render = module.get_function('render') cuda_rotate = module.get_function('rotate') diff --git a/src/alpha.h b/src/alpha.h new file mode 100644 index 0000000..afd7ea5 --- /dev/null +++ b/src/alpha.h @@ -0,0 +1,157 @@ +#ifndef __ALPHA_H__ +#define __ALPHA_H__ + +#include "linalg.h" +#include "intersect.h" +#include "mesh.h" + +template <class T> +__device__ void swap(T &a, T &b) +{ + T temp = a; + a = b; + b = temp; +} + +#define ALPHA_DEPTH 5 + +struct HitList +{ + int size; + int indices[ALPHA_DEPTH]; + float distances[ALPHA_DEPTH]; +}; + +__device__ void add_to_hit_list(const float &distance, const int &index, HitList &h) +{ + int i; + if (h.size >= ALPHA_DEPTH) + { + if (distance > h.distances[ALPHA_DEPTH-1]) + return; + + i = ALPHA_DEPTH-1; + } + else + { + i = h.size; + h.size += 1; + } + + h.indices[i] = index; + h.distances[i] = distance; + + while (i > 0 && h.distances[i-1] > h.distances[i]) + { + swap(h.distances[i-1], h.distances[i]); + swap(h.indices[i-1], h.indices[i]); + + i -= 1; + } +} + +__device__ __noinline__ int get_color_alpha(const float3 &origin, const float3& direction) +{ + HitList h; + h.size = 0; + + float distance; + + if (!intersect_node(origin, direction, g_start_node)) + return 0; + + int stack[STACK_SIZE]; + + int *head = &stack[0]; + int *node = &stack[1]; + int *tail = &stack[STACK_SIZE-1]; + *node = g_start_node; + + int i; + + do + { + int first_child = tex1Dfetch(node_map, *node); + int child_count = tex1Dfetch(node_length, *node); + + while (*node >= g_first_node && child_count == 1) + { + *node = first_child; + first_child = tex1Dfetch(node_map, *node); + child_count = tex1Dfetch(node_length, *node); + } + + if (*node >= g_first_node) + { + for (i=0; i < child_count; i++) + { + if (intersect_node(origin, direction, first_child+i)) + { + *node = first_child+i; + node++; + } + } + + node--; + } + else // node is a leaf + { + for (i=0; i < child_count; i++) + { + uint4 triangle_data = g_triangles[first_child+i]; + + float3 v0 = g_vertices[triangle_data.x]; + float3 v1 = g_vertices[triangle_data.y]; + float3 v2 = g_vertices[triangle_data.z]; + + if (intersect_triangle(origin, direction, v0, v1, v2, distance)) + { + add_to_hit_list(distance, first_child+i, h); + } + + } // triangle loop + + node--; + + } // node is a leaf + + } // while loop + while (node != head); + + if (h.size < 1) + return 0; + + float scale = 1.0; + unsigned int r = 0; + unsigned int g = 0; + unsigned int b = 0; + for (i=0; i < h.size; i++) + { + uint4 triangle_data = g_triangles[h.indices[i]]; + + float3 v0 = g_vertices[triangle_data.x]; + float3 v1 = g_vertices[triangle_data.y]; + float3 v2 = g_vertices[triangle_data.z]; + + float cos_theta = dot(normalize(cross(v1-v0,v2-v1)),-direction); + + if (cos_theta < 0.0f) + cos_theta = dot(-normalize(cross(v1-v0,v2-v1)),-direction); + + unsigned int r0 = 0xff & (g_colors[h.indices[i]] >> 16); + unsigned int g0 = 0xff & (g_colors[h.indices[i]] >> 8); + unsigned int b0 = 0xff & g_colors[h.indices[i]]; + + float alpha = (255 - (0xff & (g_colors[h.indices[i]] >> 24)))/255.0f; + + r += floorf(r0*scale*cos_theta*alpha); + g += floorf(g0*scale*cos_theta*alpha); + b += floorf(b0*scale*cos_theta*alpha); + + scale *= (1.0f-alpha); + } + + return r << 16 | g << 8 | b; +} + +#endif diff --git a/src/intersect.h b/src/intersect.h index b1713d7..c400f1c 100644 --- a/src/intersect.h +++ b/src/intersect.h @@ -9,7 +9,7 @@ #include "matrix.h" #include "rotate.h" -#define EPSILON 1.0e-3f +#define EPSILON 0.0f /* Test the intersection between a ray starting from `origin` traveling in the direction `direction` and a triangle defined by the vertices `v0`, `v1`, and @@ -69,7 +69,7 @@ __device__ int get_color(const float3 &direction, const float3 &v0, const float3 unsigned int b = 0xFF & base_color; if (scale < 0.0f) - scale = dot(-normalize(cross(v1-v0,v2-v0)),-direction); + scale = dot(-normalize(cross(v1-v0,v2-v1)),-direction); r = floorf(r*scale); g = floorf(g*scale); diff --git a/src/kernel.cu b/src/kernel.cu index 711ae80..0dc5639 100644 --- a/src/kernel.cu +++ b/src/kernel.cu @@ -7,6 +7,7 @@ #include "rotate.h" #include "mesh.h" #include "photon.h" +#include "alpha.h" #define RED_WAVELENGTH 685 #define BLUE_WAVELENGTH 465 @@ -18,7 +19,7 @@ __device__ void fAtomicAdd(float *addr, float data) data = atomicExch(addr, data+atomicExch(addr, 0.0f)); } -__device__ void to_diffuse(Photon &p, State &s, curandState &rng, const int &max_steps) +__device__ __noinline__ void to_diffuse(Photon &p, State &s, curandState &rng, const int &max_steps) { int steps = 0; while (steps < max_steps) @@ -289,24 +290,7 @@ __global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, i float3 direction = directions[id]; direction /= norm(direction); - float distance; - - int triangle_index = intersect_mesh(position, direction, distance); - - if (triangle_index == -1) - { - pixels[id] = 0x000000; - } - else - { - uint4 triangle_data = g_triangles[triangle_index]; - - float3 v0 = g_vertices[triangle_data.x]; - float3 v1 = g_vertices[triangle_data.y]; - float3 v2 = g_vertices[triangle_data.z]; - - pixels[id] = get_color(direction, v0, v1, v2, triangle_data.w); - } + pixels[id] = get_color_alpha(position, direction); } // ray_trace @@ -8,8 +8,9 @@ /* flattened triangle mesh */ __device__ float3 *g_vertices; __device__ uint4 *g_triangles; -__device__ int g_start_node; -__device__ int g_first_node; +__device__ unsigned int *g_colors; +__device__ unsigned int g_start_node; +__device__ unsigned int g_first_node; /* lower/upper bounds for the bounding box associated with each node/leaf */ texture<float4, 1, cudaReadModeElementType> upper_bounds; @@ -47,7 +48,7 @@ __device__ bool intersect_node(const float3 &origin, const float3 &direction, co direction `direction` and the global mesh texture. If the ray intersects the texture return the index of the triangle which the ray intersected, else return -1. */ -__device__ int intersect_mesh(const float3 &origin, const float3& direction, float &min_distance, int last_hit_triangle = -1) +__device__ __noinline__ int intersect_mesh(const float3 &origin, const float3& direction, float &min_distance, int last_hit_triangle = -1) { int triangle_index = -1; @@ -133,14 +134,20 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, flo extern "C" { -__global__ void set_global_mesh_variables(uint4 *triangle_ptr, float3 *vertex_ptr, int start_node, int first_node) +__global__ void set_global_mesh_variables(uint4 *triangles, float3 *vertices, unsigned int *colors, unsigned int start_node, unsigned int first_node) { - g_triangles = triangle_ptr; - g_vertices = vertex_ptr; + g_triangles = triangles; + g_vertices = vertices; + g_colors = colors; g_start_node = start_node; g_first_node = first_node; } +__global__ void set_colors(unsigned int *colors) +{ + g_colors = colors; +} + } // extern "c" #endif @@ -70,7 +70,7 @@ def build(obj, bits): geometry.add_solid(obj) elif isinstance(obj, Mesh): geometry = Geometry() - geometry.add_solid(Solid(obj, vacuum, vacuum, surface=lambertian_surface)) + geometry.add_solid(Solid(obj, vacuum, vacuum, surface=lambertian_surface, color=0x99ffffff)) else: raise Exception('cannot build type %s' % type(obj)) @@ -161,7 +161,7 @@ def view(viewable, size=(800,600), name='', bits=8, load_bvh=False): print 'device %s' % autoinit.device.name() module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True, cache_dir=False) - geometry.load(module, color=True) + geometry.load(module) cuda_raytrace = module.get_function('ray_trace') cuda_rotate = module.get_function('rotate') cuda_translate = module.get_function('translate') @@ -198,7 +198,7 @@ def view(viewable, size=(800,600), name='', bits=8, load_bvh=False): cuda.Context.synchronize() elapsed = time.time() - t0 - #print 'elapsed %f sec' % elapsed + print 'elapsed %f sec' % elapsed pygame.surfarray.blit_array(screen, pixels_gpu.get().reshape(size)) pygame.display.flip() |