summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--detectors/lbne.py2
-rw-r--r--geometry.py29
-rwxr-xr-xrender.py2
-rw-r--r--src/alpha.h157
-rw-r--r--src/intersect.h4
-rw-r--r--src/kernel.cu22
-rw-r--r--src/mesh.h19
-rwxr-xr-xview.py6
8 files changed, 197 insertions, 44 deletions
diff --git a/detectors/lbne.py b/detectors/lbne.py
index d7b6843..8fe2c18 100644
--- a/detectors/lbne.py
+++ b/detectors/lbne.py
@@ -20,7 +20,7 @@ def build_lbne(radius, height, nstrings, pmts_per_string, endcap_spacing):
# outer cylinder
cylinder_mesh = cylinder(radius, radius, height+height/(pmts_per_string-1), theta=(2*np.pi/nstrings)/4)
cylinder_mesh.vertices = rotate(cylinder_mesh.vertices, np.pi/2, (-1,0,0))
- lbne.add_solid(Solid(cylinder_mesh, water, vacuum, black_surface, 0xff))
+ lbne.add_solid(Solid(cylinder_mesh, water, vacuum, black_surface, 0x990000ff))
lbne.pmtids = []
diff --git a/geometry.py b/geometry.py
index 875d403..c76b089 100644
--- a/geometry.py
+++ b/geometry.py
@@ -58,7 +58,7 @@ class Mesh(object):
return Mesh(np.concatenate((self.vertices, other.vertices)), np.concatenate((self.triangles, other.triangles + len(self.vertices))))
class Solid(object):
- def __init__(self, mesh, material1=None, material2=None, surface=None, color=0xffffffff):
+ def __init__(self, mesh, material1=None, material2=None, surface=None, color=0xffffff):
self.mesh = mesh
if np.iterable(material1):
@@ -349,7 +349,19 @@ class Geometry(object):
if unique_bit_shifted_zvalues.size == 1:
break
- def load(self, module, color=False):
+ def load_colors(self, module):
+ self.colors = \
+ np.concatenate([solid.color for solid in self.solids])[reorder]
+
+ if not hasattr(self, 'colors_gpu'):
+ raise Exception('cannot load colors before geometry is loaded.')
+
+ self.colors_gpu.set(self.colors.astype(np.uint32))
+
+ set_colors = module.get_function('set_colors')
+ set_colors(self.colors_gpu, block=(1,1,1), grid=(1,1))
+
+ def load(self, module):
"""
Load the bounding volume hierarchy onto the GPU module,
bind it to the appropriate textures, and return a list
@@ -404,11 +416,7 @@ class Geometry(object):
triangles['x'] = self.mesh.triangles[:,0]
triangles['y'] = self.mesh.triangles[:,1]
triangles['z'] = self.mesh.triangles[:,2]
-
- if color:
- triangles['w'] = self.colors
- else:
- triangles['w'] = ((self.material1_index & 0xff) << 24) | ((self.material2_index & 0xff) << 16) | ((self.surface_index & 0xff) << 8)
+ triangles['w'] = ((self.material1_index & 0xff) << 24) | ((self.material2_index & 0xff) << 16) | ((self.surface_index & 0xff) << 8)
lower_bounds = np.empty(self.lower_bounds.shape[0], dtype=gpuarray.vec.float4)
lower_bounds['x'] = self.lower_bounds[:,0]
@@ -422,6 +430,7 @@ class Geometry(object):
self.vertices_gpu = cuda.to_device(vertices)
self.triangles_gpu = cuda.to_device(triangles)
+ self.colors_gpu = gpuarray.to_gpu(self.colors.astype(np.uint32))
self.lower_bounds_gpu = cuda.to_device(lower_bounds)
self.upper_bounds_gpu = cuda.to_device(upper_bounds)
self.node_map_gpu = cuda.to_device(self.node_map)
@@ -449,12 +458,8 @@ class Geometry(object):
print format_array('node_length', self.node_length)
print '%-15s %6s %6s' % ('total', '', format_size(vertices.nbytes + triangles.nbytes + self.lower_bounds.nbytes + self.upper_bounds.nbytes + self.node_map.nbytes + self.node_length.nbytes))
- #set_pointer = module.get_function('set_pointer')
- #set_pointer(self.triangles_gpu, self.vertices_gpu,
- # block=(1,1,1), grid=(1,1))
-
set_global_mesh_variables = module.get_function('set_global_mesh_variables')
- set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, np.int32(self.node_map.size-1), np.int32(self.first_node), block=(1,1,1), grid=(1,1))
+ set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(self.node_map.size-1), np.uint32(self.first_node), block=(1,1,1), grid=(1,1))
self.lower_bounds_tex = module.get_texref('lower_bounds')
self.upper_bounds_tex = module.get_texref('upper_bounds')
diff --git a/render.py b/render.py
index d7a9ec4..1890471 100755
--- a/render.py
+++ b/render.py
@@ -24,7 +24,7 @@ def render(viewable, size=(800,600), name='', bits=8, make_movie=False):
print 'device %s' % autoinit.device.name()
module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True, cache_dir=False)
- geometry.load(module, color=False)
+ geometry.load(module)
cuda_build_rgb_lookup = module.get_function('build_rgb_lookup')
cuda_render = module.get_function('render')
cuda_rotate = module.get_function('rotate')
diff --git a/src/alpha.h b/src/alpha.h
new file mode 100644
index 0000000..afd7ea5
--- /dev/null
+++ b/src/alpha.h
@@ -0,0 +1,157 @@
+#ifndef __ALPHA_H__
+#define __ALPHA_H__
+
+#include "linalg.h"
+#include "intersect.h"
+#include "mesh.h"
+
+template <class T>
+__device__ void swap(T &a, T &b)
+{
+ T temp = a;
+ a = b;
+ b = temp;
+}
+
+#define ALPHA_DEPTH 5
+
+struct HitList
+{
+ int size;
+ int indices[ALPHA_DEPTH];
+ float distances[ALPHA_DEPTH];
+};
+
+__device__ void add_to_hit_list(const float &distance, const int &index, HitList &h)
+{
+ int i;
+ if (h.size >= ALPHA_DEPTH)
+ {
+ if (distance > h.distances[ALPHA_DEPTH-1])
+ return;
+
+ i = ALPHA_DEPTH-1;
+ }
+ else
+ {
+ i = h.size;
+ h.size += 1;
+ }
+
+ h.indices[i] = index;
+ h.distances[i] = distance;
+
+ while (i > 0 && h.distances[i-1] > h.distances[i])
+ {
+ swap(h.distances[i-1], h.distances[i]);
+ swap(h.indices[i-1], h.indices[i]);
+
+ i -= 1;
+ }
+}
+
+__device__ __noinline__ int get_color_alpha(const float3 &origin, const float3& direction)
+{
+ HitList h;
+ h.size = 0;
+
+ float distance;
+
+ if (!intersect_node(origin, direction, g_start_node))
+ return 0;
+
+ int stack[STACK_SIZE];
+
+ int *head = &stack[0];
+ int *node = &stack[1];
+ int *tail = &stack[STACK_SIZE-1];
+ *node = g_start_node;
+
+ int i;
+
+ do
+ {
+ int first_child = tex1Dfetch(node_map, *node);
+ int child_count = tex1Dfetch(node_length, *node);
+
+ while (*node >= g_first_node && child_count == 1)
+ {
+ *node = first_child;
+ first_child = tex1Dfetch(node_map, *node);
+ child_count = tex1Dfetch(node_length, *node);
+ }
+
+ if (*node >= g_first_node)
+ {
+ for (i=0; i < child_count; i++)
+ {
+ if (intersect_node(origin, direction, first_child+i))
+ {
+ *node = first_child+i;
+ node++;
+ }
+ }
+
+ node--;
+ }
+ else // node is a leaf
+ {
+ for (i=0; i < child_count; i++)
+ {
+ uint4 triangle_data = g_triangles[first_child+i];
+
+ float3 v0 = g_vertices[triangle_data.x];
+ float3 v1 = g_vertices[triangle_data.y];
+ float3 v2 = g_vertices[triangle_data.z];
+
+ if (intersect_triangle(origin, direction, v0, v1, v2, distance))
+ {
+ add_to_hit_list(distance, first_child+i, h);
+ }
+
+ } // triangle loop
+
+ node--;
+
+ } // node is a leaf
+
+ } // while loop
+ while (node != head);
+
+ if (h.size < 1)
+ return 0;
+
+ float scale = 1.0;
+ unsigned int r = 0;
+ unsigned int g = 0;
+ unsigned int b = 0;
+ for (i=0; i < h.size; i++)
+ {
+ uint4 triangle_data = g_triangles[h.indices[i]];
+
+ float3 v0 = g_vertices[triangle_data.x];
+ float3 v1 = g_vertices[triangle_data.y];
+ float3 v2 = g_vertices[triangle_data.z];
+
+ float cos_theta = dot(normalize(cross(v1-v0,v2-v1)),-direction);
+
+ if (cos_theta < 0.0f)
+ cos_theta = dot(-normalize(cross(v1-v0,v2-v1)),-direction);
+
+ unsigned int r0 = 0xff & (g_colors[h.indices[i]] >> 16);
+ unsigned int g0 = 0xff & (g_colors[h.indices[i]] >> 8);
+ unsigned int b0 = 0xff & g_colors[h.indices[i]];
+
+ float alpha = (255 - (0xff & (g_colors[h.indices[i]] >> 24)))/255.0f;
+
+ r += floorf(r0*scale*cos_theta*alpha);
+ g += floorf(g0*scale*cos_theta*alpha);
+ b += floorf(b0*scale*cos_theta*alpha);
+
+ scale *= (1.0f-alpha);
+ }
+
+ return r << 16 | g << 8 | b;
+}
+
+#endif
diff --git a/src/intersect.h b/src/intersect.h
index b1713d7..c400f1c 100644
--- a/src/intersect.h
+++ b/src/intersect.h
@@ -9,7 +9,7 @@
#include "matrix.h"
#include "rotate.h"
-#define EPSILON 1.0e-3f
+#define EPSILON 0.0f
/* Test the intersection between a ray starting from `origin` traveling in the
direction `direction` and a triangle defined by the vertices `v0`, `v1`, and
@@ -69,7 +69,7 @@ __device__ int get_color(const float3 &direction, const float3 &v0, const float3
unsigned int b = 0xFF & base_color;
if (scale < 0.0f)
- scale = dot(-normalize(cross(v1-v0,v2-v0)),-direction);
+ scale = dot(-normalize(cross(v1-v0,v2-v1)),-direction);
r = floorf(r*scale);
g = floorf(g*scale);
diff --git a/src/kernel.cu b/src/kernel.cu
index 711ae80..0dc5639 100644
--- a/src/kernel.cu
+++ b/src/kernel.cu
@@ -7,6 +7,7 @@
#include "rotate.h"
#include "mesh.h"
#include "photon.h"
+#include "alpha.h"
#define RED_WAVELENGTH 685
#define BLUE_WAVELENGTH 465
@@ -18,7 +19,7 @@ __device__ void fAtomicAdd(float *addr, float data)
data = atomicExch(addr, data+atomicExch(addr, 0.0f));
}
-__device__ void to_diffuse(Photon &p, State &s, curandState &rng, const int &max_steps)
+__device__ __noinline__ void to_diffuse(Photon &p, State &s, curandState &rng, const int &max_steps)
{
int steps = 0;
while (steps < max_steps)
@@ -289,24 +290,7 @@ __global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, i
float3 direction = directions[id];
direction /= norm(direction);
- float distance;
-
- int triangle_index = intersect_mesh(position, direction, distance);
-
- if (triangle_index == -1)
- {
- pixels[id] = 0x000000;
- }
- else
- {
- uint4 triangle_data = g_triangles[triangle_index];
-
- float3 v0 = g_vertices[triangle_data.x];
- float3 v1 = g_vertices[triangle_data.y];
- float3 v2 = g_vertices[triangle_data.z];
-
- pixels[id] = get_color(direction, v0, v1, v2, triangle_data.w);
- }
+ pixels[id] = get_color_alpha(position, direction);
} // ray_trace
diff --git a/src/mesh.h b/src/mesh.h
index 6f678c0..ec5a508 100644
--- a/src/mesh.h
+++ b/src/mesh.h
@@ -8,8 +8,9 @@
/* flattened triangle mesh */
__device__ float3 *g_vertices;
__device__ uint4 *g_triangles;
-__device__ int g_start_node;
-__device__ int g_first_node;
+__device__ unsigned int *g_colors;
+__device__ unsigned int g_start_node;
+__device__ unsigned int g_first_node;
/* lower/upper bounds for the bounding box associated with each node/leaf */
texture<float4, 1, cudaReadModeElementType> upper_bounds;
@@ -47,7 +48,7 @@ __device__ bool intersect_node(const float3 &origin, const float3 &direction, co
direction `direction` and the global mesh texture. If the ray intersects
the texture return the index of the triangle which the ray intersected,
else return -1. */
-__device__ int intersect_mesh(const float3 &origin, const float3& direction, float &min_distance, int last_hit_triangle = -1)
+__device__ __noinline__ int intersect_mesh(const float3 &origin, const float3& direction, float &min_distance, int last_hit_triangle = -1)
{
int triangle_index = -1;
@@ -133,14 +134,20 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, flo
extern "C"
{
-__global__ void set_global_mesh_variables(uint4 *triangle_ptr, float3 *vertex_ptr, int start_node, int first_node)
+__global__ void set_global_mesh_variables(uint4 *triangles, float3 *vertices, unsigned int *colors, unsigned int start_node, unsigned int first_node)
{
- g_triangles = triangle_ptr;
- g_vertices = vertex_ptr;
+ g_triangles = triangles;
+ g_vertices = vertices;
+ g_colors = colors;
g_start_node = start_node;
g_first_node = first_node;
}
+__global__ void set_colors(unsigned int *colors)
+{
+ g_colors = colors;
+}
+
} // extern "c"
#endif
diff --git a/view.py b/view.py
index c01dd3f..11c4b57 100755
--- a/view.py
+++ b/view.py
@@ -70,7 +70,7 @@ def build(obj, bits):
geometry.add_solid(obj)
elif isinstance(obj, Mesh):
geometry = Geometry()
- geometry.add_solid(Solid(obj, vacuum, vacuum, surface=lambertian_surface))
+ geometry.add_solid(Solid(obj, vacuum, vacuum, surface=lambertian_surface, color=0x99ffffff))
else:
raise Exception('cannot build type %s' % type(obj))
@@ -161,7 +161,7 @@ def view(viewable, size=(800,600), name='', bits=8, load_bvh=False):
print 'device %s' % autoinit.device.name()
module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True, cache_dir=False)
- geometry.load(module, color=True)
+ geometry.load(module)
cuda_raytrace = module.get_function('ray_trace')
cuda_rotate = module.get_function('rotate')
cuda_translate = module.get_function('translate')
@@ -198,7 +198,7 @@ def view(viewable, size=(800,600), name='', bits=8, load_bvh=False):
cuda.Context.synchronize()
elapsed = time.time() - t0
- #print 'elapsed %f sec' % elapsed
+ print 'elapsed %f sec' % elapsed
pygame.surfarray.blit_array(screen, pixels_gpu.get().reshape(size))
pygame.display.flip()