summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xcamera.py5
-rw-r--r--geometry.py75
-rw-r--r--gpu.py18
-rw-r--r--src/alpha.h38
-rw-r--r--src/mesh.h39
-rw-r--r--src/tools.cu17
6 files changed, 90 insertions, 102 deletions
diff --git a/camera.py b/camera.py
index 797fdbe..1e9ceb1 100755
--- a/camera.py
+++ b/camera.py
@@ -104,7 +104,7 @@ class Camera(Thread):
self.xyz_lookup1_gpu = gpuarray.zeros(len(self.geometry.mesh.triangles), dtype=gpuarray.vec.float3)
self.xyz_lookup2_gpu = gpuarray.zeros(len(self.geometry.mesh.triangles), dtype=gpuarray.vec.float3)
self.image_gpu = gpuarray.zeros(self.width*self.height, dtype=gpuarray.vec.float3)
- self.context.synchronize()
+ self.gpu.context.synchronize()
self.source_position = self.point
@@ -382,8 +382,6 @@ class Camera(Thread):
self.done = False
self.clicked = False
- #shift = False
- #ctrl = False
#current_layer = 0
@@ -416,7 +414,6 @@ class EventViewer(Camera):
self.T = self.f.Get('T')
self.T.GetEntry(0)
- #@timeit
def color_hit_pmts(self):
self.gpu.reset_colors()
diff --git a/geometry.py b/geometry.py
index a98b5cd..0657a77 100644
--- a/geometry.py
+++ b/geometry.py
@@ -1,8 +1,5 @@
import sys
import numpy as np
-import pycuda.driver as cuda
-from pycuda import gpuarray
-from itertools import *
from itertoolset import *
from tools import timeit
@@ -159,7 +156,7 @@ def interleave(arr, bits):
if len(arr.shape) != 2 or arr.shape[1] != 3:
raise Exception('shape mismatch')
- z = np.zeros(arr.shape[0], dtype=np.uint32)
+ z = np.zeros(arr.shape[0], dtype=np.uint64)
for i in range(bits):
z |= (arr[:,2] & 1 << i) << (2*i) | \
(arr[:,1] & 1 << i) << (2*i+1) | \
@@ -174,13 +171,13 @@ def morton_order(mesh, bits):
"""
lower_bound, upper_bound = mesh.get_bounds()
- if bits <= 0 or bits > 10:
- raise Exception('number of bits must be in the range (0,10].')
+ if bits <= 0 or bits > 21:
+ raise Exception('number of bits must be in the range (0,21].')
max_value = 2**bits - 1
def quantize(x):
- return np.uint32((x-lower_bound)*max_value/(upper_bound-lower_bound))
+ return np.uint64((x-lower_bound)*max_value/(upper_bound-lower_bound))
mean_positions = quantize(np.mean(mesh.assemble(), axis=1))
@@ -212,7 +209,6 @@ class Geometry(object):
return len(self.solids)-1
@timeit
- #@profile
def build(self, bits=8, shift=3):
offsets = [ (0,0) ]
for solid in self.solids:
@@ -275,61 +271,40 @@ class Geometry(object):
self.upper_bounds = np.empty((unique_zvalues.size,3), dtype=np.float32)
assembled_mesh = self.mesh.assemble(group=False)
- zi1_array = np.searchsorted(zvalues_mesh, unique_zvalues)
- zi2_array = np.searchsorted(zvalues_mesh, unique_zvalues, side='right')
+ self.node_map = np.searchsorted(zvalues_mesh, unique_zvalues)
+ self.node_map_end = np.searchsorted(zvalues_mesh, unique_zvalues, side='right')
- for i, (zi1, zi2) in enumerate(zip(zi1_array,zi2_array)):
- self.lower_bounds[i] = assembled_mesh[zi1*3:zi2*3].min(0)
- self.upper_bounds[i] = assembled_mesh[zi1*3:zi2*3].max(0)
+ for i, (zi1, zi2) in enumerate(izip(self.node_map, self.node_map_end)):
+ self.lower_bounds[i] = assembled_mesh[zi1*3:zi2*3].min(axis=0)
+ self.upper_bounds[i] = assembled_mesh[zi1*3:zi2*3].max(axis=0)
- zvalues = np.array(unique_zvalues, copy=True)
- self.node_map = np.array(zi1_array, copy=True)
- self.node_length = zi2_array - zi1_array
self.layers = np.zeros(unique_zvalues.size, dtype=np.uint32)
self.first_node = unique_zvalues.size
begin_last_layer = 0
- layer = 1
+ for layer in count(1):
+ bit_shifted_zvalues = unique_zvalues >> shift
+ unique_zvalues = np.unique(bit_shifted_zvalues)
- while True:
- bit_shifted_zvalues = zvalues >> shift
- unique_bit_shifted_zvalues = np.unique(bit_shifted_zvalues)
- zvalues = np.empty(unique_bit_shifted_zvalues.size, dtype=np.uint64)
+ i0 = begin_last_layer + bit_shifted_zvalues.size
- self.lower_bounds.resize(\
- (self.lower_bounds.shape[0]+unique_bit_shifted_zvalues.size,3))
- self.upper_bounds.resize(\
- (self.upper_bounds.shape[0]+unique_bit_shifted_zvalues.size,3))
+ self.node_map.resize(self.node_map.size+unique_zvalues.size)
+ self.node_map[i0:] = np.searchsorted(bit_shifted_zvalues, unique_zvalues) + begin_last_layer
+ self.node_map_end.resize(self.node_map_end.size+unique_zvalues.size)
+ self.node_map_end[i0:] = np.searchsorted(bit_shifted_zvalues, unique_zvalues, side='right') + begin_last_layer
- self.node_map.resize(\
- self.node_map.size+unique_bit_shifted_zvalues.size)
- self.node_length.resize(\
- self.node_length.size+unique_bit_shifted_zvalues.size)
+ self.layers.resize(self.layers.size+unique_zvalues.size)
+ self.layers[i0:] = layer
- self.layers.resize(self.layers.size+unique_bit_shifted_zvalues.size)
+ self.lower_bounds.resize((self.lower_bounds.shape[0]+unique_zvalues.size,3))
+ self.upper_bounds.resize((self.upper_bounds.shape[0]+unique_zvalues.size,3))
- for i, z in enumerate(unique_bit_shifted_zvalues):
- i1 = np.searchsorted(bit_shifted_zvalues, z) + \
- begin_last_layer
- i2 = np.searchsorted(bit_shifted_zvalues, z, side='right') + \
- begin_last_layer
-
- zvalues[i] = z
-
- i += begin_last_layer + bit_shifted_zvalues.size
-
- self.lower_bounds[i] = self.lower_bounds[i1:i2].min(axis=0)
- self.upper_bounds[i] = self.upper_bounds[i1:i2].max(axis=0)
-
- self.node_map[i] = i1
- self.node_length[i] = i2-i1
- self.layers[i] = layer
+ for i, zi1, zi2 in izip(count(i0), self.node_map[i0:], self.node_map_end[i0:]):
+ self.lower_bounds[i] = self.lower_bounds[zi1:zi2].min(axis=0)
+ self.upper_bounds[i] = self.upper_bounds[zi1:zi2].max(axis=0)
begin_last_layer += bit_shifted_zvalues.size
- layer += 1
-
- if unique_bit_shifted_zvalues.size == 1:
+ if unique_zvalues.size == 1:
break
-
diff --git a/gpu.py b/gpu.py
index a21bc38..9017f20 100644
--- a/gpu.py
+++ b/gpu.py
@@ -72,13 +72,13 @@ class GPU(object):
device = cuda.Device(device_id)
self.context = device.make_context()
print 'device %s' % self.context.get_device().name()
- self.module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True, cache_dir=False)
+ self.module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True)
self.geo_funcs = CUDAFuncs(self.module, ['set_wavelength_range', 'set_material', 'set_surface', 'set_global_mesh_variables', 'color_solids'])
self.prop_funcs = CUDAFuncs(self.module, ['init_rng', 'propagate'])
self.nthread_per_block = 64
self.max_blocks = 1024
- self.daq_module = SourceModule(src.daq, options=['-I' + src.dir], no_extern_c=True, cache_dir=False)
+ self.daq_module = SourceModule(src.daq, options=['-I' + src.dir], no_extern_c=True)
self.daq_funcs = CUDAFuncs(self.daq_module,
['reset_earliest_time_int', 'run_daq',
'convert_sortable_int_to_float'])
@@ -90,8 +90,8 @@ class GPU(object):
print format_array('lower_bounds', self.lower_bounds_gpu)
print format_array('upper_bounds', self.upper_bounds_gpu)
print format_array('node_map', self.node_map_gpu)
- print format_array('node_length', self.node_length_gpu)
- print '%-15s %6s %6s' % ('total', '', format_size(self.vertices_gpu.nbytes + self.triangles_gpu.nbytes + self.lower_bounds_gpu.nbytes + self.upper_bounds_gpu.nbytes + self.node_map_gpu.nbytes + self.node_length_gpu.nbytes))
+ print format_array('node_map_end', self.node_map_end_gpu)
+ print '%-15s %6s %6s' % ('total', '', format_size(self.vertices_gpu.nbytes + self.triangles_gpu.nbytes + self.lower_bounds_gpu.nbytes + self.upper_bounds_gpu.nbytes + self.node_map_gpu.nbytes + self.node_map_end_gpu.nbytes))
def load_geometry(self, geometry):
if not hasattr(geometry, 'mesh'):
@@ -155,7 +155,7 @@ class GPU(object):
self.colors_gpu = gpuarray.to_gpu(geometry.colors.astype(np.uint32))
self.node_map_gpu = gpuarray.to_gpu(geometry.node_map.astype(np.uint32))
- self.node_length_gpu = gpuarray.to_gpu(geometry.node_length.astype(np.uint32))
+ self.node_map_end_gpu = gpuarray.to_gpu(geometry.node_map_end.astype(np.uint32))
self.solid_id_map_gpu = gpuarray.to_gpu(geometry.solid_id.astype(np.uint32))
self.geo_funcs.set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(geometry.node_map.size-1), np.uint32(geometry.first_node), block=(1,1,1), grid=(1,1))
@@ -163,27 +163,25 @@ class GPU(object):
self.lower_bounds_tex = self.module.get_texref('lower_bounds')
self.upper_bounds_tex = self.module.get_texref('upper_bounds')
self.node_map_tex = self.module.get_texref('node_map')
- self.node_length_tex = self.module.get_texref('node_length')
+ self.node_map_end_tex = self.module.get_texref('node_map_end')
self.lower_bounds_tex.set_address(self.lower_bounds_gpu.gpudata, self.lower_bounds_gpu.nbytes)
self.upper_bounds_tex.set_address(self.upper_bounds_gpu.gpudata, self.upper_bounds_gpu.nbytes)
self.node_map_tex.set_address(self.node_map_gpu.gpudata, self.node_map_gpu.nbytes)
- self.node_length_tex.set_address(self.node_length_gpu.gpudata, self.node_length_gpu.nbytes)
+ self.node_map_end_tex.set_address(self.node_map_end_gpu.gpudata, self.node_map_end_gpu.nbytes)
self.lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4)
self.upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4)
self.node_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
- self.node_length_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
+ self.node_map_end_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
self.geometry = geometry
self.print_device_usage()
- #@timeit
def reset_colors(self):
self.colors_gpu.set(self.geometry.colors.astype(np.uint32))
- #@timeit
def color_solids(self, solid_ids, colors):
solid_ids_gpu = gpuarray.to_gpu(np.array(solid_ids, dtype=np.int32))
solid_colors_gpu = gpuarray.to_gpu(np.array(colors, dtype=np.uint32))
diff --git a/src/alpha.h b/src/alpha.h
index 5e3e803..a9963eb 100644
--- a/src/alpha.h
+++ b/src/alpha.h
@@ -17,14 +17,14 @@ __device__ void swap(T &a, T &b)
struct HitList
{
- int size;
- int indices[ALPHA_DEPTH];
+ unsigned int size;
+ unsigned int indices[ALPHA_DEPTH];
float distances[ALPHA_DEPTH];
};
__device__ void add_to_hit_list(const float &distance, const int &index, HitList &h)
{
- int i;
+ unsigned int i;
if (h.size >= ALPHA_DEPTH)
{
if (distance > h.distances[ALPHA_DEPTH-1])
@@ -50,7 +50,7 @@ __device__ void add_to_hit_list(const float &distance, const int &index, HitList
}
}
-__device__ __noinline__ int get_color_alpha(const float3 &origin, const float3& direction)
+__device__ int get_color_alpha(const float3 &origin, const float3& direction)
{
HitList h;
h.size = 0;
@@ -60,34 +60,34 @@ __device__ __noinline__ int get_color_alpha(const float3 &origin, const float3&
if (!intersect_node(origin, direction, g_start_node))
return 0;
- int stack[STACK_SIZE];
+ unsigned int stack[STACK_SIZE];
- int *head = &stack[0];
- int *node = &stack[1];
- int *tail = &stack[STACK_SIZE-1];
+ unsigned int *head = &stack[0];
+ unsigned int *node = &stack[1];
+ unsigned int *tail = &stack[STACK_SIZE-1];
*node = g_start_node;
- int i;
+ unsigned int i;
do
{
- int first_child = tex1Dfetch(node_map, *node);
- int child_count = tex1Dfetch(node_length, *node);
+ unsigned int first_child = tex1Dfetch(node_map, *node);
+ unsigned int stop = tex1Dfetch(node_map_end, *node);
- while (*node >= g_first_node && child_count == 1)
+ while (*node >= g_first_node && stop == first_child+1)
{
*node = first_child;
first_child = tex1Dfetch(node_map, *node);
- child_count = tex1Dfetch(node_length, *node);
+ stop = tex1Dfetch(node_map_end, *node);
}
if (*node >= g_first_node)
{
- for (i=0; i < child_count; i++)
+ for (i=first_child; i < stop; i++)
{
- if (intersect_node(origin, direction, first_child+i))
+ if (intersect_node(origin, direction, i))
{
- *node = first_child+i;
+ *node = i;
node++;
}
}
@@ -96,9 +96,9 @@ __device__ __noinline__ int get_color_alpha(const float3 &origin, const float3&
}
else // node is a leaf
{
- for (i=0; i < child_count; i++)
+ for (i=first_child; i < stop; i++)
{
- uint4 triangle_data = g_triangles[first_child+i];
+ uint4 triangle_data = g_triangles[i];
float3 v0 = g_vertices[triangle_data.x];
float3 v1 = g_vertices[triangle_data.y];
@@ -106,7 +106,7 @@ __device__ __noinline__ int get_color_alpha(const float3 &origin, const float3&
if (intersect_triangle(origin, direction, v0, v1, v2, distance))
{
- add_to_hit_list(distance, first_child+i, h);
+ add_to_hit_list(distance, i, h);
}
} // triangle loop
diff --git a/src/mesh.h b/src/mesh.h
index e0170ff..7c148af 100644
--- a/src/mesh.h
+++ b/src/mesh.h
@@ -18,7 +18,8 @@ texture<float4, 1, cudaReadModeElementType> lower_bounds;
/* map to child nodes/triangles and the number of child nodes/triangles */
texture<unsigned int, 1, cudaReadModeElementType> node_map;
-texture<unsigned int, 1, cudaReadModeElementType> node_length;
+texture<unsigned int, 1, cudaReadModeElementType> node_map_end;
+//texture<unsigned int, 1, cudaReadModeElementType> node_length;
__device__ float3 make_float3(const float4 &a)
{
@@ -48,7 +49,7 @@ __device__ bool intersect_node(const float3 &origin, const float3 &direction, co
direction `direction` and the global mesh texture. If the ray intersects
the texture return the index of the triangle which the ray intersected,
else return -1. */
-__device__ __noinline__ int intersect_mesh(const float3 &origin, const float3& direction, float &min_distance, int last_hit_triangle = -1)
+__device__ int intersect_mesh(const float3 &origin, const float3& direction, float &min_distance, int last_hit_triangle = -1)
{
int triangle_index = -1;
@@ -57,34 +58,34 @@ __device__ __noinline__ int intersect_mesh(const float3 &origin, const float3& d
if (!intersect_node(origin, direction, g_start_node))
return -1;
- int stack[STACK_SIZE];
+ unsigned int stack[STACK_SIZE];
- int *head = &stack[0];
- int *node = &stack[1];
- int *tail = &stack[STACK_SIZE-1];
+ unsigned int *head = &stack[0];
+ unsigned int *node = &stack[1];
+ unsigned int *tail = &stack[STACK_SIZE-1];
*node = g_start_node;
- int i;
+ unsigned int i;
do
{
- int first_child = tex1Dfetch(node_map, *node);
- int child_count = tex1Dfetch(node_length, *node);
+ unsigned int first_child = tex1Dfetch(node_map, *node);
+ unsigned int stop = tex1Dfetch(node_map_end, *node);
- while (*node >= g_first_node && child_count == 1)
+ while (*node >= g_first_node && stop == first_child+1)
{
*node = first_child;
first_child = tex1Dfetch(node_map, *node);
- child_count = tex1Dfetch(node_length, *node);
+ stop = tex1Dfetch(node_map_end, *node);
}
if (*node >= g_first_node)
{
- for (i=0; i < child_count; i++)
+ for (i=first_child; i < stop; i++)
{
- if (intersect_node(origin, direction, first_child+i))
+ if (intersect_node(origin, direction, i))
{
- *node = first_child+i;
+ *node = i;
node++;
}
}
@@ -93,12 +94,12 @@ __device__ __noinline__ int intersect_mesh(const float3 &origin, const float3& d
}
else // node is a leaf
{
- for (i=0; i < child_count; i++)
+ for (i=first_child; i < stop; i++)
{
- if (last_hit_triangle == first_child+i)
+ if (last_hit_triangle == i)
continue;
- uint4 triangle_data = g_triangles[first_child+i];
+ uint4 triangle_data = g_triangles[i];
float3 v0 = g_vertices[triangle_data.x];
float3 v1 = g_vertices[triangle_data.y];
@@ -108,14 +109,14 @@ __device__ __noinline__ int intersect_mesh(const float3 &origin, const float3& d
{
if (triangle_index == -1)
{
- triangle_index = first_child + i;
+ triangle_index = i;
min_distance = distance;
continue;
}
if (distance < min_distance)
{
- triangle_index = first_child + i;
+ triangle_index = i;
min_distance = distance;
}
}
diff --git a/src/tools.cu b/src/tools.cu
new file mode 100644
index 0000000..3d3fed7
--- /dev/null
+++ b/src/tools.cu
@@ -0,0 +1,17 @@
+//--*-c-*-
+
+extern "C"
+{
+
+__global__ void interleave(int nthreads, unsigned long long *x, unsigned long long *y, unsigned long long *z, int bits, unsigned long long *dest)
+{
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
+
+ if (id >= nthreads)
+ return;
+
+ for (int i=0; i < bits; i++)
+ dest[id] |= (x[id] & 1 << i) << (2*i) | (y[id] & 1 << i) << (2*i+1) | (z[id] & 1 << i) << (2*i+2);
+}
+
+}