diff options
-rwxr-xr-x | camera.py | 5 | ||||
-rw-r--r-- | geometry.py | 75 | ||||
-rw-r--r-- | gpu.py | 18 | ||||
-rw-r--r-- | src/alpha.h | 38 | ||||
-rw-r--r-- | src/mesh.h | 39 | ||||
-rw-r--r-- | src/tools.cu | 17 |
6 files changed, 90 insertions, 102 deletions
@@ -104,7 +104,7 @@ class Camera(Thread): self.xyz_lookup1_gpu = gpuarray.zeros(len(self.geometry.mesh.triangles), dtype=gpuarray.vec.float3) self.xyz_lookup2_gpu = gpuarray.zeros(len(self.geometry.mesh.triangles), dtype=gpuarray.vec.float3) self.image_gpu = gpuarray.zeros(self.width*self.height, dtype=gpuarray.vec.float3) - self.context.synchronize() + self.gpu.context.synchronize() self.source_position = self.point @@ -382,8 +382,6 @@ class Camera(Thread): self.done = False self.clicked = False - #shift = False - #ctrl = False #current_layer = 0 @@ -416,7 +414,6 @@ class EventViewer(Camera): self.T = self.f.Get('T') self.T.GetEntry(0) - #@timeit def color_hit_pmts(self): self.gpu.reset_colors() diff --git a/geometry.py b/geometry.py index a98b5cd..0657a77 100644 --- a/geometry.py +++ b/geometry.py @@ -1,8 +1,5 @@ import sys import numpy as np -import pycuda.driver as cuda -from pycuda import gpuarray -from itertools import * from itertoolset import * from tools import timeit @@ -159,7 +156,7 @@ def interleave(arr, bits): if len(arr.shape) != 2 or arr.shape[1] != 3: raise Exception('shape mismatch') - z = np.zeros(arr.shape[0], dtype=np.uint32) + z = np.zeros(arr.shape[0], dtype=np.uint64) for i in range(bits): z |= (arr[:,2] & 1 << i) << (2*i) | \ (arr[:,1] & 1 << i) << (2*i+1) | \ @@ -174,13 +171,13 @@ def morton_order(mesh, bits): """ lower_bound, upper_bound = mesh.get_bounds() - if bits <= 0 or bits > 10: - raise Exception('number of bits must be in the range (0,10].') + if bits <= 0 or bits > 21: + raise Exception('number of bits must be in the range (0,21].') max_value = 2**bits - 1 def quantize(x): - return np.uint32((x-lower_bound)*max_value/(upper_bound-lower_bound)) + return np.uint64((x-lower_bound)*max_value/(upper_bound-lower_bound)) mean_positions = quantize(np.mean(mesh.assemble(), axis=1)) @@ -212,7 +209,6 @@ class Geometry(object): return len(self.solids)-1 @timeit - #@profile def build(self, bits=8, shift=3): offsets = [ (0,0) ] for solid in self.solids: @@ -275,61 +271,40 @@ class Geometry(object): self.upper_bounds = np.empty((unique_zvalues.size,3), dtype=np.float32) assembled_mesh = self.mesh.assemble(group=False) - zi1_array = np.searchsorted(zvalues_mesh, unique_zvalues) - zi2_array = np.searchsorted(zvalues_mesh, unique_zvalues, side='right') + self.node_map = np.searchsorted(zvalues_mesh, unique_zvalues) + self.node_map_end = np.searchsorted(zvalues_mesh, unique_zvalues, side='right') - for i, (zi1, zi2) in enumerate(zip(zi1_array,zi2_array)): - self.lower_bounds[i] = assembled_mesh[zi1*3:zi2*3].min(0) - self.upper_bounds[i] = assembled_mesh[zi1*3:zi2*3].max(0) + for i, (zi1, zi2) in enumerate(izip(self.node_map, self.node_map_end)): + self.lower_bounds[i] = assembled_mesh[zi1*3:zi2*3].min(axis=0) + self.upper_bounds[i] = assembled_mesh[zi1*3:zi2*3].max(axis=0) - zvalues = np.array(unique_zvalues, copy=True) - self.node_map = np.array(zi1_array, copy=True) - self.node_length = zi2_array - zi1_array self.layers = np.zeros(unique_zvalues.size, dtype=np.uint32) self.first_node = unique_zvalues.size begin_last_layer = 0 - layer = 1 + for layer in count(1): + bit_shifted_zvalues = unique_zvalues >> shift + unique_zvalues = np.unique(bit_shifted_zvalues) - while True: - bit_shifted_zvalues = zvalues >> shift - unique_bit_shifted_zvalues = np.unique(bit_shifted_zvalues) - zvalues = np.empty(unique_bit_shifted_zvalues.size, dtype=np.uint64) + i0 = begin_last_layer + bit_shifted_zvalues.size - self.lower_bounds.resize(\ - (self.lower_bounds.shape[0]+unique_bit_shifted_zvalues.size,3)) - self.upper_bounds.resize(\ - (self.upper_bounds.shape[0]+unique_bit_shifted_zvalues.size,3)) + self.node_map.resize(self.node_map.size+unique_zvalues.size) + self.node_map[i0:] = np.searchsorted(bit_shifted_zvalues, unique_zvalues) + begin_last_layer + self.node_map_end.resize(self.node_map_end.size+unique_zvalues.size) + self.node_map_end[i0:] = np.searchsorted(bit_shifted_zvalues, unique_zvalues, side='right') + begin_last_layer - self.node_map.resize(\ - self.node_map.size+unique_bit_shifted_zvalues.size) - self.node_length.resize(\ - self.node_length.size+unique_bit_shifted_zvalues.size) + self.layers.resize(self.layers.size+unique_zvalues.size) + self.layers[i0:] = layer - self.layers.resize(self.layers.size+unique_bit_shifted_zvalues.size) + self.lower_bounds.resize((self.lower_bounds.shape[0]+unique_zvalues.size,3)) + self.upper_bounds.resize((self.upper_bounds.shape[0]+unique_zvalues.size,3)) - for i, z in enumerate(unique_bit_shifted_zvalues): - i1 = np.searchsorted(bit_shifted_zvalues, z) + \ - begin_last_layer - i2 = np.searchsorted(bit_shifted_zvalues, z, side='right') + \ - begin_last_layer - - zvalues[i] = z - - i += begin_last_layer + bit_shifted_zvalues.size - - self.lower_bounds[i] = self.lower_bounds[i1:i2].min(axis=0) - self.upper_bounds[i] = self.upper_bounds[i1:i2].max(axis=0) - - self.node_map[i] = i1 - self.node_length[i] = i2-i1 - self.layers[i] = layer + for i, zi1, zi2 in izip(count(i0), self.node_map[i0:], self.node_map_end[i0:]): + self.lower_bounds[i] = self.lower_bounds[zi1:zi2].min(axis=0) + self.upper_bounds[i] = self.upper_bounds[zi1:zi2].max(axis=0) begin_last_layer += bit_shifted_zvalues.size - layer += 1 - - if unique_bit_shifted_zvalues.size == 1: + if unique_zvalues.size == 1: break - @@ -72,13 +72,13 @@ class GPU(object): device = cuda.Device(device_id) self.context = device.make_context() print 'device %s' % self.context.get_device().name() - self.module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True, cache_dir=False) + self.module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True) self.geo_funcs = CUDAFuncs(self.module, ['set_wavelength_range', 'set_material', 'set_surface', 'set_global_mesh_variables', 'color_solids']) self.prop_funcs = CUDAFuncs(self.module, ['init_rng', 'propagate']) self.nthread_per_block = 64 self.max_blocks = 1024 - self.daq_module = SourceModule(src.daq, options=['-I' + src.dir], no_extern_c=True, cache_dir=False) + self.daq_module = SourceModule(src.daq, options=['-I' + src.dir], no_extern_c=True) self.daq_funcs = CUDAFuncs(self.daq_module, ['reset_earliest_time_int', 'run_daq', 'convert_sortable_int_to_float']) @@ -90,8 +90,8 @@ class GPU(object): print format_array('lower_bounds', self.lower_bounds_gpu) print format_array('upper_bounds', self.upper_bounds_gpu) print format_array('node_map', self.node_map_gpu) - print format_array('node_length', self.node_length_gpu) - print '%-15s %6s %6s' % ('total', '', format_size(self.vertices_gpu.nbytes + self.triangles_gpu.nbytes + self.lower_bounds_gpu.nbytes + self.upper_bounds_gpu.nbytes + self.node_map_gpu.nbytes + self.node_length_gpu.nbytes)) + print format_array('node_map_end', self.node_map_end_gpu) + print '%-15s %6s %6s' % ('total', '', format_size(self.vertices_gpu.nbytes + self.triangles_gpu.nbytes + self.lower_bounds_gpu.nbytes + self.upper_bounds_gpu.nbytes + self.node_map_gpu.nbytes + self.node_map_end_gpu.nbytes)) def load_geometry(self, geometry): if not hasattr(geometry, 'mesh'): @@ -155,7 +155,7 @@ class GPU(object): self.colors_gpu = gpuarray.to_gpu(geometry.colors.astype(np.uint32)) self.node_map_gpu = gpuarray.to_gpu(geometry.node_map.astype(np.uint32)) - self.node_length_gpu = gpuarray.to_gpu(geometry.node_length.astype(np.uint32)) + self.node_map_end_gpu = gpuarray.to_gpu(geometry.node_map_end.astype(np.uint32)) self.solid_id_map_gpu = gpuarray.to_gpu(geometry.solid_id.astype(np.uint32)) self.geo_funcs.set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(geometry.node_map.size-1), np.uint32(geometry.first_node), block=(1,1,1), grid=(1,1)) @@ -163,27 +163,25 @@ class GPU(object): self.lower_bounds_tex = self.module.get_texref('lower_bounds') self.upper_bounds_tex = self.module.get_texref('upper_bounds') self.node_map_tex = self.module.get_texref('node_map') - self.node_length_tex = self.module.get_texref('node_length') + self.node_map_end_tex = self.module.get_texref('node_map_end') self.lower_bounds_tex.set_address(self.lower_bounds_gpu.gpudata, self.lower_bounds_gpu.nbytes) self.upper_bounds_tex.set_address(self.upper_bounds_gpu.gpudata, self.upper_bounds_gpu.nbytes) self.node_map_tex.set_address(self.node_map_gpu.gpudata, self.node_map_gpu.nbytes) - self.node_length_tex.set_address(self.node_length_gpu.gpudata, self.node_length_gpu.nbytes) + self.node_map_end_tex.set_address(self.node_map_end_gpu.gpudata, self.node_map_end_gpu.nbytes) self.lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4) self.upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4) self.node_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) - self.node_length_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) + self.node_map_end_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) self.geometry = geometry self.print_device_usage() - #@timeit def reset_colors(self): self.colors_gpu.set(self.geometry.colors.astype(np.uint32)) - #@timeit def color_solids(self, solid_ids, colors): solid_ids_gpu = gpuarray.to_gpu(np.array(solid_ids, dtype=np.int32)) solid_colors_gpu = gpuarray.to_gpu(np.array(colors, dtype=np.uint32)) diff --git a/src/alpha.h b/src/alpha.h index 5e3e803..a9963eb 100644 --- a/src/alpha.h +++ b/src/alpha.h @@ -17,14 +17,14 @@ __device__ void swap(T &a, T &b) struct HitList { - int size; - int indices[ALPHA_DEPTH]; + unsigned int size; + unsigned int indices[ALPHA_DEPTH]; float distances[ALPHA_DEPTH]; }; __device__ void add_to_hit_list(const float &distance, const int &index, HitList &h) { - int i; + unsigned int i; if (h.size >= ALPHA_DEPTH) { if (distance > h.distances[ALPHA_DEPTH-1]) @@ -50,7 +50,7 @@ __device__ void add_to_hit_list(const float &distance, const int &index, HitList } } -__device__ __noinline__ int get_color_alpha(const float3 &origin, const float3& direction) +__device__ int get_color_alpha(const float3 &origin, const float3& direction) { HitList h; h.size = 0; @@ -60,34 +60,34 @@ __device__ __noinline__ int get_color_alpha(const float3 &origin, const float3& if (!intersect_node(origin, direction, g_start_node)) return 0; - int stack[STACK_SIZE]; + unsigned int stack[STACK_SIZE]; - int *head = &stack[0]; - int *node = &stack[1]; - int *tail = &stack[STACK_SIZE-1]; + unsigned int *head = &stack[0]; + unsigned int *node = &stack[1]; + unsigned int *tail = &stack[STACK_SIZE-1]; *node = g_start_node; - int i; + unsigned int i; do { - int first_child = tex1Dfetch(node_map, *node); - int child_count = tex1Dfetch(node_length, *node); + unsigned int first_child = tex1Dfetch(node_map, *node); + unsigned int stop = tex1Dfetch(node_map_end, *node); - while (*node >= g_first_node && child_count == 1) + while (*node >= g_first_node && stop == first_child+1) { *node = first_child; first_child = tex1Dfetch(node_map, *node); - child_count = tex1Dfetch(node_length, *node); + stop = tex1Dfetch(node_map_end, *node); } if (*node >= g_first_node) { - for (i=0; i < child_count; i++) + for (i=first_child; i < stop; i++) { - if (intersect_node(origin, direction, first_child+i)) + if (intersect_node(origin, direction, i)) { - *node = first_child+i; + *node = i; node++; } } @@ -96,9 +96,9 @@ __device__ __noinline__ int get_color_alpha(const float3 &origin, const float3& } else // node is a leaf { - for (i=0; i < child_count; i++) + for (i=first_child; i < stop; i++) { - uint4 triangle_data = g_triangles[first_child+i]; + uint4 triangle_data = g_triangles[i]; float3 v0 = g_vertices[triangle_data.x]; float3 v1 = g_vertices[triangle_data.y]; @@ -106,7 +106,7 @@ __device__ __noinline__ int get_color_alpha(const float3 &origin, const float3& if (intersect_triangle(origin, direction, v0, v1, v2, distance)) { - add_to_hit_list(distance, first_child+i, h); + add_to_hit_list(distance, i, h); } } // triangle loop @@ -18,7 +18,8 @@ texture<float4, 1, cudaReadModeElementType> lower_bounds; /* map to child nodes/triangles and the number of child nodes/triangles */ texture<unsigned int, 1, cudaReadModeElementType> node_map; -texture<unsigned int, 1, cudaReadModeElementType> node_length; +texture<unsigned int, 1, cudaReadModeElementType> node_map_end; +//texture<unsigned int, 1, cudaReadModeElementType> node_length; __device__ float3 make_float3(const float4 &a) { @@ -48,7 +49,7 @@ __device__ bool intersect_node(const float3 &origin, const float3 &direction, co direction `direction` and the global mesh texture. If the ray intersects the texture return the index of the triangle which the ray intersected, else return -1. */ -__device__ __noinline__ int intersect_mesh(const float3 &origin, const float3& direction, float &min_distance, int last_hit_triangle = -1) +__device__ int intersect_mesh(const float3 &origin, const float3& direction, float &min_distance, int last_hit_triangle = -1) { int triangle_index = -1; @@ -57,34 +58,34 @@ __device__ __noinline__ int intersect_mesh(const float3 &origin, const float3& d if (!intersect_node(origin, direction, g_start_node)) return -1; - int stack[STACK_SIZE]; + unsigned int stack[STACK_SIZE]; - int *head = &stack[0]; - int *node = &stack[1]; - int *tail = &stack[STACK_SIZE-1]; + unsigned int *head = &stack[0]; + unsigned int *node = &stack[1]; + unsigned int *tail = &stack[STACK_SIZE-1]; *node = g_start_node; - int i; + unsigned int i; do { - int first_child = tex1Dfetch(node_map, *node); - int child_count = tex1Dfetch(node_length, *node); + unsigned int first_child = tex1Dfetch(node_map, *node); + unsigned int stop = tex1Dfetch(node_map_end, *node); - while (*node >= g_first_node && child_count == 1) + while (*node >= g_first_node && stop == first_child+1) { *node = first_child; first_child = tex1Dfetch(node_map, *node); - child_count = tex1Dfetch(node_length, *node); + stop = tex1Dfetch(node_map_end, *node); } if (*node >= g_first_node) { - for (i=0; i < child_count; i++) + for (i=first_child; i < stop; i++) { - if (intersect_node(origin, direction, first_child+i)) + if (intersect_node(origin, direction, i)) { - *node = first_child+i; + *node = i; node++; } } @@ -93,12 +94,12 @@ __device__ __noinline__ int intersect_mesh(const float3 &origin, const float3& d } else // node is a leaf { - for (i=0; i < child_count; i++) + for (i=first_child; i < stop; i++) { - if (last_hit_triangle == first_child+i) + if (last_hit_triangle == i) continue; - uint4 triangle_data = g_triangles[first_child+i]; + uint4 triangle_data = g_triangles[i]; float3 v0 = g_vertices[triangle_data.x]; float3 v1 = g_vertices[triangle_data.y]; @@ -108,14 +109,14 @@ __device__ __noinline__ int intersect_mesh(const float3 &origin, const float3& d { if (triangle_index == -1) { - triangle_index = first_child + i; + triangle_index = i; min_distance = distance; continue; } if (distance < min_distance) { - triangle_index = first_child + i; + triangle_index = i; min_distance = distance; } } diff --git a/src/tools.cu b/src/tools.cu new file mode 100644 index 0000000..3d3fed7 --- /dev/null +++ b/src/tools.cu @@ -0,0 +1,17 @@ +//--*-c-*- + +extern "C" +{ + +__global__ void interleave(int nthreads, unsigned long long *x, unsigned long long *y, unsigned long long *z, int bits, unsigned long long *dest) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + for (int i=0; i < bits; i++) + dest[id] |= (x[id] & 1 << i) << (2*i) | (y[id] & 1 << i) << (2*i+1) | (z[id] & 1 << i) << (2*i+2); +} + +} |