diff options
-rw-r--r-- | __init__.py | 2 | ||||
-rw-r--r-- | detectors/lbne.py | 30 | ||||
-rw-r--r-- | geometry.py | 53 | ||||
-rw-r--r-- | gpu.py | 73 | ||||
-rw-r--r-- | likelihood.py | 109 | ||||
-rw-r--r-- | photon.py | 12 | ||||
-rw-r--r-- | src/kernel.cu | 27 | ||||
-rwxr-xr-x | view.py | 36 |
8 files changed, 218 insertions, 124 deletions
diff --git a/__init__.py b/__init__.py index d3a7f3d..7387dc2 100644 --- a/__init__.py +++ b/__init__.py @@ -2,7 +2,5 @@ import geometry import materials import transform import stl -import view import photon -import gpu import layout diff --git a/detectors/lbne.py b/detectors/lbne.py index 54e11a9..7cd666a 100644 --- a/detectors/lbne.py +++ b/detectors/lbne.py @@ -1,3 +1,4 @@ +import sys import numpy as np from copy import deepcopy from chroma import layout @@ -5,10 +6,7 @@ from chroma.stl import read_stl from chroma.transform import rotate from chroma.geometry import Geometry, Solid from chroma.materials import glass, h2o -from chroma.photon import uniform_sphere -from chroma.gpu import GPU from itertools import product -from histogram import * endcap_spacing = .485 @@ -19,7 +17,6 @@ nstrings = 324//10 pmts_per_string = 102//10 class LBNE(Geometry): - """Miniature version of the LBNE water Cerenkov detector geometry.""" def __init__(self): super(LBNE, self).__init__() @@ -53,28 +50,3 @@ class LBNE(Geometry): pmt.mesh = rotate(pmt.mesh, +np.pi/2, (0,1,0)) pmt.mesh += (x,y,-height/2-height/(pmts_per_string-1)/2) self.add_solid(pmt) - - def load_geometry(self): - self.gpu = GPU() - self.texrefs = self.gpu.load_geometry(self) - self.propagate = self.gpu.get_function('propagate') - -if __name__ == '__main__': - import sys - import optparse - import pickle - - parser = optparse.OptionParser('prog filename') - parser.add_option('-b', '--bits', type='int', dest='bits', - help='bits for z-ordering space axes', default=6) - options, args = parser.parse_args() - - if len(args) < 1: - sys.exit(parser.format_help()) - - lbne = LBNE() - lbne.build(bits=options.bits) - - f = open(args[0], 'wb') - pickle.dump(lbne, f) - f.close() diff --git a/geometry.py b/geometry.py index 3a11daf..e3748e6 100644 --- a/geometry.py +++ b/geometry.py @@ -1,5 +1,7 @@ import numpy as np from itertools import chain +import pycuda.driver as cuda +from pycuda import gpuarray def compress(data, selectors): """ @@ -235,8 +237,8 @@ class Geometry(object): self.lower_bounds = np.empty((len(nodes),3)) self.upper_bounds = np.empty((len(nodes),3)) - self.child_map = np.empty(len(nodes)) - self.child_len = np.empty(len(nodes)) + self.child_map = np.empty(len(nodes), dtype=np.uint32) + self.child_len = np.empty(len(nodes), dtype=np.uint32) self.first_leaf = None for i, node in enumerate(nodes): @@ -256,3 +258,50 @@ class Geometry(object): if self.first_leaf is None: self.first_leaf = i + + def load(self, module): + """ + Load the bounding volume hierarchy onto the GPU module, + bind it to the appropriate textures, and return a list + of the texture references. + """ + mesh = np.empty(self.mesh.shape[0]*3, dtype=gpuarray.vec.float4) + mesh['x'] = self.mesh[:,:,0].flatten() + mesh['y'] = self.mesh[:,:,1].flatten() + mesh['z'] = self.mesh[:,:,2].flatten() + + lower_bounds = np.empty(self.lower_bounds.shape[0], dtype=gpuarray.vec.float4) + lower_bounds['x'] = self.lower_bounds[:,0] + lower_bounds['y'] = self.lower_bounds[:,1] + lower_bounds['z'] = self.lower_bounds[:,2] + + upper_bounds = np.empty(self.upper_bounds.shape[0], dtype=gpuarray.vec.float4) + upper_bounds['x'] = self.upper_bounds[:,0] + upper_bounds['y'] = self.upper_bounds[:,1] + upper_bounds['z'] = self.upper_bounds[:,2] + + self.mesh_gpu = cuda.to_device(mesh) + self.lower_bounds_gpu = cuda.to_device(lower_bounds) + self.upper_bounds_gpu = cuda.to_device(upper_bounds) + self.child_map_gpu = cuda.to_device(self.child_map) + self.child_len_gpu = cuda.to_device(self.child_len) + + mesh_tex = module.get_texref('mesh') + lower_bounds_tex = module.get_texref('lower_bounds') + upper_bounds_tex = module.get_texref('upper_bounds') + child_map_tex = module.get_texref('child_map_arr') + child_len_tex = module.get_texref('child_len_arr') + + mesh_tex.set_address(self.mesh_gpu, mesh.nbytes) + lower_bounds_tex.set_address(self.lower_bounds_gpu, lower_bounds.nbytes) + upper_bounds_tex.set_address(self.upper_bounds_gpu, upper_bounds.nbytes) + child_map_tex.set_address(self.child_map_gpu, self.child_map.nbytes) + child_len_tex.set_address(self.child_len_gpu, self.child_len.nbytes) + + mesh_tex.set_format(cuda.array_format.FLOAT, 4) + lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4) + upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4) + child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) + child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) + + return [mesh_tex, lower_bounds_tex, upper_bounds_tex, child_map_tex, child_len_tex] @@ -1,73 +0,0 @@ -import os -import numpy as np -from pycuda import autoinit -from pycuda.compiler import SourceModule -import pycuda.driver as cuda -from pycuda import gpuarray -import layout - -float3 = gpuarray.vec.float3 -float4 = gpuarray.vec.float4 - -source = open(layout.source + '/kernel.cu').read() - -def make_vector(arr, dtype=float3): - if len(arr.shape) != 2 or arr.shape[-1] != 3: - raise Exception('shape mismatch') - - v = np.empty(arr.shape[0], dtype) - v['x'] = arr[:,0] - v['y'] = arr[:,1] - v['z'] = arr[:,2] - - return v - -class GPU(object): - """ - Object to handle all of the texture allocation/referencing when loading - a geometry onto the GPU. - """ - def __init__(self): - print 'device %s' % autoinit.device.name() - - self.module = SourceModule(source, options=['-I' + layout.source], - no_extern_c=True, cache_dir=False) - - self.get_function = self.module.get_function - - def load_geometry(self, geometry): - """ - Load all the textures from `geometry` onto the GPU and return a list - of texture references. - """ - self.mesh_vec = make_vector(geometry.mesh.reshape(geometry.mesh.shape[0]*3,3), float4) - self.lower_bounds_vec = make_vector(geometry.lower_bounds, float4) - self.upper_bounds_vec = make_vector(geometry.upper_bounds, float4) - self.uchild_map = geometry.child_map.astype(np.uint32) - self.uchild_len = geometry.child_len.astype(np.uint32) - - self.mesh_gpu = cuda.to_device(self.mesh_vec) - self.lower_bounds_gpu = cuda.to_device(self.lower_bounds_vec) - self.upper_bounds_gpu = cuda.to_device(self.upper_bounds_vec) - self.child_map_gpu = cuda.to_device(self.uchild_map) - self.child_len_gpu = cuda.to_device(self.uchild_len) - - self.mesh_tex = self.module.get_texref('mesh') - self.lower_bounds_tex = self.module.get_texref('lower_bounds') - self.upper_bounds_tex = self.module.get_texref('upper_bounds') - self.child_map_tex = self.module.get_texref('child_map_arr') - self.child_len_tex = self.module.get_texref('child_len_arr') - - self.mesh_tex.set_address(self.mesh_gpu, self.mesh_vec.nbytes) - self.lower_bounds_tex.set_address(self.lower_bounds_gpu, self.lower_bounds_vec.nbytes) - self.upper_bounds_tex.set_address(self.upper_bounds_gpu, self.upper_bounds_vec.nbytes) - self.child_map_tex.set_address(self.child_map_gpu, self.uchild_map.nbytes) - self.child_len_tex.set_address(self.child_len_gpu, self.uchild_len.nbytes) - - self.mesh_tex.set_format(cuda.array_format.FLOAT, 4) - self.lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4) - self.upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4) - self.child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) - self.child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) - - return [self.mesh_tex, self.lower_bounds_tex, self.upper_bounds_tex, self.child_map_tex, self.child_len_tex] diff --git a/likelihood.py b/likelihood.py new file mode 100644 index 0000000..ae712d8 --- /dev/null +++ b/likelihood.py @@ -0,0 +1,109 @@ +import sys +import numpy as np + +from pycuda import autoinit +from pycuda.compiler import SourceModule +from pycuda import gpuarray +import pycuda.driver as cuda + +from uncertainties import ufloat, umath + +from detectors import LBNE + +from histogram import Histogram + +from photon import uniform_sphere + +import layout + +print 'device %s' % autoinit.device.name() + +source = open(layout.source + '/kernel.cu').read() +module = SourceModule(source, options=['-I' + layout.source], no_extern_c=True, cache_dir=False) + +geometry = LBNE() +geometry.build(bits=6) +texrefs = geometry.load(module) + +propagate = module.get_function('propagate') + +nblocks = 64 + +def generate_event(z=0.0, nphotons=1000): + origins = np.tile(gpuarray.vec.make_float3(0,0,z), (nphotons,1)) + origins_gpu = cuda.to_device(origins) + + directions = uniform_sphere(nphotons) + directions_float3 = np.empty(nphotons, dtype=gpuarray.vec.float3) + directions_float3['x'] = directions[:,0] + directions_float3['y'] = directions[:,1] + directions_float3['z'] = directions[:,2] + directions_gpu = cuda.to_device(directions_float3) + + dest = np.empty(nphotons, dtype=np.int32) + dest_gpu = cuda.to_device(dest) + + propagate(np.int32(nphotons), origins_gpu, directions_gpu, np.int32(geometry.first_leaf), dest_gpu, block=(nblocks,1,1), grid=(nphotons//nblocks+1,1)) + + cuda.memcpy_dtoh(dest, dest_gpu) + + triangles = dest[(dest != -1)] + + event_bincount = np.zeros(len(geometry.solids)) + gpu_bincount = np.bincount(geometry.solid_index[triangles]) + event_bincount[:gpu_bincount.size] = gpu_bincount + + return event_bincount + +def likelihood(event_bincount, z=0.0, nphotons=1000, neval=1000): + origins = np.tile(gpuarray.vec.make_float3(0,0,z), (nphotons,1)) + origins_gpu = cuda.to_device(origins) + + bincount = np.zeros((neval, len(geometry.solids))) + for i in range(neval): + print '\reval %i' % i, + sys.stdout.flush() + + directions = uniform_sphere(nphotons) + directions_float3 = np.empty(nphotons, dtype=gpuarray.vec.float3) + directions_float3['x'] = directions[:,0] + directions_float3['y'] = directions[:,1] + directions_float3['z'] = directions[:,2] + directions_gpu = cuda.to_device(directions_float3) + + dest = np.empty(nphotons, dtype=np.int32) + dest_gpu = cuda.to_device(dest) + + propagate(np.int32(nphotons), origins_gpu, directions_gpu, np.int32(geometry.first_leaf), dest_gpu, block=(nblocks,1,1), grid=(nphotons//nblocks+1,1)) + + cuda.memcpy_dtoh(dest, dest_gpu) + + triangles = dest[(dest != -1)] + + gpu_bincount = np.bincount(geometry.solid_index[triangles]) + bincount[i][:gpu_bincount.size] = gpu_bincount + print + + histograms = [] + for i in range(len(geometry.solids)): + h = Histogram(100, (-0.5, 99.5)) + h.fill(bincount[:,i]) + h.normalize() + histograms.append(h) + + log_likelihood = ufloat((0,0)) + for i, h in enumerate(histograms): + probability = ufloat((h.eval(event_bincount[i]), h.eval_err(event_bincount[i]))) + + if probability.nominal_value == 0.0: + return None + + log_likelihood += umath.log(probability) + + return -log_likelihood + +if __name__ == '__main__': + event_bincount = generate_event() + + for z in np.arange(-2.5,2.5,0.1): + print 'z, likelihood = (%f, %s)' % (z, likelihood(event_bincount,z)) @@ -1,6 +1,6 @@ import numpy as np -def uniform_sphere(size=None): +def uniform_sphere(size=None, dtype=np.double): """ Generate random points isotropically distributed across the unit sphere. @@ -17,13 +17,13 @@ def uniform_sphere(size=None): c = np.sqrt(1-u**2) - points = np.empty((x.size, 3)) + if size is None: + return np.array([c*np.cos(theta), c*np.sin(theta), u]) + + points = np.empty((size, 3), dtype) points[:,0] = c*np.cos(theta) points[:,1] = c*np.sin(theta) points[:,2] = u - if size is None: - return points[0] - else: - return points + return points diff --git a/src/kernel.cu b/src/kernel.cu index ed53032..0d5e54b 100644 --- a/src/kernel.cu +++ b/src/kernel.cu @@ -1,5 +1,6 @@ //-*-c-*- #include <math_constants.h> +#include <curand_kernel.h> #include "linalg.h" #include "matrix.h" @@ -111,9 +112,30 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, con return triangle_idx; } +__device__ curandState rng_states[256*512]; + extern "C" { +/* Initialize random number states */ +__global__ void init_rng(unsigned long long seed, unsigned long long offset) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + curand_init(seed, idx, offset, rng_states+idx); +} + +/* */ +__global__ void uniform_sphere(int max_idx, float3 *points) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + + if (idx > max_idx) + return; + + //curandState rng = *(rng_states+idx); + +} + /* Translate `points` by the vector `v` */ __global__ void translate(int max_idx, float3 *points, float3 v) { @@ -176,7 +198,7 @@ __global__ void ray_trace(int max_idx, float3 *origins, float3 *directions, int intersects the mesh set the hit_solid array value associated with the photon to the triangle index of the triangle the photon intersected, else set the hit_solid array value to -1. */ -__global__ void propagate(int max_idx, float3 *origins, float3 *directions, int first_leaf, int *hit_solids) +__global__ void propagate(int max_idx, float3 *origins, float3 *directions, int first_leaf, int *hit_triangles) { int idx = blockIdx.x*blockDim.x + threadIdx.x; @@ -187,7 +209,8 @@ __global__ void propagate(int max_idx, float3 *origins, float3 *directions, int float3 direction = *(directions+idx); direction /= norm(direction); - *(hit_solids+idx) = intersect_mesh(origin, direction, first_leaf); + *(hit_triangles+idx) = intersect_mesh(origin, direction, first_leaf); + } // propagate } // extern "c" @@ -4,11 +4,14 @@ import numpy as np import pygame from pygame.locals import * +import layout from camera import * from geometry import * from transform import * -from gpu import * +from pycuda import autoinit +from pycuda.compiler import SourceModule +from pycuda import gpuarray def view(geometry, name=''): """ @@ -23,11 +26,14 @@ def view(geometry, name=''): scale = np.linalg.norm(upper_bound-lower_bound) - gpu = GPU() - gpu.load_geometry(geometry) - cuda_raytrace = gpu.get_function('ray_trace') - cuda_rotate = gpu.get_function('rotate') - cuda_translate = gpu.get_function('translate') + print 'device %s' % autoinit.device.name() + + source = open(layout.source + '/kernel.cu').read() + module = SourceModule(source, options=['-I' + layout.source], no_extern_c=True, cache_dir=False) + texrefs = geometry.load(module) + cuda_raytrace = module.get_function('ray_trace') + cuda_rotate = module.get_function('rotate') + cuda_translate = module.get_function('translate') pygame.init() size = width, height = 800, 600 @@ -44,14 +50,24 @@ def view(geometry, name=''): camera.position(point) - origin, direction = camera.get_rays() + origins, directions = camera.get_rays() + + origins_float3 = np.empty(origins.shape[0], dtype=gpuarray.vec.float3) + origins_float3['x'] = origins[:,0] + origins_float3['y'] = origins[:,1] + origins_float3['z'] = origins[:,2] + + directions_float3 = np.empty(directions.shape[0], dtype=gpuarray.vec.float3) + directions_float3['x'] = directions[:,0] + directions_float3['y'] = directions[:,1] + directions_float3['z'] = directions[:,2] + + origins_gpu = cuda.to_device(origins_float3) + directions_gpu = cuda.to_device(directions_float3) pixels = np.empty(width*height, dtype=np.int32) pixels_gpu = cuda.to_device(pixels) - origins_gpu = cuda.to_device(make_vector(origin)) - directions_gpu = cuda.to_device(make_vector(direction)) - nblocks = 64 gpu_kwargs = {'block': (nblocks,1,1), 'grid':(pixels.size/nblocks+1,1)} |