8 files changed, 218 insertions, 124 deletions
diff --git a/__init__.py b/__init__.py
index d3a7f3d..7387dc2 100644
--- a/__init__.py
+++ b/__init__.py
@@ -2,7 +2,5 @@ import geometry
 import materials
 import transform
 import stl
-import view
 import photon
-import gpu
 import layout
diff --git a/detectors/lbne.py b/detectors/lbne.py
index 54e11a9..7cd666a 100644
--- a/detectors/lbne.py
+++ b/detectors/lbne.py
@@ -1,3 +1,4 @@
+import sys
 import numpy as np
 from copy import deepcopy
 from chroma import layout
@@ -5,10 +6,7 @@ from chroma.stl import read_stl
 from chroma.transform import rotate
 from chroma.geometry import Geometry, Solid
 from chroma.materials import glass, h2o
-from chroma.photon import uniform_sphere
-from chroma.gpu import GPU
 from itertools import product
-from histogram import *
 
 endcap_spacing = .485
 
@@ -19,7 +17,6 @@ nstrings = 324//10
 pmts_per_string = 102//10
 
 class LBNE(Geometry):
-    """Miniature version of the LBNE water Cerenkov detector geometry."""
     def __init__(self):
         super(LBNE, self).__init__()
 
@@ -53,28 +50,3 @@ class LBNE(Geometry):
                 pmt.mesh = rotate(pmt.mesh, +np.pi/2, (0,1,0))
                 pmt.mesh += (x,y,-height/2-height/(pmts_per_string-1)/2)
                 self.add_solid(pmt)
-
-    def load_geometry(self):
-        self.gpu = GPU()
-        self.texrefs = self.gpu.load_geometry(self)
-        self.propagate = self.gpu.get_function('propagate')
-
-if __name__ == '__main__':
-    import sys
-    import optparse
-    import pickle
-
-    parser = optparse.OptionParser('prog filename')
-    parser.add_option('-b', '--bits', type='int', dest='bits',
-                      help='bits for z-ordering space axes', default=6)
-    options, args = parser.parse_args()
-
-    if len(args) < 1:
-        sys.exit(parser.format_help())
-
-    lbne = LBNE()
-    lbne.build(bits=options.bits)
-
-    f = open(args[0], 'wb')
-    pickle.dump(lbne, f)
-    f.close()
diff --git a/geometry.py b/geometry.py
index 3a11daf..e3748e6 100644
--- a/geometry.py
+++ b/geometry.py
@@ -1,5 +1,7 @@
 import numpy as np
 from itertools import chain
+import pycuda.driver as cuda
+from pycuda import gpuarray
 
 def compress(data, selectors):
     """
@@ -235,8 +237,8 @@ class Geometry(object):
 
         self.lower_bounds = np.empty((len(nodes),3))
         self.upper_bounds = np.empty((len(nodes),3))
-        self.child_map = np.empty(len(nodes))
-        self.child_len = np.empty(len(nodes))
+        self.child_map = np.empty(len(nodes), dtype=np.uint32)
+        self.child_len = np.empty(len(nodes), dtype=np.uint32)
         self.first_leaf = None
 
         for i, node in enumerate(nodes):
@@ -256,3 +258,50 @@ class Geometry(object):
 
                 if self.first_leaf is None:
                     self.first_leaf = i
+
+    def load(self, module):
+        """
+        Load the bounding volume hierarchy onto the GPU module,
+        bind it to the appropriate textures, and return a list
+        of the texture references.
+        """
+        mesh = np.empty(self.mesh.shape[0]*3, dtype=gpuarray.vec.float4)
+        mesh['x'] = self.mesh[:,:,0].flatten()
+        mesh['y'] = self.mesh[:,:,1].flatten()
+        mesh['z'] = self.mesh[:,:,2].flatten()
+
+        lower_bounds = np.empty(self.lower_bounds.shape[0], dtype=gpuarray.vec.float4)
+        lower_bounds['x'] = self.lower_bounds[:,0]
+        lower_bounds['y'] = self.lower_bounds[:,1]
+        lower_bounds['z'] = self.lower_bounds[:,2]
+
+        upper_bounds = np.empty(self.upper_bounds.shape[0], dtype=gpuarray.vec.float4)
+        upper_bounds['x'] = self.upper_bounds[:,0]
+        upper_bounds['y'] = self.upper_bounds[:,1]
+        upper_bounds['z'] = self.upper_bounds[:,2]
+
+        self.mesh_gpu = cuda.to_device(mesh)
+        self.lower_bounds_gpu = cuda.to_device(lower_bounds)
+        self.upper_bounds_gpu = cuda.to_device(upper_bounds)
+        self.child_map_gpu = cuda.to_device(self.child_map)
+        self.child_len_gpu = cuda.to_device(self.child_len)
+
+        mesh_tex = module.get_texref('mesh')
+        lower_bounds_tex = module.get_texref('lower_bounds')
+        upper_bounds_tex = module.get_texref('upper_bounds')
+        child_map_tex = module.get_texref('child_map_arr')
+        child_len_tex = module.get_texref('child_len_arr')
+
+        mesh_tex.set_address(self.mesh_gpu, mesh.nbytes)
+        lower_bounds_tex.set_address(self.lower_bounds_gpu, lower_bounds.nbytes)
+        upper_bounds_tex.set_address(self.upper_bounds_gpu, upper_bounds.nbytes)
+        child_map_tex.set_address(self.child_map_gpu, self.child_map.nbytes)
+        child_len_tex.set_address(self.child_len_gpu, self.child_len.nbytes)
+
+        mesh_tex.set_format(cuda.array_format.FLOAT, 4)
+        lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4)
+        upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4)
+        child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
+        child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
+
+        return [mesh_tex, lower_bounds_tex, upper_bounds_tex, child_map_tex, child_len_tex]
diff --git a/gpu.py b/gpu.py
deleted file mode 100644
index 6ab525d..0000000
--- a/gpu.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import os
-import numpy as np
-from pycuda import autoinit
-from pycuda.compiler import SourceModule
-import pycuda.driver as cuda
-from pycuda import gpuarray
-import layout
-
-float3 = gpuarray.vec.float3
-float4 = gpuarray.vec.float4
-
-source = open(layout.source + '/kernel.cu').read()
-
-def make_vector(arr, dtype=float3):
-    if len(arr.shape) != 2 or arr.shape[-1] != 3:
-        raise Exception('shape mismatch')
-
-    v = np.empty(arr.shape[0], dtype)
-    v['x'] = arr[:,0]
-    v['y'] = arr[:,1]
-    v['z'] = arr[:,2]
-
-    return v
-
-class GPU(object):
-    """
-    Object to handle all of the texture allocation/referencing when loading
-    a geometry onto the GPU.
-    """
-    def __init__(self):
-        print 'device %s' % autoinit.device.name()
-
-        self.module = SourceModule(source, options=['-I' + layout.source],
-                                   no_extern_c=True, cache_dir=False)
-
-        self.get_function = self.module.get_function
-
-    def load_geometry(self, geometry):
-        """
-        Load all the textures from `geometry` onto the GPU and return a list
-        of texture references.
-        """
-        self.mesh_vec = make_vector(geometry.mesh.reshape(geometry.mesh.shape[0]*3,3), float4)
-        self.lower_bounds_vec = make_vector(geometry.lower_bounds, float4)
-        self.upper_bounds_vec = make_vector(geometry.upper_bounds, float4)
-        self.uchild_map = geometry.child_map.astype(np.uint32)
-        self.uchild_len = geometry.child_len.astype(np.uint32)
-
-        self.mesh_gpu = cuda.to_device(self.mesh_vec)
-        self.lower_bounds_gpu = cuda.to_device(self.lower_bounds_vec)
-        self.upper_bounds_gpu = cuda.to_device(self.upper_bounds_vec)
-        self.child_map_gpu = cuda.to_device(self.uchild_map)
-        self.child_len_gpu = cuda.to_device(self.uchild_len)
-
-        self.mesh_tex = self.module.get_texref('mesh')
-        self.lower_bounds_tex = self.module.get_texref('lower_bounds')
-        self.upper_bounds_tex = self.module.get_texref('upper_bounds')
-        self.child_map_tex = self.module.get_texref('child_map_arr')
-        self.child_len_tex = self.module.get_texref('child_len_arr')
-        
-        self.mesh_tex.set_address(self.mesh_gpu, self.mesh_vec.nbytes)
-        self.lower_bounds_tex.set_address(self.lower_bounds_gpu, self.lower_bounds_vec.nbytes)
-        self.upper_bounds_tex.set_address(self.upper_bounds_gpu, self.upper_bounds_vec.nbytes)
-        self.child_map_tex.set_address(self.child_map_gpu, self.uchild_map.nbytes)
-        self.child_len_tex.set_address(self.child_len_gpu, self.uchild_len.nbytes)
-
-        self.mesh_tex.set_format(cuda.array_format.FLOAT, 4)
-        self.lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4)
-        self.upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4)
-        self.child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
-        self.child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1)
-
-        return [self.mesh_tex, self.lower_bounds_tex, self.upper_bounds_tex, self.child_map_tex, self.child_len_tex]
diff --git a/likelihood.py b/likelihood.py
new file mode 100644
index 0000000..ae712d8
--- /dev/null
+++ b/likelihood.py
@@ -0,0 +1,109 @@
+import sys
+import numpy as np
+
+from pycuda import autoinit
+from pycuda.compiler import SourceModule
+from pycuda import gpuarray
+import pycuda.driver as cuda
+
+from uncertainties import ufloat, umath
+
+from detectors import LBNE
+
+from histogram import Histogram
+
+from photon import uniform_sphere
+
+import layout
+
+print 'device %s' % autoinit.device.name()
+
+source = open(layout.source + '/kernel.cu').read()
+module = SourceModule(source, options=['-I' + layout.source], no_extern_c=True, cache_dir=False)
+
+geometry = LBNE()
+geometry.build(bits=6)
+texrefs = geometry.load(module)
+
+propagate = module.get_function('propagate')
+
+nblocks = 64
+
+def generate_event(z=0.0, nphotons=1000):
+    origins = np.tile(gpuarray.vec.make_float3(0,0,z), (nphotons,1))
+    origins_gpu = cuda.to_device(origins)
+
+    directions = uniform_sphere(nphotons)
+    directions_float3 = np.empty(nphotons, dtype=gpuarray.vec.float3)
+    directions_float3['x'] = directions[:,0]
+    directions_float3['y'] = directions[:,1]
+    directions_float3['z'] = directions[:,2]
+    directions_gpu = cuda.to_device(directions_float3)
+
+    dest = np.empty(nphotons, dtype=np.int32)
+    dest_gpu = cuda.to_device(dest)
+
+    propagate(np.int32(nphotons), origins_gpu, directions_gpu, np.int32(geometry.first_leaf), dest_gpu, block=(nblocks,1,1), grid=(nphotons//nblocks+1,1))
+
+    cuda.memcpy_dtoh(dest, dest_gpu)
+
+    triangles = dest[(dest != -1)]
+
+    event_bincount = np.zeros(len(geometry.solids))
+    gpu_bincount = np.bincount(geometry.solid_index[triangles])
+    event_bincount[:gpu_bincount.size] = gpu_bincount
+
+    return event_bincount
+
+def likelihood(event_bincount, z=0.0, nphotons=1000, neval=1000):
+    origins = np.tile(gpuarray.vec.make_float3(0,0,z), (nphotons,1))
+    origins_gpu = cuda.to_device(origins)
+
+    bincount = np.zeros((neval, len(geometry.solids)))
+    for i in range(neval):
+        print '\reval %i' % i,
+        sys.stdout.flush()
+
+        directions = uniform_sphere(nphotons)
+        directions_float3 = np.empty(nphotons, dtype=gpuarray.vec.float3)
+        directions_float3['x'] = directions[:,0]
+        directions_float3['y'] = directions[:,1]
+        directions_float3['z'] = directions[:,2]
+        directions_gpu = cuda.to_device(directions_float3)
+
+        dest = np.empty(nphotons, dtype=np.int32)
+        dest_gpu = cuda.to_device(dest)
+
+        propagate(np.int32(nphotons), origins_gpu, directions_gpu, np.int32(geometry.first_leaf), dest_gpu, block=(nblocks,1,1), grid=(nphotons//nblocks+1,1))
+
+        cuda.memcpy_dtoh(dest, dest_gpu)
+
+        triangles = dest[(dest != -1)]
+
+        gpu_bincount = np.bincount(geometry.solid_index[triangles])
+        bincount[i][:gpu_bincount.size] = gpu_bincount
+    print
+
+    histograms = []
+    for i in range(len(geometry.solids)):
+        h = Histogram(100, (-0.5, 99.5))
+        h.fill(bincount[:,i])
+        h.normalize()
+        histograms.append(h)
+
+    log_likelihood = ufloat((0,0))
+    for i, h in enumerate(histograms):
+        probability = ufloat((h.eval(event_bincount[i]), h.eval_err(event_bincount[i])))
+
+        if probability.nominal_value == 0.0:
+            return None
+
+        log_likelihood += umath.log(probability)
+
+    return -log_likelihood
+
+if __name__ == '__main__':
+    event_bincount = generate_event()
+
+    for z in np.arange(-2.5,2.5,0.1):
+        print 'z, likelihood = (%f, %s)' % (z, likelihood(event_bincount,z))
diff --git a/photon.py b/photon.py
index d1574ea..f8ece6a 100644
--- a/photon.py
+++ b/photon.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-def uniform_sphere(size=None):
+def uniform_sphere(size=None, dtype=np.double):
     """
     Generate random points isotropically distributed across the unit sphere.
 
@@ -17,13 +17,13 @@ def uniform_sphere(size=None):
 
     c = np.sqrt(1-u**2)
 
-    points = np.empty((x.size, 3))
+    if size is None:
+        return np.array([c*np.cos(theta), c*np.sin(theta), u])
+
+    points = np.empty((size, 3), dtype)
 
     points[:,0] = c*np.cos(theta)
     points[:,1] = c*np.sin(theta)
     points[:,2] = u
 
-    if size is None:
-        return points[0]
-    else:
-        return points
+    return points
diff --git a/src/kernel.cu b/src/kernel.cu
index ed53032..0d5e54b 100644
--- a/src/kernel.cu
+++ b/src/kernel.cu
@@ -1,5 +1,6 @@
 //-*-c-*-
 #include <math_constants.h>
+#include <curand_kernel.h>
 
 #include "linalg.h"
 #include "matrix.h"
@@ -111,9 +112,30 @@ __device__ int intersect_mesh(const float3 &origin, const float3& direction, con
 	return triangle_idx;
 }
 
+__device__ curandState rng_states[256*512];
+
 extern "C"
 {
 
+/* Initialize random number states */
+__global__ void init_rng(unsigned long long seed, unsigned long long offset)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	curand_init(seed, idx, offset, rng_states+idx);
+}
+
+/* */
+__global__ void uniform_sphere(int max_idx, float3 *points)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+
+	if (idx > max_idx)
+		return;
+
+	//curandState rng = *(rng_states+idx);
+
+}
+
 /* Translate `points` by the vector `v` */
 __global__ void translate(int max_idx, float3 *points, float3 v)
 {
@@ -176,7 +198,7 @@ __global__ void ray_trace(int max_idx, float3 *origins, float3 *directions, int
    intersects the mesh set the hit_solid array value associated with the
    photon to the triangle index of the triangle the photon intersected, else
    set the hit_solid array value to -1. */
-__global__ void propagate(int max_idx, float3 *origins, float3 *directions, int first_leaf, int *hit_solids)
+__global__ void propagate(int max_idx, float3 *origins, float3 *directions, int first_leaf, int *hit_triangles)
 {
 	int idx = blockIdx.x*blockDim.x + threadIdx.x;
 
@@ -187,7 +209,8 @@ __global__ void propagate(int max_idx, float3 *origins, float3 *directions, int
 	float3 direction = *(directions+idx);
 	direction /= norm(direction);
 
-	*(hit_solids+idx) = intersect_mesh(origin, direction, first_leaf);
+	*(hit_triangles+idx) = intersect_mesh(origin, direction, first_leaf);
+
 } // propagate
 
 } // extern "c"
diff --git a/view.py b/view.py
index 2c37d2e..7e9e815 100755
--- a/view.py
+++ b/view.py
@@ -4,11 +4,14 @@ import numpy as np
 import pygame
 from pygame.locals import *
 
+import layout
 from camera import *
 from geometry import *
 from transform import *
-from gpu import *
 
+from pycuda import autoinit
+from pycuda.compiler import SourceModule
+from pycuda import gpuarray
 
 def view(geometry, name=''):
     """
@@ -23,11 +26,14 @@ def view(geometry, name=''):
 
     scale = np.linalg.norm(upper_bound-lower_bound)
 
-    gpu = GPU()
-    gpu.load_geometry(geometry)
-    cuda_raytrace = gpu.get_function('ray_trace')
-    cuda_rotate = gpu.get_function('rotate')
-    cuda_translate = gpu.get_function('translate')
+    print 'device %s' % autoinit.device.name()
+
+    source = open(layout.source + '/kernel.cu').read()
+    module = SourceModule(source, options=['-I' + layout.source], no_extern_c=True, cache_dir=False)
+    texrefs = geometry.load(module)
+    cuda_raytrace = module.get_function('ray_trace')
+    cuda_rotate = module.get_function('rotate')
+    cuda_translate = module.get_function('translate')
 
     pygame.init()
     size = width, height = 800, 600
@@ -44,14 +50,24 @@ def view(geometry, name=''):
 
     camera.position(point)
 
-    origin, direction = camera.get_rays()
+    origins, directions = camera.get_rays()
+
+    origins_float3 = np.empty(origins.shape[0], dtype=gpuarray.vec.float3)
+    origins_float3['x'] = origins[:,0]
+    origins_float3['y'] = origins[:,1]
+    origins_float3['z'] = origins[:,2]
+
+    directions_float3 = np.empty(directions.shape[0], dtype=gpuarray.vec.float3)
+    directions_float3['x'] = directions[:,0]
+    directions_float3['y'] = directions[:,1]
+    directions_float3['z'] = directions[:,2]
+
+    origins_gpu = cuda.to_device(origins_float3)
+    directions_gpu = cuda.to_device(directions_float3)
 
     pixels = np.empty(width*height, dtype=np.int32)
     pixels_gpu = cuda.to_device(pixels)
 
-    origins_gpu = cuda.to_device(make_vector(origin))
-    directions_gpu = cuda.to_device(make_vector(direction))
-
     nblocks = 64
 
     gpu_kwargs = {'block': (nblocks,1,1), 'grid':(pixels.size/nblocks+1,1)}