Move CUDA source inside chroma package, rename tests directory to test

author: Stan Seibert <stan@mtrr.org> 2011-09-16 15:02:02 -0400
committer: Stan Seibert <stan@mtrr.org> 2011-09-16 15:02:02 -0400
commit: 142b3c3caff164deb9bc7b2848e58e52387723ff (patch)
tree: 417da3ad69a2756aff7a21dca4b08733d3e87afb /test
parent: 084dfd08b714faefaea77cb7dc04d2e93dc04b1d (diff)
download: chroma-142b3c3caff164deb9bc7b2848e58e52387723ff.tar.gz
chroma-142b3c3caff164deb9bc7b2848e58e52387723ff.tar.bz2
chroma-142b3c3caff164deb9bc7b2848e58e52387723ff.zip
17 files changed, 1309 insertions, 0 deletions
diff --git a/test/__init__.py b/test/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/__init__.py
diff --git a/test/data/ray_intersection.npz b/test/data/ray_intersection.npz
new file mode 100644
index 0000000..fba0e40
--- /dev/null
+++ b/test/data/ray_intersection.npz
diff --git a/test/linalg_test.cu b/test/linalg_test.cu
new file mode 100644
index 0000000..4e9c983
--- /dev/null
+++ b/test/linalg_test.cu
@@ -0,0 +1,128 @@
+//-*-c-*-
+
+#include "linalg.h"
+
+extern "C"
+{
+
+__global__ void float3add(float3 *a, float3 *b, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = a[idx] + b[idx];
+}
+
+__global__ void float3addequal(float3 *a, float3 *b)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	a[idx] += b[idx];
+}
+
+__global__ void float3sub(float3 *a, float3 *b, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = a[idx] - b[idx];
+}
+
+__global__ void float3subequal(float3 *a, float3 *b)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	a[idx] -= b[idx];
+}
+
+__global__ void float3addfloat(float3 *a, float c, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = a[idx] + c;
+}
+
+__global__ void float3addfloatequal(float3 *a, float c)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	a[idx] += c;
+}
+
+__global__ void floataddfloat3(float3 *a, float c, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = c + a[idx];
+}
+
+__global__ void float3subfloat(float3 *a, float c, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = a[idx] - c;
+}
+
+__global__ void float3subfloatequal(float3 *a, float c)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	a[idx] -= c;
+}
+
+__global__ void floatsubfloat3(float3 *a, float c, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = c - a[idx];
+}
+
+__global__ void float3mulfloat(float3 *a, float c, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = a[idx]*c;
+}
+
+__global__ void float3mulfloatequal(float3 *a, float c)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	a[idx] *= c;
+}
+
+__global__ void floatmulfloat3(float3 *a, float c, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = c*a[idx];
+}
+
+__global__ void float3divfloat(float3 *a, float c, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = a[idx]/c;
+}
+
+__global__ void float3divfloatequal(float3 *a, float c)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	a[idx] /= c;
+}
+
+__global__ void floatdivfloat3(float3 *a, float c, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = c/a[idx];
+}
+
+__global__ void dot(float3 *a, float3 *b, float *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = dot(a[idx],b[idx]);
+}
+
+__global__ void cross(float3 *a, float3 *b, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = cross(a[idx],b[idx]);
+}
+
+__global__ void norm(float3 *a, float *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = norm(a[idx]);
+}
+
+__global__ void minusfloat3(float3 *a, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = -a[idx];
+}
+
+} // extern "c"
diff --git a/test/linalg_test.py b/test/linalg_test.py
new file mode 100644
index 0000000..31688d9
--- /dev/null
+++ b/test/linalg_test.py
@@ -0,0 +1,214 @@
+import os
+import numpy as np
+from pycuda import autoinit
+from pycuda.compiler import SourceModule
+import pycuda.driver as cuda
+from pycuda import gpuarray
+
+float3 = gpuarray.vec.float3
+
+print 'device %s' % autoinit.device.name()
+
+current_directory = os.path.split(os.path.realpath(__file__))[0]
+source_directory = current_directory + '/../src'
+
+source = open(current_directory + '/linalg_test.cu').read()
+
+mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False)
+
+float3add = mod.get_function('float3add')
+float3addequal = mod.get_function('float3addequal')
+float3sub = mod.get_function('float3sub')
+float3subequal = mod.get_function('float3subequal')
+float3addfloat = mod.get_function('float3addfloat')
+float3addfloatequal = mod.get_function('float3addfloatequal')
+floataddfloat3 = mod.get_function('floataddfloat3')
+float3subfloat = mod.get_function('float3subfloat')
+float3subfloatequal = mod.get_function('float3subfloatequal')
+floatsubfloat3 = mod.get_function('floatsubfloat3')
+float3mulfloat = mod.get_function('float3mulfloat')
+float3mulfloatequal = mod.get_function('float3mulfloatequal')
+floatmulfloat3 = mod.get_function('floatmulfloat3')
+float3divfloat = mod.get_function('float3divfloat')
+float3divfloatequal = mod.get_function('float3divfloatequal')
+floatdivfloat3 = mod.get_function('floatdivfloat3')
+dot = mod.get_function('dot')
+cross = mod.get_function('cross')
+norm = mod.get_function('norm')
+minusfloat3 = mod.get_function('minusfloat3')
+
+size = {'block': (256,1,1), 'grid': (1,1)}
+
+a = np.empty(size['block'][0], dtype=float3)
+b = np.empty(size['block'][0], dtype=float3)
+c = np.float32(np.random.random_sample())
+
+a['x'] = np.random.random_sample(size=a.size)
+a['y'] = np.random.random_sample(size=a.size)
+a['z'] = np.random.random_sample(size=a.size)
+
+b['x'] = np.random.random_sample(size=b.size)
+b['y'] = np.random.random_sample(size=b.size)
+b['z'] = np.random.random_sample(size=b.size)
+
+def testfloat3add():
+    dest = np.empty(a.size, dtype=float3)
+    float3add(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+    if not np.allclose(a['x']+b['x'], dest['x']) or \
+            not np.allclose(a['y']+b['y'], dest['y']) or \
+            not np.allclose(a['z']+b['z'], dest['z']):
+        assert False
+
+def testfloat3sub():
+    dest = np.empty(a.size, dtype=float3)
+    float3sub(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+    if not np.allclose(a['x']-b['x'], dest['x']) or \
+            not np.allclose(a['y']-b['y'], dest['y']) or \
+            not np.allclose(a['z']-b['z'], dest['z']):
+        assert False
+
+def testfloat3addequal():
+    dest = np.copy(a)
+    float3addequal(cuda.InOut(dest), cuda.In(b), **size)
+    if not np.allclose(a['x']+b['x'], dest['x']) or \
+            not np.allclose(a['y']+b['y'], dest['y']) or \
+            not np.allclose(a['z']+b['z'], dest['z']):
+        assert False
+
+def testfloat3subequal():
+    dest = np.copy(a)
+    float3subequal(cuda.InOut(dest), cuda.In(b), **size)
+    if not np.allclose(a['x']-b['x'], dest['x']) or \
+            not np.allclose(a['y']-b['y'], dest['y']) or \
+            not np.allclose(a['z']-b['z'], dest['z']):
+        assert False
+
+def testfloat3addfloat():
+    dest = np.empty(a.size, dtype=float3)
+    float3addfloat(cuda.In(a), c, cuda.Out(dest), **size)
+    if not np.allclose(a['x']+c, dest['x']) or \
+            not np.allclose(a['y']+c, dest['y']) or \
+            not np.allclose(a['z']+c, dest['z']):
+        assert False
+
+def testfloat3addfloatequal():
+    dest = np.copy(a)
+    float3addfloatequal(cuda.InOut(dest), c, **size)
+    if not np.allclose(a['x']+c, dest['x']) or \
+            not np.allclose(a['y']+c, dest['y']) or \
+            not np.allclose(a['z']+c, dest['z']):
+        assert False
+
+def testfloataddfloat3():
+    dest = np.empty(a.size, dtype=float3)
+    floataddfloat3(cuda.In(a), c, cuda.Out(dest), **size)
+    if not np.allclose(c+a['x'], dest['x']) or \
+            not np.allclose(c+a['y'], dest['y']) or \
+            not np.allclose(c+a['z'], dest['z']):
+        assert False
+
+def testfloat3subfloat():
+    dest = np.empty(a.size, dtype=float3)
+    float3subfloat(cuda.In(a), c, cuda.Out(dest), **size)
+    if not np.allclose(a['x']-c, dest['x']) or \
+            not np.allclose(a['y']-c, dest['y']) or \
+            not np.allclose(a['z']-c, dest['z']):
+        assert False
+
+def testfloat3subfloatequal():
+    dest = np.copy(a)
+    float3subfloatequal(cuda.InOut(dest), c, **size)
+    if not np.allclose(a['x']-c, dest['x']) or \
+            not np.allclose(a['y']-c, dest['y']) or \
+            not np.allclose(a['z']-c, dest['z']):
+        assert False
+
+def testfloatsubfloat3():
+    dest = np.empty(a.size, dtype=float3)
+    floatsubfloat3(cuda.In(a), c, cuda.Out(dest), **size)
+    if not np.allclose(c-a['x'], dest['x']) or \
+            not np.allclose(c-a['y'], dest['y']) or \
+            not np.allclose(c-a['z'], dest['z']):
+        assert False
+
+def testfloat3mulfloat():
+    dest = np.empty(a.size, dtype=float3)
+    float3mulfloat(cuda.In(a), c, cuda.Out(dest), **size)
+    if not np.allclose(a['x']*c, dest['x']) or \
+            not np.allclose(a['y']*c, dest['y']) or \
+            not np.allclose(a['z']*c, dest['z']):
+        assert False
+
+def testfloat3mulfloatequal():
+    dest = np.copy(a)
+    float3mulfloatequal(cuda.InOut(dest), c, **size)
+    if not np.allclose(a['x']*c, dest['x']) or \
+            not np.allclose(a['y']*c, dest['y']) or \
+            not np.allclose(a['z']*c, dest['z']):
+        assert False
+
+def testfloatmulfloat3():
+    dest = np.empty(a.size, dtype=float3)
+    floatmulfloat3(cuda.In(a), c, cuda.Out(dest), **size)
+    if not np.allclose(c*a['x'], dest['x']) or \
+            not np.allclose(c*a['y'], dest['y']) or \
+            not np.allclose(c*a['z'], dest['z']):
+        assert False
+
+def testfloat3divfloat():
+    dest = np.empty(a.size, dtype=float3)
+    float3divfloat(cuda.In(a), c, cuda.Out(dest), **size)
+    if not np.allclose(a['x']/c, dest['x']) or \
+            not np.allclose(a['y']/c, dest['y']) or \
+            not np.allclose(a['z']/c, dest['z']):
+        assert False
+
+def testfloat3divfloatequal():
+    dest = np.copy(a)
+    float3divfloatequal(cuda.InOut(dest), c, **size)
+    if not np.allclose(a['x']/c, dest['x']) or \
+            not np.allclose(a['y']/c, dest['y']) or \
+            not np.allclose(a['z']/c, dest['z']):
+        assert False
+
+def testfloatdivfloat3():
+    dest = np.empty(a.size, dtype=float3)
+    floatdivfloat3(cuda.In(a), c, cuda.Out(dest), **size)
+    if not np.allclose(c/a['x'], dest['x']) or \
+            not np.allclose(c/a['y'], dest['y']) or \
+            not np.allclose(c/a['z'], dest['z']):
+        assert false
+
+def testdot():
+    dest = np.empty(a.size, dtype=np.float32)
+    dot(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+    if not np.allclose(a['x']*b['x'] + a['y']*b['y'] + a['z']*b['z'], dest):
+        assert False
+
+def testcross():
+    dest = np.empty(a.size, dtype=float3)
+    cross(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+    for u, v, wdest in zip(a,b,dest):
+        w = np.cross((u['x'], u['y'], u['z']),(v['x'],v['y'],v['z']))
+        if not np.allclose(wdest['x'], w[0]) or \
+                not np.allclose(wdest['y'], w[1]) or \
+                not np.allclose(wdest['z'], w[2]):
+            print w
+            print wdest
+            assert False
+
+def testnorm():
+    dest = np.empty(a.size, dtype=np.float32)
+    norm(cuda.In(a), cuda.Out(dest), **size)
+
+    for i in range(len(dest)):
+        if not np.allclose(np.linalg.norm((a['x'][i],a['y'][i],a['z'][i])), dest[i]):
+            assert False
+
+def testminusfloat3():
+    dest = np.empty(a.size, dtype=float3)
+    minusfloat3(cuda.In(a), cuda.Out(dest), **size)
+    if not np.allclose(-a['x'], dest['x']) or \
+            not np.allclose(-a['y'], dest['y']) or \
+            not np.allclose(-a['z'], dest['z']):
+        assert False
diff --git a/test/matrix_test.cu b/test/matrix_test.cu
new file mode 100644
index 0000000..d64cb34
--- /dev/null
+++ b/test/matrix_test.cu
@@ -0,0 +1,152 @@
+//-*-c-*-
+
+#include "matrix.h"
+
+__device__ Matrix array2matrix(float *a)
+{
+	return make_matrix(a[0], a[1], a[2],
+			   a[3], a[4], a[5],
+			   a[6], a[7], a[8]);
+}
+
+__device__ void matrix2array(const Matrix &m, float *a)
+{
+	a[0] = m.a00;
+	a[1] = m.a01;
+	a[2] = m.a02;
+	a[3] = m.a10;
+	a[4] = m.a11;
+	a[5] = m.a12;
+	a[6] = m.a20;
+	a[7] = m.a21;
+	a[8] = m.a22;
+}
+
+extern "C"
+{
+
+__global__ void det(float *a, float *dest)
+{
+	Matrix m = array2matrix(a);
+	dest[0] = det(m);
+}
+
+__global__ void inv(float *a, float *dest)
+{
+	Matrix m = array2matrix(a);
+	matrix2array(inv(m), dest);
+}
+
+__global__ void minusmatrix(float *a, float *dest)
+{
+	matrix2array(-array2matrix(a), dest);
+}
+
+__global__ void matrixadd(float *a, float *b, float *dest)
+{
+	matrix2array(array2matrix(a)+array2matrix(b), dest);
+}
+
+__global__ void matrixsub(float *a, float *b, float *dest)
+{
+	matrix2array(array2matrix(a)-array2matrix(b), dest);
+}
+
+__global__ void matrixmul(float *a, float *b, float *dest)
+{
+	matrix2array(array2matrix(a)*array2matrix(b), dest);
+}
+
+__global__ void multiply(float *a, float3 *x, float3 *dest)
+{
+	dest[0] = array2matrix(a)*x[0];
+}
+
+__global__ void matrixaddfloat(float *a, float c, float *dest)
+{
+	matrix2array(array2matrix(a)+c, dest);
+}
+
+__global__ void matrixsubfloat(float *a, float c, float *dest)
+{
+	matrix2array(array2matrix(a)-c, dest);
+}
+
+__global__ void matrixmulfloat(float *a, float c, float *dest)
+{
+	matrix2array(array2matrix(a)*c, dest);
+}
+
+__global__ void matrixdivfloat(float *a, float c, float *dest)
+{
+	matrix2array(array2matrix(a)/c, dest);
+}
+
+__global__ void floataddmatrix(float *a, float c, float *dest)
+{
+	matrix2array(c+array2matrix(a), dest);
+}
+
+__global__ void floatsubmatrix(float *a, float c, float *dest)
+{
+	matrix2array(c-array2matrix(a), dest);
+}
+
+__global__ void floatmulmatrix(float *a, float c, float *dest)
+{
+	matrix2array(c*array2matrix(a), dest);
+}
+
+__global__ void floatdivmatrix(float *a, float c, float *dest)
+{
+	matrix2array(c/array2matrix(a), dest);
+}
+
+__global__ void matrixaddequals(float *a, float *b)
+{
+	Matrix m = array2matrix(a);
+	m += array2matrix(b);
+	matrix2array(m,a);
+}
+
+__global__ void matrixsubequals(float *a, float *b)
+{
+	Matrix m = array2matrix(a);
+	m -= array2matrix(b);
+	matrix2array(m,a);
+}
+
+__global__ void matrixaddequalsfloat(float *a, float c)
+{
+	Matrix m = array2matrix(a);
+	m += c;
+	matrix2array(m,a);
+}
+
+__global__ void matrixsubequalsfloat(float *a, float c)
+{
+	Matrix m = array2matrix(a);
+	m -= c;
+	matrix2array(m,a);
+}
+
+__global__ void matrixmulequalsfloat(float *a, float c)
+{
+	Matrix m = array2matrix(a);
+	m *= c;
+	matrix2array(m,a);
+}
+
+__global__ void matrixdivequalsfloat(float *a, float c)
+{
+	Matrix m = array2matrix(a);
+	m /= c;
+	matrix2array(m,a);
+}
+
+__global__ void outer(float3 a, float3 b, float* dest)
+{
+	matrix2array(outer(a,b), dest);
+}
+
+} // extern "c"
diff --git a/test/matrix_test.py b/test/matrix_test.py
new file mode 100644
index 0000000..c843025
--- /dev/null
+++ b/test/matrix_test.py
@@ -0,0 +1,327 @@
+import os
+import numpy as np
+from pycuda import autoinit
+from pycuda.compiler import SourceModule
+import pycuda.driver as cuda
+from pycuda import gpuarray
+
+float3 = gpuarray.vec.float3
+
+print 'device %s' % autoinit.device.name()
+
+current_directory = os.path.split(os.path.realpath(__file__))[0]
+source_directory = current_directory + '/../src'
+
+source = open(current_directory + '/matrix_test.cu').read()
+
+mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False)
+
+det = mod.get_function('det')
+inv = mod.get_function('inv')
+matrixadd = mod.get_function('matrixadd')
+matrixsub = mod.get_function('matrixsub')
+matrixmul = mod.get_function('matrixmul')
+multiply = mod.get_function('multiply')
+matrixaddfloat = mod.get_function('matrixaddfloat')
+matrixsubfloat = mod.get_function('matrixsubfloat')
+matrixmulfloat = mod.get_function('matrixmulfloat')
+matrixdivfloat = mod.get_function('matrixdivfloat')
+floataddmatrix = mod.get_function('floataddmatrix')
+floatsubmatrix = mod.get_function('floatsubmatrix')
+floatmulmatrix = mod.get_function('floatmulmatrix')
+floatdivmatrix = mod.get_function('floatdivmatrix')
+matrixaddequals = mod.get_function('matrixaddequals')
+matrixsubequals = mod.get_function('matrixsubequals')
+matrixaddequalsfloat = mod.get_function('matrixaddequalsfloat')
+matrixsubequalsfloat = mod.get_function('matrixsubequalsfloat')
+matrixmulequalsfloat = mod.get_function('matrixmulequalsfloat')
+matrixdivequalsfloat = mod.get_function('matrixdivequalsfloat')
+outer = mod.get_function('outer')
+minusmatrix = mod.get_function('minusmatrix')
+
+size = {'block': (1,1,1), 'grid': (1,1)}
+
+for i in range(1):
+    a = np.random.random_sample(size=9).astype(np.float32)
+    b = np.random.random_sample(size=9).astype(np.float32)
+    dest = np.empty(1, dtype=np.float32)
+    c = np.int32(np.random.random_sample())
+
+    print 'testing det...',
+
+    det(cuda.In(a), cuda.Out(dest), **size)
+
+    if not np.allclose(np.float32(np.linalg.det(a.reshape(3,3))), dest[0]):
+        print 'fail'
+        print np.float32(np.linalg.det(a.reshape(3,3)))
+        print dest[0]
+    else:
+        print 'success'
+
+    print 'testing inv...',
+
+    dest = np.empty(9, dtype=np.float32)
+
+    inv(cuda.In(a), cuda.Out(dest), **size)
+
+    if not np.allclose(np.linalg.inv(a.reshape(3,3)).flatten().astype(np.float32), dest):
+        print 'fail'
+        print np.linalg.inv(a.reshape(3,3)).flatten().astype(np.float32)
+        print dest
+    else:
+        print 'success'
+
+    print 'testing matrixadd...',
+
+    matrixadd(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+
+    if not np.allclose(a+b, dest):
+        print 'fail'
+        print a+b
+        print dest
+    else:
+        print 'success'
+
+    print 'testing matrixsub...',
+
+    matrixsub(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+
+    if not np.allclose(a-b, dest):
+        print 'fail'
+        print a-b
+        print dest
+    else:
+        print 'sucess'
+
+    print 'testing matrixmul...',
+
+    matrixmul(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+
+    if not np.allclose(np.dot(a.reshape(3,3),b.reshape(3,3)).flatten(), dest):
+        print 'fail'
+        print np.dot(a.reshape(3,3),b.reshape(3,3)).flatten()
+        print dest
+    else:
+        print 'success'
+
+    print 'testing multiply...',
+
+    x_cpu = np.random.random_sample(size=3).astype(np.float32)
+    x_gpu = np.array((x_cpu[0], x_cpu[1], x_cpu[2]), dtype=float3)
+
+    dest = np.empty(1, dtype=float3)
+    
+    multiply(cuda.In(a), cuda.In(x_gpu), cuda.Out(dest), **size)
+
+    m = a.reshape(3,3)
+
+    if not np.allclose(np.dot(x_cpu,m[0]), dest[0]['x']) or \
+            not np.allclose(np.dot(x_cpu,m[1]), dest[0]['y']) or \
+            not np.allclose(np.dot(x_cpu,m[2]), dest[0]['z']):
+        print 'fail'
+        print np.dot(x_cpu,m[0])
+        print np.dot(x_cpu,m[1])
+        print np.dot(x_cpu,m[2])
+        print dest[0]['x']
+        print dest[0]['y']
+        print dest[0]['z']
+    else:
+        print 'success'
+
+    print 'testing matrixaddfloat...',
+
+    dest = np.empty(9, dtype=np.float32)
+
+    matrixaddfloat(cuda.In(a), c, cuda.Out(dest), **size)
+
+    if not np.allclose(a+c, dest):
+        print 'fail'
+        print a+c
+        print dest
+    else:
+        print 'success'
+
+    print 'testing matrixsubfloat...',
+
+    matrixsubfloat(cuda.In(a), c, cuda.Out(dest), **size)
+
+    if not np.allclose(a-c, dest):
+        print 'fail'
+        print a-c
+        print dest
+    else:
+        print 'success'
+
+    print 'testing matrixmulfloat...',
+
+    matrixmulfloat(cuda.In(a), c, cuda.Out(dest), **size)
+
+    if not np.allclose(a*c, dest):
+        print 'fail'
+        print a-c
+        print dest
+    else:
+        print 'success'
+
+    print 'testing matrixdivfloat...',
+
+    matrixdivfloat(cuda.In(a), c, cuda.Out(dest), **size)
+
+    if not np.allclose(a/c, dest):
+        print 'fail'
+        print a/c
+        print dest
+    else:
+        print 'success'
+
+    print 'testing floataddmatrix...',
+
+    floataddmatrix(cuda.In(a), c, cuda.Out(dest), **size)
+
+    if not np.allclose(c+a, dest):
+        print 'fail'
+        print c+a
+        print dest
+    else:
+        print 'success'
+
+    print 'testing floatsubmatrix...',
+
+    floatsubmatrix(cuda.In(a), c, cuda.Out(dest), **size)
+
+    if not np.allclose(c-a, dest):
+        print 'fail'
+        print c-a
+        print dest
+    else:
+        print 'success'
+
+    print 'testing floatmulmatrix...',
+
+    floatmulmatrix(cuda.In(a), c, cuda.Out(dest), **size)
+
+    if not np.allclose(c*a, dest):
+        print 'fail'
+        print c*a
+        print dest
+    else:
+        print 'success'
+
+    print 'testing floatdivmatrix...',
+
+    floatdivmatrix(cuda.In(a), c, cuda.Out(dest), **size)
+
+    if not np.allclose(c/a, dest):
+        print 'fail'
+        print c/a
+        print dest
+    else:
+        print 'success'
+
+    print 'testing matrixaddequals...',
+
+    dest = np.copy(a)
+
+    matrixaddequals(cuda.InOut(dest), cuda.In(b), **size)
+
+    if not np.allclose(a+b, dest):
+        print 'fail'
+        print a+b
+        print dest
+    else:
+        print 'success'
+
+    print 'testing matrixsubequals...',
+
+    dest = np.copy(a)
+
+    matrixsubequals(cuda.InOut(dest), cuda.In(b), **size)
+
+    if not np.allclose(a-b, dest):
+        print 'fail'
+        print a-b
+        print dest
+    else:
+        print 'success'
+
+    print 'testing matrixaddequalsfloat...',
+
+    dest = np.copy(a)
+
+    matrixaddequalsfloat(cuda.InOut(dest), c, **size)
+
+    if not np.allclose(a+c, dest):
+        print 'fail'
+        print a+c
+        print dest
+    else:
+        print 'success'
+
+    print 'testing matrixsubequalsfloat...',
+
+    dest = np.copy(a)
+
+    matrixsubequalsfloat(cuda.InOut(dest), c, **size)
+
+    if not np.allclose(a-c, dest):
+        print 'fail'
+        print a-c
+        print dest
+    else:
+        print 'success'
+
+    print 'testing matrixmulequalsfloat...',
+
+    dest = np.copy(a)
+
+    matrixmulequalsfloat(cuda.InOut(dest), c, **size)
+
+    if not np.allclose(a*c, dest):
+        print 'fail'
+        print a*c
+        print dest
+    else:
+        print 'success'
+
+    print 'testing matrixdivequalsfloat...',
+
+    dest = np.copy(a)
+
+    matrixdivequalsfloat(cuda.InOut(dest), c, **size)
+
+    if not np.allclose(a/c, dest):
+        print 'fail'
+        print a*c
+        print dest
+    else:
+        print 'success'
+
+    print 'testing outer...',
+
+    x1_cpu = np.random.random_sample(size=3).astype(np.float32)
+    x2_cpu = np.random.random_sample(size=3).astype(np.float32)
+
+    x1_gpu = np.array((x1_cpu[0], x1_cpu[1], x1_cpu[2]), dtype=float3)
+    x2_gpu = np.array((x2_cpu[0], x2_cpu[1], x2_cpu[2]), dtype=float3)
+
+    outer(x1_gpu, x2_gpu, cuda.Out(dest), **size)
+
+    if not np.allclose(np.outer(x1_cpu, x2_cpu).flatten(), dest):
+        print 'fail'
+        print np.outer(x1_cpu, x2_cpu).flatten()
+        print dest
+    else:
+        print 'success'
+
+    print 'testing minus matrix...',
+
+    dest = np.copy(a)
+
+    minusmatrix(cuda.In(a), cuda.Out(dest), **size)
+
+    if not np.allclose(-a, dest):
+        print 'fail'
+        print -a
+        print dest
+    else:
+        print 'success'
diff --git a/test/rotate_test.cu b/test/rotate_test.cu
new file mode 100644
index 0000000..6cafc12
--- /dev/null
+++ b/test/rotate_test.cu
@@ -0,0 +1,14 @@
+//-*-c-*-
+
+#include "rotate.h"
+
+extern "C"
+{
+
+__global__ void rotate(float3 *a, float *phi, float3 *n, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = rotate(a[idx], phi[idx], n[idx]);
+}
+
+} // extern "c"
diff --git a/test/rotate_test.py b/test/rotate_test.py
new file mode 100644
index 0000000..92eff84
--- /dev/null
+++ b/test/rotate_test.py
@@ -0,0 +1,67 @@
+import os
+import numpy as np
+from pycuda import autoinit
+from pycuda.compiler import SourceModule
+import pycuda.driver as cuda
+from pycuda import gpuarray
+float3 = gpuarray.vec.float3
+
+def rotate(x, phi, n):
+    x = np.asarray(x)
+    n = np.asarray(n)
+
+    r = np.cos(phi)*np.identity(3) + (1-np.cos(phi))*np.outer(n,n) + \
+        np.sin(phi)*np.array([[0,n[2],-n[1]],[-n[2],0,n[0]],[n[1],-n[0],0]])
+
+    return np.inner(x,r)
+
+print 'device %s' % autoinit.device.name()
+
+current_directory = os.path.split(os.path.realpath(__file__))[0]
+source_directory = current_directory + '/../src'
+
+source = open(current_directory + '/rotate_test.cu').read()
+
+mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False)
+
+rotate_gpu = mod.get_function('rotate')
+
+size = {'block': (100,1,1), 'grid': (1,1)}
+
+a = np.empty(size['block'][0], dtype=float3)
+n = np.empty(size['block'][0], dtype=float3)
+phi = np.random.random_sample(size=a.size).astype(np.float32)
+
+a['x'] = np.random.random_sample(size=a.size)
+a['y'] = np.random.random_sample(size=a.size)
+a['z'] = np.random.random_sample(size=a.size)
+
+n['x'] = np.random.random_sample(size=n.size)
+n['y'] = np.random.random_sample(size=n.size)
+n['z'] = np.random.random_sample(size=n.size)
+
+a['x'] = np.ones(a.size)
+a['y'] = np.zeros(a.size)
+a['z'] = np.zeros(a.size)
+
+n['x'] = np.zeros(n.size)
+n['y'] = np.zeros(n.size)
+n['z'] = np.ones(n.size)
+
+phi = np.array([np.pi/2]*a.size).astype(np.float32)
+
+def testrotate():
+    dest = np.empty(a.size, dtype=float3)
+    rotate_gpu(cuda.In(a), cuda.In(phi), cuda.In(n), cuda.Out(dest), **size)
+    for v, theta, w, rdest in zip(a,phi,n,dest):
+        r = rotate((v['x'], v['y'], v['z']), theta, (w['x'], w['y'], w['z']))
+        if not np.allclose(rdest['x'], r[0]) or \
+                not np.allclose(rdest['y'], r[1]) or \
+                not np.allclose(rdest['z'], r[2]):
+            print v
+            print theta
+            print w
+            print r
+            print rdest
+            assert False
+
diff --git a/test/test_fileio.py b/test/test_fileio.py
new file mode 100644
index 0000000..3869a9f
--- /dev/null
+++ b/test/test_fileio.py
@@ -0,0 +1,74 @@
+import unittest
+from chroma.fileio import root
+from chroma import event
+import numpy as np
+
+class TestFileIO(unittest.TestCase):
+    def test_file_write_and_read(self):
+        ev = event.Event(1, event.Vertex('e-', pos=(0,0,1), dir=(1,0,0),
+                                         ke=15.0, pol=(0,1,0)))
+
+        photons_beg = root.make_photon_with_arrays(1)
+        photons_beg.pos[0] = (1,2,3)
+        photons_beg.dir[0] = (4,5,6)
+        photons_beg.pol[0] = (7,8,9)
+        photons_beg.wavelengths[0] = 400.0
+        photons_beg.t[0] = 100.0
+        photons_beg.last_hit_triangles[0] = 5
+        photons_beg.flags[0] = 20
+        ev.photons_beg = photons_beg
+
+        photons_end = root.make_photon_with_arrays(1)
+        photons_end.pos[0] = (1,2,3)
+        photons_end.dir[0] = (4,5,6)
+        photons_end.pol[0] = (7,8,9)
+        photons_end.wavelengths[0] = 400.0
+        photons_end.t[0] = 100.0
+        photons_end.last_hit_triangles[0] = 5
+        photons_end.flags[0] = 20
+        ev.photons_end = photons_end
+
+        ev.vertices = [ev.primary_vertex]
+
+        channels = event.Channels(hit=np.array([True, False]), 
+                                  t=np.array([20.0, 1e9], dtype=np.float32),
+                                  q=np.array([2.0, 0.0], dtype=np.float32),
+                                  flags=np.array([8, 32], dtype=np.uint32))
+        ev.channels = channels
+
+        filename = '/tmp/chroma-filewritertest.root'
+        writer = root.RootWriter(filename)
+        writer.write_event(ev)
+        writer.close()
+
+        # Exercise the RootReader methods
+        reader = root.RootReader(filename)
+        self.assertEquals(len(reader), 1)
+
+        self.assertRaises(StopIteration, reader.prev)
+
+        reader.next()
+
+        self.assertEqual(reader.index(), 0)
+        self.assertRaises(StopIteration, reader.next)
+        
+        reader.jump_to(0)
+
+        # Enough screwing around, let's get the one event in the file
+        newev = reader.current()
+
+        # Now check if everything is correct in the event
+        for attribute in ['id']:
+            self.assertEqual(getattr(ev, attribute), getattr(newev, attribute), 'compare %s' % attribute)
+
+        for attribute in ['pos', 'dir', 'pol', 'ke', 't0']:
+            self.assertTrue(np.allclose(getattr(ev.primary_vertex, attribute), getattr(newev.primary_vertex, attribute)), 'compare %s' % attribute)
+
+            for i in range(len(ev.vertices)):
+                self.assertTrue(np.allclose(getattr(ev.vertices[i], attribute), getattr(newev.vertices[i], attribute)), 'compare %s' % attribute)
+
+        for attribute in ['pos', 'dir', 'pol', 'wavelengths', 't', 'last_hit_triangles', 'flags']:    
+            self.assertTrue(np.allclose(getattr(ev.photons_beg, attribute), 
+                                        getattr(newev.photons_beg, attribute)), 'compare %s' % attribute)
+            self.assertTrue(np.allclose(getattr(ev.photons_end, attribute), 
+                                        getattr(newev.photons_end, attribute)), 'compare %s' % attribute)
diff --git a/test/test_generator_photon.py b/test/test_generator_photon.py
new file mode 100644
index 0000000..13501fe
--- /dev/null
+++ b/test/test_generator_photon.py
@@ -0,0 +1,21 @@
+import unittest
+import itertools
+
+from chroma import generator
+from chroma.generator.vertex import constant_particle_gun
+from chroma import optics
+
+class TestG4ParallelGenerator(unittest.TestCase):
+    def test_center(self):
+        '''Generate Cherenkov light at the center of the world volume'''
+        gen = generator.photon.G4ParallelGenerator(1, optics.water_wcsim)
+        vertex = itertools.islice(constant_particle_gun('e-', (0,0,0), (1,0,0), 100), 10)
+        for event in gen.generate_events(vertex):
+            self.assertGreater(len(event.photons_beg.pos), 0)
+
+    def test_off_center(self):
+        '''Generate Cherenkov light at (1 m, 0 m, 0 m)'''
+        gen = generator.photon.G4ParallelGenerator(1, optics.water_wcsim)
+        vertex = itertools.islice(constant_particle_gun('e-', (1,0,0), (1,0,0), 100), 10)
+        for event in gen.generate_events(vertex):
+            self.assertGreater(len(event.photons_beg.pos), 0)
diff --git a/test/test_generator_vertex.py b/test/test_generator_vertex.py
new file mode 100644
index 0000000..cec363d
--- /dev/null
+++ b/test/test_generator_vertex.py
@@ -0,0 +1,23 @@
+import unittest
+import itertools
+import numpy as np
+import chroma.generator.vertex
+
+class TestParticleGun(unittest.TestCase):
+    def test_constant_particle_gun_center(self):
+        '''Generate electron vertices at the center of the world volume.'''
+        vertex = chroma.generator.vertex.constant_particle_gun('e-', (0,0,0), (1,0,0), 100)
+        for ev in itertools.islice(vertex, 100):
+            self.assertEquals(ev.primary_vertex.particle_name, 'e-')
+            self.assertTrue(np.allclose(ev.primary_vertex.pos, [0,0,0]))
+            self.assertTrue(np.allclose(ev.primary_vertex.dir, [1,0,0]))
+            self.assertTrue(np.allclose(ev.primary_vertex.ke, 100))
+
+    def test_off_center(self):
+        '''Generate electron vertices at (1,0,0) in the world volume.'''
+        vertex = chroma.generator.vertex.constant_particle_gun('e-', (1,0,0), (1,0,0), 100)
+        for ev in itertools.islice(vertex, 100):
+            self.assertEquals(ev.primary_vertex.particle_name, 'e-')
+            self.assertTrue(np.allclose(ev.primary_vertex.pos, [1,0,0]))
+            self.assertTrue(np.allclose(ev.primary_vertex.dir, [1,0,0]))
+            self.assertTrue(np.allclose(ev.primary_vertex.ke, 100))
diff --git a/test/test_pdf.py b/test/test_pdf.py
new file mode 100644
index 0000000..571cbd4
--- /dev/null
+++ b/test/test_pdf.py
@@ -0,0 +1,67 @@
+import unittest
+import numpy as np
+import itertools
+
+import chroma.detectors
+from chroma.generator.photon import G4ParallelGenerator
+from chroma.generator.vertex import constant_particle_gun
+from chroma.optics import water_wcsim
+from chroma import gpu
+from chroma.sim import Simulation
+
+class TestPDF(unittest.TestCase):
+    def setUp(self):
+        self.detector = chroma.detectors.microlbne()
+        self.detector.build()
+        self.vertex_gen = constant_particle_gun('e-', (0,0,0), (1,0,0), 10)
+
+    def testGPUPDF(self):
+        '''Create a hit count and (q,t) PDF for 10 MeV events in MicroLBNE'''
+
+        g4generator = G4ParallelGenerator(1, water_wcsim)
+
+        context = gpu.create_cuda_context()
+
+        gpu_geometry = gpu.GPUGeometry(self.detector)
+
+        nthreads_per_block, max_blocks = 64, 1024
+
+        rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks)
+
+        gpu_daq = gpu.GPUDaq(gpu_geometry, max(self.detector.pmtids))
+        gpu_pdf = gpu.GPUPDF()
+        gpu_pdf.setup_pdf(max(self.detector.pmtids), 100, (-0.5, 999.5), 10, (-0.5, 9.5))
+        
+        gpu_pdf.clear_pdf()
+
+        for ev in g4generator.generate_events(itertools.islice(self.vertex_gen, 10)):
+            gpu_photons = gpu.GPUPhotons(ev.photons_beg)
+            gpu_photons.propagate(gpu_geometry, rng_states, nthreads_per_block,
+                                  max_blocks)
+            gpu_channels = gpu_daq.acquire(gpu_photons, rng_states,
+                                           nthreads_per_block, max_blocks)
+            gpu_pdf.add_hits_to_pdf(gpu_channels)
+
+        hitcount, pdf = gpu_pdf.get_pdfs()
+        self.assertTrue( (hitcount > 0).any() )
+        self.assertTrue( (pdf > 0).any() )
+
+        # Consistency checks
+        for i, nhits in enumerate(hitcount):
+            self.assertEqual(nhits, pdf[i].sum())
+
+        context.pop()
+
+    def testSimPDF(self):
+        sim = Simulation(self.detector)
+
+        vertex_iter = itertools.islice(self.vertex_gen, 10)
+
+        hitcount, pdf = sim.create_pdf(vertex_iter, 100, (-0.5, 999.5), 10, (-0.5, 9.5))
+
+        self.assertTrue( (hitcount > 0).any() )
+        self.assertTrue( (pdf > 0).any() )
+
+        # Consistency checks
+        for i, nhits in enumerate(hitcount):
+            self.assertEqual(nhits, pdf[i].sum())
diff --git a/test/test_propagation.py b/test/test_propagation.py
new file mode 100644
index 0000000..43ecb9b
--- /dev/null
+++ b/test/test_propagation.py
@@ -0,0 +1,58 @@
+import unittest
+import numpy as np
+
+from chroma.geometry import Solid, Geometry
+from chroma.make import box
+from chroma.sim import Simulation
+from chroma.optics import vacuum
+from chroma.event import Photons
+
+class TestPropagation(unittest.TestCase):
+    def testAbort(self):
+        '''Photons that hit a triangle at normal incidence should not abort.
+
+        Photons that hit a triangle at exactly normal incidence can sometimes
+        produce a dot product that is outside the range allowed by acos().
+        Trigger these with axis aligned photons in a box.
+        '''
+
+        # Setup geometry
+        cube = Geometry(vacuum)
+        cube.add_solid(Solid(box(100,100,100), vacuum, vacuum))
+        cube.pmtids = [0]
+        cube.build(use_cache=False)
+        sim = Simulation(cube, geant4_processes=0)
+
+        # Create initial photons
+        nphotons = 10000
+        pos = np.tile([0,0,0], (nphotons,1)).astype(np.float32)
+        dir = np.tile([0,0,1], (nphotons,1)).astype(np.float32)
+        pol = np.zeros_like(pos)
+        phi = np.random.uniform(0, 2*np.pi, nphotons).astype(np.float32)
+        pol[:,0] = np.cos(phi)
+        pol[:,1] = np.sin(phi)
+        t = np.zeros(nphotons, dtype=np.float32)
+        wavelengths = np.empty(nphotons, np.float32)
+        wavelengths.fill(400.0)
+
+        photons = Photons(pos=pos, dir=dir, pol=pol, t=t,
+                          wavelengths=wavelengths)
+
+        # First make one step to check for strangeness
+        photons_end = sim.simulate([photons], keep_photons_end=True,
+                                   max_steps=1).next().photons_end
+
+        self.assertFalse(np.isnan(photons_end.pos).any())
+        self.assertFalse(np.isnan(photons_end.dir).any())
+        self.assertFalse(np.isnan(photons_end.pol).any())
+        self.assertFalse(np.isnan(photons_end.t).any())
+        self.assertFalse(np.isnan(photons_end.wavelengths).any())
+
+        # Now let it run the usual ten steps
+        photons_end = sim.simulate([photons], keep_photons_end=True,
+                                   max_steps=10).next().photons_end
+        aborted = (photons_end.flags & (1 << 31)) > 0
+        print 'aborted photons: %1.1f' % \
+            (float(np.count_nonzero(aborted)) / nphotons)
+        self.assertFalse(aborted.any())
+        
diff --git a/test/test_ray_intersection.py b/test/test_ray_intersection.py
new file mode 100644
index 0000000..7d0c53c
--- /dev/null
+++ b/test/test_ray_intersection.py
@@ -0,0 +1,27 @@
+import unittest
+import chroma
+import numpy as np
+import os
+from pycuda import gpuarray as ga
+
+class TestRayIntersection(unittest.TestCase):
+    def setUp(self):
+        self.context = chroma.gpu.create_cuda_context()
+        self.module = chroma.gpu.get_cu_module('mesh.h')
+        self.gpu_funcs = chroma.gpu.GPUFuncs(self.module)
+        self.box = chroma.gpu.GPUGeometry(chroma.build(chroma.make.cube()))
+
+        pos, dir = chroma.project.from_film()
+        self.pos_gpu = ga.to_gpu(chroma.gpu.to_float3(pos))
+        self.dir_gpu = ga.to_gpu(chroma.gpu.to_float3(dir))
+
+        testdir = os.path.dirname(os.path.abspath(chroma.tests.__file__))
+        self.dx_standard = np.load(os.path.join(testdir,
+                                                'data/ray_intersection.npz'))
+    def test_intersection_distance(self):
+        dx = ga.zeros(self.pos_gpu.size, dtype=np.float32)
+        self.gpu_funcs.distance_to_mesh(np.int32(self.pos_gpu.size), self.pos_gpu, self.dir_gpu, self.box.gpudata, dx, block=(64,1,1), grid=(self.pos_gpu.size//64+1,1))
+        self.assertTrue((dx.get() == self.dx_standard).all())
+
+    def tearDown(self):
+        self.context.pop()
diff --git a/test/test_rayleigh.py b/test/test_rayleigh.py
new file mode 100644
index 0000000..02ccb41
--- /dev/null
+++ b/test/test_rayleigh.py
@@ -0,0 +1,57 @@
+import unittest
+import numpy as np
+
+from chroma.geometry import Solid, Geometry
+from chroma.make import box
+from chroma.sim import Simulation
+from chroma.optics import water_wcsim
+from chroma.event import Photons
+import histogram
+from histogram.root import rootify
+import ROOT
+ROOT.gROOT.SetBatch(1)
+
+class TestRayleigh(unittest.TestCase):
+    def setUp(self):
+        self.cube = Geometry(water_wcsim)
+        self.cube.add_solid(Solid(box(100,100,100), water_wcsim, water_wcsim))
+        self.cube.pmtids = [0]
+        self.cube.build(use_cache=False)
+        self.sim = Simulation(self.cube, geant4_processes=0)
+
+        nphotons = 100000
+        pos = np.tile([0,0,0], (nphotons,1)).astype(np.float32)
+        dir = np.tile([0,0,1], (nphotons,1)).astype(np.float32)
+        pol = np.zeros_like(pos)
+        phi = np.random.uniform(0, 2*np.pi, nphotons).astype(np.float32)
+        pol[:,0] = np.cos(phi)
+        pol[:,1] = np.sin(phi)
+        t = np.zeros(nphotons, dtype=np.float32)
+        wavelengths = np.empty(nphotons, np.float32)
+        wavelengths.fill(400.0)
+
+        self.photons = Photons(pos=pos, dir=dir, pol=pol, t=t, wavelengths=wavelengths)
+
+    def testAngularDistributionPolarized(self):
+        # Fully polarized photons
+        self.photons.pol[:] = [1.0, 0.0, 0.0]
+
+        photons_end = self.sim.simulate([self.photons], keep_photons_end=True, max_steps=1).next().photons_end
+        aborted = (photons_end.flags & (1 << 31)) > 0
+        self.assertFalse(aborted.any())
+
+        # Compute the dot product between initial and final dir
+        rayleigh_scatters = (photons_end.flags & (1 << 4)) > 0
+        cos_scatter = (self.photons.dir[rayleigh_scatters] * photons_end.dir[rayleigh_scatters]).sum(axis=1)
+        theta_scatter = np.arccos(cos_scatter)
+        h = histogram.Histogram(bins=100, range=(0, np.pi))
+        h.fill(theta_scatter)
+        h = rootify(h)
+
+        # The functional form for polarized light should be
+        # (1 + \cos^2 \theta)\sin \theta according to GEANT4 physics
+        # reference manual.
+        f = ROOT.TF1("pol_func", "[0]*(1+cos(x)**2)*sin(x)", 0, np.pi)
+        h.Fit(f)
+        self.assertGreater(f.GetProb(), 1e-3)
+
diff --git a/test/test_sample_cdf.cu b/test/test_sample_cdf.cu
new file mode 100644
index 0000000..1401772
--- /dev/null
+++ b/test/test_sample_cdf.cu
@@ -0,0 +1,16 @@
+// -*-c++-*-
+#include "random.h"
+
+extern "C" {
+
+__global__ void test_sample_cdf(int offset, int ncdf, 
+				float *cdf_x, float *cdf_y, float *out)
+{
+  int id = blockDim.x * blockIdx.x + threadIdx.x;
+  curandState s;
+  curand_init(0, id, offset, &s);
+
+  out[id] = sample_cdf(&s, ncdf, cdf_x, cdf_y);
+}
+
+}
diff --git a/test/test_sample_cdf.py b/test/test_sample_cdf.py
new file mode 100644
index 0000000..2baa2ac
--- /dev/null
+++ b/test/test_sample_cdf.py
@@ -0,0 +1,64 @@
+import pycuda.autoinit
+import pycuda.driver as cuda
+from pycuda.compiler import SourceModule
+from pycuda import gpuarray
+import numpy as np
+import ROOT
+import os
+
+current_directory = os.path.split(os.path.realpath(__file__))[0]
+source_directory = current_directory + '/../src'
+
+source = open(current_directory + '/test_sample_cdf.cu').read()
+
+mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False)
+
+test_sample_cdf = mod.get_function('test_sample_cdf')
+
+def compare_sampling(hist, reps=10):
+    nbins = hist.GetNbinsX();
+    xaxis = hist.GetXaxis()
+    intg = hist.GetIntegral()
+    cdf_y = np.empty(nbins+1, dtype=float)
+    cdf_x = np.empty_like(cdf_y)
+
+    cdf_x[0] = xaxis.GetBinLowEdge(1)
+    cdf_y[0] = 0.0
+    for i in xrange(1,len(cdf_x)):
+        cdf_y[i] = intg[i]
+        cdf_x[i] = xaxis.GetBinUpEdge(i)
+
+    cdf_x_gpu = gpuarray.to_gpu(cdf_x.astype(np.float32))
+    cdf_y_gpu = gpuarray.to_gpu(cdf_y.astype(np.float32))
+    block =(128,1,1)
+    grid = (128, 1)
+    out_gpu = gpuarray.empty(shape=int(block[0]*grid[0]), dtype=np.float32)
+
+    out_h = ROOT.TH1D('out_h', '', hist.GetNbinsX(), 
+                      xaxis.GetXmin(),
+                      xaxis.GetXmax())
+    out_h.SetLineColor(ROOT.kGreen)
+
+    for i in xrange(reps):
+        test_sample_cdf(np.int32(i),
+                        np.int32(len(cdf_x_gpu)), 
+                        cdf_x_gpu, cdf_y_gpu, out_gpu, block=block, grid=grid)
+        out = out_gpu.get()
+        for v in out:
+            out_h.Fill(v)
+
+    prob = out_h.KolmogorovTest(hist)
+    return prob, out_h 
+
+def test_sampling():
+    '''Verify that the CDF-based sampler on the GPU reproduces a binned
+    Gaussian distribution'''
+    f = ROOT.TF1('f_gaussian', 'gaus(0)', -5, 5)
+    f.SetParameters(1.0/np.sqrt(np.pi * 2), 0.0, 1.0)
+    gaussian = ROOT.TH1D('gaussian', '', 100, -5, 5)
+    gaussian.Add(f)
+
+    prob, out_h = compare_sampling(gaussian, reps=50)
+
+    assert prob > 0.01
+
author	Stan Seibert <stan@mtrr.org>	2011-09-16 15:02:02 -0400
committer	Stan Seibert <stan@mtrr.org>	2011-09-16 15:02:02 -0400
commit	142b3c3caff164deb9bc7b2848e58e52387723ff (patch)
tree	417da3ad69a2756aff7a21dca4b08733d3e87afb /test
parent	084dfd08b714faefaea77cb7dc04d2e93dc04b1d (diff)
download	chroma-142b3c3caff164deb9bc7b2848e58e52387723ff.tar.gz chroma-142b3c3caff164deb9bc7b2848e58e52387723ff.tar.bz2 chroma-142b3c3caff164deb9bc7b2848e58e52387723ff.zip