summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
Diffstat (limited to 'tests')
-rw-r--r--tests/linalg_test.cu128
-rw-r--r--tests/linalg_test.py214
-rw-r--r--tests/matrix_test.cu152
-rw-r--r--tests/matrix_test.py327
-rw-r--r--tests/rotate_test.cu14
-rw-r--r--tests/rotate_test.py67
6 files changed, 902 insertions, 0 deletions
diff --git a/tests/linalg_test.cu b/tests/linalg_test.cu
new file mode 100644
index 0000000..4e9c983
--- /dev/null
+++ b/tests/linalg_test.cu
@@ -0,0 +1,128 @@
+//-*-c-*-
+
+#include "linalg.h"
+
+extern "C"
+{
+
+__global__ void float3add(float3 *a, float3 *b, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = a[idx] + b[idx];
+}
+
+__global__ void float3addequal(float3 *a, float3 *b)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ a[idx] += b[idx];
+}
+
+__global__ void float3sub(float3 *a, float3 *b, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = a[idx] - b[idx];
+}
+
+__global__ void float3subequal(float3 *a, float3 *b)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ a[idx] -= b[idx];
+}
+
+__global__ void float3addfloat(float3 *a, float c, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = a[idx] + c;
+}
+
+__global__ void float3addfloatequal(float3 *a, float c)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ a[idx] += c;
+}
+
+__global__ void floataddfloat3(float3 *a, float c, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = c + a[idx];
+}
+
+__global__ void float3subfloat(float3 *a, float c, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = a[idx] - c;
+}
+
+__global__ void float3subfloatequal(float3 *a, float c)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ a[idx] -= c;
+}
+
+__global__ void floatsubfloat3(float3 *a, float c, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = c - a[idx];
+}
+
+__global__ void float3mulfloat(float3 *a, float c, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = a[idx]*c;
+}
+
+__global__ void float3mulfloatequal(float3 *a, float c)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ a[idx] *= c;
+}
+
+__global__ void floatmulfloat3(float3 *a, float c, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = c*a[idx];
+}
+
+__global__ void float3divfloat(float3 *a, float c, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = a[idx]/c;
+}
+
+__global__ void float3divfloatequal(float3 *a, float c)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ a[idx] /= c;
+}
+
+__global__ void floatdivfloat3(float3 *a, float c, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = c/a[idx];
+}
+
+__global__ void dot(float3 *a, float3 *b, float *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = dot(a[idx],b[idx]);
+}
+
+__global__ void cross(float3 *a, float3 *b, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = cross(a[idx],b[idx]);
+}
+
+__global__ void norm(float3 *a, float *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = norm(a[idx]);
+}
+
+__global__ void minusfloat3(float3 *a, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = -a[idx];
+}
+
+} // extern "c"
diff --git a/tests/linalg_test.py b/tests/linalg_test.py
new file mode 100644
index 0000000..31688d9
--- /dev/null
+++ b/tests/linalg_test.py
@@ -0,0 +1,214 @@
+import os
+import numpy as np
+from pycuda import autoinit
+from pycuda.compiler import SourceModule
+import pycuda.driver as cuda
+from pycuda import gpuarray
+
+float3 = gpuarray.vec.float3
+
+print 'device %s' % autoinit.device.name()
+
+current_directory = os.path.split(os.path.realpath(__file__))[0]
+source_directory = current_directory + '/../src'
+
+source = open(current_directory + '/linalg_test.cu').read()
+
+mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False)
+
+float3add = mod.get_function('float3add')
+float3addequal = mod.get_function('float3addequal')
+float3sub = mod.get_function('float3sub')
+float3subequal = mod.get_function('float3subequal')
+float3addfloat = mod.get_function('float3addfloat')
+float3addfloatequal = mod.get_function('float3addfloatequal')
+floataddfloat3 = mod.get_function('floataddfloat3')
+float3subfloat = mod.get_function('float3subfloat')
+float3subfloatequal = mod.get_function('float3subfloatequal')
+floatsubfloat3 = mod.get_function('floatsubfloat3')
+float3mulfloat = mod.get_function('float3mulfloat')
+float3mulfloatequal = mod.get_function('float3mulfloatequal')
+floatmulfloat3 = mod.get_function('floatmulfloat3')
+float3divfloat = mod.get_function('float3divfloat')
+float3divfloatequal = mod.get_function('float3divfloatequal')
+floatdivfloat3 = mod.get_function('floatdivfloat3')
+dot = mod.get_function('dot')
+cross = mod.get_function('cross')
+norm = mod.get_function('norm')
+minusfloat3 = mod.get_function('minusfloat3')
+
+size = {'block': (256,1,1), 'grid': (1,1)}
+
+a = np.empty(size['block'][0], dtype=float3)
+b = np.empty(size['block'][0], dtype=float3)
+c = np.float32(np.random.random_sample())
+
+a['x'] = np.random.random_sample(size=a.size)
+a['y'] = np.random.random_sample(size=a.size)
+a['z'] = np.random.random_sample(size=a.size)
+
+b['x'] = np.random.random_sample(size=b.size)
+b['y'] = np.random.random_sample(size=b.size)
+b['z'] = np.random.random_sample(size=b.size)
+
+def testfloat3add():
+ dest = np.empty(a.size, dtype=float3)
+ float3add(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+ if not np.allclose(a['x']+b['x'], dest['x']) or \
+ not np.allclose(a['y']+b['y'], dest['y']) or \
+ not np.allclose(a['z']+b['z'], dest['z']):
+ assert False
+
+def testfloat3sub():
+ dest = np.empty(a.size, dtype=float3)
+ float3sub(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+ if not np.allclose(a['x']-b['x'], dest['x']) or \
+ not np.allclose(a['y']-b['y'], dest['y']) or \
+ not np.allclose(a['z']-b['z'], dest['z']):
+ assert False
+
+def testfloat3addequal():
+ dest = np.copy(a)
+ float3addequal(cuda.InOut(dest), cuda.In(b), **size)
+ if not np.allclose(a['x']+b['x'], dest['x']) or \
+ not np.allclose(a['y']+b['y'], dest['y']) or \
+ not np.allclose(a['z']+b['z'], dest['z']):
+ assert False
+
+def testfloat3subequal():
+ dest = np.copy(a)
+ float3subequal(cuda.InOut(dest), cuda.In(b), **size)
+ if not np.allclose(a['x']-b['x'], dest['x']) or \
+ not np.allclose(a['y']-b['y'], dest['y']) or \
+ not np.allclose(a['z']-b['z'], dest['z']):
+ assert False
+
+def testfloat3addfloat():
+ dest = np.empty(a.size, dtype=float3)
+ float3addfloat(cuda.In(a), c, cuda.Out(dest), **size)
+ if not np.allclose(a['x']+c, dest['x']) or \
+ not np.allclose(a['y']+c, dest['y']) or \
+ not np.allclose(a['z']+c, dest['z']):
+ assert False
+
+def testfloat3addfloatequal():
+ dest = np.copy(a)
+ float3addfloatequal(cuda.InOut(dest), c, **size)
+ if not np.allclose(a['x']+c, dest['x']) or \
+ not np.allclose(a['y']+c, dest['y']) or \
+ not np.allclose(a['z']+c, dest['z']):
+ assert False
+
+def testfloataddfloat3():
+ dest = np.empty(a.size, dtype=float3)
+ floataddfloat3(cuda.In(a), c, cuda.Out(dest), **size)
+ if not np.allclose(c+a['x'], dest['x']) or \
+ not np.allclose(c+a['y'], dest['y']) or \
+ not np.allclose(c+a['z'], dest['z']):
+ assert False
+
+def testfloat3subfloat():
+ dest = np.empty(a.size, dtype=float3)
+ float3subfloat(cuda.In(a), c, cuda.Out(dest), **size)
+ if not np.allclose(a['x']-c, dest['x']) or \
+ not np.allclose(a['y']-c, dest['y']) or \
+ not np.allclose(a['z']-c, dest['z']):
+ assert False
+
+def testfloat3subfloatequal():
+ dest = np.copy(a)
+ float3subfloatequal(cuda.InOut(dest), c, **size)
+ if not np.allclose(a['x']-c, dest['x']) or \
+ not np.allclose(a['y']-c, dest['y']) or \
+ not np.allclose(a['z']-c, dest['z']):
+ assert False
+
+def testfloatsubfloat3():
+ dest = np.empty(a.size, dtype=float3)
+ floatsubfloat3(cuda.In(a), c, cuda.Out(dest), **size)
+ if not np.allclose(c-a['x'], dest['x']) or \
+ not np.allclose(c-a['y'], dest['y']) or \
+ not np.allclose(c-a['z'], dest['z']):
+ assert False
+
+def testfloat3mulfloat():
+ dest = np.empty(a.size, dtype=float3)
+ float3mulfloat(cuda.In(a), c, cuda.Out(dest), **size)
+ if not np.allclose(a['x']*c, dest['x']) or \
+ not np.allclose(a['y']*c, dest['y']) or \
+ not np.allclose(a['z']*c, dest['z']):
+ assert False
+
+def testfloat3mulfloatequal():
+ dest = np.copy(a)
+ float3mulfloatequal(cuda.InOut(dest), c, **size)
+ if not np.allclose(a['x']*c, dest['x']) or \
+ not np.allclose(a['y']*c, dest['y']) or \
+ not np.allclose(a['z']*c, dest['z']):
+ assert False
+
+def testfloatmulfloat3():
+ dest = np.empty(a.size, dtype=float3)
+ floatmulfloat3(cuda.In(a), c, cuda.Out(dest), **size)
+ if not np.allclose(c*a['x'], dest['x']) or \
+ not np.allclose(c*a['y'], dest['y']) or \
+ not np.allclose(c*a['z'], dest['z']):
+ assert False
+
+def testfloat3divfloat():
+ dest = np.empty(a.size, dtype=float3)
+ float3divfloat(cuda.In(a), c, cuda.Out(dest), **size)
+ if not np.allclose(a['x']/c, dest['x']) or \
+ not np.allclose(a['y']/c, dest['y']) or \
+ not np.allclose(a['z']/c, dest['z']):
+ assert False
+
+def testfloat3divfloatequal():
+ dest = np.copy(a)
+ float3divfloatequal(cuda.InOut(dest), c, **size)
+ if not np.allclose(a['x']/c, dest['x']) or \
+ not np.allclose(a['y']/c, dest['y']) or \
+ not np.allclose(a['z']/c, dest['z']):
+ assert False
+
+def testfloatdivfloat3():
+ dest = np.empty(a.size, dtype=float3)
+ floatdivfloat3(cuda.In(a), c, cuda.Out(dest), **size)
+ if not np.allclose(c/a['x'], dest['x']) or \
+ not np.allclose(c/a['y'], dest['y']) or \
+ not np.allclose(c/a['z'], dest['z']):
+ assert false
+
+def testdot():
+ dest = np.empty(a.size, dtype=np.float32)
+ dot(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+ if not np.allclose(a['x']*b['x'] + a['y']*b['y'] + a['z']*b['z'], dest):
+ assert False
+
+def testcross():
+ dest = np.empty(a.size, dtype=float3)
+ cross(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+ for u, v, wdest in zip(a,b,dest):
+ w = np.cross((u['x'], u['y'], u['z']),(v['x'],v['y'],v['z']))
+ if not np.allclose(wdest['x'], w[0]) or \
+ not np.allclose(wdest['y'], w[1]) or \
+ not np.allclose(wdest['z'], w[2]):
+ print w
+ print wdest
+ assert False
+
+def testnorm():
+ dest = np.empty(a.size, dtype=np.float32)
+ norm(cuda.In(a), cuda.Out(dest), **size)
+
+ for i in range(len(dest)):
+ if not np.allclose(np.linalg.norm((a['x'][i],a['y'][i],a['z'][i])), dest[i]):
+ assert False
+
+def testminusfloat3():
+ dest = np.empty(a.size, dtype=float3)
+ minusfloat3(cuda.In(a), cuda.Out(dest), **size)
+ if not np.allclose(-a['x'], dest['x']) or \
+ not np.allclose(-a['y'], dest['y']) or \
+ not np.allclose(-a['z'], dest['z']):
+ assert False
diff --git a/tests/matrix_test.cu b/tests/matrix_test.cu
new file mode 100644
index 0000000..d64cb34
--- /dev/null
+++ b/tests/matrix_test.cu
@@ -0,0 +1,152 @@
+//-*-c-*-
+
+#include "matrix.h"
+
+__device__ Matrix array2matrix(float *a)
+{
+ return make_matrix(a[0], a[1], a[2],
+ a[3], a[4], a[5],
+ a[6], a[7], a[8]);
+}
+
+__device__ void matrix2array(const Matrix &m, float *a)
+{
+ a[0] = m.a00;
+ a[1] = m.a01;
+ a[2] = m.a02;
+ a[3] = m.a10;
+ a[4] = m.a11;
+ a[5] = m.a12;
+ a[6] = m.a20;
+ a[7] = m.a21;
+ a[8] = m.a22;
+}
+
+extern "C"
+{
+
+__global__ void det(float *a, float *dest)
+{
+ Matrix m = array2matrix(a);
+ dest[0] = det(m);
+}
+
+__global__ void inv(float *a, float *dest)
+{
+ Matrix m = array2matrix(a);
+ matrix2array(inv(m), dest);
+}
+
+__global__ void minusmatrix(float *a, float *dest)
+{
+ matrix2array(-array2matrix(a), dest);
+}
+
+__global__ void matrixadd(float *a, float *b, float *dest)
+{
+ matrix2array(array2matrix(a)+array2matrix(b), dest);
+}
+
+__global__ void matrixsub(float *a, float *b, float *dest)
+{
+ matrix2array(array2matrix(a)-array2matrix(b), dest);
+}
+
+__global__ void matrixmul(float *a, float *b, float *dest)
+{
+ matrix2array(array2matrix(a)*array2matrix(b), dest);
+}
+
+__global__ void multiply(float *a, float3 *x, float3 *dest)
+{
+ dest[0] = array2matrix(a)*x[0];
+}
+
+__global__ void matrixaddfloat(float *a, float c, float *dest)
+{
+ matrix2array(array2matrix(a)+c, dest);
+}
+
+__global__ void matrixsubfloat(float *a, float c, float *dest)
+{
+ matrix2array(array2matrix(a)-c, dest);
+}
+
+__global__ void matrixmulfloat(float *a, float c, float *dest)
+{
+ matrix2array(array2matrix(a)*c, dest);
+}
+
+__global__ void matrixdivfloat(float *a, float c, float *dest)
+{
+ matrix2array(array2matrix(a)/c, dest);
+}
+
+__global__ void floataddmatrix(float *a, float c, float *dest)
+{
+ matrix2array(c+array2matrix(a), dest);
+}
+
+__global__ void floatsubmatrix(float *a, float c, float *dest)
+{
+ matrix2array(c-array2matrix(a), dest);
+}
+
+__global__ void floatmulmatrix(float *a, float c, float *dest)
+{
+ matrix2array(c*array2matrix(a), dest);
+}
+
+__global__ void floatdivmatrix(float *a, float c, float *dest)
+{
+ matrix2array(c/array2matrix(a), dest);
+}
+
+__global__ void matrixaddequals(float *a, float *b)
+{
+ Matrix m = array2matrix(a);
+ m += array2matrix(b);
+ matrix2array(m,a);
+}
+
+__global__ void matrixsubequals(float *a, float *b)
+{
+ Matrix m = array2matrix(a);
+ m -= array2matrix(b);
+ matrix2array(m,a);
+}
+
+__global__ void matrixaddequalsfloat(float *a, float c)
+{
+ Matrix m = array2matrix(a);
+ m += c;
+ matrix2array(m,a);
+}
+
+__global__ void matrixsubequalsfloat(float *a, float c)
+{
+ Matrix m = array2matrix(a);
+ m -= c;
+ matrix2array(m,a);
+}
+
+__global__ void matrixmulequalsfloat(float *a, float c)
+{
+ Matrix m = array2matrix(a);
+ m *= c;
+ matrix2array(m,a);
+}
+
+__global__ void matrixdivequalsfloat(float *a, float c)
+{
+ Matrix m = array2matrix(a);
+ m /= c;
+ matrix2array(m,a);
+}
+
+__global__ void outer(float3 a, float3 b, float* dest)
+{
+ matrix2array(outer(a,b), dest);
+}
+
+} // extern "c"
diff --git a/tests/matrix_test.py b/tests/matrix_test.py
new file mode 100644
index 0000000..c843025
--- /dev/null
+++ b/tests/matrix_test.py
@@ -0,0 +1,327 @@
+import os
+import numpy as np
+from pycuda import autoinit
+from pycuda.compiler import SourceModule
+import pycuda.driver as cuda
+from pycuda import gpuarray
+
+float3 = gpuarray.vec.float3
+
+print 'device %s' % autoinit.device.name()
+
+current_directory = os.path.split(os.path.realpath(__file__))[0]
+source_directory = current_directory + '/../src'
+
+source = open(current_directory + '/matrix_test.cu').read()
+
+mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False)
+
+det = mod.get_function('det')
+inv = mod.get_function('inv')
+matrixadd = mod.get_function('matrixadd')
+matrixsub = mod.get_function('matrixsub')
+matrixmul = mod.get_function('matrixmul')
+multiply = mod.get_function('multiply')
+matrixaddfloat = mod.get_function('matrixaddfloat')
+matrixsubfloat = mod.get_function('matrixsubfloat')
+matrixmulfloat = mod.get_function('matrixmulfloat')
+matrixdivfloat = mod.get_function('matrixdivfloat')
+floataddmatrix = mod.get_function('floataddmatrix')
+floatsubmatrix = mod.get_function('floatsubmatrix')
+floatmulmatrix = mod.get_function('floatmulmatrix')
+floatdivmatrix = mod.get_function('floatdivmatrix')
+matrixaddequals = mod.get_function('matrixaddequals')
+matrixsubequals = mod.get_function('matrixsubequals')
+matrixaddequalsfloat = mod.get_function('matrixaddequalsfloat')
+matrixsubequalsfloat = mod.get_function('matrixsubequalsfloat')
+matrixmulequalsfloat = mod.get_function('matrixmulequalsfloat')
+matrixdivequalsfloat = mod.get_function('matrixdivequalsfloat')
+outer = mod.get_function('outer')
+minusmatrix = mod.get_function('minusmatrix')
+
+size = {'block': (1,1,1), 'grid': (1,1)}
+
+for i in range(1):
+ a = np.random.random_sample(size=9).astype(np.float32)
+ b = np.random.random_sample(size=9).astype(np.float32)
+ dest = np.empty(1, dtype=np.float32)
+ c = np.int32(np.random.random_sample())
+
+ print 'testing det...',
+
+ det(cuda.In(a), cuda.Out(dest), **size)
+
+ if not np.allclose(np.float32(np.linalg.det(a.reshape(3,3))), dest[0]):
+ print 'fail'
+ print np.float32(np.linalg.det(a.reshape(3,3)))
+ print dest[0]
+ else:
+ print 'success'
+
+ print 'testing inv...',
+
+ dest = np.empty(9, dtype=np.float32)
+
+ inv(cuda.In(a), cuda.Out(dest), **size)
+
+ if not np.allclose(np.linalg.inv(a.reshape(3,3)).flatten().astype(np.float32), dest):
+ print 'fail'
+ print np.linalg.inv(a.reshape(3,3)).flatten().astype(np.float32)
+ print dest
+ else:
+ print 'success'
+
+ print 'testing matrixadd...',
+
+ matrixadd(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+
+ if not np.allclose(a+b, dest):
+ print 'fail'
+ print a+b
+ print dest
+ else:
+ print 'success'
+
+ print 'testing matrixsub...',
+
+ matrixsub(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+
+ if not np.allclose(a-b, dest):
+ print 'fail'
+ print a-b
+ print dest
+ else:
+ print 'sucess'
+
+ print 'testing matrixmul...',
+
+ matrixmul(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+
+ if not np.allclose(np.dot(a.reshape(3,3),b.reshape(3,3)).flatten(), dest):
+ print 'fail'
+ print np.dot(a.reshape(3,3),b.reshape(3,3)).flatten()
+ print dest
+ else:
+ print 'success'
+
+ print 'testing multiply...',
+
+ x_cpu = np.random.random_sample(size=3).astype(np.float32)
+ x_gpu = np.array((x_cpu[0], x_cpu[1], x_cpu[2]), dtype=float3)
+
+ dest = np.empty(1, dtype=float3)
+
+ multiply(cuda.In(a), cuda.In(x_gpu), cuda.Out(dest), **size)
+
+ m = a.reshape(3,3)
+
+ if not np.allclose(np.dot(x_cpu,m[0]), dest[0]['x']) or \
+ not np.allclose(np.dot(x_cpu,m[1]), dest[0]['y']) or \
+ not np.allclose(np.dot(x_cpu,m[2]), dest[0]['z']):
+ print 'fail'
+ print np.dot(x_cpu,m[0])
+ print np.dot(x_cpu,m[1])
+ print np.dot(x_cpu,m[2])
+ print dest[0]['x']
+ print dest[0]['y']
+ print dest[0]['z']
+ else:
+ print 'success'
+
+ print 'testing matrixaddfloat...',
+
+ dest = np.empty(9, dtype=np.float32)
+
+ matrixaddfloat(cuda.In(a), c, cuda.Out(dest), **size)
+
+ if not np.allclose(a+c, dest):
+ print 'fail'
+ print a+c
+ print dest
+ else:
+ print 'success'
+
+ print 'testing matrixsubfloat...',
+
+ matrixsubfloat(cuda.In(a), c, cuda.Out(dest), **size)
+
+ if not np.allclose(a-c, dest):
+ print 'fail'
+ print a-c
+ print dest
+ else:
+ print 'success'
+
+ print 'testing matrixmulfloat...',
+
+ matrixmulfloat(cuda.In(a), c, cuda.Out(dest), **size)
+
+ if not np.allclose(a*c, dest):
+ print 'fail'
+ print a-c
+ print dest
+ else:
+ print 'success'
+
+ print 'testing matrixdivfloat...',
+
+ matrixdivfloat(cuda.In(a), c, cuda.Out(dest), **size)
+
+ if not np.allclose(a/c, dest):
+ print 'fail'
+ print a/c
+ print dest
+ else:
+ print 'success'
+
+ print 'testing floataddmatrix...',
+
+ floataddmatrix(cuda.In(a), c, cuda.Out(dest), **size)
+
+ if not np.allclose(c+a, dest):
+ print 'fail'
+ print c+a
+ print dest
+ else:
+ print 'success'
+
+ print 'testing floatsubmatrix...',
+
+ floatsubmatrix(cuda.In(a), c, cuda.Out(dest), **size)
+
+ if not np.allclose(c-a, dest):
+ print 'fail'
+ print c-a
+ print dest
+ else:
+ print 'success'
+
+ print 'testing floatmulmatrix...',
+
+ floatmulmatrix(cuda.In(a), c, cuda.Out(dest), **size)
+
+ if not np.allclose(c*a, dest):
+ print 'fail'
+ print c*a
+ print dest
+ else:
+ print 'success'
+
+ print 'testing floatdivmatrix...',
+
+ floatdivmatrix(cuda.In(a), c, cuda.Out(dest), **size)
+
+ if not np.allclose(c/a, dest):
+ print 'fail'
+ print c/a
+ print dest
+ else:
+ print 'success'
+
+ print 'testing matrixaddequals...',
+
+ dest = np.copy(a)
+
+ matrixaddequals(cuda.InOut(dest), cuda.In(b), **size)
+
+ if not np.allclose(a+b, dest):
+ print 'fail'
+ print a+b
+ print dest
+ else:
+ print 'success'
+
+ print 'testing matrixsubequals...',
+
+ dest = np.copy(a)
+
+ matrixsubequals(cuda.InOut(dest), cuda.In(b), **size)
+
+ if not np.allclose(a-b, dest):
+ print 'fail'
+ print a-b
+ print dest
+ else:
+ print 'success'
+
+ print 'testing matrixaddequalsfloat...',
+
+ dest = np.copy(a)
+
+ matrixaddequalsfloat(cuda.InOut(dest), c, **size)
+
+ if not np.allclose(a+c, dest):
+ print 'fail'
+ print a+c
+ print dest
+ else:
+ print 'success'
+
+ print 'testing matrixsubequalsfloat...',
+
+ dest = np.copy(a)
+
+ matrixsubequalsfloat(cuda.InOut(dest), c, **size)
+
+ if not np.allclose(a-c, dest):
+ print 'fail'
+ print a-c
+ print dest
+ else:
+ print 'success'
+
+ print 'testing matrixmulequalsfloat...',
+
+ dest = np.copy(a)
+
+ matrixmulequalsfloat(cuda.InOut(dest), c, **size)
+
+ if not np.allclose(a*c, dest):
+ print 'fail'
+ print a*c
+ print dest
+ else:
+ print 'success'
+
+ print 'testing matrixdivequalsfloat...',
+
+ dest = np.copy(a)
+
+ matrixdivequalsfloat(cuda.InOut(dest), c, **size)
+
+ if not np.allclose(a/c, dest):
+ print 'fail'
+ print a*c
+ print dest
+ else:
+ print 'success'
+
+ print 'testing outer...',
+
+ x1_cpu = np.random.random_sample(size=3).astype(np.float32)
+ x2_cpu = np.random.random_sample(size=3).astype(np.float32)
+
+ x1_gpu = np.array((x1_cpu[0], x1_cpu[1], x1_cpu[2]), dtype=float3)
+ x2_gpu = np.array((x2_cpu[0], x2_cpu[1], x2_cpu[2]), dtype=float3)
+
+ outer(x1_gpu, x2_gpu, cuda.Out(dest), **size)
+
+ if not np.allclose(np.outer(x1_cpu, x2_cpu).flatten(), dest):
+ print 'fail'
+ print np.outer(x1_cpu, x2_cpu).flatten()
+ print dest
+ else:
+ print 'success'
+
+ print 'testing minus matrix...',
+
+ dest = np.copy(a)
+
+ minusmatrix(cuda.In(a), cuda.Out(dest), **size)
+
+ if not np.allclose(-a, dest):
+ print 'fail'
+ print -a
+ print dest
+ else:
+ print 'success'
diff --git a/tests/rotate_test.cu b/tests/rotate_test.cu
new file mode 100644
index 0000000..6cafc12
--- /dev/null
+++ b/tests/rotate_test.cu
@@ -0,0 +1,14 @@
+//-*-c-*-
+
+#include "rotate.h"
+
+extern "C"
+{
+
+__global__ void rotate(float3 *a, float *phi, float3 *n, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = rotate(a[idx], phi[idx], n[idx]);
+}
+
+} // extern "c"
diff --git a/tests/rotate_test.py b/tests/rotate_test.py
new file mode 100644
index 0000000..92eff84
--- /dev/null
+++ b/tests/rotate_test.py
@@ -0,0 +1,67 @@
+import os
+import numpy as np
+from pycuda import autoinit
+from pycuda.compiler import SourceModule
+import pycuda.driver as cuda
+from pycuda import gpuarray
+float3 = gpuarray.vec.float3
+
+def rotate(x, phi, n):
+ x = np.asarray(x)
+ n = np.asarray(n)
+
+ r = np.cos(phi)*np.identity(3) + (1-np.cos(phi))*np.outer(n,n) + \
+ np.sin(phi)*np.array([[0,n[2],-n[1]],[-n[2],0,n[0]],[n[1],-n[0],0]])
+
+ return np.inner(x,r)
+
+print 'device %s' % autoinit.device.name()
+
+current_directory = os.path.split(os.path.realpath(__file__))[0]
+source_directory = current_directory + '/../src'
+
+source = open(current_directory + '/rotate_test.cu').read()
+
+mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False)
+
+rotate_gpu = mod.get_function('rotate')
+
+size = {'block': (100,1,1), 'grid': (1,1)}
+
+a = np.empty(size['block'][0], dtype=float3)
+n = np.empty(size['block'][0], dtype=float3)
+phi = np.random.random_sample(size=a.size).astype(np.float32)
+
+a['x'] = np.random.random_sample(size=a.size)
+a['y'] = np.random.random_sample(size=a.size)
+a['z'] = np.random.random_sample(size=a.size)
+
+n['x'] = np.random.random_sample(size=n.size)
+n['y'] = np.random.random_sample(size=n.size)
+n['z'] = np.random.random_sample(size=n.size)
+
+a['x'] = np.ones(a.size)
+a['y'] = np.zeros(a.size)
+a['z'] = np.zeros(a.size)
+
+n['x'] = np.zeros(n.size)
+n['y'] = np.zeros(n.size)
+n['z'] = np.ones(n.size)
+
+phi = np.array([np.pi/2]*a.size).astype(np.float32)
+
+def testrotate():
+ dest = np.empty(a.size, dtype=float3)
+ rotate_gpu(cuda.In(a), cuda.In(phi), cuda.In(n), cuda.Out(dest), **size)
+ for v, theta, w, rdest in zip(a,phi,n,dest):
+ r = rotate((v['x'], v['y'], v['z']), theta, (w['x'], w['y'], w['z']))
+ if not np.allclose(rdest['x'], r[0]) or \
+ not np.allclose(rdest['y'], r[1]) or \
+ not np.allclose(rdest['z'], r[2]):
+ print v
+ print theta
+ print w
+ print r
+ print rdest
+ assert False
+