diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/linalg_test.cu | 48 | ||||
-rw-r--r-- | tests/linalg_test.py | 360 | ||||
-rw-r--r-- | tests/matrix_test.cu | 145 | ||||
-rw-r--r-- | tests/matrix_test.py | 310 |
4 files changed, 643 insertions, 220 deletions
diff --git a/tests/linalg_test.cu b/tests/linalg_test.cu index 13d2ed0..81043f1 100644 --- a/tests/linalg_test.cu +++ b/tests/linalg_test.cu @@ -3,78 +3,102 @@ extern "C" { -__global__ void add(float3 *a, float3 *b, float3 *dest) +__global__ void float3add(float3 *a, float3 *b, float3 *dest) { int idx = blockIdx.x*blockDim.x + threadIdx.x; dest[idx] = a[idx] + b[idx]; } -__global__ void addequal(float3 *a, float3 *b) +__global__ void float3addequal(float3 *a, float3 *b) { int idx = blockIdx.x*blockDim.x + threadIdx.x; a[idx] += b[idx]; } -__global__ void sub(float3 *a, float3 *b, float3 *dest) +__global__ void float3sub(float3 *a, float3 *b, float3 *dest) { int idx = blockIdx.x*blockDim.x + threadIdx.x; dest[idx] = a[idx] - b[idx]; } -__global__ void subequal(float3 *a, float3 *b) +__global__ void float3subequal(float3 *a, float3 *b) { int idx = blockIdx.x*blockDim.x + threadIdx.x; a[idx] -= b[idx]; } -__global__ void addfloat(float3 *a, float c, float3 *dest) +__global__ void float3addfloat(float3 *a, float c, float3 *dest) { int idx = blockIdx.x*blockDim.x + threadIdx.x; dest[idx] = a[idx] + c; } -__global__ void addfloatequal(float3 *a, float c) +__global__ void float3addfloatequal(float3 *a, float c) { int idx = blockIdx.x*blockDim.x + threadIdx.x; a[idx] += c; } -__global__ void subfloat(float3 *a, float c, float3 *dest) +__global__ void floataddfloat3(float3 *a, float c, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = c + a[idx]; +} + +__global__ void float3subfloat(float3 *a, float c, float3 *dest) { int idx = blockIdx.x*blockDim.x + threadIdx.x; dest[idx] = a[idx] - c; } -__global__ void subfloatequal(float3 *a, float c) +__global__ void float3subfloatequal(float3 *a, float c) { int idx = blockIdx.x*blockDim.x + threadIdx.x; a[idx] -= c; } -__global__ void mulfloat(float3 *a, float c, float3 *dest) +__global__ void floatsubfloat3(float3 *a, float c, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = c - a[idx]; +} + +__global__ void float3mulfloat(float3 *a, float c, float3 *dest) { int idx = blockIdx.x*blockDim.x + threadIdx.x; dest[idx] = a[idx]*c; } -__global__ void mulfloatequal(float3 *a, float c) +__global__ void float3mulfloatequal(float3 *a, float c) { int idx = blockIdx.x*blockDim.x + threadIdx.x; a[idx] *= c; } -__global__ void divfloat(float3 *a, float c, float3 *dest) +__global__ void floatmulfloat3(float3 *a, float c, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = c*a[idx]; +} + +__global__ void float3divfloat(float3 *a, float c, float3 *dest) { int idx = blockIdx.x*blockDim.x + threadIdx.x; dest[idx] = a[idx]/c; } -__global__ void divfloatequal(float3 *a, float c) +__global__ void float3divfloatequal(float3 *a, float c) { int idx = blockIdx.x*blockDim.x + threadIdx.x; a[idx] /= c; } +__global__ void floatdivfloat3(float3 *a, float c, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = c/a[idx]; +} + __global__ void dot(float3 *a, float3 *b, float* dest) { int idx = blockIdx.x*blockDim.x + threadIdx.x; diff --git a/tests/linalg_test.py b/tests/linalg_test.py index bc9720b..b490813 100644 --- a/tests/linalg_test.py +++ b/tests/linalg_test.py @@ -1,4 +1,3 @@ -import sys import numpy as np from pycuda import autoinit from pycuda.compiler import SourceModule @@ -13,219 +12,164 @@ source = open('../linalg.h').read() + open('linalg_test.cu').read() mod = SourceModule(source, no_extern_c=True, arch='sm_13') -add = mod.get_function('add') -addequal = mod.get_function('addequal') -sub = mod.get_function('sub') -subequal = mod.get_function('subequal') -addfloat = mod.get_function('addfloat') -addfloatequal = mod.get_function('addfloatequal') -subfloat = mod.get_function('subfloat') -subfloatequal = mod.get_function('subfloatequal') -mulfloat = mod.get_function('mulfloat') -mulfloatequal = mod.get_function('mulfloatequal') -divfloat = mod.get_function('divfloat') -divfloatequal = mod.get_function('divfloatequal') +float3add = mod.get_function('float3add') +float3addequal = mod.get_function('float3addequal') +float3sub = mod.get_function('float3sub') +float3subequal = mod.get_function('float3subequal') +float3addfloat = mod.get_function('float3addfloat') +float3addfloatequal = mod.get_function('float3addfloatequal') +floataddfloat3 = mod.get_function('floataddfloat3') +float3subfloat = mod.get_function('float3subfloat') +float3subfloatequal = mod.get_function('float3subfloatequal') +floatsubfloat3 = mod.get_function('floatsubfloat3') +float3mulfloat = mod.get_function('float3mulfloat') +float3mulfloatequal = mod.get_function('float3mulfloatequal') +floatmulfloat3 = mod.get_function('floatmulfloat3') +float3divfloat = mod.get_function('float3divfloat') +float3divfloatequal = mod.get_function('float3divfloatequal') +floatdivfloat3 = mod.get_function('floatdivfloat3') dot = mod.get_function('dot') -size = {'block': (10,1,1), 'grid': (1,1)} - -for i in range(1): - a = np.zeros(size['block'][0], dtype=float3) - b = np.zeros(a.size, dtype=float3) - dest = np.zeros(a.size, dtype=float3) - c = np.float32(np.random.random_sample()) - destfloat = np.zeros(a.size, dtype=np.float32) - - a['x'] = np.random.random_sample(size=a.size) - a['y'] = np.random.random_sample(size=a.size) - a['z'] = np.random.random_sample(size=a.size) - - print a['x'] - print a['y'] - print a['z'] - print c - - print 'testing add...', - - add(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) - - if (a['x'] + b['x'] != dest['x']).any() or \ - (a['y'] + b['y'] != dest['y']).any() or \ - (a['z'] + b['z'] != dest['z']).any(): - print 'fail' - else: - print 'success' - - print 'testing sub...', - - sub(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) - - if (a['x'] - b['x'] != dest['x']).any() or \ - (a['y'] - b['y'] != dest['y']).any() or \ - (a['z'] - b['z'] != dest['z']).any(): - print 'fail' - else: - print 'success' - - print 'testing addfloat...', - - addfloat(cuda.In(a), c, cuda.Out(dest), **size) - - if (a['x'] + c != dest['x']).any() or \ - (a['y'] + c != dest['y']).any() or \ - (a['z'] + c != dest['z']).any(): - print 'fail' - else: - print 'success' - - print 'testing subfloat...', - - subfloat(cuda.In(a), c, cuda.Out(dest), **size) - - if (a['x'] - c != dest['x']).any() or \ - (a['y'] - c != dest['y']).any() or \ - (a['z'] - c != dest['z']).any(): - print 'fail' - else: - print 'success' - - print 'testing mulfloat...', - - mulfloat(cuda.In(a), c, cuda.Out(dest), **size) - - if (a['x']*c != dest['x']).any() or \ - (a['y']*c != dest['y']).any() or \ - (a['z']*c != dest['z']).any(): - print 'fail' - else: - print 'success' - - print 'testing divfloat...', - - divfloat(cuda.In(a), c, cuda.Out(dest), **size) - - if (a['x']/c != dest['x']).any() or \ - (a['y']/c != dest['y']).any() or \ - (a['z']/c != dest['z']).any(): - print 'fail' - print a['x']/c - print a['y']/c - print a['z']/c - print dest['x'] - print dest['y'] - print dest['z'] - else: - print 'success' - - print 'testing dot...', - - dot(cuda.In(a), cuda.In(b), cuda.Out(destfloat), **size) - - if (a['x']*b['x'] + a['y']*b['y'] + a['z']*b['z'] != destfloat).any(): - print 'fail' - else: - print 'sucess' - - print 'testing addequal...', - +size = {'block': (100,1,1), 'grid': (1,1)} + +a = np.empty(size['block'][0], dtype=float3) +b = np.empty(size['block'][0], dtype=float3) +c = np.float32(np.random.random_sample()) + +a['x'] = np.random.random_sample(size=a.size) +a['y'] = np.random.random_sample(size=a.size) +a['z'] = np.random.random_sample(size=a.size) + +def testfloat3add(): + dest = np.empty(a.size, dtype=float3) + float3add(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) + if not np.allclose(a['x']+b['x'], dest['x']) or \ + not np.allclose(a['y']+b['y'], dest['y']) or \ + not np.allclose(a['z']+b['z'], dest['z']): + assert False + +def testfloat3sub(): + dest = np.empty(a.size, dtype=float3) + float3sub(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) + if not np.allclose(a['x']-b['x'], dest['x']) or \ + not np.allclose(a['y']-b['y'], dest['y']) or \ + not np.allclose(a['z']-b['z'], dest['z']): + assert False + +def testfloat3addequal(): dest = np.copy(a) + float3addequal(cuda.InOut(dest), cuda.In(b), **size) + if not np.allclose(a['x']+b['x'], dest['x']) or \ + not np.allclose(a['y']+b['y'], dest['y']) or \ + not np.allclose(a['z']+b['z'], dest['z']): + assert False - addequal(cuda.InOut(dest), cuda.In(b), **size) - - if (a['x'] + b['x'] != dest['x']).any() or \ - (a['y'] + b['y'] != dest['y']).any() or \ - (a['z'] + b['z'] != dest['z']).any(): - print 'fail' - else: - print 'success' - - print 'testing subequal...', - +def testfloat3subequal(): dest = np.copy(a) - - subequal(cuda.InOut(dest), cuda.In(b), **size) - - if (a['x'] - b['x'] != dest['x']).any() or \ - (a['y'] - b['y'] != dest['y']).any() or \ - (a['z'] - b['z'] != dest['z']).any(): - print 'fail' - else: - print 'success' - - print 'testing addfloatequal...', - + float3subequal(cuda.InOut(dest), cuda.In(b), **size) + if not np.allclose(a['x']-b['x'], dest['x']) or \ + not np.allclose(a['y']-b['y'], dest['y']) or \ + not np.allclose(a['z']-b['z'], dest['z']): + assert False + +def testfloat3addfloat(): + dest = np.empty(a.size, dtype=float3) + float3addfloat(cuda.In(a), c, cuda.Out(dest), **size) + if not np.allclose(a['x']+c, dest['x']) or \ + not np.allclose(a['y']+c, dest['y']) or \ + not np.allclose(a['z']+c, dest['z']): + assert False + +def testfloat3addfloatequal(): dest = np.copy(a) - - addfloatequal(cuda.InOut(dest), c, **size) - - if (a['x'] + c != dest['x']).any() or \ - (a['y'] + c != dest['y']).any() or \ - (a['z'] + c != dest['z']).any(): - print 'fail' - print a['x'] + c - print a['y'] + c - print a['z'] + c - print dest['x'] - print dest['y'] - print dest['z'] - else: - print 'success' - - print 'testing subfloatequal...', - + float3addfloatequal(cuda.InOut(dest), c, **size) + if not np.allclose(a['x']+c, dest['x']) or \ + not np.allclose(a['y']+c, dest['y']) or \ + not np.allclose(a['z']+c, dest['z']): + assert False + +def testfloataddfloat3(): + dest = np.empty(a.size, dtype=float3) + floataddfloat3(cuda.In(a), c, cuda.Out(dest), **size) + if not np.allclose(c+a['x'], dest['x']) or \ + not np.allclose(c+a['y'], dest['y']) or \ + not np.allclose(c+a['z'], dest['z']): + assert False + +def testfloat3subfloat(): + dest = np.empty(a.size, dtype=float3) + float3subfloat(cuda.In(a), c, cuda.Out(dest), **size) + if not np.allclose(a['x']-c, dest['x']) or \ + not np.allclose(a['y']-c, dest['y']) or \ + not np.allclose(a['z']-c, dest['z']): + assert False + +def testfloat3subfloatequal(): dest = np.copy(a) - - subfloatequal(cuda.InOut(dest), c, **size) - - if (a['x'] - c != dest['x']).any() or \ - (a['y'] - c != dest['y']).any() or \ - (a['z'] - c != dest['z']).any(): - print 'fail' - print a['x'] - c - print a['y'] - c - print a['z'] - c - print dest['x'] - print dest['y'] - print dest['z'] - else: - print 'success' - - print 'testing mulfloatequal...', - + float3subfloatequal(cuda.InOut(dest), c, **size) + if not np.allclose(a['x']-c, dest['x']) or \ + not np.allclose(a['y']-c, dest['y']) or \ + not np.allclose(a['z']-c, dest['z']): + assert False + +def testfloatsubfloat3(): + dest = np.empty(a.size, dtype=float3) + floatsubfloat3(cuda.In(a), c, cuda.Out(dest), **size) + if not np.allclose(c-a['x'], dest['x']) or \ + not np.allclose(c-a['y'], dest['y']) or \ + not np.allclose(c-a['z'], dest['z']): + assert False + +def testfloat3mulfloat(): + dest = np.empty(a.size, dtype=float3) + float3mulfloat(cuda.In(a), c, cuda.Out(dest), **size) + if not np.allclose(a['x']*c, dest['x']) or \ + not np.allclose(a['y']*c, dest['y']) or \ + not np.allclose(a['z']*c, dest['z']): + assert False + +def testfloat3mulfloatequal(): dest = np.copy(a) - - mulfloatequal(cuda.InOut(dest), c, **size) - - if (a['x']*c != dest['x']).any() or \ - (a['y']*c != dest['y']).any() or \ - (a['z']*c != dest['z']).any(): - print 'fail' - print a['x']*c - print a['y']*c - print a['z']*c - print dest['x'] - print dest['y'] - print dest['z'] - else: - print 'success' - - print 'testing divfloatequal...', - + float3mulfloatequal(cuda.InOut(dest), c, **size) + if not np.allclose(a['x']*c, dest['x']) or \ + not np.allclose(a['y']*c, dest['y']) or \ + not np.allclose(a['z']*c, dest['z']): + assert False + +def testfloatmulfloat3(): + dest = np.empty(a.size, dtype=float3) + floatmulfloat3(cuda.In(a), c, cuda.Out(dest), **size) + if not np.allclose(c*a['x'], dest['x']) or \ + not np.allclose(c*a['y'], dest['y']) or \ + not np.allclose(c*a['z'], dest['z']): + assert False + +def testfloat3divfloat(): + dest = np.empty(a.size, dtype=float3) + float3divfloat(cuda.In(a), c, cuda.Out(dest), **size) + if not np.allclose(a['x']/c, dest['x']) or \ + not np.allclose(a['y']/c, dest['y']) or \ + not np.allclose(a['z']/c, dest['z']): + assert False + +def testfloat3divfloatequal(): dest = np.copy(a) - - divfloatequal(cuda.InOut(dest), c, **size) - - if (a['x']/c != dest['x']).any() or \ - (a['y']/c != dest['y']).any() or \ - (a['z']/c != dest['z']).any(): - print 'fail' - print a['x']/c - print a['y']/c - print a['z']/c - print dest['x'] - print dest['y'] - print dest['z'] - else: - print 'success' - - + float3divfloatequal(cuda.InOut(dest), c, **size) + if not np.allclose(a['x']/c, dest['x']) or \ + not np.allclose(a['y']/c, dest['y']) or \ + not np.allclose(a['z']/c, dest['z']): + assert False + +def testfloatdivfloat3(): + dest = np.empty(a.size, dtype=float3) + floatdivfloat3(cuda.In(a), c, cuda.Out(dest), **size) + if not np.allclose(c/a['x'], dest['x']) or \ + not np.allclose(c/a['y'], dest['y']) or \ + not np.allclose(c/a['z'], dest['z']): + assert false + +def testdot(): + dest = np.empty(a.size, dtype=np.float32) + dot(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) + if not np.allclose(a['x']*b['x'] + a['y']*b['y'] + a['z']*b['z'], dest): + assert False diff --git a/tests/matrix_test.cu b/tests/matrix_test.cu new file mode 100644 index 0000000..5c88a92 --- /dev/null +++ b/tests/matrix_test.cu @@ -0,0 +1,145 @@ +//-*-c-*- + +__device__ Matrix array2matrix(float *a) +{ + return make_matrix(a[0], a[1], a[2], + a[3], a[4], a[5], + a[6], a[7], a[8]); +} + +__device__ void matrix2array(const Matrix &m, float *a) +{ + a[0] = m.a00; + a[1] = m.a01; + a[2] = m.a02; + a[3] = m.a10; + a[4] = m.a11; + a[5] = m.a12; + a[6] = m.a20; + a[7] = m.a21; + a[8] = m.a22; +} + +extern "C" +{ + +__global__ void det(float *a, float *dest) +{ + Matrix m = array2matrix(a); + dest[0] = det(m); +} + +__global__ void inv(float *a, float *dest) +{ + Matrix m = array2matrix(a); + matrix2array(inv(m), dest); +} + +__global__ void matrixadd(float *a, float *b, float *dest) +{ + matrix2array(array2matrix(a)+array2matrix(b), dest); +} + +__global__ void matrixsub(float *a, float *b, float *dest) +{ + matrix2array(array2matrix(a)-array2matrix(b), dest); +} + +__global__ void matrixmul(float *a, float *b, float *dest) +{ + matrix2array(array2matrix(a)*array2matrix(b), dest); +} + +__global__ void multiply(float *a, float3 *x, float3 *dest) +{ + dest[0] = array2matrix(a)*x[0]; +} + +__global__ void matrixaddfloat(float *a, float c, float *dest) +{ + matrix2array(array2matrix(a)+c, dest); +} + +__global__ void matrixsubfloat(float *a, float c, float *dest) +{ + matrix2array(array2matrix(a)-c, dest); +} + +__global__ void matrixmulfloat(float *a, float c, float *dest) +{ + matrix2array(array2matrix(a)*c, dest); +} + +__global__ void matrixdivfloat(float *a, float c, float *dest) +{ + matrix2array(array2matrix(a)/c, dest); +} + +__global__ void floataddmatrix(float *a, float c, float *dest) +{ + matrix2array(c+array2matrix(a), dest); +} + +__global__ void floatsubmatrix(float *a, float c, float *dest) +{ + matrix2array(c-array2matrix(a), dest); +} + +__global__ void floatmulmatrix(float *a, float c, float *dest) +{ + matrix2array(c*array2matrix(a), dest); +} + +__global__ void floatdivmatrix(float *a, float c, float *dest) +{ + matrix2array(c/array2matrix(a), dest); +} + +__global__ void matrixaddequals(float *a, float *b) +{ + Matrix m = array2matrix(a); + m += array2matrix(b); + matrix2array(m,a); +} + +__global__ void matrixsubequals(float *a, float *b) +{ + Matrix m = array2matrix(a); + m -= array2matrix(b); + matrix2array(m,a); +} + +__global__ void matrixaddequalsfloat(float *a, float c) +{ + Matrix m = array2matrix(a); + m += c; + matrix2array(m,a); +} + +__global__ void matrixsubequalsfloat(float *a, float c) +{ + Matrix m = array2matrix(a); + m -= c; + matrix2array(m,a); +} + +__global__ void matrixmulequalsfloat(float *a, float c) +{ + Matrix m = array2matrix(a); + m *= c; + matrix2array(m,a); +} + +__global__ void matrixdivequalsfloat(float *a, float c) +{ + Matrix m = array2matrix(a); + m /= c; + matrix2array(m,a); +} + +__global__ void outer(float3 a, float3 b, float* dest) +{ + matrix2array(outer(a,b), dest); +} + +} // extern "c" diff --git a/tests/matrix_test.py b/tests/matrix_test.py new file mode 100644 index 0000000..412acf9 --- /dev/null +++ b/tests/matrix_test.py @@ -0,0 +1,310 @@ +import numpy as np +from pycuda import autoinit +from pycuda.compiler import SourceModule +import pycuda.driver as cuda +from pycuda import gpuarray + +float3 = gpuarray.vec.float3 + +print 'device %s' % autoinit.device.name() + +source = open('../matrix.h').read() + open('../linalg.h').read() + \ + open('matrix_test.cu').read() + +mod = SourceModule(source, no_extern_c=True, arch='sm_13') + +det = mod.get_function('det') +inv = mod.get_function('inv') +matrixadd = mod.get_function('matrixadd') +matrixsub = mod.get_function('matrixsub') +matrixmul = mod.get_function('matrixmul') +multiply = mod.get_function('multiply') +matrixaddfloat = mod.get_function('matrixaddfloat') +matrixsubfloat = mod.get_function('matrixsubfloat') +matrixmulfloat = mod.get_function('matrixmulfloat') +matrixdivfloat = mod.get_function('matrixdivfloat') +floataddmatrix = mod.get_function('floataddmatrix') +floatsubmatrix = mod.get_function('floatsubmatrix') +floatmulmatrix = mod.get_function('floatmulmatrix') +floatdivmatrix = mod.get_function('floatdivmatrix') +matrixaddequals = mod.get_function('matrixaddequals') +matrixsubequals = mod.get_function('matrixsubequals') +matrixaddequalsfloat = mod.get_function('matrixaddequalsfloat') +matrixsubequalsfloat = mod.get_function('matrixsubequalsfloat') +matrixmulequalsfloat = mod.get_function('matrixmulequalsfloat') +matrixdivequalsfloat = mod.get_function('matrixdivequalsfloat') +outer = mod.get_function('outer') + +size = {'block': (1,1,1), 'grid': (1,1)} + +for i in range(1): + a = np.random.random_sample(size=9).astype(np.float32) + b = np.random.random_sample(size=9).astype(np.float32) + dest = np.empty(1, dtype=np.float32) + c = np.int32(np.random.random_sample()) + + print 'testing det...', + + det(cuda.In(a), cuda.Out(dest), **size) + + if not np.allclose(np.float32(np.linalg.det(a.reshape(3,3))), dest[0]): + print 'fail' + print np.float32(np.linalg.det(a.reshape(3,3))) + print dest[0] + else: + print 'success' + + print 'testing inv...', + + dest = np.empty(9, dtype=np.float32) + + inv(cuda.In(a), cuda.Out(dest), **size) + + if not np.allclose(np.linalg.inv(a.reshape(3,3)).flatten().astype(np.float32), dest): + print 'fail' + print np.linalg.inv(a.reshape(3,3)).flatten().astype(np.float32) + print dest + else: + print 'success' + + print 'testing matrixadd...', + + matrixadd(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) + + if not np.allclose(a+b, dest): + print 'fail' + print a+b + print dest + else: + print 'success' + + print 'testing matrixsub...', + + matrixsub(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) + + if not np.allclose(a-b, dest): + print 'fail' + print a-b + print dest + else: + print 'sucess' + + print 'testing matrixmul...', + + matrixmul(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) + + if not np.allclose(np.dot(a.reshape(3,3),b.reshape(3,3)).flatten(), dest): + print 'fail' + print np.dot(a.reshape(3,3),b.reshape(3,3)).flatten() + print dest + else: + print 'success' + + print 'testing multiply...', + + x_cpu = np.random.random_sample(size=3).astype(np.float32) + x_gpu = np.array((x_cpu[0], x_cpu[1], x_cpu[2]), dtype=float3) + + dest = np.empty(1, dtype=float3) + + multiply(cuda.In(a), cuda.In(x_gpu), cuda.Out(dest), **size) + + m = a.reshape(3,3) + + if not np.allclose(np.dot(x_cpu,m[0]), dest[0]['x']) or \ + not np.allclose(np.dot(x_cpu,m[1]), dest[0]['y']) or \ + not np.allclose(np.dot(x_cpu,m[2]), dest[0]['z']): + print 'fail' + print np.dot(x_cpu,m[0]) + print np.dot(x_cpu,m[1]) + print np.dot(x_cpu,m[2]) + print dest[0]['x'] + print dest[0]['y'] + print dest[0]['z'] + else: + print 'success' + + print 'testing matrixaddfloat...', + + dest = np.empty(9, dtype=np.float32) + + matrixaddfloat(cuda.In(a), c, cuda.Out(dest), **size) + + if not np.allclose(a+c, dest): + print 'fail' + print a+c + print dest + else: + print 'success' + + print 'testing matrixsubfloat...', + + matrixsubfloat(cuda.In(a), c, cuda.Out(dest), **size) + + if not np.allclose(a-c, dest): + print 'fail' + print a-c + print dest + else: + print 'success' + + print 'testing matrixmulfloat...', + + matrixmulfloat(cuda.In(a), c, cuda.Out(dest), **size) + + if not np.allclose(a*c, dest): + print 'fail' + print a-c + print dest + else: + print 'success' + + print 'testing matrixdivfloat...', + + matrixdivfloat(cuda.In(a), c, cuda.Out(dest), **size) + + if not np.allclose(a/c, dest): + print 'fail' + print a/c + print dest + else: + print 'success' + + print 'testing floataddmatrix...', + + floataddmatrix(cuda.In(a), c, cuda.Out(dest), **size) + + if not np.allclose(c+a, dest): + print 'fail' + print c+a + print dest + else: + print 'success' + + print 'testing floatsubmatrix...', + + floatsubmatrix(cuda.In(a), c, cuda.Out(dest), **size) + + if not np.allclose(c-a, dest): + print 'fail' + print c-a + print dest + else: + print 'success' + + print 'testing floatmulmatrix...', + + floatmulmatrix(cuda.In(a), c, cuda.Out(dest), **size) + + if not np.allclose(c*a, dest): + print 'fail' + print c*a + print dest + else: + print 'success' + + print 'testing floatdivmatrix...', + + floatdivmatrix(cuda.In(a), c, cuda.Out(dest), **size) + + if not np.allclose(c/a, dest): + print 'fail' + print c/a + print dest + else: + print 'success' + + print 'testing matrixaddequals...', + + dest = np.copy(a) + + matrixaddequals(cuda.InOut(dest), cuda.In(b), **size) + + if not np.allclose(a+b, dest): + print 'fail' + print a+b + print dest + else: + print 'success' + + print 'testing matrixsubequals...', + + dest = np.copy(a) + + matrixsubequals(cuda.InOut(dest), cuda.In(b), **size) + + if not np.allclose(a-b, dest): + print 'fail' + print a-b + print dest + else: + print 'success' + + print 'testing matrixaddequalsfloat...', + + dest = np.copy(a) + + matrixaddequalsfloat(cuda.InOut(dest), c, **size) + + if not np.allclose(a+c, dest): + print 'fail' + print a+c + print dest + else: + print 'success' + + print 'testing matrixsubequalsfloat...', + + dest = np.copy(a) + + matrixsubequalsfloat(cuda.InOut(dest), c, **size) + + if not np.allclose(a-c, dest): + print 'fail' + print a-c + print dest + else: + print 'success' + + print 'testing matrixmulequalsfloat...', + + dest = np.copy(a) + + matrixmulequalsfloat(cuda.InOut(dest), c, **size) + + if not np.allclose(a*c, dest): + print 'fail' + print a*c + print dest + else: + print 'success' + + print 'testing matrixdivequalsfloat...', + + dest = np.copy(a) + + matrixdivequalsfloat(cuda.InOut(dest), c, **size) + + if not np.allclose(a/c, dest): + print 'fail' + print a*c + print dest + else: + print 'success' + + print 'testing outer...', + + x1_cpu = np.random.random_sample(size=3).astype(np.float32) + x2_cpu = np.random.random_sample(size=3).astype(np.float32) + + x1_gpu = np.array((x1_cpu[0], x1_cpu[1], x1_cpu[2]), dtype=float3) + x2_gpu = np.array((x2_cpu[0], x2_cpu[1], x2_cpu[2]), dtype=float3) + + outer(x1_gpu, x2_gpu, cuda.Out(dest), **size) + + if not np.allclose(np.outer(x1_cpu, x2_cpu).flatten(), dest): + print 'fail' + print np.outer(x1_cpu, x2_cpu).flatten() + print dest + else: + print 'success' |