diff options
author | Anthony LaTorre <telatorre@gmail.com> | 2011-05-05 16:28:09 -0400 |
---|---|---|
committer | Anthony LaTorre <telatorre@gmail.com> | 2011-05-05 16:28:09 -0400 |
commit | 7a4c43ed53fe0f6a61484be3e082a1d21dbd2ece (patch) | |
tree | e32fee7d8da724b64ba0bcbc9ed4026cccd04feb | |
parent | 48cb6fc276143567e13bfec6846721beb4ca2f46 (diff) | |
download | chroma-7a4c43ed53fe0f6a61484be3e082a1d21dbd2ece.tar.gz chroma-7a4c43ed53fe0f6a61484be3e082a1d21dbd2ece.tar.bz2 chroma-7a4c43ed53fe0f6a61484be3e082a1d21dbd2ece.zip |
finished basic linear algebra operations and tests
-rw-r--r-- | linalg.h | 10 | ||||
-rw-r--r-- | linalg_test.cu | 99 | ||||
-rw-r--r-- | tests/linalg_test.cu | 84 | ||||
-rw-r--r-- | tests/linalg_test.py | 231 |
4 files changed, 320 insertions, 104 deletions
@@ -3,7 +3,7 @@ __device__ __host__ float3 operator+ (const float3 &a, const float3 &b) { - return make_float3(a.x+b.x, a.y+b.y, a.z+b.y); + return make_float3(a.x+b.x, a.y+b.y, a.z+b.z); } __device__ __host__ void operator+= (float3 &a, const float3 &b) @@ -30,7 +30,7 @@ __device__ __host__ float3 operator+ (const float3 &a, const float &c) return make_float3(a.x+c, a.y+c, a.z+c); } -__device__ __host__ float3 operator+= (const float3 &a, const float &c) +__device__ __host__ void operator+= (float3 &a, const float &c) { a.x += c; a.y += c; @@ -47,7 +47,7 @@ __device__ __host__ float3 operator- (const float3 &a, const float &c) return make_float3(a.x-c, a.y-c, a.z-c); } -__device__ __host__ float3 operator-= (const float3 &a, const float &c) +__device__ __host__ void operator-= (float3 &a, const float &c) { a.x -= c; a.y -= c; @@ -64,7 +64,7 @@ __device__ __host__ float3 operator* (const float3 &a, const float &c) return make_float3(a.x*c, a.y*c, a.z*c); } -__device__ __host__ float3 operator*= (const float3 &a, const float &c) +__device__ __host__ void operator*= (float3 &a, const float &c) { a.x *= c; a.y *= c; @@ -81,7 +81,7 @@ __device__ __host__ float3 operator/ (const float3 &a, const float &c) return make_float3(a.x/c, a.y/c, a.z/c); } -__device__ __host__ float3 operator/= (const float3 &a, const float &c) +__device__ __host__ void operator/= (float3 &a, const float &c) { a.x /= c; a.y /= c; diff --git a/linalg_test.cu b/linalg_test.cu deleted file mode 100644 index c4647a7..0000000 --- a/linalg_test.cu +++ /dev/null @@ -1,99 +0,0 @@ -//-*-c-*- - -#include "linalg.h" - -extern "C" -{ - -__global__ void add(int n, float3 *a, float3 *b, float3 *out) -{ - int i; - for (i=0; i < n; i++) - out[i] = a[i] + b[i]; -} - -__global__ void addequal(int n, float3 *a, float3 *b) -{ - int i; - for (i=0; i < n; i++) - a[i] += b[i]; -} - -__global__ void sub(int n, float3 *a, float3 *b, float3 *out) -{ - int i; - for (i=0; i < n; i++) - out[i] = a[i] - b[i]; -} - -__global__ void subequal(int n, float3 *a, float3 *b) -{ - int i; - for (i=0; i < n; i++) - a[i] -= b[i]; -} - -__gloabl__ void addfloat(int n, float3 *a, float c, float3 *out) -{ - int i; - for (i=0; i < n; i++) - out[i] = a[i] + c; -} - -__global__ void addfloatequal(int n, float3 *a, float c) -{ - int i; - for (i=0; i < n; i++) - a += c; -} - -__global__ void subfloat(int n, float3 *a, float c, float3 *out) -{ - int i; - for (i=0; i < n; i++) - out[i] = a[i] - c; -} - -__global__ void subfloatequal(int n, float3 *a, float c) -{ - int i; - for (i=0; i < n; i++) - a[i] -= c; -} - -__global__ void mulfloat(int n, float3 *a, float c, float3 *out) -{ - int i; - for (i=0; i < n; i++) - out[i] = a[i]*c; -} - -__global__ void mulfloatequal(int n, float3 *a, float c) -{ - int i; - for (i=0; i < n; i++) - a *= c; -} - -__global__ void divfloat(int n, float3 *a, float c, float3 *out) -{ - int i; - for (i=0; i < n; i++) - out[i] = a[i]/c; -} - -__global__ void divfloatequal(int n, float3 *a, float c) -{ - int i; - for (i=0; i < n; i++) - a /= c; -} - -__global__ void dot(int n, float3 *a, float3 *b, float* out) -{ - int i; - for (i=0; i < n; i++) - out[i] = dot(a,b); -} - -} // extern "c" diff --git a/tests/linalg_test.cu b/tests/linalg_test.cu new file mode 100644 index 0000000..13d2ed0 --- /dev/null +++ b/tests/linalg_test.cu @@ -0,0 +1,84 @@ +//-*-c-*- + +extern "C" +{ + +__global__ void add(float3 *a, float3 *b, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = a[idx] + b[idx]; +} + +__global__ void addequal(float3 *a, float3 *b) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + a[idx] += b[idx]; +} + +__global__ void sub(float3 *a, float3 *b, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = a[idx] - b[idx]; +} + +__global__ void subequal(float3 *a, float3 *b) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + a[idx] -= b[idx]; +} + +__global__ void addfloat(float3 *a, float c, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = a[idx] + c; +} + +__global__ void addfloatequal(float3 *a, float c) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + a[idx] += c; +} + +__global__ void subfloat(float3 *a, float c, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = a[idx] - c; +} + +__global__ void subfloatequal(float3 *a, float c) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + a[idx] -= c; +} + +__global__ void mulfloat(float3 *a, float c, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = a[idx]*c; +} + +__global__ void mulfloatequal(float3 *a, float c) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + a[idx] *= c; +} + +__global__ void divfloat(float3 *a, float c, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = a[idx]/c; +} + +__global__ void divfloatequal(float3 *a, float c) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + a[idx] /= c; +} + +__global__ void dot(float3 *a, float3 *b, float* dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = dot(a[idx],b[idx]); +} + +} // extern "c" diff --git a/tests/linalg_test.py b/tests/linalg_test.py new file mode 100644 index 0000000..bc9720b --- /dev/null +++ b/tests/linalg_test.py @@ -0,0 +1,231 @@ +import sys +import numpy as np +from pycuda import autoinit +from pycuda.compiler import SourceModule +import pycuda.driver as cuda +from pycuda import gpuarray + +float3 = gpuarray.vec.float3 + +print 'device %s' % autoinit.device.name() + +source = open('../linalg.h').read() + open('linalg_test.cu').read() + +mod = SourceModule(source, no_extern_c=True, arch='sm_13') + +add = mod.get_function('add') +addequal = mod.get_function('addequal') +sub = mod.get_function('sub') +subequal = mod.get_function('subequal') +addfloat = mod.get_function('addfloat') +addfloatequal = mod.get_function('addfloatequal') +subfloat = mod.get_function('subfloat') +subfloatequal = mod.get_function('subfloatequal') +mulfloat = mod.get_function('mulfloat') +mulfloatequal = mod.get_function('mulfloatequal') +divfloat = mod.get_function('divfloat') +divfloatequal = mod.get_function('divfloatequal') +dot = mod.get_function('dot') + +size = {'block': (10,1,1), 'grid': (1,1)} + +for i in range(1): + a = np.zeros(size['block'][0], dtype=float3) + b = np.zeros(a.size, dtype=float3) + dest = np.zeros(a.size, dtype=float3) + c = np.float32(np.random.random_sample()) + destfloat = np.zeros(a.size, dtype=np.float32) + + a['x'] = np.random.random_sample(size=a.size) + a['y'] = np.random.random_sample(size=a.size) + a['z'] = np.random.random_sample(size=a.size) + + print a['x'] + print a['y'] + print a['z'] + print c + + print 'testing add...', + + add(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) + + if (a['x'] + b['x'] != dest['x']).any() or \ + (a['y'] + b['y'] != dest['y']).any() or \ + (a['z'] + b['z'] != dest['z']).any(): + print 'fail' + else: + print 'success' + + print 'testing sub...', + + sub(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) + + if (a['x'] - b['x'] != dest['x']).any() or \ + (a['y'] - b['y'] != dest['y']).any() or \ + (a['z'] - b['z'] != dest['z']).any(): + print 'fail' + else: + print 'success' + + print 'testing addfloat...', + + addfloat(cuda.In(a), c, cuda.Out(dest), **size) + + if (a['x'] + c != dest['x']).any() or \ + (a['y'] + c != dest['y']).any() or \ + (a['z'] + c != dest['z']).any(): + print 'fail' + else: + print 'success' + + print 'testing subfloat...', + + subfloat(cuda.In(a), c, cuda.Out(dest), **size) + + if (a['x'] - c != dest['x']).any() or \ + (a['y'] - c != dest['y']).any() or \ + (a['z'] - c != dest['z']).any(): + print 'fail' + else: + print 'success' + + print 'testing mulfloat...', + + mulfloat(cuda.In(a), c, cuda.Out(dest), **size) + + if (a['x']*c != dest['x']).any() or \ + (a['y']*c != dest['y']).any() or \ + (a['z']*c != dest['z']).any(): + print 'fail' + else: + print 'success' + + print 'testing divfloat...', + + divfloat(cuda.In(a), c, cuda.Out(dest), **size) + + if (a['x']/c != dest['x']).any() or \ + (a['y']/c != dest['y']).any() or \ + (a['z']/c != dest['z']).any(): + print 'fail' + print a['x']/c + print a['y']/c + print a['z']/c + print dest['x'] + print dest['y'] + print dest['z'] + else: + print 'success' + + print 'testing dot...', + + dot(cuda.In(a), cuda.In(b), cuda.Out(destfloat), **size) + + if (a['x']*b['x'] + a['y']*b['y'] + a['z']*b['z'] != destfloat).any(): + print 'fail' + else: + print 'sucess' + + print 'testing addequal...', + + dest = np.copy(a) + + addequal(cuda.InOut(dest), cuda.In(b), **size) + + if (a['x'] + b['x'] != dest['x']).any() or \ + (a['y'] + b['y'] != dest['y']).any() or \ + (a['z'] + b['z'] != dest['z']).any(): + print 'fail' + else: + print 'success' + + print 'testing subequal...', + + dest = np.copy(a) + + subequal(cuda.InOut(dest), cuda.In(b), **size) + + if (a['x'] - b['x'] != dest['x']).any() or \ + (a['y'] - b['y'] != dest['y']).any() or \ + (a['z'] - b['z'] != dest['z']).any(): + print 'fail' + else: + print 'success' + + print 'testing addfloatequal...', + + dest = np.copy(a) + + addfloatequal(cuda.InOut(dest), c, **size) + + if (a['x'] + c != dest['x']).any() or \ + (a['y'] + c != dest['y']).any() or \ + (a['z'] + c != dest['z']).any(): + print 'fail' + print a['x'] + c + print a['y'] + c + print a['z'] + c + print dest['x'] + print dest['y'] + print dest['z'] + else: + print 'success' + + print 'testing subfloatequal...', + + dest = np.copy(a) + + subfloatequal(cuda.InOut(dest), c, **size) + + if (a['x'] - c != dest['x']).any() or \ + (a['y'] - c != dest['y']).any() or \ + (a['z'] - c != dest['z']).any(): + print 'fail' + print a['x'] - c + print a['y'] - c + print a['z'] - c + print dest['x'] + print dest['y'] + print dest['z'] + else: + print 'success' + + print 'testing mulfloatequal...', + + dest = np.copy(a) + + mulfloatequal(cuda.InOut(dest), c, **size) + + if (a['x']*c != dest['x']).any() or \ + (a['y']*c != dest['y']).any() or \ + (a['z']*c != dest['z']).any(): + print 'fail' + print a['x']*c + print a['y']*c + print a['z']*c + print dest['x'] + print dest['y'] + print dest['z'] + else: + print 'success' + + print 'testing divfloatequal...', + + dest = np.copy(a) + + divfloatequal(cuda.InOut(dest), c, **size) + + if (a['x']/c != dest['x']).any() or \ + (a['y']/c != dest['y']).any() or \ + (a['z']/c != dest['z']).any(): + print 'fail' + print a['x']/c + print a['y']/c + print a['z']/c + print dest['x'] + print dest['y'] + print dest['z'] + else: + print 'success' + + |