finished basic linear algebra operations and tests

author: Anthony LaTorre <telatorre@gmail.com> 2011-05-05 16:28:09 -0400
committer: Anthony LaTorre <telatorre@gmail.com> 2011-05-05 16:28:09 -0400
commit: 7a4c43ed53fe0f6a61484be3e082a1d21dbd2ece (patch)
tree: e32fee7d8da724b64ba0bcbc9ed4026cccd04feb
parent: 48cb6fc276143567e13bfec6846721beb4ca2f46 (diff)
download: chroma-7a4c43ed53fe0f6a61484be3e082a1d21dbd2ece.tar.gz
chroma-7a4c43ed53fe0f6a61484be3e082a1d21dbd2ece.tar.bz2
chroma-7a4c43ed53fe0f6a61484be3e082a1d21dbd2ece.zip
4 files changed, 320 insertions, 104 deletions
diff --git a/linalg.h b/linalg.h
index 4d0344e..698b6f5 100644
--- a/linalg.h
+++ b/linalg.h
@@ -3,7 +3,7 @@
 
 __device__ __host__ float3 operator+ (const float3 &a, const float3 &b)
 {
-	return make_float3(a.x+b.x, a.y+b.y, a.z+b.y);
+	return make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
 }
 
 __device__ __host__ void operator+= (float3 &a, const float3 &b)
@@ -30,7 +30,7 @@ __device__ __host__ float3 operator+ (const float3 &a, const float &c)
 	return make_float3(a.x+c, a.y+c, a.z+c);
 }
 
-__device__ __host__ float3 operator+= (const float3 &a, const float &c)
+__device__ __host__ void operator+= (float3 &a, const float &c)
 {
 	a.x += c;
 	a.y += c;
@@ -47,7 +47,7 @@ __device__ __host__ float3 operator- (const float3 &a, const float &c)
 	return make_float3(a.x-c, a.y-c, a.z-c);
 }
 
-__device__ __host__ float3 operator-= (const float3 &a, const float &c)
+__device__ __host__ void operator-= (float3 &a, const float &c)
 {
 	a.x -= c;
 	a.y -= c;
@@ -64,7 +64,7 @@ __device__ __host__ float3 operator* (const float3 &a, const float &c)
 	return make_float3(a.x*c, a.y*c, a.z*c);
 }
 
-__device__ __host__ float3 operator*= (const float3 &a, const float &c)
+__device__ __host__ void operator*= (float3 &a, const float &c)
 {
 	a.x *= c;
 	a.y *= c;
@@ -81,7 +81,7 @@ __device__ __host__ float3 operator/ (const float3 &a, const float &c)
 	return make_float3(a.x/c, a.y/c, a.z/c);
 }
 
-__device__ __host__ float3 operator/= (const float3 &a, const float &c)
+__device__ __host__ void operator/= (float3 &a, const float &c)
 {
 	a.x /= c;
 	a.y /= c;
diff --git a/linalg_test.cu b/linalg_test.cu
deleted file mode 100644
index c4647a7..0000000
--- a/linalg_test.cu
+++ /dev/null
@@ -1,99 +0,0 @@
-//-*-c-*-
-
-#include "linalg.h"
-
-extern "C"
-{
-
-__global__ void add(int n, float3 *a, float3 *b, float3 *out)
-{
-	int i;
-	for (i=0; i < n; i++)
-		out[i] = a[i] + b[i];
-}
-
-__global__ void addequal(int n, float3 *a, float3 *b)
-{
-	int i;
-	for (i=0; i < n; i++)
-		a[i] += b[i];
-}
-
-__global__ void sub(int n, float3 *a, float3 *b, float3 *out)
-{
-	int i;
-	for (i=0; i < n; i++)
-		out[i] = a[i] - b[i];
-}
-
-__global__ void subequal(int n, float3 *a, float3 *b)
-{
-	int i;
-	for (i=0; i < n; i++)
-		a[i] -= b[i];
-}
-
-__gloabl__ void addfloat(int n, float3 *a, float c, float3 *out)
-{
-	int i;
-	for (i=0; i < n; i++)
-		out[i] = a[i] + c;
-}
-
-__global__ void addfloatequal(int n, float3 *a, float c)
-{
-	int i;
-	for (i=0; i < n; i++)
-		a += c;
-}
-
-__global__ void subfloat(int n, float3 *a, float c, float3 *out)
-{
-	int i;
-	for (i=0; i < n; i++)
-		out[i] = a[i] - c;
-}
-
-__global__ void subfloatequal(int n, float3 *a, float c)
-{
-	int i;
-	for (i=0; i < n; i++)
-		a[i] -= c;
-}
-
-__global__ void mulfloat(int n, float3 *a, float c, float3 *out)
-{
-	int i;
-	for (i=0; i < n; i++)
-		out[i] = a[i]*c;
-}
-
-__global__ void mulfloatequal(int n, float3 *a, float c)
-{
-	int i;
-	for (i=0; i < n; i++)
-		a *= c;
-}
-
-__global__ void divfloat(int n, float3 *a, float c, float3 *out)
-{
-	int i;
-	for (i=0; i < n; i++)
-		out[i] = a[i]/c;
-}
-
-__global__ void divfloatequal(int n, float3 *a, float c)
-{
-	int i;
-	for (i=0; i < n; i++)
-		a /= c;
-}
-
-__global__ void dot(int n, float3 *a, float3 *b, float* out)
-{
-	int i;
-	for (i=0; i < n; i++)
-		out[i] = dot(a,b);
-}
-
-} // extern "c"
diff --git a/tests/linalg_test.cu b/tests/linalg_test.cu
new file mode 100644
index 0000000..13d2ed0
--- /dev/null
+++ b/tests/linalg_test.cu
@@ -0,0 +1,84 @@
+//-*-c-*-
+
+extern "C"
+{
+
+__global__ void add(float3 *a, float3 *b, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = a[idx] + b[idx];
+}
+
+__global__ void addequal(float3 *a, float3 *b)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	a[idx] += b[idx];
+}
+
+__global__ void sub(float3 *a, float3 *b, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = a[idx] - b[idx];
+}
+
+__global__ void subequal(float3 *a, float3 *b)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	a[idx] -= b[idx];
+}
+
+__global__ void addfloat(float3 *a, float c, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = a[idx] + c;
+}
+
+__global__ void addfloatequal(float3 *a, float c)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	a[idx] += c;
+}
+
+__global__ void subfloat(float3 *a, float c, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = a[idx] - c;
+}
+
+__global__ void subfloatequal(float3 *a, float c)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	a[idx] -= c;
+}
+
+__global__ void mulfloat(float3 *a, float c, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = a[idx]*c;
+}
+
+__global__ void mulfloatequal(float3 *a, float c)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	a[idx] *= c;
+}
+
+__global__ void divfloat(float3 *a, float c, float3 *dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = a[idx]/c;
+}
+
+__global__ void divfloatequal(float3 *a, float c)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	a[idx] /= c;
+}
+
+__global__ void dot(float3 *a, float3 *b, float* dest)
+{
+	int idx = blockIdx.x*blockDim.x + threadIdx.x;
+	dest[idx] = dot(a[idx],b[idx]);
+}
+
+} // extern "c"
diff --git a/tests/linalg_test.py b/tests/linalg_test.py
new file mode 100644
index 0000000..bc9720b
--- /dev/null
+++ b/tests/linalg_test.py
@@ -0,0 +1,231 @@
+import sys
+import numpy as np
+from pycuda import autoinit
+from pycuda.compiler import SourceModule
+import pycuda.driver as cuda
+from pycuda import gpuarray
+
+float3 = gpuarray.vec.float3
+
+print 'device %s' % autoinit.device.name()
+
+source = open('../linalg.h').read() + open('linalg_test.cu').read()
+
+mod = SourceModule(source, no_extern_c=True, arch='sm_13')
+
+add = mod.get_function('add')
+addequal = mod.get_function('addequal')
+sub = mod.get_function('sub')
+subequal = mod.get_function('subequal')
+addfloat = mod.get_function('addfloat')
+addfloatequal = mod.get_function('addfloatequal')
+subfloat = mod.get_function('subfloat')
+subfloatequal = mod.get_function('subfloatequal')
+mulfloat = mod.get_function('mulfloat')
+mulfloatequal = mod.get_function('mulfloatequal')
+divfloat = mod.get_function('divfloat')
+divfloatequal = mod.get_function('divfloatequal')
+dot = mod.get_function('dot')
+
+size = {'block': (10,1,1), 'grid': (1,1)}
+
+for i in range(1):
+    a = np.zeros(size['block'][0], dtype=float3)
+    b = np.zeros(a.size, dtype=float3)
+    dest = np.zeros(a.size, dtype=float3)
+    c = np.float32(np.random.random_sample())
+    destfloat = np.zeros(a.size, dtype=np.float32)
+
+    a['x'] = np.random.random_sample(size=a.size)
+    a['y'] = np.random.random_sample(size=a.size)
+    a['z'] = np.random.random_sample(size=a.size)
+
+    print a['x']
+    print a['y']
+    print a['z']
+    print c
+
+    print 'testing add...',
+
+    add(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+
+    if (a['x'] + b['x'] != dest['x']).any() or \
+            (a['y'] + b['y'] != dest['y']).any() or \
+            (a['z'] + b['z'] != dest['z']).any():
+        print 'fail'
+    else:
+        print 'success'
+
+    print 'testing sub...',
+
+    sub(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+
+    if (a['x'] - b['x'] != dest['x']).any() or \
+            (a['y'] - b['y'] != dest['y']).any() or \
+            (a['z'] - b['z'] != dest['z']).any():
+        print 'fail'
+    else:
+        print 'success'
+
+    print 'testing addfloat...',
+
+    addfloat(cuda.In(a), c, cuda.Out(dest), **size)
+
+    if (a['x'] + c != dest['x']).any() or \
+            (a['y'] + c != dest['y']).any() or \
+            (a['z'] + c != dest['z']).any():
+        print 'fail'
+    else:
+        print 'success'
+
+    print 'testing subfloat...',
+
+    subfloat(cuda.In(a), c, cuda.Out(dest), **size)
+
+    if (a['x'] - c != dest['x']).any() or \
+            (a['y'] - c != dest['y']).any() or \
+            (a['z'] - c != dest['z']).any():
+        print 'fail'
+    else:
+        print 'success'
+
+    print 'testing mulfloat...',
+
+    mulfloat(cuda.In(a), c, cuda.Out(dest), **size)
+
+    if (a['x']*c != dest['x']).any() or \
+            (a['y']*c != dest['y']).any() or \
+            (a['z']*c != dest['z']).any():
+        print 'fail'
+    else:
+        print 'success'
+
+    print 'testing divfloat...',
+
+    divfloat(cuda.In(a), c, cuda.Out(dest), **size)
+
+    if (a['x']/c != dest['x']).any() or \
+            (a['y']/c != dest['y']).any() or \
+            (a['z']/c != dest['z']).any():
+        print 'fail'
+        print a['x']/c
+        print a['y']/c
+        print a['z']/c
+        print dest['x']
+        print dest['y']
+        print dest['z']
+    else:
+        print 'success'
+
+    print 'testing dot...',
+
+    dot(cuda.In(a), cuda.In(b), cuda.Out(destfloat), **size)
+
+    if (a['x']*b['x'] + a['y']*b['y'] + a['z']*b['z'] != destfloat).any():
+        print 'fail'
+    else:
+        print 'sucess'
+
+    print 'testing addequal...',
+
+    dest = np.copy(a)
+
+    addequal(cuda.InOut(dest), cuda.In(b), **size)
+
+    if (a['x'] + b['x'] != dest['x']).any() or \
+            (a['y'] + b['y'] != dest['y']).any() or \
+            (a['z'] + b['z'] != dest['z']).any():
+        print 'fail'
+    else:
+        print 'success'
+
+    print 'testing subequal...',
+
+    dest = np.copy(a)
+
+    subequal(cuda.InOut(dest), cuda.In(b), **size)
+
+    if (a['x'] - b['x'] != dest['x']).any() or \
+            (a['y'] - b['y'] != dest['y']).any() or \
+            (a['z'] - b['z'] != dest['z']).any():
+        print 'fail'
+    else:
+        print 'success'
+
+    print 'testing addfloatequal...',
+
+    dest = np.copy(a)
+
+    addfloatequal(cuda.InOut(dest), c, **size)
+
+    if (a['x'] + c != dest['x']).any() or \
+            (a['y'] + c != dest['y']).any() or \
+            (a['z'] + c != dest['z']).any():
+        print 'fail'
+        print a['x'] + c
+        print a['y'] + c
+        print a['z'] + c
+        print dest['x']
+        print dest['y']
+        print dest['z']
+    else:
+        print 'success'
+
+    print 'testing subfloatequal...',
+
+    dest = np.copy(a)
+
+    subfloatequal(cuda.InOut(dest), c, **size)
+
+    if (a['x'] - c != dest['x']).any() or \
+            (a['y'] - c != dest['y']).any() or \
+            (a['z'] - c != dest['z']).any():
+        print 'fail'
+        print a['x'] - c
+        print a['y'] - c
+        print a['z'] - c
+        print dest['x']
+        print dest['y']
+        print dest['z']
+    else:
+        print 'success'
+
+    print 'testing mulfloatequal...',
+
+    dest = np.copy(a)
+
+    mulfloatequal(cuda.InOut(dest), c, **size)
+
+    if (a['x']*c != dest['x']).any() or \
+            (a['y']*c != dest['y']).any() or \
+            (a['z']*c != dest['z']).any():
+        print 'fail'
+        print a['x']*c
+        print a['y']*c
+        print a['z']*c
+        print dest['x']
+        print dest['y']
+        print dest['z']
+    else:
+        print 'success'
+
+    print 'testing divfloatequal...',
+
+    dest = np.copy(a)
+
+    divfloatequal(cuda.InOut(dest), c, **size)
+
+    if (a['x']/c != dest['x']).any() or \
+            (a['y']/c != dest['y']).any() or \
+            (a['z']/c != dest['z']).any():
+        print 'fail'
+        print a['x']/c
+        print a['y']/c
+        print a['z']/c
+        print dest['x']
+        print dest['y']
+        print dest['z']
+    else:
+        print 'success'
+
+
author	Anthony LaTorre <telatorre@gmail.com>	2011-05-05 16:28:09 -0400
committer	Anthony LaTorre <telatorre@gmail.com>	2011-05-05 16:28:09 -0400
commit	7a4c43ed53fe0f6a61484be3e082a1d21dbd2ece (patch)
tree	e32fee7d8da724b64ba0bcbc9ed4026cccd04feb
parent	48cb6fc276143567e13bfec6846721beb4ca2f46 (diff)
download	chroma-7a4c43ed53fe0f6a61484be3e082a1d21dbd2ece.tar.gz chroma-7a4c43ed53fe0f6a61484be3e082a1d21dbd2ece.tar.bz2 chroma-7a4c43ed53fe0f6a61484be3e082a1d21dbd2ece.zip