summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony LaTorre <telatorre@gmail.com>2011-05-05 16:28:09 -0400
committerAnthony LaTorre <telatorre@gmail.com>2011-05-05 16:28:09 -0400
commit7a4c43ed53fe0f6a61484be3e082a1d21dbd2ece (patch)
treee32fee7d8da724b64ba0bcbc9ed4026cccd04feb
parent48cb6fc276143567e13bfec6846721beb4ca2f46 (diff)
downloadchroma-7a4c43ed53fe0f6a61484be3e082a1d21dbd2ece.tar.gz
chroma-7a4c43ed53fe0f6a61484be3e082a1d21dbd2ece.tar.bz2
chroma-7a4c43ed53fe0f6a61484be3e082a1d21dbd2ece.zip
finished basic linear algebra operations and tests
-rw-r--r--linalg.h10
-rw-r--r--linalg_test.cu99
-rw-r--r--tests/linalg_test.cu84
-rw-r--r--tests/linalg_test.py231
4 files changed, 320 insertions, 104 deletions
diff --git a/linalg.h b/linalg.h
index 4d0344e..698b6f5 100644
--- a/linalg.h
+++ b/linalg.h
@@ -3,7 +3,7 @@
__device__ __host__ float3 operator+ (const float3 &a, const float3 &b)
{
- return make_float3(a.x+b.x, a.y+b.y, a.z+b.y);
+ return make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
}
__device__ __host__ void operator+= (float3 &a, const float3 &b)
@@ -30,7 +30,7 @@ __device__ __host__ float3 operator+ (const float3 &a, const float &c)
return make_float3(a.x+c, a.y+c, a.z+c);
}
-__device__ __host__ float3 operator+= (const float3 &a, const float &c)
+__device__ __host__ void operator+= (float3 &a, const float &c)
{
a.x += c;
a.y += c;
@@ -47,7 +47,7 @@ __device__ __host__ float3 operator- (const float3 &a, const float &c)
return make_float3(a.x-c, a.y-c, a.z-c);
}
-__device__ __host__ float3 operator-= (const float3 &a, const float &c)
+__device__ __host__ void operator-= (float3 &a, const float &c)
{
a.x -= c;
a.y -= c;
@@ -64,7 +64,7 @@ __device__ __host__ float3 operator* (const float3 &a, const float &c)
return make_float3(a.x*c, a.y*c, a.z*c);
}
-__device__ __host__ float3 operator*= (const float3 &a, const float &c)
+__device__ __host__ void operator*= (float3 &a, const float &c)
{
a.x *= c;
a.y *= c;
@@ -81,7 +81,7 @@ __device__ __host__ float3 operator/ (const float3 &a, const float &c)
return make_float3(a.x/c, a.y/c, a.z/c);
}
-__device__ __host__ float3 operator/= (const float3 &a, const float &c)
+__device__ __host__ void operator/= (float3 &a, const float &c)
{
a.x /= c;
a.y /= c;
diff --git a/linalg_test.cu b/linalg_test.cu
deleted file mode 100644
index c4647a7..0000000
--- a/linalg_test.cu
+++ /dev/null
@@ -1,99 +0,0 @@
-//-*-c-*-
-
-#include "linalg.h"
-
-extern "C"
-{
-
-__global__ void add(int n, float3 *a, float3 *b, float3 *out)
-{
- int i;
- for (i=0; i < n; i++)
- out[i] = a[i] + b[i];
-}
-
-__global__ void addequal(int n, float3 *a, float3 *b)
-{
- int i;
- for (i=0; i < n; i++)
- a[i] += b[i];
-}
-
-__global__ void sub(int n, float3 *a, float3 *b, float3 *out)
-{
- int i;
- for (i=0; i < n; i++)
- out[i] = a[i] - b[i];
-}
-
-__global__ void subequal(int n, float3 *a, float3 *b)
-{
- int i;
- for (i=0; i < n; i++)
- a[i] -= b[i];
-}
-
-__gloabl__ void addfloat(int n, float3 *a, float c, float3 *out)
-{
- int i;
- for (i=0; i < n; i++)
- out[i] = a[i] + c;
-}
-
-__global__ void addfloatequal(int n, float3 *a, float c)
-{
- int i;
- for (i=0; i < n; i++)
- a += c;
-}
-
-__global__ void subfloat(int n, float3 *a, float c, float3 *out)
-{
- int i;
- for (i=0; i < n; i++)
- out[i] = a[i] - c;
-}
-
-__global__ void subfloatequal(int n, float3 *a, float c)
-{
- int i;
- for (i=0; i < n; i++)
- a[i] -= c;
-}
-
-__global__ void mulfloat(int n, float3 *a, float c, float3 *out)
-{
- int i;
- for (i=0; i < n; i++)
- out[i] = a[i]*c;
-}
-
-__global__ void mulfloatequal(int n, float3 *a, float c)
-{
- int i;
- for (i=0; i < n; i++)
- a *= c;
-}
-
-__global__ void divfloat(int n, float3 *a, float c, float3 *out)
-{
- int i;
- for (i=0; i < n; i++)
- out[i] = a[i]/c;
-}
-
-__global__ void divfloatequal(int n, float3 *a, float c)
-{
- int i;
- for (i=0; i < n; i++)
- a /= c;
-}
-
-__global__ void dot(int n, float3 *a, float3 *b, float* out)
-{
- int i;
- for (i=0; i < n; i++)
- out[i] = dot(a,b);
-}
-
-} // extern "c"
diff --git a/tests/linalg_test.cu b/tests/linalg_test.cu
new file mode 100644
index 0000000..13d2ed0
--- /dev/null
+++ b/tests/linalg_test.cu
@@ -0,0 +1,84 @@
+//-*-c-*-
+
+extern "C"
+{
+
+__global__ void add(float3 *a, float3 *b, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = a[idx] + b[idx];
+}
+
+__global__ void addequal(float3 *a, float3 *b)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ a[idx] += b[idx];
+}
+
+__global__ void sub(float3 *a, float3 *b, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = a[idx] - b[idx];
+}
+
+__global__ void subequal(float3 *a, float3 *b)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ a[idx] -= b[idx];
+}
+
+__global__ void addfloat(float3 *a, float c, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = a[idx] + c;
+}
+
+__global__ void addfloatequal(float3 *a, float c)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ a[idx] += c;
+}
+
+__global__ void subfloat(float3 *a, float c, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = a[idx] - c;
+}
+
+__global__ void subfloatequal(float3 *a, float c)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ a[idx] -= c;
+}
+
+__global__ void mulfloat(float3 *a, float c, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = a[idx]*c;
+}
+
+__global__ void mulfloatequal(float3 *a, float c)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ a[idx] *= c;
+}
+
+__global__ void divfloat(float3 *a, float c, float3 *dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = a[idx]/c;
+}
+
+__global__ void divfloatequal(float3 *a, float c)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ a[idx] /= c;
+}
+
+__global__ void dot(float3 *a, float3 *b, float* dest)
+{
+ int idx = blockIdx.x*blockDim.x + threadIdx.x;
+ dest[idx] = dot(a[idx],b[idx]);
+}
+
+} // extern "c"
diff --git a/tests/linalg_test.py b/tests/linalg_test.py
new file mode 100644
index 0000000..bc9720b
--- /dev/null
+++ b/tests/linalg_test.py
@@ -0,0 +1,231 @@
+import sys
+import numpy as np
+from pycuda import autoinit
+from pycuda.compiler import SourceModule
+import pycuda.driver as cuda
+from pycuda import gpuarray
+
+float3 = gpuarray.vec.float3
+
+print 'device %s' % autoinit.device.name()
+
+source = open('../linalg.h').read() + open('linalg_test.cu').read()
+
+mod = SourceModule(source, no_extern_c=True, arch='sm_13')
+
+add = mod.get_function('add')
+addequal = mod.get_function('addequal')
+sub = mod.get_function('sub')
+subequal = mod.get_function('subequal')
+addfloat = mod.get_function('addfloat')
+addfloatequal = mod.get_function('addfloatequal')
+subfloat = mod.get_function('subfloat')
+subfloatequal = mod.get_function('subfloatequal')
+mulfloat = mod.get_function('mulfloat')
+mulfloatequal = mod.get_function('mulfloatequal')
+divfloat = mod.get_function('divfloat')
+divfloatequal = mod.get_function('divfloatequal')
+dot = mod.get_function('dot')
+
+size = {'block': (10,1,1), 'grid': (1,1)}
+
+for i in range(1):
+ a = np.zeros(size['block'][0], dtype=float3)
+ b = np.zeros(a.size, dtype=float3)
+ dest = np.zeros(a.size, dtype=float3)
+ c = np.float32(np.random.random_sample())
+ destfloat = np.zeros(a.size, dtype=np.float32)
+
+ a['x'] = np.random.random_sample(size=a.size)
+ a['y'] = np.random.random_sample(size=a.size)
+ a['z'] = np.random.random_sample(size=a.size)
+
+ print a['x']
+ print a['y']
+ print a['z']
+ print c
+
+ print 'testing add...',
+
+ add(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+
+ if (a['x'] + b['x'] != dest['x']).any() or \
+ (a['y'] + b['y'] != dest['y']).any() or \
+ (a['z'] + b['z'] != dest['z']).any():
+ print 'fail'
+ else:
+ print 'success'
+
+ print 'testing sub...',
+
+ sub(cuda.In(a), cuda.In(b), cuda.Out(dest), **size)
+
+ if (a['x'] - b['x'] != dest['x']).any() or \
+ (a['y'] - b['y'] != dest['y']).any() or \
+ (a['z'] - b['z'] != dest['z']).any():
+ print 'fail'
+ else:
+ print 'success'
+
+ print 'testing addfloat...',
+
+ addfloat(cuda.In(a), c, cuda.Out(dest), **size)
+
+ if (a['x'] + c != dest['x']).any() or \
+ (a['y'] + c != dest['y']).any() or \
+ (a['z'] + c != dest['z']).any():
+ print 'fail'
+ else:
+ print 'success'
+
+ print 'testing subfloat...',
+
+ subfloat(cuda.In(a), c, cuda.Out(dest), **size)
+
+ if (a['x'] - c != dest['x']).any() or \
+ (a['y'] - c != dest['y']).any() or \
+ (a['z'] - c != dest['z']).any():
+ print 'fail'
+ else:
+ print 'success'
+
+ print 'testing mulfloat...',
+
+ mulfloat(cuda.In(a), c, cuda.Out(dest), **size)
+
+ if (a['x']*c != dest['x']).any() or \
+ (a['y']*c != dest['y']).any() or \
+ (a['z']*c != dest['z']).any():
+ print 'fail'
+ else:
+ print 'success'
+
+ print 'testing divfloat...',
+
+ divfloat(cuda.In(a), c, cuda.Out(dest), **size)
+
+ if (a['x']/c != dest['x']).any() or \
+ (a['y']/c != dest['y']).any() or \
+ (a['z']/c != dest['z']).any():
+ print 'fail'
+ print a['x']/c
+ print a['y']/c
+ print a['z']/c
+ print dest['x']
+ print dest['y']
+ print dest['z']
+ else:
+ print 'success'
+
+ print 'testing dot...',
+
+ dot(cuda.In(a), cuda.In(b), cuda.Out(destfloat), **size)
+
+ if (a['x']*b['x'] + a['y']*b['y'] + a['z']*b['z'] != destfloat).any():
+ print 'fail'
+ else:
+ print 'sucess'
+
+ print 'testing addequal...',
+
+ dest = np.copy(a)
+
+ addequal(cuda.InOut(dest), cuda.In(b), **size)
+
+ if (a['x'] + b['x'] != dest['x']).any() or \
+ (a['y'] + b['y'] != dest['y']).any() or \
+ (a['z'] + b['z'] != dest['z']).any():
+ print 'fail'
+ else:
+ print 'success'
+
+ print 'testing subequal...',
+
+ dest = np.copy(a)
+
+ subequal(cuda.InOut(dest), cuda.In(b), **size)
+
+ if (a['x'] - b['x'] != dest['x']).any() or \
+ (a['y'] - b['y'] != dest['y']).any() or \
+ (a['z'] - b['z'] != dest['z']).any():
+ print 'fail'
+ else:
+ print 'success'
+
+ print 'testing addfloatequal...',
+
+ dest = np.copy(a)
+
+ addfloatequal(cuda.InOut(dest), c, **size)
+
+ if (a['x'] + c != dest['x']).any() or \
+ (a['y'] + c != dest['y']).any() or \
+ (a['z'] + c != dest['z']).any():
+ print 'fail'
+ print a['x'] + c
+ print a['y'] + c
+ print a['z'] + c
+ print dest['x']
+ print dest['y']
+ print dest['z']
+ else:
+ print 'success'
+
+ print 'testing subfloatequal...',
+
+ dest = np.copy(a)
+
+ subfloatequal(cuda.InOut(dest), c, **size)
+
+ if (a['x'] - c != dest['x']).any() or \
+ (a['y'] - c != dest['y']).any() or \
+ (a['z'] - c != dest['z']).any():
+ print 'fail'
+ print a['x'] - c
+ print a['y'] - c
+ print a['z'] - c
+ print dest['x']
+ print dest['y']
+ print dest['z']
+ else:
+ print 'success'
+
+ print 'testing mulfloatequal...',
+
+ dest = np.copy(a)
+
+ mulfloatequal(cuda.InOut(dest), c, **size)
+
+ if (a['x']*c != dest['x']).any() or \
+ (a['y']*c != dest['y']).any() or \
+ (a['z']*c != dest['z']).any():
+ print 'fail'
+ print a['x']*c
+ print a['y']*c
+ print a['z']*c
+ print dest['x']
+ print dest['y']
+ print dest['z']
+ else:
+ print 'success'
+
+ print 'testing divfloatequal...',
+
+ dest = np.copy(a)
+
+ divfloatequal(cuda.InOut(dest), c, **size)
+
+ if (a['x']/c != dest['x']).any() or \
+ (a['y']/c != dest['y']).any() or \
+ (a['z']/c != dest['z']).any():
+ print 'fail'
+ print a['x']/c
+ print a['y']/c
+ print a['z']/c
+ print dest['x']
+ print dest['y']
+ print dest['z']
+ else:
+ print 'success'
+
+