3 files changed, 203 insertions, 0 deletions
diff --git a/.hgignore b/.hgignore
new file mode 100644
index 0000000..ea5e656
--- /dev/null
+++ b/.hgignore
@@ -0,0 +1,3 @@
+syntax:glob
+*.pyc
+*~
+\ No newline at end of file
diff --git a/linalg.h b/linalg.h
new file mode 100644
index 0000000..4d0344e
--- /dev/null
+++ b/linalg.h
@@ -0,0 +1,101 @@
+#ifndef __LINALG_H__
+#define __LINALG_H__
+
+__device__ __host__ float3 operator+ (const float3 &a, const float3 &b)
+{
+	return make_float3(a.x+b.x, a.y+b.y, a.z+b.y);
+}
+
+__device__ __host__ void operator+= (float3 &a, const float3 &b)
+{
+	a.x += b.x;
+	a.y += b.y;
+	a.z += b.z;
+}
+
+__device__ __host__ float3 operator- (const float3 &a, const float3 &b)
+{
+	return make_float3(a.x-b.x, a.y-b.y, a.z-b.z);
+}
+
+__device__ __host__ void operator-= (float3 &a, const float3 &b)
+{
+	a.x -= b.x;
+	a.y -= b.y;
+	a.z -= b.z;
+}
+
+__device__ __host__ float3 operator+ (const float3 &a, const float &c)
+{
+	return make_float3(a.x+c, a.y+c, a.z+c);
+}
+
+__device__ __host__ float3 operator+= (const float3 &a, const float &c)
+{
+	a.x += c;
+	a.y += c;
+	a.z += c;
+}
+
+__device__ __host__ float3 operator+ (const float &c, const float3 &a)
+{
+	return make_float3(c+a.x, c+a.y, c+a.z);
+}
+
+__device__ __host__ float3 operator- (const float3 &a, const float &c)
+{
+	return make_float3(a.x-c, a.y-c, a.z-c);
+}
+
+__device__ __host__ float3 operator-= (const float3 &a, const float &c)
+{
+	a.x -= c;
+	a.y -= c;
+	a.z -= c;
+}
+
+__device__ __host__ float3 operator- (const float &c, const float3& a)
+{
+	return make_float3(c-a.x, c-a.y, c-a.z);
+}
+
+__device__ __host__ float3 operator* (const float3 &a, const float &c)
+{
+	return make_float3(a.x*c, a.y*c, a.z*c);
+}
+
+__device__ __host__ float3 operator*= (const float3 &a, const float &c)
+{
+	a.x *= c;
+	a.y *= c;
+	a.z *= c;
+}
+
+__device__ __host__ float3 operator* (const float &c, const float3& a)
+{
+	return make_float3(c*a.x, c*a.y, c*a.z);
+}
+
+__device__ __host__ float3 operator/ (const float3 &a, const float &c)
+{
+	return make_float3(a.x/c, a.y/c, a.z/c);
+}
+
+__device__ __host__ float3 operator/= (const float3 &a, const float &c)
+{
+	a.x /= c;
+	a.y /= c;
+	a.z /= c;
+}
+
+__device__ __host__ float3 operator/ (const float &c, const float3 &a)
+{
+	return make_float3(c/a.x, c/a.y, c/a.z);
+}
+
+__device__ __host__ float dot(const float3 &a, const float3 &b)
+{
+	return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+#endif
diff --git a/linalg_test.cu b/linalg_test.cu
new file mode 100644
index 0000000..c4647a7
--- /dev/null
+++ b/linalg_test.cu
@@ -0,0 +1,99 @@
+//-*-c-*-
+
+#include "linalg.h"
+
+extern "C"
+{
+
+__global__ void add(int n, float3 *a, float3 *b, float3 *out)
+{
+	int i;
+	for (i=0; i < n; i++)
+		out[i] = a[i] + b[i];
+}
+
+__global__ void addequal(int n, float3 *a, float3 *b)
+{
+	int i;
+	for (i=0; i < n; i++)
+		a[i] += b[i];
+}
+
+__global__ void sub(int n, float3 *a, float3 *b, float3 *out)
+{
+	int i;
+	for (i=0; i < n; i++)
+		out[i] = a[i] - b[i];
+}
+
+__global__ void subequal(int n, float3 *a, float3 *b)
+{
+	int i;
+	for (i=0; i < n; i++)
+		a[i] -= b[i];
+}
+
+__gloabl__ void addfloat(int n, float3 *a, float c, float3 *out)
+{
+	int i;
+	for (i=0; i < n; i++)
+		out[i] = a[i] + c;
+}
+
+__global__ void addfloatequal(int n, float3 *a, float c)
+{
+	int i;
+	for (i=0; i < n; i++)
+		a += c;
+}
+
+__global__ void subfloat(int n, float3 *a, float c, float3 *out)
+{
+	int i;
+	for (i=0; i < n; i++)
+		out[i] = a[i] - c;
+}
+
+__global__ void subfloatequal(int n, float3 *a, float c)
+{
+	int i;
+	for (i=0; i < n; i++)
+		a[i] -= c;
+}
+
+__global__ void mulfloat(int n, float3 *a, float c, float3 *out)
+{
+	int i;
+	for (i=0; i < n; i++)
+		out[i] = a[i]*c;
+}
+
+__global__ void mulfloatequal(int n, float3 *a, float c)
+{
+	int i;
+	for (i=0; i < n; i++)
+		a *= c;
+}
+
+__global__ void divfloat(int n, float3 *a, float c, float3 *out)
+{
+	int i;
+	for (i=0; i < n; i++)
+		out[i] = a[i]/c;
+}
+
+__global__ void divfloatequal(int n, float3 *a, float c)
+{
+	int i;
+	for (i=0; i < n; i++)
+		a /= c;
+}
+
+__global__ void dot(int n, float3 *a, float3 *b, float* out)
+{
+	int i;
+	for (i=0; i < n; i++)
+		out[i] = dot(a,b);
+}
+
+} // extern "c"