diff options
-rw-r--r-- | linalg.h | 5 | ||||
-rw-r--r-- | rotate.h | 24 | ||||
-rw-r--r-- | tests/linalg_test.cu | 6 | ||||
-rw-r--r-- | tests/linalg_test.py | 15 | ||||
-rw-r--r-- | tests/rotate_test.cu | 14 | ||||
-rw-r--r-- | tests/rotate_test.py | 63 |
6 files changed, 127 insertions, 0 deletions
@@ -98,4 +98,9 @@ __device__ __host__ float dot(const float3 &a, const float3 &b) return a.x*b.x + a.y*b.y + a.z*b.z; } +__device__ __host__ float3 cross(const float3 &a, const float3 &b) +{ + return make_float3(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); +} + #endif diff --git a/rotate.h b/rotate.h new file mode 100644 index 0000000..52d6d6a --- /dev/null +++ b/rotate.h @@ -0,0 +1,24 @@ +#ifndef __ROTATE_H__ +#define __ROTATE_H__ + +__device__ const Matrix IDENTITY_MATRIX = {1,0,0,0,1,0,0,0,1}; +__device__ const Matrix ZERO_MATRIX = {0,0,0,0,0,0,0,0,0}; + +__device__ __host__ Matrix make_rotation_matrix(float phi, const float3 &n) +{ + /* rotate points counterclockwise, when looking towards +infinity, + through an angle `phi` about the axis `n`. */ + + float cos_phi = cosf(phi); + float sin_phi = sinf(phi); + + return IDENTITY_MATRIX*cos_phi + (1-cos_phi)*outer(n,n) + + sin_phi*make_matrix(0,n.z,-n.y,-n.z,0,n.x,n.y,-n.x,0); +} + +__device__ __host__ float3 rotate(const float3 &a, float phi, const float3 &n) +{ + return make_rotation_matrix(phi,n)*a; +} + +#endif diff --git a/tests/linalg_test.cu b/tests/linalg_test.cu index 81043f1..bce5ea6 100644 --- a/tests/linalg_test.cu +++ b/tests/linalg_test.cu @@ -105,4 +105,10 @@ __global__ void dot(float3 *a, float3 *b, float* dest) dest[idx] = dot(a[idx],b[idx]); } +__global__ void cross(float3 *a, float3 *b, float3* dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = cross(a[idx],b[idx]); +} + } // extern "c" diff --git a/tests/linalg_test.py b/tests/linalg_test.py index b490813..f5e947e 100644 --- a/tests/linalg_test.py +++ b/tests/linalg_test.py @@ -29,6 +29,7 @@ float3divfloat = mod.get_function('float3divfloat') float3divfloatequal = mod.get_function('float3divfloatequal') floatdivfloat3 = mod.get_function('floatdivfloat3') dot = mod.get_function('dot') +cross = mod.get_function('cross') size = {'block': (100,1,1), 'grid': (1,1)} @@ -40,6 +41,10 @@ a['x'] = np.random.random_sample(size=a.size) a['y'] = np.random.random_sample(size=a.size) a['z'] = np.random.random_sample(size=a.size) +b['x'] = np.random.random_sample(size=b.size) +b['y'] = np.random.random_sample(size=b.size) +b['z'] = np.random.random_sample(size=b.size) + def testfloat3add(): dest = np.empty(a.size, dtype=float3) float3add(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) @@ -173,3 +178,13 @@ def testdot(): dot(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) if not np.allclose(a['x']*b['x'] + a['y']*b['y'] + a['z']*b['z'], dest): assert False + +def testcross(): + dest = np.empty(a.size, dtype=float3) + cross(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) + for u, v, wdest in zip(a,b,dest): + w = np.cross((u['x'], u['y'], u['z']),(v['x'],v['y'],v['z'])) + if not np.allclose(wdest['x'], w[0]) or \ + not np.allclose(wdest['y'], w[1]) or \ + not np.allclose(wdest['z'], w[2]): + assert False diff --git a/tests/rotate_test.cu b/tests/rotate_test.cu new file mode 100644 index 0000000..96e4d75 --- /dev/null +++ b/tests/rotate_test.cu @@ -0,0 +1,14 @@ +//-*-c-*- + + + +extern "C" +{ + +__global__ void rotate(float3 *a, float *phi, float3 *n, float3 *dest) +{ + int idx = blockIdx.x*blockDim.x + threadIdx.x; + dest[idx] = rotate(a[idx], phi[idx], n[idx]); +} + +} // extern "c" diff --git a/tests/rotate_test.py b/tests/rotate_test.py new file mode 100644 index 0000000..8c2cfcb --- /dev/null +++ b/tests/rotate_test.py @@ -0,0 +1,63 @@ +import numpy as np +from pycuda import autoinit +from pycuda.compiler import SourceModule +import pycuda.driver as cuda +from pycuda import gpuarray +float3 = gpuarray.vec.float3 + +def rotate(x, phi, n): + x = np.asarray(x) + n = np.asarray(n) + + r = np.cos(phi)*np.identity(3) + (1-np.cos(phi))*np.outer(n,n) + \ + np.sin(phi)*np.array([[0,n[2],-n[1]],[-n[2],0,n[0]],[n[1],-n[0],0]]) + + return np.inner(x,r) + +print 'device %s' % autoinit.device.name() + +source = open('../linalg.h').read() + open('../matrix.h').read() + \ + open('../rotate.h').read() + open('rotate_test.cu').read() +mod = SourceModule(source, no_extern_c=True, arch='sm_13') + +rotate_gpu = mod.get_function('rotate') + +size = {'block': (100,1,1), 'grid': (1,1)} + +a = np.empty(size['block'][0], dtype=float3) +n = np.empty(size['block'][0], dtype=float3) +phi = np.random.random_sample(size=a.size).astype(np.float32) + +a['x'] = np.random.random_sample(size=a.size) +a['y'] = np.random.random_sample(size=a.size) +a['z'] = np.random.random_sample(size=a.size) + +n['x'] = np.random.random_sample(size=n.size) +n['y'] = np.random.random_sample(size=n.size) +n['z'] = np.random.random_sample(size=n.size) + +a['x'] = np.ones(a.size) +a['y'] = np.zeros(a.size) +a['z'] = np.zeros(a.size) + +n['x'] = np.zeros(n.size) +n['y'] = np.zeros(n.size) +n['z'] = np.ones(n.size) + +phi = np.array([np.pi/2]*a.size).astype(np.float32) + +def testrotate(): + dest = np.empty(a.size, dtype=float3) + rotate_gpu(cuda.In(a), cuda.In(phi), cuda.In(n), cuda.Out(dest), **size) + for v, theta, w, rdest in zip(a,phi,n,dest): + r = rotate((v['x'], v['y'], v['z']), theta, (w['x'], w['y'], w['z'])) + if not np.allclose(rdest['x'], r[0]) or \ + not np.allclose(rdest['y'], r[1]) or \ + not np.allclose(rdest['z'], r[2]): + print v + print theta + print w + print r + print rdest + assert False + |