diff options
author | Anthony LaTorre <devnull@localhost> | 2012-12-21 11:46:24 -0500 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2021-05-09 08:42:39 -0700 |
commit | 79e3643283034e1923c2a84fe0c4d0eb52b97570 (patch) | |
tree | 3c41502f8f99b51940f31ef20afedb56f08927ab | |
parent | 581f4305ffe86cb42d5bcaa9bf081e6ea4abceca (diff) | |
download | chroma-79e3643283034e1923c2a84fe0c4d0eb52b97570.tar.gz chroma-79e3643283034e1923c2a84fe0c4d0eb52b97570.tar.bz2 chroma-79e3643283034e1923c2a84fe0c4d0eb52b97570.zip |
rotate() does not need to create a matrix. update test. remove superflous function in last commit.
-rw-r--r-- | chroma/cuda/rotate.h | 11 | ||||
-rw-r--r-- | chroma/cuda/transform.cu | 11 | ||||
-rw-r--r-- | test/rotate_test.cu | 4 | ||||
-rw-r--r-- | test/rotate_test.py | 74 |
4 files changed, 41 insertions, 59 deletions
diff --git a/chroma/cuda/rotate.h b/chroma/cuda/rotate.h index 15f8037..f93abcc 100644 --- a/chroma/cuda/rotate.h +++ b/chroma/cuda/rotate.h @@ -21,6 +21,17 @@ make_rotation_matrix(float phi, const float3 &n) __device__ float3 rotate(const float3 &a, float phi, const float3 &n) { + float cos_phi = cosf(phi); + float sin_phi = sinf(phi); + + return a*cos_phi + n*dot(a,n)*(1.0f-cos_phi) + cross(a,n)*sin_phi; +} + +/* rotate points counterclockwise, when looking towards +infinity, + through an angle `phi` about the axis `n`. */ +__device__ float3 +rotate_with_matrix(const float3 &a, float phi, const float3 &n) +{ return make_rotation_matrix(phi,n)*a; } diff --git a/chroma/cuda/transform.cu b/chroma/cuda/transform.cu index 1c5a6cb..1f4405e 100644 --- a/chroma/cuda/transform.cu +++ b/chroma/cuda/transform.cu @@ -31,17 +31,6 @@ rotate(int nthreads, float3 *a, float phi, float3 axis) a[id] = rotate(a[id], phi, axis); } -__global__ void -rotate_(int nthreads, float3 *a, float phi, float3 axis) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - a[id] = rotate(a[id], phi, axis); -} - /* Rotate the points `a` through an angle `phi` counter-clockwise (when looking towards +infinity along `axis`) about the axis defined by the point `point` and the vector `axis` . */ diff --git a/test/rotate_test.cu b/test/rotate_test.cu index 6cafc12..5cd3a3a 100644 --- a/test/rotate_test.cu +++ b/test/rotate_test.cu @@ -5,10 +5,10 @@ extern "C" { -__global__ void rotate(float3 *a, float *phi, float3 *n, float3 *dest) +__global__ void rotate(float3 *a, float *phi, float3 n, float3 *dest) { int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = rotate(a[idx], phi[idx], n[idx]); + dest[idx] = rotate(a[idx], phi[idx], n); } } // extern "c" diff --git a/test/rotate_test.py b/test/rotate_test.py index 7ac7804..c7cd58e 100644 --- a/test/rotate_test.py +++ b/test/rotate_test.py @@ -1,67 +1,49 @@ import os import numpy as np +import time from pycuda import autoinit from pycuda.compiler import SourceModule import pycuda.driver as cuda -from pycuda import gpuarray -float3 = gpuarray.vec.float3 - -def rotate(x, phi, n): - x = np.asarray(x) - n = np.asarray(n) - - r = np.cos(phi)*np.identity(3) + (1-np.cos(phi))*np.outer(n,n) + \ - np.sin(phi)*np.array([[0,n[2],-n[1]],[-n[2],0,n[0]],[n[1],-n[0],0]]) - - return np.inner(x,r) +import pycuda.gpuarray as ga +from chroma.gpu.tools import to_float3 +from chroma.transform import rotate, normalize +from chroma.cuda import srcdir as source_directory print 'device %s' % autoinit.device.name() current_directory = os.path.split(os.path.realpath(__file__))[0] -from chroma.cuda import srcdir as source_directory - source = open(current_directory + '/rotate_test.cu').read() - -mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False) +mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True) rotate_gpu = mod.get_function('rotate') -size = {'block': (100,1,1), 'grid': (1,1)} +nthreads_per_block = 1024 +blocks = 4096 + +def test_rotate(): + n = nthreads_per_block*blocks -a = np.empty(size['block'][0], dtype=float3) -n = np.empty(size['block'][0], dtype=float3) -phi = np.random.random_sample(size=a.size).astype(np.float32) + a = np.random.rand(n,3).astype(np.float32) + t = np.random.rand(n).astype(np.float32)*2*np.pi + w = normalize(np.random.rand(3)) -a['x'] = np.random.random_sample(size=a.size) -a['y'] = np.random.random_sample(size=a.size) -a['z'] = np.random.random_sample(size=a.size) + a_gpu = ga.to_gpu(to_float3(a)) + t_gpu = ga.to_gpu(t) -n['x'] = np.random.random_sample(size=n.size) -n['y'] = np.random.random_sample(size=n.size) -n['z'] = np.random.random_sample(size=n.size) + dest_gpu = ga.empty(n,dtype=ga.vec.float3) -a['x'] = np.ones(a.size) -a['y'] = np.zeros(a.size) -a['z'] = np.zeros(a.size) + t0 = time.time() + rotate_gpu(a_gpu,t_gpu,ga.vec.make_float3(*w),dest_gpu, + block=(nthreads_per_block,1,1),grid=(blocks,1)) + autoinit.context.synchronize() + elapsed = time.time() - t0; -n['x'] = np.zeros(n.size) -n['y'] = np.zeros(n.size) -n['z'] = np.ones(n.size) + print 'elapsed %f sec' % elapsed -phi = np.array([np.pi/2]*a.size).astype(np.float32) + r = rotate(a,t,w) -def testrotate(): - dest = np.empty(a.size, dtype=float3) - rotate_gpu(cuda.In(a), cuda.In(phi), cuda.In(n), cuda.Out(dest), **size) - for v, theta, w, rdest in zip(a,phi,n,dest): - r = rotate((v['x'], v['y'], v['z']), theta, (w['x'], w['y'], w['z'])) - if not np.allclose(rdest['x'], r[0]) or \ - not np.allclose(rdest['y'], r[1]) or \ - not np.allclose(rdest['z'], r[2]): - print v - print theta - print w - print r - print rdest - assert False + assert np.allclose(r,dest_gpu.get().view(np.float32).reshape((-1,3)), + atol=1e-5) +if __name__ == '__main__': + test_rotate() |