diff options
Diffstat (limited to 'test')
-rw-r--r-- | test/rotate_test.cu | 4 | ||||
-rw-r--r-- | test/rotate_test.py | 74 |
2 files changed, 30 insertions, 48 deletions
diff --git a/test/rotate_test.cu b/test/rotate_test.cu index 6cafc12..5cd3a3a 100644 --- a/test/rotate_test.cu +++ b/test/rotate_test.cu @@ -5,10 +5,10 @@ extern "C" { -__global__ void rotate(float3 *a, float *phi, float3 *n, float3 *dest) +__global__ void rotate(float3 *a, float *phi, float3 n, float3 *dest) { int idx = blockIdx.x*blockDim.x + threadIdx.x; - dest[idx] = rotate(a[idx], phi[idx], n[idx]); + dest[idx] = rotate(a[idx], phi[idx], n); } } // extern "c" diff --git a/test/rotate_test.py b/test/rotate_test.py index 7ac7804..c7cd58e 100644 --- a/test/rotate_test.py +++ b/test/rotate_test.py @@ -1,67 +1,49 @@ import os import numpy as np +import time from pycuda import autoinit from pycuda.compiler import SourceModule import pycuda.driver as cuda -from pycuda import gpuarray -float3 = gpuarray.vec.float3 - -def rotate(x, phi, n): - x = np.asarray(x) - n = np.asarray(n) - - r = np.cos(phi)*np.identity(3) + (1-np.cos(phi))*np.outer(n,n) + \ - np.sin(phi)*np.array([[0,n[2],-n[1]],[-n[2],0,n[0]],[n[1],-n[0],0]]) - - return np.inner(x,r) +import pycuda.gpuarray as ga +from chroma.gpu.tools import to_float3 +from chroma.transform import rotate, normalize +from chroma.cuda import srcdir as source_directory print 'device %s' % autoinit.device.name() current_directory = os.path.split(os.path.realpath(__file__))[0] -from chroma.cuda import srcdir as source_directory - source = open(current_directory + '/rotate_test.cu').read() - -mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False) +mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True) rotate_gpu = mod.get_function('rotate') -size = {'block': (100,1,1), 'grid': (1,1)} +nthreads_per_block = 1024 +blocks = 4096 + +def test_rotate(): + n = nthreads_per_block*blocks -a = np.empty(size['block'][0], dtype=float3) -n = np.empty(size['block'][0], dtype=float3) -phi = np.random.random_sample(size=a.size).astype(np.float32) + a = np.random.rand(n,3).astype(np.float32) + t = np.random.rand(n).astype(np.float32)*2*np.pi + w = normalize(np.random.rand(3)) -a['x'] = np.random.random_sample(size=a.size) -a['y'] = np.random.random_sample(size=a.size) -a['z'] = np.random.random_sample(size=a.size) + a_gpu = ga.to_gpu(to_float3(a)) + t_gpu = ga.to_gpu(t) -n['x'] = np.random.random_sample(size=n.size) -n['y'] = np.random.random_sample(size=n.size) -n['z'] = np.random.random_sample(size=n.size) + dest_gpu = ga.empty(n,dtype=ga.vec.float3) -a['x'] = np.ones(a.size) -a['y'] = np.zeros(a.size) -a['z'] = np.zeros(a.size) + t0 = time.time() + rotate_gpu(a_gpu,t_gpu,ga.vec.make_float3(*w),dest_gpu, + block=(nthreads_per_block,1,1),grid=(blocks,1)) + autoinit.context.synchronize() + elapsed = time.time() - t0; -n['x'] = np.zeros(n.size) -n['y'] = np.zeros(n.size) -n['z'] = np.ones(n.size) + print 'elapsed %f sec' % elapsed -phi = np.array([np.pi/2]*a.size).astype(np.float32) + r = rotate(a,t,w) -def testrotate(): - dest = np.empty(a.size, dtype=float3) - rotate_gpu(cuda.In(a), cuda.In(phi), cuda.In(n), cuda.Out(dest), **size) - for v, theta, w, rdest in zip(a,phi,n,dest): - r = rotate((v['x'], v['y'], v['z']), theta, (w['x'], w['y'], w['z'])) - if not np.allclose(rdest['x'], r[0]) or \ - not np.allclose(rdest['y'], r[1]) or \ - not np.allclose(rdest['z'], r[2]): - print v - print theta - print w - print r - print rdest - assert False + assert np.allclose(r,dest_gpu.get().view(np.float32).reshape((-1,3)), + atol=1e-5) +if __name__ == '__main__': + test_rotate() |