summaryrefslogtreecommitdiff
path: root/test/rotate_test.py
diff options
context:
space:
mode:
Diffstat (limited to 'test/rotate_test.py')
-rw-r--r--test/rotate_test.py74
1 files changed, 28 insertions, 46 deletions
diff --git a/test/rotate_test.py b/test/rotate_test.py
index 7ac7804..c7cd58e 100644
--- a/test/rotate_test.py
+++ b/test/rotate_test.py
@@ -1,67 +1,49 @@
import os
import numpy as np
+import time
from pycuda import autoinit
from pycuda.compiler import SourceModule
import pycuda.driver as cuda
-from pycuda import gpuarray
-float3 = gpuarray.vec.float3
-
-def rotate(x, phi, n):
- x = np.asarray(x)
- n = np.asarray(n)
-
- r = np.cos(phi)*np.identity(3) + (1-np.cos(phi))*np.outer(n,n) + \
- np.sin(phi)*np.array([[0,n[2],-n[1]],[-n[2],0,n[0]],[n[1],-n[0],0]])
-
- return np.inner(x,r)
+import pycuda.gpuarray as ga
+from chroma.gpu.tools import to_float3
+from chroma.transform import rotate, normalize
+from chroma.cuda import srcdir as source_directory
print 'device %s' % autoinit.device.name()
current_directory = os.path.split(os.path.realpath(__file__))[0]
-from chroma.cuda import srcdir as source_directory
-
source = open(current_directory + '/rotate_test.cu').read()
-
-mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False)
+mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True)
rotate_gpu = mod.get_function('rotate')
-size = {'block': (100,1,1), 'grid': (1,1)}
+nthreads_per_block = 1024
+blocks = 4096
+
+def test_rotate():
+ n = nthreads_per_block*blocks
-a = np.empty(size['block'][0], dtype=float3)
-n = np.empty(size['block'][0], dtype=float3)
-phi = np.random.random_sample(size=a.size).astype(np.float32)
+ a = np.random.rand(n,3).astype(np.float32)
+ t = np.random.rand(n).astype(np.float32)*2*np.pi
+ w = normalize(np.random.rand(3))
-a['x'] = np.random.random_sample(size=a.size)
-a['y'] = np.random.random_sample(size=a.size)
-a['z'] = np.random.random_sample(size=a.size)
+ a_gpu = ga.to_gpu(to_float3(a))
+ t_gpu = ga.to_gpu(t)
-n['x'] = np.random.random_sample(size=n.size)
-n['y'] = np.random.random_sample(size=n.size)
-n['z'] = np.random.random_sample(size=n.size)
+ dest_gpu = ga.empty(n,dtype=ga.vec.float3)
-a['x'] = np.ones(a.size)
-a['y'] = np.zeros(a.size)
-a['z'] = np.zeros(a.size)
+ t0 = time.time()
+ rotate_gpu(a_gpu,t_gpu,ga.vec.make_float3(*w),dest_gpu,
+ block=(nthreads_per_block,1,1),grid=(blocks,1))
+ autoinit.context.synchronize()
+ elapsed = time.time() - t0;
-n['x'] = np.zeros(n.size)
-n['y'] = np.zeros(n.size)
-n['z'] = np.ones(n.size)
+ print 'elapsed %f sec' % elapsed
-phi = np.array([np.pi/2]*a.size).astype(np.float32)
+ r = rotate(a,t,w)
-def testrotate():
- dest = np.empty(a.size, dtype=float3)
- rotate_gpu(cuda.In(a), cuda.In(phi), cuda.In(n), cuda.Out(dest), **size)
- for v, theta, w, rdest in zip(a,phi,n,dest):
- r = rotate((v['x'], v['y'], v['z']), theta, (w['x'], w['y'], w['z']))
- if not np.allclose(rdest['x'], r[0]) or \
- not np.allclose(rdest['y'], r[1]) or \
- not np.allclose(rdest['z'], r[2]):
- print v
- print theta
- print w
- print r
- print rdest
- assert False
+ assert np.allclose(r,dest_gpu.get().view(np.float32).reshape((-1,3)),
+ atol=1e-5)
+if __name__ == '__main__':
+ test_rotate()