rotate() does not need to create a matrix. update test. remove superflous function in last commit.

author: Anthony LaTorre <devnull@localhost> 2012-12-21 11:46:24 -0500
committer: tlatorre <tlatorre@uchicago.edu> 2021-05-09 08:42:39 -0700
commit: 79e3643283034e1923c2a84fe0c4d0eb52b97570 (patch)
tree: 3c41502f8f99b51940f31ef20afedb56f08927ab
parent: 581f4305ffe86cb42d5bcaa9bf081e6ea4abceca (diff)
download: chroma-79e3643283034e1923c2a84fe0c4d0eb52b97570.tar.gz
chroma-79e3643283034e1923c2a84fe0c4d0eb52b97570.tar.bz2
chroma-79e3643283034e1923c2a84fe0c4d0eb52b97570.zip
4 files changed, 41 insertions, 59 deletions
diff --git a/chroma/cuda/rotate.h b/chroma/cuda/rotate.h
index 15f8037..f93abcc 100644
--- a/chroma/cuda/rotate.h
+++ b/chroma/cuda/rotate.h
@@ -21,6 +21,17 @@ make_rotation_matrix(float phi, const float3 &n)
 __device__ float3
 rotate(const float3 &a, float phi, const float3 &n)
 {
+    float cos_phi = cosf(phi);
+    float sin_phi = sinf(phi);
+
+    return a*cos_phi + n*dot(a,n)*(1.0f-cos_phi) + cross(a,n)*sin_phi;
+}
+
+/* rotate points counterclockwise, when looking towards +infinity,
+   through an angle `phi` about the axis `n`. */
+__device__ float3
+rotate_with_matrix(const float3 &a, float phi, const float3 &n)
+{
     return make_rotation_matrix(phi,n)*a;
 }
 
diff --git a/chroma/cuda/transform.cu b/chroma/cuda/transform.cu
index 1c5a6cb..1f4405e 100644
--- a/chroma/cuda/transform.cu
+++ b/chroma/cuda/transform.cu
@@ -31,17 +31,6 @@ rotate(int nthreads, float3 *a, float phi, float3 axis)
     a[id] = rotate(a[id], phi, axis);
 }
 
-__global__ void
-rotate_(int nthreads, float3 *a, float phi, float3 axis)
-{
-    int id = blockIdx.x*blockDim.x + threadIdx.x;
-
-    if (id >= nthreads)
-	return;
-
-    a[id] = rotate(a[id], phi, axis);
-}
-
 /* Rotate the points `a` through an angle `phi` counter-clockwise
    (when looking towards +infinity along `axis`) about the axis defined
    by the point `point` and the vector `axis` . */
diff --git a/test/rotate_test.cu b/test/rotate_test.cu
index 6cafc12..5cd3a3a 100644
--- a/test/rotate_test.cu
+++ b/test/rotate_test.cu
@@ -5,10 +5,10 @@
 extern "C"
 {
 
-__global__ void rotate(float3 *a, float *phi, float3 *n, float3 *dest)
+__global__ void rotate(float3 *a, float *phi, float3 n, float3 *dest)
 {
 	int idx = blockIdx.x*blockDim.x + threadIdx.x;
-	dest[idx] = rotate(a[idx], phi[idx], n[idx]);
+	dest[idx] = rotate(a[idx], phi[idx], n);
 }
 
 } // extern "c"
diff --git a/test/rotate_test.py b/test/rotate_test.py
index 7ac7804..c7cd58e 100644
--- a/test/rotate_test.py
+++ b/test/rotate_test.py
@@ -1,67 +1,49 @@
 import os
 import numpy as np
+import time
 from pycuda import autoinit
 from pycuda.compiler import SourceModule
 import pycuda.driver as cuda
-from pycuda import gpuarray
-float3 = gpuarray.vec.float3
-
-def rotate(x, phi, n):
-    x = np.asarray(x)
-    n = np.asarray(n)
-
-    r = np.cos(phi)*np.identity(3) + (1-np.cos(phi))*np.outer(n,n) + \
-        np.sin(phi)*np.array([[0,n[2],-n[1]],[-n[2],0,n[0]],[n[1],-n[0],0]])
-
-    return np.inner(x,r)
+import pycuda.gpuarray as ga
+from chroma.gpu.tools import to_float3
+from chroma.transform import rotate, normalize
+from chroma.cuda import srcdir as source_directory
 
 print 'device %s' % autoinit.device.name()
 
 current_directory = os.path.split(os.path.realpath(__file__))[0]
-from chroma.cuda import srcdir as source_directory
-
 source = open(current_directory + '/rotate_test.cu').read()
-
-mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True, cache_dir=False)
+mod = SourceModule(source, options=['-I' + source_directory], no_extern_c=True)
 
 rotate_gpu = mod.get_function('rotate')
 
-size = {'block': (100,1,1), 'grid': (1,1)}
+nthreads_per_block = 1024
+blocks = 4096
+
+def test_rotate():
+    n = nthreads_per_block*blocks
 
-a = np.empty(size['block'][0], dtype=float3)
-n = np.empty(size['block'][0], dtype=float3)
-phi = np.random.random_sample(size=a.size).astype(np.float32)
+    a = np.random.rand(n,3).astype(np.float32)
+    t = np.random.rand(n).astype(np.float32)*2*np.pi
+    w = normalize(np.random.rand(3))
 
-a['x'] = np.random.random_sample(size=a.size)
-a['y'] = np.random.random_sample(size=a.size)
-a['z'] = np.random.random_sample(size=a.size)
+    a_gpu = ga.to_gpu(to_float3(a))
+    t_gpu = ga.to_gpu(t)
 
-n['x'] = np.random.random_sample(size=n.size)
-n['y'] = np.random.random_sample(size=n.size)
-n['z'] = np.random.random_sample(size=n.size)
+    dest_gpu = ga.empty(n,dtype=ga.vec.float3)
 
-a['x'] = np.ones(a.size)
-a['y'] = np.zeros(a.size)
-a['z'] = np.zeros(a.size)
+    t0 = time.time()
+    rotate_gpu(a_gpu,t_gpu,ga.vec.make_float3(*w),dest_gpu,
+               block=(nthreads_per_block,1,1),grid=(blocks,1))
+    autoinit.context.synchronize()
+    elapsed = time.time() - t0;
 
-n['x'] = np.zeros(n.size)
-n['y'] = np.zeros(n.size)
-n['z'] = np.ones(n.size)
+    print 'elapsed %f sec' % elapsed
 
-phi = np.array([np.pi/2]*a.size).astype(np.float32)
+    r = rotate(a,t,w)
 
-def testrotate():
-    dest = np.empty(a.size, dtype=float3)
-    rotate_gpu(cuda.In(a), cuda.In(phi), cuda.In(n), cuda.Out(dest), **size)
-    for v, theta, w, rdest in zip(a,phi,n,dest):
-        r = rotate((v['x'], v['y'], v['z']), theta, (w['x'], w['y'], w['z']))
-        if not np.allclose(rdest['x'], r[0]) or \
-                not np.allclose(rdest['y'], r[1]) or \
-                not np.allclose(rdest['z'], r[2]):
-            print v
-            print theta
-            print w
-            print r
-            print rdest
-            assert False
+    assert np.allclose(r,dest_gpu.get().view(np.float32).reshape((-1,3)),
+                       atol=1e-5)
 
+if __name__ == '__main__':
+    test_rotate()
author	Anthony LaTorre <devnull@localhost>	2012-12-21 11:46:24 -0500
committer	tlatorre <tlatorre@uchicago.edu>	2021-05-09 08:42:39 -0700
commit	79e3643283034e1923c2a84fe0c4d0eb52b97570 (patch)
tree	3c41502f8f99b51940f31ef20afedb56f08927ab
parent	581f4305ffe86cb42d5bcaa9bf081e6ea4abceca (diff)
download	chroma-79e3643283034e1923c2a84fe0c4d0eb52b97570.tar.gz chroma-79e3643283034e1923c2a84fe0c4d0eb52b97570.tar.bz2 chroma-79e3643283034e1923c2a84fe0c4d0eb52b97570.zip