import sys import numpy as np from pycuda import autoinit from pycuda.compiler import SourceModule import pycuda.driver as cuda from pycuda import gpuarray float3 = gpuarray.vec.float3 print 'device %s' % autoinit.device.name() source = open('../linalg.h').read() + open('linalg_test.cu').read() mod = SourceModule(source, no_extern_c=True, arch='sm_13') add = mod.get_function('add') addequal = mod.get_function('addequal') sub = mod.get_function('sub') subequal = mod.get_function('subequal') addfloat = mod.get_function('addfloat') addfloatequal = mod.get_function('addfloatequal') subfloat = mod.get_function('subfloat') subfloatequal = mod.get_function('subfloatequal') mulfloat = mod.get_function('mulfloat') mulfloatequal = mod.get_function('mulfloatequal') divfloat = mod.get_function('divfloat') divfloatequal = mod.get_function('divfloatequal') dot = mod.get_function('dot') size = {'block': (10,1,1), 'grid': (1,1)} for i in range(1): a = np.zeros(size['block'][0], dtype=float3) b = np.zeros(a.size, dtype=float3) dest = np.zeros(a.size, dtype=float3) c = np.float32(np.random.random_sample()) destfloat = np.zeros(a.size, dtype=np.float32) a['x'] = np.random.random_sample(size=a.size) a['y'] = np.random.random_sample(size=a.size) a['z'] = np.random.random_sample(size=a.size) print a['x'] print a['y'] print a['z'] print c print 'testing add...', add(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) if (a['x'] + b['x'] != dest['x']).any() or \ (a['y'] + b['y'] != dest['y']).any() or \ (a['z'] + b['z'] != dest['z']).any(): print 'fail' else: print 'success' print 'testing sub...', sub(cuda.In(a), cuda.In(b), cuda.Out(dest), **size) if (a['x'] - b['x'] != dest['x']).any() or \ (a['y'] - b['y'] != dest['y']).any() or \ (a['z'] - b['z'] != dest['z']).any(): print 'fail' else: print 'success' print 'testing addfloat...', addfloat(cuda.In(a), c, cuda.Out(dest), **size) if (a['x'] + c != dest['x']).any() or \ (a['y'] + c != dest['y']).any() or \ (a['z'] + c != dest['z']).any(): print 'fail' else: print 'success' print 'testing subfloat...', subfloat(cuda.In(a), c, cuda.Out(dest), **size) if (a['x'] - c != dest['x']).any() or \ (a['y'] - c != dest['y']).any() or \ (a['z'] - c != dest['z']).any(): print 'fail' else: print 'success' print 'testing mulfloat...', mulfloat(cuda.In(a), c, cuda.Out(dest), **size) if (a['x']*c != dest['x']).any() or \ (a['y']*c != dest['y']).any() or \ (a['z']*c != dest['z']).any(): print 'fail' else: print 'success' print 'testing divfloat...', divfloat(cuda.In(a), c, cuda.Out(dest), **size) if (a['x']/c != dest['x']).any() or \ (a['y']/c != dest['y']).any() or \ (a['z']/c != dest['z']).any(): print 'fail' print a['x']/c print a['y']/c print a['z']/c print dest['x'] print dest['y'] print dest['z'] else: print 'success' print 'testing dot...', dot(cuda.In(a), cuda.In(b), cuda.Out(destfloat), **size) if (a['x']*b['x'] + a['y']*b['y'] + a['z']*b['z'] != destfloat).any(): print 'fail' else: print 'sucess' print 'testing addequal...', dest = np.copy(a) addequal(cuda.InOut(dest), cuda.In(b), **size) if (a['x'] + b['x'] != dest['x']).any() or \ (a['y'] + b['y'] != dest['y']).any() or \ (a['z'] + b['z'] != dest['z']).any(): print 'fail' else: print 'success' print 'testing subequal...', dest = np.copy(a) subequal(cuda.InOut(dest), cuda.In(b), **size) if (a['x'] - b['x'] != dest['x']).any() or \ (a['y'] - b['y'] != dest['y']).any() or \ (a['z'] - b['z'] != dest['z']).any(): print 'fail' else: print 'success' print 'testing addfloatequal...', dest = np.copy(a) addfloatequal(cuda.InOut(dest), c, **size) if (a['x'] + c != dest['x']).any() or \ (a['y'] + c != dest['y']).any() or \ (a['z'] + c != dest['z']).any(): print 'fail' print a['x'] + c print a['y'] + c print a['z'] + c print dest['x'] print dest['y'] print dest['z'] else: print 'success' print 'testing subfloatequal...', dest = np.copy(a) subfloatequal(cuda.InOut(dest), c, **size) if (a['x'] - c != dest['x']).any() or \ (a['y'] - c != dest['y']).any() or \ (a['z'] - c != dest['z']).any(): print 'fail' print a['x'] - c print a['y'] - c print a['z'] - c print dest['x'] print dest['y'] print dest['z'] else: print 'success' print 'testing mulfloatequal...', dest = np.copy(a) mulfloatequal(cuda.InOut(dest), c, **size) if (a['x']*c != dest['x']).any() or \ (a['y']*c != dest['y']).any() or \ (a['z']*c != dest['z']).any(): print 'fail' print a['x']*c print a['y']*c print a['z']*c print dest['x'] print dest['y'] print dest['z'] else: print 'success' print 'testing divfloatequal...', dest = np.copy(a) divfloatequal(cuda.InOut(dest), c, **size) if (a['x']/c != dest['x']).any() or \ (a['y']/c != dest['y']).any() or \ (a['z']/c != dest['z']).any(): print 'fail' print a['x']/c print a['y']/c print a['z']/c print dest['x'] print dest['y'] print dest['z'] else: print 'success'