import os import time import numpy as np from pycuda import autoinit from pycuda.compiler import SourceModule import pycuda.driver as cuda from pycuda import gpuarray from stl import * from geometry import * from materials import * from camera import * from vector import * import pygame print 'device %s' % autoinit.device.name() source_directory = os.path.split(os.path.realpath(__file__))[0] + '/src' source = open(source_directory + '/kernel.cu').read() mod = SourceModule(source, options=['-I ' + source_directory], no_extern_c=True, cache_dir=False) intersect_mesh = mod.get_function('intersect_mesh') rotate = mod.get_function('rotate') translate = mod.get_function('translate') size = width, height = 800, 600 screen = pygame.display.set_mode(size) camera = Camera(size) camera.position((0,100,10)) origin, direction = camera.get_rays() for i in range(direction.shape[0]): direction[i] /= np.linalg.norm(direction[i]) origin, direction = make_vector(origin), make_vector(direction) origin_gpu = cuda.to_device(origin) direction_gpu = cuda.to_device(direction) #solid = Solid(read_stl('models/lionsolid.stl'), vacuum, vacuum) #geometry = Geometry() #geometry.add_solid(solid) #geometry.build(bits=3) import detectors geometry = detectors.build_lbne() #geometry = detectors.load_lbne('./detectors/lbne_8bit.pkl') mesh = geometry.mesh mesh = mesh.reshape(mesh.shape[0]*3,3) mesh = make_vector(mesh, dtype=gpuarray.vec.float4) lower_bound = make_vector(geometry.lower_bound, dtype=gpuarray.vec.float4) upper_bound = make_vector(geometry.upper_bound, dtype=gpuarray.vec.float4) child_map = geometry.child_map.astype(np.uint32) child_len = geometry.child_len.astype(np.uint32) first_leaf = np.int32(geometry.first_leaf) mesh_gpu = cuda.to_device(mesh) lower_bound_gpu = cuda.to_device(lower_bound) upper_bound_gpu = cuda.to_device(upper_bound) child_map_gpu = cuda.to_device(child_map) child_len_gpu = cuda.to_device(child_len) mesh_tex = mod.get_texref('mesh') lower_bound_tex = mod.get_texref('lower_bound_arr') upper_bound_tex = mod.get_texref('upper_bound_arr') child_map_tex = mod.get_texref('child_map_arr') child_len_tex = mod.get_texref('child_len_arr') mesh_tex.set_address(mesh_gpu, mesh.nbytes) lower_bound_tex.set_address(lower_bound_gpu, lower_bound.nbytes) upper_bound_tex.set_address(upper_bound_gpu, upper_bound.nbytes) child_map_tex.set_address(child_map_gpu, child_map.nbytes) child_len_tex.set_address(child_len_gpu, child_len.nbytes) mesh_tex.set_format(cuda.array_format.FLOAT, 4) lower_bound_tex.set_format(cuda.array_format.FLOAT, 4) upper_bound_tex.set_format(cuda.array_format.FLOAT, 4) child_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) child_len_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) pixels = np.empty(width*height, dtype=np.int32) pixels_gpu = cuda.to_device(pixels) states = np.empty(width*height, dtype=np.int32) states_gpu = cuda.to_device(states) block_size = 64 for i in range(10): rotate(np.int32(origin.size), origin_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1)) rotate(np.int32(direction.size), direction_gpu, np.float32(np.pi/100), gpuarray.vec.make_float3(0,0,1), block=(block_size,1,1), grid=(width*height//block_size+1,1)) translate(np.int32(origin.size), origin_gpu, gpuarray.vec.make_float3(-.5*np.sin(i*np.pi/100), -.5*np.cos(i*np.pi/100), 0), block=(block_size,1,1), grid=(width*height//block_size+1,1)) t0 = time.time() intersect_mesh(np.int32(origin.size), origin_gpu, direction_gpu, pixels_gpu, first_leaf, states_gpu, block=(block_size,1,1), grid=(width*height//block_size+1,1), texrefs=[mesh_tex, lower_bound_tex, upper_bound_tex, child_map_tex, child_len_tex]) cuda.Context.synchronize() elapsed = time.time() - t0 print '%i triangles, %i photons, %f sec; (%f photons/sec)' % \ (mesh.size//3, pixels.size, elapsed, pixels.size/elapsed) cuda.memcpy_dtoh(pixels, pixels_gpu) pygame.surfarray.blit_array(screen, pixels.reshape(size)) pygame.display.flip() raw_input('press enter to exit')