import numpy as np import numpy.ma as ma from pycuda.tools import make_default_context from pycuda.compiler import SourceModule import pycuda.driver as cuda from pycuda import gpuarray from copy import copy from itertools import izip import src from geometry import standard_wavelengths from color import map_to_color cuda.init() def format_size(size): if size < 1e3: return '%.1f%s' % (size, ' ') elif size < 1e6: return '%.1f%s' % (size/1e3, 'K') elif size < 1e9: return '%.1f%s' % (size/1e6, 'M') else: return '%.1f%s' % (size/1e9, 'G') def format_array(name, array): return '%-15s %6s %6s' % \ (name, format_size(len(array)), format_size(array.nbytes)) class GPU(object): def __init__(self): self.context = make_default_context() print 'device %s' % self.context.get_device().name() self.module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True, cache_dir=False) def print_device_usage(self): print 'device usage:' print format_array('vertices', self.vertices_gpu) print format_array('triangles', self.triangles_gpu) print format_array('lower_bounds', self.lower_bounds_gpu) print format_array('upper_bounds', self.upper_bounds_gpu) print format_array('node_map', self.node_map_gpu) print format_array('node_length', self.node_length_gpu) print '%-15s %6s %6s' % ('total', '', format_size(self.vertices_gpu.nbytes + self.triangles_gpu.nbytes + self.lower_bounds_gpu.nbytes + self.upper_bounds_gpu.nbytes + self.node_map_gpu.nbytes + self.node_length_gpu.nbytes)) def load_geometry(self, geometry): if not hasattr(geometry, 'mesh'): geometry.build(bits=8) set_wavelength_range = self.module.get_function('set_wavelength_range') set_wavelength_range(np.float32(standard_wavelengths[0]), np.float32(standard_wavelengths[-1]), np.float32(standard_wavelengths[1]-standard_wavelengths[0]), np.uint32(standard_wavelengths.size), block=(1,1,1), grid=(1,1)) set_material = self.module.get_function('set_material') self.materials = [] for i in range(len(geometry.unique_materials)): material = copy(geometry.unique_materials[i]) if material is None: raise Exception('one or more triangles is missing a material.') material.refractive_index_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, material.refractive_index[:,0], material.refractive_index[:,1]).astype(np.float32)) material.absorption_length_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, material.absorption_length[:,0], material.absorption_length[:,1]).astype(np.float32)) material.scattering_length_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, material.scattering_length[:,0], material.scattering_length[:,1]).astype(np.float32)) set_material(np.int32(i), material.refractive_index_gpu, material.absorption_length_gpu, material.scattering_length_gpu, block=(1,1,1), grid=(1,1)) self.materials.append(material) set_surface = self.module.get_function('set_surface') self.surfaces = [] for i in range(len(geometry.unique_surfaces)): surface = copy(geometry.unique_surfaces[i]) if surface is None: continue surface.detect_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.detect[:,0], surface.detect[:,1]).astype(np.float32)) surface.absorb_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.absorb[:,0], surface.absorb[:,1]).astype(np.float32)) surface.reflect_diffuse_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.reflect_diffuse[:,0], surface.reflect_diffuse[:,1]).astype(np.float32)) surface.reflect_specular_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.reflect_specular[:,0], surface.reflect_specular[:,1]).astype(np.float32)) set_surface(np.int32(i), surface.detect_gpu, surface.absorb_gpu, surface.reflect_diffuse_gpu, surface.reflect_specular_gpu, block=(1,1,1), grid=(1,1)) self.surfaces.append(surface) self.vertices_gpu = gpuarray.to_gpu(geometry.mesh.vertices.astype(np.float32).view(gpuarray.vec.float3)) triangles = \ np.empty(len(geometry.mesh.triangles), dtype=gpuarray.vec.uint4) triangles['x'] = geometry.mesh.triangles[:,0] triangles['y'] = geometry.mesh.triangles[:,1] triangles['z'] = geometry.mesh.triangles[:,2] triangles['w'] = ((geometry.material1_index & 0xff) << 24) | ((geometry.material2_index & 0xff) << 16) | ((geometry.surface_index & 0xff) << 8) self.triangles_gpu = gpuarray.to_gpu(triangles) lower_bounds_float4 = np.empty(geometry.lower_bounds.shape[0], dtype=gpuarray.vec.float4) lower_bounds_float4['x'] = geometry.lower_bounds[:,0] lower_bounds_float4['y'] = geometry.lower_bounds[:,1] lower_bounds_float4['z'] = geometry.lower_bounds[:,2] self.lower_bounds_gpu = gpuarray.to_gpu(lower_bounds_float4) upper_bounds_float4 = np.empty(geometry.upper_bounds.shape[0], dtype=gpuarray.vec.float4) upper_bounds_float4['x'] = geometry.upper_bounds[:,0] upper_bounds_float4['y'] = geometry.upper_bounds[:,1] upper_bounds_float4['z'] = geometry.upper_bounds[:,2] self.upper_bounds_gpu = gpuarray.to_gpu(upper_bounds_float4) self.colors_gpu = gpuarray.to_gpu(geometry.colors.astype(np.uint32)) self.node_map_gpu = gpuarray.to_gpu(geometry.node_map.astype(np.uint32)) self.node_length_gpu = gpuarray.to_gpu(geometry.node_length.astype(np.uint32)) self.solid_id_map_gpu = gpuarray.to_gpu(geometry.solid_id.astype(np.uint32)) set_global_mesh_variables = self.module.get_function('set_global_mesh_variables') set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(geometry.node_map.size-1), np.uint32(geometry.first_node), block=(1,1,1), grid=(1,1)) self.lower_bounds_tex = self.module.get_texref('lower_bounds') self.upper_bounds_tex = self.module.get_texref('upper_bounds') self.node_map_tex = self.module.get_texref('node_map') self.node_length_tex = self.module.get_texref('node_length') self.lower_bounds_tex.set_address(self.lower_bounds_gpu.gpudata, self.lower_bounds_gpu.nbytes) self.upper_bounds_tex.set_address(self.upper_bounds_gpu.gpudata, self.upper_bounds_gpu.nbytes) self.node_map_tex.set_address(self.node_map_gpu.gpudata, self.node_map_gpu.nbytes) self.node_length_tex.set_address(self.node_length_gpu.gpudata, self.node_length_gpu.nbytes) self.lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4) self.upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4) self.node_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) self.node_length_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) self.print_device_usage() def color_solids(self, solid_ids, colors): solid_id_map = self.solid_id_map_gpu.get() triangle_colors = self.colors_gpu.get() for i, color in izip(solid_ids, colors): triangle_colors[solid_id_map == i] = color self.colors_gpu.set(triangle_colors)