From 4bf95f452e3275c12026b16b51dc646846598f19 Mon Sep 17 00:00:00 2001 From: Anthony LaTorre Date: Wed, 3 Aug 2011 14:32:49 -0400 Subject: add a GPU class to handle both the gpu context and module; since the geometry requires global device pointers, there should be a one to one correspondence between modules and contexts. the current plan is to perform all gpu operations within this class. also add a simple color map to display hit pmt charge and timing information. --- gpu.py | 140 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 gpu.py (limited to 'gpu.py') diff --git a/gpu.py b/gpu.py new file mode 100644 index 0000000..e5fd5f1 --- /dev/null +++ b/gpu.py @@ -0,0 +1,140 @@ +import numpy as np +import numpy.ma as ma +from pycuda.tools import make_default_context +from pycuda.compiler import SourceModule +import pycuda.driver as cuda +from pycuda import gpuarray +from copy import copy +from itertools import izip + +import src +from geometry import standard_wavelengths +from color import map_to_color + +cuda.init() + +def format_size(size): + if size < 1e3: + return '%.1f%s' % (size, ' ') + elif size < 1e6: + return '%.1f%s' % (size/1e3, 'K') + elif size < 1e9: + return '%.1f%s' % (size/1e6, 'M') + else: + return '%.1f%s' % (size/1e9, 'G') + +def format_array(name, array): + return '%-15s %6s %6s' % \ + (name, format_size(len(array)), format_size(array.nbytes)) + +class GPU(object): + def __init__(self): + self.context = make_default_context() + print 'device %s' % self.context.get_device().name() + self.module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True, cache_dir=False) + + def print_device_usage(self): + print 'device usage:' + print format_array('vertices', self.vertices_gpu) + print format_array('triangles', self.triangles_gpu) + print format_array('lower_bounds', self.lower_bounds_gpu) + print format_array('upper_bounds', self.upper_bounds_gpu) + print format_array('node_map', self.node_map_gpu) + print format_array('node_length', self.node_length_gpu) + print '%-15s %6s %6s' % ('total', '', format_size(self.vertices_gpu.nbytes + self.triangles_gpu.nbytes + self.lower_bounds_gpu.nbytes + self.upper_bounds_gpu.nbytes + self.node_map_gpu.nbytes + self.node_length_gpu.nbytes)) + + def load_geometry(self, geometry): + if not hasattr(geometry, 'mesh'): + geometry.build(bits=8) + + set_wavelength_range = self.module.get_function('set_wavelength_range') + set_wavelength_range(np.float32(standard_wavelengths[0]), np.float32(standard_wavelengths[-1]), np.float32(standard_wavelengths[1]-standard_wavelengths[0]), np.uint32(standard_wavelengths.size), block=(1,1,1), grid=(1,1)) + + set_material = self.module.get_function('set_material') + self.materials = [] + for i in range(len(geometry.unique_materials)): + material = copy(geometry.unique_materials[i]) + + if material is None: + raise Exception('one or more triangles is missing a material.') + + material.refractive_index_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, material.refractive_index[:,0], material.refractive_index[:,1]).astype(np.float32)) + material.absorption_length_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, material.absorption_length[:,0], material.absorption_length[:,1]).astype(np.float32)) + material.scattering_length_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, material.scattering_length[:,0], material.scattering_length[:,1]).astype(np.float32)) + + set_material(np.int32(i), material.refractive_index_gpu, material.absorption_length_gpu, material.scattering_length_gpu, block=(1,1,1), grid=(1,1)) + + self.materials.append(material) + + set_surface = self.module.get_function('set_surface') + self.surfaces = [] + for i in range(len(geometry.unique_surfaces)): + surface = copy(geometry.unique_surfaces[i]) + + if surface is None: + continue + + surface.detect_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.detect[:,0], surface.detect[:,1]).astype(np.float32)) + surface.absorb_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.absorb[:,0], surface.absorb[:,1]).astype(np.float32)) + surface.reflect_diffuse_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.reflect_diffuse[:,0], surface.reflect_diffuse[:,1]).astype(np.float32)) + surface.reflect_specular_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.reflect_specular[:,0], surface.reflect_specular[:,1]).astype(np.float32)) + + set_surface(np.int32(i), surface.detect_gpu, surface.absorb_gpu, surface.reflect_diffuse_gpu, surface.reflect_specular_gpu, block=(1,1,1), grid=(1,1)) + + self.surfaces.append(surface) + + self.vertices_gpu = gpuarray.to_gpu(geometry.mesh.vertices.astype(np.float32).view(gpuarray.vec.float3)) + + triangles = \ + np.empty(len(geometry.mesh.triangles), dtype=gpuarray.vec.uint4) + triangles['x'] = geometry.mesh.triangles[:,0] + triangles['y'] = geometry.mesh.triangles[:,1] + triangles['z'] = geometry.mesh.triangles[:,2] + triangles['w'] = ((geometry.material1_index & 0xff) << 24) | ((geometry.material2_index & 0xff) << 16) | ((geometry.surface_index & 0xff) << 8) + self.triangles_gpu = gpuarray.to_gpu(triangles) + + lower_bounds_float4 = np.empty(geometry.lower_bounds.shape[0], dtype=gpuarray.vec.float4) + lower_bounds_float4['x'] = geometry.lower_bounds[:,0] + lower_bounds_float4['y'] = geometry.lower_bounds[:,1] + lower_bounds_float4['z'] = geometry.lower_bounds[:,2] + self.lower_bounds_gpu = gpuarray.to_gpu(lower_bounds_float4) + + upper_bounds_float4 = np.empty(geometry.upper_bounds.shape[0], dtype=gpuarray.vec.float4) + upper_bounds_float4['x'] = geometry.upper_bounds[:,0] + upper_bounds_float4['y'] = geometry.upper_bounds[:,1] + upper_bounds_float4['z'] = geometry.upper_bounds[:,2] + self.upper_bounds_gpu = gpuarray.to_gpu(upper_bounds_float4) + + self.colors_gpu = gpuarray.to_gpu(geometry.colors.astype(np.uint32)) + self.node_map_gpu = gpuarray.to_gpu(geometry.node_map.astype(np.uint32)) + self.node_length_gpu = gpuarray.to_gpu(geometry.node_length.astype(np.uint32)) + self.solid_id_map_gpu = gpuarray.to_gpu(geometry.solid_id.astype(np.uint32)) + + set_global_mesh_variables = self.module.get_function('set_global_mesh_variables') + set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(geometry.node_map.size-1), np.uint32(geometry.first_node), block=(1,1,1), grid=(1,1)) + + self.lower_bounds_tex = self.module.get_texref('lower_bounds') + self.upper_bounds_tex = self.module.get_texref('upper_bounds') + self.node_map_tex = self.module.get_texref('node_map') + self.node_length_tex = self.module.get_texref('node_length') + + self.lower_bounds_tex.set_address(self.lower_bounds_gpu.gpudata, self.lower_bounds_gpu.nbytes) + self.upper_bounds_tex.set_address(self.upper_bounds_gpu.gpudata, self.upper_bounds_gpu.nbytes) + self.node_map_tex.set_address(self.node_map_gpu.gpudata, self.node_map_gpu.nbytes) + self.node_length_tex.set_address(self.node_length_gpu.gpudata, self.node_length_gpu.nbytes) + + self.lower_bounds_tex.set_format(cuda.array_format.FLOAT, 4) + self.upper_bounds_tex.set_format(cuda.array_format.FLOAT, 4) + self.node_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) + self.node_length_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) + + self.print_device_usage() + + def color_solids(self, solid_ids, colors): + solid_id_map = self.solid_id_map_gpu.get() + triangle_colors = self.colors_gpu.get() + + for i, color in izip(solid_ids, colors): + triangle_colors[solid_id_map == i] = color + + self.colors_gpu.set(triangle_colors) -- cgit