diff options
author | Stan Seibert <stan@mtrr.org> | 2011-08-04 11:53:30 -0400 |
---|---|---|
committer | Stan Seibert <stan@mtrr.org> | 2011-08-04 11:53:30 -0400 |
commit | 9b8a7902e01bea0f87154857163624b8da040ec5 (patch) | |
tree | 4e307be8fcd9e58ace564742552cd4ba3785557d | |
parent | cf87548f09607927d49defa5c1bbb3e4fc877286 (diff) | |
download | chroma-9b8a7902e01bea0f87154857163624b8da040ec5.tar.gz chroma-9b8a7902e01bea0f87154857163624b8da040ec5.tar.bz2 chroma-9b8a7902e01bea0f87154857163624b8da040ec5.zip |
Implement propagate and daq functions in gpu module.
-rw-r--r-- | gpu.py | 160 |
1 files changed, 150 insertions, 10 deletions
@@ -2,6 +2,7 @@ import numpy as np import numpy.ma as ma from pycuda.tools import make_default_context from pycuda.compiler import SourceModule +from pycuda.characterize import sizeof import pycuda.driver as cuda from pycuda import gpuarray from copy import copy @@ -27,11 +28,39 @@ def format_array(name, array): return '%-15s %6s %6s' % \ (name, format_size(len(array)), format_size(array.nbytes)) +class CUDAFuncs(object): + '''simple container class for GPU functions as attributes''' + def __init__(self, cuda_module, func_names): + '''Extract __global__ functions listed in func_names from + the PyCUDA module object. The functions are assigned + to attributes of this object with the same name.''' + for name in func_names: + setattr(self, name, cuda_module.get_function(name)) + class GPU(object): - def __init__(self): - self.context = make_default_context() + def __init__(self, device_id=None): + '''Initialize a GPU context on the specified device. If device_id==None, + the default device is used.''' + + if device_id == None: + self.context = make_default_context() + else: + device = pycuda.driver.Device(device_id) + self.context = device.make_context() print 'device %s' % self.context.get_device().name() self.module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True, cache_dir=False) + self.geo_funcs = CUDAFuncs(self.module, ['set_wavelength_range', 'set_material', + 'set_surface', + 'set_global_mesh_variables']) + + + self.prop_funcs = CUDAFuncs(self.module, ['init_rng', 'propagate']) + self.nblocks = 64 + self.max_nthreads = 200000 + self.daq_module = SourceModule(src.daq, options=['-I' + src.dir], no_extern_c=True, cache_dir=False) + self.daq_funcs = CUDAFuncs(self.daq_module, + ['reset_earliest_time_int', 'run_daq', + 'convert_sortable_int_to_float']) def print_device_usage(self): print 'device usage:' @@ -47,10 +76,8 @@ class GPU(object): if not hasattr(geometry, 'mesh'): geometry.build(bits=8) - set_wavelength_range = self.module.get_function('set_wavelength_range') - set_wavelength_range(np.float32(standard_wavelengths[0]), np.float32(standard_wavelengths[-1]), np.float32(standard_wavelengths[1]-standard_wavelengths[0]), np.uint32(standard_wavelengths.size), block=(1,1,1), grid=(1,1)) + self.geo_funcs.set_wavelength_range(np.float32(standard_wavelengths[0]), np.float32(standard_wavelengths[-1]), np.float32(standard_wavelengths[1]-standard_wavelengths[0]), np.uint32(standard_wavelengths.size), block=(1,1,1), grid=(1,1)) - set_material = self.module.get_function('set_material') self.materials = [] for i in range(len(geometry.unique_materials)): material = copy(geometry.unique_materials[i]) @@ -62,11 +89,10 @@ class GPU(object): material.absorption_length_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, material.absorption_length[:,0], material.absorption_length[:,1]).astype(np.float32)) material.scattering_length_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, material.scattering_length[:,0], material.scattering_length[:,1]).astype(np.float32)) - set_material(np.int32(i), material.refractive_index_gpu, material.absorption_length_gpu, material.scattering_length_gpu, block=(1,1,1), grid=(1,1)) + self.geo_funcs.set_material(np.int32(i), material.refractive_index_gpu, material.absorption_length_gpu, material.scattering_length_gpu, block=(1,1,1), grid=(1,1)) self.materials.append(material) - set_surface = self.module.get_function('set_surface') self.surfaces = [] for i in range(len(geometry.unique_surfaces)): surface = copy(geometry.unique_surfaces[i]) @@ -79,7 +105,7 @@ class GPU(object): surface.reflect_diffuse_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.reflect_diffuse[:,0], surface.reflect_diffuse[:,1]).astype(np.float32)) surface.reflect_specular_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.reflect_specular[:,0], surface.reflect_specular[:,1]).astype(np.float32)) - set_surface(np.int32(i), surface.detect_gpu, surface.absorb_gpu, surface.reflect_diffuse_gpu, surface.reflect_specular_gpu, block=(1,1,1), grid=(1,1)) + self.geo_funcs.set_surface(np.int32(i), surface.detect_gpu, surface.absorb_gpu, surface.reflect_diffuse_gpu, surface.reflect_specular_gpu, block=(1,1,1), grid=(1,1)) self.surfaces.append(surface) @@ -110,8 +136,7 @@ class GPU(object): self.node_length_gpu = gpuarray.to_gpu(geometry.node_length.astype(np.uint32)) self.solid_id_map_gpu = gpuarray.to_gpu(geometry.solid_id.astype(np.uint32)) - set_global_mesh_variables = self.module.get_function('set_global_mesh_variables') - set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(geometry.node_map.size-1), np.uint32(geometry.first_node), block=(1,1,1), grid=(1,1)) + self.geo_funcs.set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(geometry.node_map.size-1), np.uint32(geometry.first_node), block=(1,1,1), grid=(1,1)) self.lower_bounds_tex = self.module.get_texref('lower_bounds') self.upper_bounds_tex = self.module.get_texref('upper_bounds') @@ -138,3 +163,118 @@ class GPU(object): triangle_colors[solid_id_map == i] = color self.colors_gpu.set(triangle_colors) + + def setup_propagate(self, seed=1): + self.rng_states_gpu = cuda.mem_alloc(self.max_nthreads*sizeof('curandStateXORWOW', '#include <curand_kernel.h>')) + self.prop_funcs.init_rng(np.int32(self.max_nthreads), self.rng_states_gpu, + np.uint64(seed), np.uint64(0), + block=(self.nblocks,1,1), + grid=(self.max_nthreads//self.nblocks+1,1)) + #self.context.synchronize() + + def load_photons(self, pos, dir, pol, wavelength, t0, + histories=None, last_hit_triangles=None): + '''Load N photons onto the GPU. + + pos: numpy.array(shape=(N, 3)) of photon starting positions (meters) + dir: numpy.array(shape=(N, 3)) of photon starting directions (unit vectors) + pol: numpy.array(shape=(N, 3)) of photon polarization directions (unit vectors) + wavelength: numpy.array(shape=N) of photon wavelengths (nm) + t0: numpy.array(shape=N) of photon start times (s) + + Optional args will be loaded with defaults on GPU if not set: + histories: Bitmask of interactions that have occurred over history of photon + last_hit_triangles: The triangle ID number that the photon last interacted with, + if any. -1 if no triangle was hit in the last step + ''' + self.nphotons = len(pos) + assert self.nphotons < self.max_nthreads + assert len(dir) == self.nphotons + assert len(pol) == self.nphotons + assert len(wavelength) == self.nphotons + assert len(t0) == self.nphotons + + self.positions_gpu = gpuarray.to_gpu(pos.astype(np.float32).view(gpuarray.vec.float3)) + self.directions_gpu = gpuarray.to_gpu(dir.astype(np.float32).view(gpuarray.vec.float3)) + self.polarizations_gpu = gpuarray.to_gpu(pol.astype(np.float32).view(gpuarray.vec.float3)) + self.wavelengths_gpu = gpuarray.to_gpu(wavelength.astype(np.float32)) + self.times_gpu = gpuarray.to_gpu(t0.astype(np.float32)) + + if histories: + self.histories_gpu = gpuarray.to_gpu(histories.astype(np.uint32)) + else: + self.histories_gpu = gpuarray.zeros(self.nphotons, dtype=np.uint32) + + if last_hit_triangles: + self.last_hit_triangles_gpu = gpuarray.to_gpu(last_hit_triangles.astype(np.int32)) + else: + self.last_hit_triangles_gpu = gpuarray.empty(self.nphotons, dtype=np.int32) + self.last_hit_triangles_gpu.fill(-1) + + self.per_photon_kernel_config = {'block': (self.nblocks,1,1), + 'grid': (self.nphotons//self.nblocks+1,1)} + + def propagate(self, max_steps=10): + '''Propagate photons on GPU to termination or max_steps, whichever comes first. + + May be called repeatedly without reloading photon information if single-stepping + through photon history. + ''' + self.prop_funcs.propagate(np.int32(self.nphotons), + self.rng_states_gpu, self.positions_gpu, + self.directions_gpu, + self.wavelengths_gpu, self.polarizations_gpu, + self.times_gpu, self.histories_gpu, + self.last_hit_triangles_gpu, + np.int32(max_steps), **self.per_photon_kernel_config) + #self.context.synchronize() + + def get_photons(self): + '''Returns a dictionary of current photon state information. + + Contents of dictionary have the same names as the parameters to load_photons(). + ''' + return { 'pos' : self.positions_gpu.get().view(np.float32).reshape(self.positions_gpu.size, 3), + 'dir' : self.directions_gpu.get().view(np.float32).reshape(self.directions_gpu.size, 3), + 'pol' : self.polarizations_gpu.get().view(np.float32).reshape(self.polarization_gpu.size, 3), + 'wavelength' : self.wavelengths_gpu.get(), + 't0' : self.times_gpu.get(), + 'histories' : self.histories_gpu.get(), + 'last_hit_triangles' : self.last_hit_triangles_gpu.get()} + + def setup_daq(self, max_pmt_id, pmt_rms=1.2e-9): + self.earliest_time_gpu = gpuarray.GPUArray(shape=(max_pmt_id+1,), dtype=np.float32) + self.earliest_time_int_gpu = gpuarray.GPUArray(shape=self.earliest_time_gpu.shape, + dtype=np.uint32) + self.daq_pmt_rms = pmt_rms + + def run_daq(self): + self.daq_funcs.reset_earliest_time_int(np.float32(1e9), + np.int32(len(self.earliest_time_int_gpu)), + self.earliest_time_int_gpu, + block=(self.nblocks,1,1), + grid=(len(self.earliest_time_int_gpu)//self.nblocks+1,1)) + #self.context.synchronize() + self.daq_funcs.run_daq(self.rng_states_gpu, np.uint32(0x1 << 2), + np.float32(self.daq_pmt_rms), + np.int32(len(self.positions_gpu)), + self.times_gpu, self.histories_gpu, + self.last_hit_triangles_gpu, + self.solid_id_map_gpu, + np.int32(len(self.earliest_time_int_gpu)), + self.earliest_time_int_gpu, + block=(self.nblocks,1,1), grid=(self.nphotons//self.nblocks+1,1)) + #self.context.synchronize() + self.daq_funcs.convert_sortable_int_to_float(np.int32(len(self.earliest_time_int_gpu)), + self.earliest_time_int_gpu, + self.earliest_time_gpu, + block=(self.nblocks,1,1), + grid=(len(self.earliest_time_int_gpu)//self.nblocks+1,1)) + #self.context.synchronize() + + + def get_hits(self): + return self.earliest_time_gpu.get() + + def shutdown(self): + self.context.pop() |