summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStan Seibert <stan@mtrr.org>2011-08-04 11:53:30 -0400
committerStan Seibert <stan@mtrr.org>2011-08-04 11:53:30 -0400
commit9b8a7902e01bea0f87154857163624b8da040ec5 (patch)
tree4e307be8fcd9e58ace564742552cd4ba3785557d
parentcf87548f09607927d49defa5c1bbb3e4fc877286 (diff)
downloadchroma-9b8a7902e01bea0f87154857163624b8da040ec5.tar.gz
chroma-9b8a7902e01bea0f87154857163624b8da040ec5.tar.bz2
chroma-9b8a7902e01bea0f87154857163624b8da040ec5.zip
Implement propagate and daq functions in gpu module.
-rw-r--r--gpu.py160
1 files changed, 150 insertions, 10 deletions
diff --git a/gpu.py b/gpu.py
index e5fd5f1..11a83ee 100644
--- a/gpu.py
+++ b/gpu.py
@@ -2,6 +2,7 @@ import numpy as np
import numpy.ma as ma
from pycuda.tools import make_default_context
from pycuda.compiler import SourceModule
+from pycuda.characterize import sizeof
import pycuda.driver as cuda
from pycuda import gpuarray
from copy import copy
@@ -27,11 +28,39 @@ def format_array(name, array):
return '%-15s %6s %6s' % \
(name, format_size(len(array)), format_size(array.nbytes))
+class CUDAFuncs(object):
+ '''simple container class for GPU functions as attributes'''
+ def __init__(self, cuda_module, func_names):
+ '''Extract __global__ functions listed in func_names from
+ the PyCUDA module object. The functions are assigned
+ to attributes of this object with the same name.'''
+ for name in func_names:
+ setattr(self, name, cuda_module.get_function(name))
+
class GPU(object):
- def __init__(self):
- self.context = make_default_context()
+ def __init__(self, device_id=None):
+ '''Initialize a GPU context on the specified device. If device_id==None,
+ the default device is used.'''
+
+ if device_id == None:
+ self.context = make_default_context()
+ else:
+ device = pycuda.driver.Device(device_id)
+ self.context = device.make_context()
print 'device %s' % self.context.get_device().name()
self.module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True, cache_dir=False)
+ self.geo_funcs = CUDAFuncs(self.module, ['set_wavelength_range', 'set_material',
+ 'set_surface',
+ 'set_global_mesh_variables'])
+
+
+ self.prop_funcs = CUDAFuncs(self.module, ['init_rng', 'propagate'])
+ self.nblocks = 64
+ self.max_nthreads = 200000
+ self.daq_module = SourceModule(src.daq, options=['-I' + src.dir], no_extern_c=True, cache_dir=False)
+ self.daq_funcs = CUDAFuncs(self.daq_module,
+ ['reset_earliest_time_int', 'run_daq',
+ 'convert_sortable_int_to_float'])
def print_device_usage(self):
print 'device usage:'
@@ -47,10 +76,8 @@ class GPU(object):
if not hasattr(geometry, 'mesh'):
geometry.build(bits=8)
- set_wavelength_range = self.module.get_function('set_wavelength_range')
- set_wavelength_range(np.float32(standard_wavelengths[0]), np.float32(standard_wavelengths[-1]), np.float32(standard_wavelengths[1]-standard_wavelengths[0]), np.uint32(standard_wavelengths.size), block=(1,1,1), grid=(1,1))
+ self.geo_funcs.set_wavelength_range(np.float32(standard_wavelengths[0]), np.float32(standard_wavelengths[-1]), np.float32(standard_wavelengths[1]-standard_wavelengths[0]), np.uint32(standard_wavelengths.size), block=(1,1,1), grid=(1,1))
- set_material = self.module.get_function('set_material')
self.materials = []
for i in range(len(geometry.unique_materials)):
material = copy(geometry.unique_materials[i])
@@ -62,11 +89,10 @@ class GPU(object):
material.absorption_length_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, material.absorption_length[:,0], material.absorption_length[:,1]).astype(np.float32))
material.scattering_length_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, material.scattering_length[:,0], material.scattering_length[:,1]).astype(np.float32))
- set_material(np.int32(i), material.refractive_index_gpu, material.absorption_length_gpu, material.scattering_length_gpu, block=(1,1,1), grid=(1,1))
+ self.geo_funcs.set_material(np.int32(i), material.refractive_index_gpu, material.absorption_length_gpu, material.scattering_length_gpu, block=(1,1,1), grid=(1,1))
self.materials.append(material)
- set_surface = self.module.get_function('set_surface')
self.surfaces = []
for i in range(len(geometry.unique_surfaces)):
surface = copy(geometry.unique_surfaces[i])
@@ -79,7 +105,7 @@ class GPU(object):
surface.reflect_diffuse_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.reflect_diffuse[:,0], surface.reflect_diffuse[:,1]).astype(np.float32))
surface.reflect_specular_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.reflect_specular[:,0], surface.reflect_specular[:,1]).astype(np.float32))
- set_surface(np.int32(i), surface.detect_gpu, surface.absorb_gpu, surface.reflect_diffuse_gpu, surface.reflect_specular_gpu, block=(1,1,1), grid=(1,1))
+ self.geo_funcs.set_surface(np.int32(i), surface.detect_gpu, surface.absorb_gpu, surface.reflect_diffuse_gpu, surface.reflect_specular_gpu, block=(1,1,1), grid=(1,1))
self.surfaces.append(surface)
@@ -110,8 +136,7 @@ class GPU(object):
self.node_length_gpu = gpuarray.to_gpu(geometry.node_length.astype(np.uint32))
self.solid_id_map_gpu = gpuarray.to_gpu(geometry.solid_id.astype(np.uint32))
- set_global_mesh_variables = self.module.get_function('set_global_mesh_variables')
- set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(geometry.node_map.size-1), np.uint32(geometry.first_node), block=(1,1,1), grid=(1,1))
+ self.geo_funcs.set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(geometry.node_map.size-1), np.uint32(geometry.first_node), block=(1,1,1), grid=(1,1))
self.lower_bounds_tex = self.module.get_texref('lower_bounds')
self.upper_bounds_tex = self.module.get_texref('upper_bounds')
@@ -138,3 +163,118 @@ class GPU(object):
triangle_colors[solid_id_map == i] = color
self.colors_gpu.set(triangle_colors)
+
+ def setup_propagate(self, seed=1):
+ self.rng_states_gpu = cuda.mem_alloc(self.max_nthreads*sizeof('curandStateXORWOW', '#include <curand_kernel.h>'))
+ self.prop_funcs.init_rng(np.int32(self.max_nthreads), self.rng_states_gpu,
+ np.uint64(seed), np.uint64(0),
+ block=(self.nblocks,1,1),
+ grid=(self.max_nthreads//self.nblocks+1,1))
+ #self.context.synchronize()
+
+ def load_photons(self, pos, dir, pol, wavelength, t0,
+ histories=None, last_hit_triangles=None):
+ '''Load N photons onto the GPU.
+
+ pos: numpy.array(shape=(N, 3)) of photon starting positions (meters)
+ dir: numpy.array(shape=(N, 3)) of photon starting directions (unit vectors)
+ pol: numpy.array(shape=(N, 3)) of photon polarization directions (unit vectors)
+ wavelength: numpy.array(shape=N) of photon wavelengths (nm)
+ t0: numpy.array(shape=N) of photon start times (s)
+
+ Optional args will be loaded with defaults on GPU if not set:
+ histories: Bitmask of interactions that have occurred over history of photon
+ last_hit_triangles: The triangle ID number that the photon last interacted with,
+ if any. -1 if no triangle was hit in the last step
+ '''
+ self.nphotons = len(pos)
+ assert self.nphotons < self.max_nthreads
+ assert len(dir) == self.nphotons
+ assert len(pol) == self.nphotons
+ assert len(wavelength) == self.nphotons
+ assert len(t0) == self.nphotons
+
+ self.positions_gpu = gpuarray.to_gpu(pos.astype(np.float32).view(gpuarray.vec.float3))
+ self.directions_gpu = gpuarray.to_gpu(dir.astype(np.float32).view(gpuarray.vec.float3))
+ self.polarizations_gpu = gpuarray.to_gpu(pol.astype(np.float32).view(gpuarray.vec.float3))
+ self.wavelengths_gpu = gpuarray.to_gpu(wavelength.astype(np.float32))
+ self.times_gpu = gpuarray.to_gpu(t0.astype(np.float32))
+
+ if histories:
+ self.histories_gpu = gpuarray.to_gpu(histories.astype(np.uint32))
+ else:
+ self.histories_gpu = gpuarray.zeros(self.nphotons, dtype=np.uint32)
+
+ if last_hit_triangles:
+ self.last_hit_triangles_gpu = gpuarray.to_gpu(last_hit_triangles.astype(np.int32))
+ else:
+ self.last_hit_triangles_gpu = gpuarray.empty(self.nphotons, dtype=np.int32)
+ self.last_hit_triangles_gpu.fill(-1)
+
+ self.per_photon_kernel_config = {'block': (self.nblocks,1,1),
+ 'grid': (self.nphotons//self.nblocks+1,1)}
+
+ def propagate(self, max_steps=10):
+ '''Propagate photons on GPU to termination or max_steps, whichever comes first.
+
+ May be called repeatedly without reloading photon information if single-stepping
+ through photon history.
+ '''
+ self.prop_funcs.propagate(np.int32(self.nphotons),
+ self.rng_states_gpu, self.positions_gpu,
+ self.directions_gpu,
+ self.wavelengths_gpu, self.polarizations_gpu,
+ self.times_gpu, self.histories_gpu,
+ self.last_hit_triangles_gpu,
+ np.int32(max_steps), **self.per_photon_kernel_config)
+ #self.context.synchronize()
+
+ def get_photons(self):
+ '''Returns a dictionary of current photon state information.
+
+ Contents of dictionary have the same names as the parameters to load_photons().
+ '''
+ return { 'pos' : self.positions_gpu.get().view(np.float32).reshape(self.positions_gpu.size, 3),
+ 'dir' : self.directions_gpu.get().view(np.float32).reshape(self.directions_gpu.size, 3),
+ 'pol' : self.polarizations_gpu.get().view(np.float32).reshape(self.polarization_gpu.size, 3),
+ 'wavelength' : self.wavelengths_gpu.get(),
+ 't0' : self.times_gpu.get(),
+ 'histories' : self.histories_gpu.get(),
+ 'last_hit_triangles' : self.last_hit_triangles_gpu.get()}
+
+ def setup_daq(self, max_pmt_id, pmt_rms=1.2e-9):
+ self.earliest_time_gpu = gpuarray.GPUArray(shape=(max_pmt_id+1,), dtype=np.float32)
+ self.earliest_time_int_gpu = gpuarray.GPUArray(shape=self.earliest_time_gpu.shape,
+ dtype=np.uint32)
+ self.daq_pmt_rms = pmt_rms
+
+ def run_daq(self):
+ self.daq_funcs.reset_earliest_time_int(np.float32(1e9),
+ np.int32(len(self.earliest_time_int_gpu)),
+ self.earliest_time_int_gpu,
+ block=(self.nblocks,1,1),
+ grid=(len(self.earliest_time_int_gpu)//self.nblocks+1,1))
+ #self.context.synchronize()
+ self.daq_funcs.run_daq(self.rng_states_gpu, np.uint32(0x1 << 2),
+ np.float32(self.daq_pmt_rms),
+ np.int32(len(self.positions_gpu)),
+ self.times_gpu, self.histories_gpu,
+ self.last_hit_triangles_gpu,
+ self.solid_id_map_gpu,
+ np.int32(len(self.earliest_time_int_gpu)),
+ self.earliest_time_int_gpu,
+ block=(self.nblocks,1,1), grid=(self.nphotons//self.nblocks+1,1))
+ #self.context.synchronize()
+ self.daq_funcs.convert_sortable_int_to_float(np.int32(len(self.earliest_time_int_gpu)),
+ self.earliest_time_int_gpu,
+ self.earliest_time_gpu,
+ block=(self.nblocks,1,1),
+ grid=(len(self.earliest_time_int_gpu)//self.nblocks+1,1))
+ #self.context.synchronize()
+
+
+ def get_hits(self):
+ return self.earliest_time_gpu.get()
+
+ def shutdown(self):
+ self.context.pop()