Implement propagate and daq functions in gpu module.

author: Stan Seibert <stan@mtrr.org> 2011-08-04 11:53:30 -0400
committer: Stan Seibert <stan@mtrr.org> 2011-08-04 11:53:30 -0400
commit: 9b8a7902e01bea0f87154857163624b8da040ec5 (patch)
tree: 4e307be8fcd9e58ace564742552cd4ba3785557d
parent: cf87548f09607927d49defa5c1bbb3e4fc877286 (diff)
download: chroma-9b8a7902e01bea0f87154857163624b8da040ec5.tar.gz
chroma-9b8a7902e01bea0f87154857163624b8da040ec5.tar.bz2
chroma-9b8a7902e01bea0f87154857163624b8da040ec5.zip
1 files changed, 150 insertions, 10 deletions
diff --git a/gpu.py b/gpu.py
index e5fd5f1..11a83ee 100644
--- a/gpu.py
+++ b/gpu.py
@@ -2,6 +2,7 @@ import numpy as np
 import numpy.ma as ma
 from pycuda.tools import make_default_context
 from pycuda.compiler import SourceModule
+from pycuda.characterize import sizeof
 import pycuda.driver as cuda
 from pycuda import gpuarray
 from copy import copy
@@ -27,11 +28,39 @@ def format_array(name, array):
     return '%-15s %6s %6s' % \
         (name, format_size(len(array)), format_size(array.nbytes))
 
+class CUDAFuncs(object):
+    '''simple container class for GPU functions as attributes'''
+    def __init__(self, cuda_module, func_names):
+        '''Extract __global__ functions listed in func_names from
+        the PyCUDA module object.  The functions are assigned
+        to attributes of this object with the same name.'''
+        for name in func_names:
+            setattr(self, name, cuda_module.get_function(name))
+
 class GPU(object):
-    def __init__(self):
-        self.context = make_default_context()
+    def __init__(self, device_id=None):
+        '''Initialize a GPU context on the specified device.  If device_id==None,
+        the default device is used.'''
+
+        if device_id == None:
+            self.context = make_default_context()
+        else:
+            device = pycuda.driver.Device(device_id)
+            self.context = device.make_context()
         print 'device %s' % self.context.get_device().name()
         self.module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True, cache_dir=False)
+        self.geo_funcs = CUDAFuncs(self.module, ['set_wavelength_range', 'set_material',
+                                                 'set_surface', 
+                                                 'set_global_mesh_variables'])
+                                                 
+                                                 
+        self.prop_funcs = CUDAFuncs(self.module, ['init_rng', 'propagate'])
+        self.nblocks = 64
+        self.max_nthreads = 200000
+        self.daq_module = SourceModule(src.daq, options=['-I' + src.dir], no_extern_c=True, cache_dir=False)
+        self.daq_funcs = CUDAFuncs(self.daq_module, 
+                              ['reset_earliest_time_int', 'run_daq',
+                               'convert_sortable_int_to_float'])
 
     def print_device_usage(self):
         print 'device usage:'
@@ -47,10 +76,8 @@ class GPU(object):
         if not hasattr(geometry, 'mesh'):
             geometry.build(bits=8)
 
-        set_wavelength_range = self.module.get_function('set_wavelength_range')
-        set_wavelength_range(np.float32(standard_wavelengths[0]), np.float32(standard_wavelengths[-1]), np.float32(standard_wavelengths[1]-standard_wavelengths[0]), np.uint32(standard_wavelengths.size), block=(1,1,1), grid=(1,1))
+        self.geo_funcs.set_wavelength_range(np.float32(standard_wavelengths[0]), np.float32(standard_wavelengths[-1]), np.float32(standard_wavelengths[1]-standard_wavelengths[0]), np.uint32(standard_wavelengths.size), block=(1,1,1), grid=(1,1))
 
-        set_material = self.module.get_function('set_material')
         self.materials = []
         for i in range(len(geometry.unique_materials)):
             material = copy(geometry.unique_materials[i])
@@ -62,11 +89,10 @@ class GPU(object):
             material.absorption_length_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, material.absorption_length[:,0], material.absorption_length[:,1]).astype(np.float32))
             material.scattering_length_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, material.scattering_length[:,0], material.scattering_length[:,1]).astype(np.float32))
 
-            set_material(np.int32(i), material.refractive_index_gpu, material.absorption_length_gpu, material.scattering_length_gpu, block=(1,1,1), grid=(1,1))
+            self.geo_funcs.set_material(np.int32(i), material.refractive_index_gpu, material.absorption_length_gpu, material.scattering_length_gpu, block=(1,1,1), grid=(1,1))
 
             self.materials.append(material)
 
-        set_surface = self.module.get_function('set_surface')
         self.surfaces = []
         for i in range(len(geometry.unique_surfaces)):
             surface = copy(geometry.unique_surfaces[i])
@@ -79,7 +105,7 @@ class GPU(object):
             surface.reflect_diffuse_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.reflect_diffuse[:,0], surface.reflect_diffuse[:,1]).astype(np.float32))
             surface.reflect_specular_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.reflect_specular[:,0], surface.reflect_specular[:,1]).astype(np.float32))
 
-            set_surface(np.int32(i), surface.detect_gpu, surface.absorb_gpu, surface.reflect_diffuse_gpu, surface.reflect_specular_gpu, block=(1,1,1), grid=(1,1))
+            self.geo_funcs.set_surface(np.int32(i), surface.detect_gpu, surface.absorb_gpu, surface.reflect_diffuse_gpu, surface.reflect_specular_gpu, block=(1,1,1), grid=(1,1))
 
             self.surfaces.append(surface)
 
@@ -110,8 +136,7 @@ class GPU(object):
         self.node_length_gpu = gpuarray.to_gpu(geometry.node_length.astype(np.uint32))
         self.solid_id_map_gpu = gpuarray.to_gpu(geometry.solid_id.astype(np.uint32))
 
-        set_global_mesh_variables = self.module.get_function('set_global_mesh_variables')
-        set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(geometry.node_map.size-1), np.uint32(geometry.first_node), block=(1,1,1), grid=(1,1))
+        self.geo_funcs.set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(geometry.node_map.size-1), np.uint32(geometry.first_node), block=(1,1,1), grid=(1,1))
 
         self.lower_bounds_tex = self.module.get_texref('lower_bounds')
         self.upper_bounds_tex = self.module.get_texref('upper_bounds')
@@ -138,3 +163,118 @@ class GPU(object):
             triangle_colors[solid_id_map == i] = color
 
         self.colors_gpu.set(triangle_colors)
+
+    def setup_propagate(self, seed=1):
+        self.rng_states_gpu = cuda.mem_alloc(self.max_nthreads*sizeof('curandStateXORWOW', '#include <curand_kernel.h>'))
+        self.prop_funcs.init_rng(np.int32(self.max_nthreads), self.rng_states_gpu, 
+                                 np.uint64(seed), np.uint64(0), 
+                                 block=(self.nblocks,1,1), 
+                                 grid=(self.max_nthreads//self.nblocks+1,1))
+        #self.context.synchronize()
+
+    def load_photons(self, pos, dir, pol, wavelength, t0, 
+                     histories=None, last_hit_triangles=None):
+        '''Load N photons onto the GPU.
+
+           pos: numpy.array(shape=(N, 3)) of photon starting positions (meters)
+           dir: numpy.array(shape=(N, 3)) of photon starting directions (unit vectors)
+           pol: numpy.array(shape=(N, 3)) of photon polarization directions (unit vectors)
+           wavelength: numpy.array(shape=N) of photon wavelengths (nm)
+           t0: numpy.array(shape=N) of photon start times (s)
+           
+           Optional args will be loaded with defaults on GPU if not set:
+           histories: Bitmask of interactions that have occurred over history of photon
+           last_hit_triangles: The triangle ID number that the photon last interacted with,
+                               if any.  -1 if no triangle was hit in the last step
+        '''
+        self.nphotons = len(pos)
+        assert self.nphotons < self.max_nthreads
+        assert len(dir) == self.nphotons
+        assert len(pol) == self.nphotons
+        assert len(wavelength) == self.nphotons
+        assert len(t0) == self.nphotons
+
+        self.positions_gpu = gpuarray.to_gpu(pos.astype(np.float32).view(gpuarray.vec.float3))
+        self.directions_gpu = gpuarray.to_gpu(dir.astype(np.float32).view(gpuarray.vec.float3))
+        self.polarizations_gpu = gpuarray.to_gpu(pol.astype(np.float32).view(gpuarray.vec.float3))
+        self.wavelengths_gpu = gpuarray.to_gpu(wavelength.astype(np.float32))
+        self.times_gpu = gpuarray.to_gpu(t0.astype(np.float32))
+
+        if histories:
+            self.histories_gpu = gpuarray.to_gpu(histories.astype(np.uint32))
+        else:
+            self.histories_gpu = gpuarray.zeros(self.nphotons, dtype=np.uint32)
+
+        if last_hit_triangles:
+            self.last_hit_triangles_gpu = gpuarray.to_gpu(last_hit_triangles.astype(np.int32))
+        else:
+            self.last_hit_triangles_gpu = gpuarray.empty(self.nphotons, dtype=np.int32)
+            self.last_hit_triangles_gpu.fill(-1)
+
+        self.per_photon_kernel_config = {'block': (self.nblocks,1,1), 
+                                         'grid': (self.nphotons//self.nblocks+1,1)}
+
+    def propagate(self, max_steps=10):
+        '''Propagate photons on GPU to termination or max_steps, whichever comes first.
+
+        May be called repeatedly without reloading photon information if single-stepping
+        through photon history.
+        '''
+        self.prop_funcs.propagate(np.int32(self.nphotons), 
+                                 self.rng_states_gpu, self.positions_gpu, 
+                                 self.directions_gpu, 
+                                 self.wavelengths_gpu, self.polarizations_gpu, 
+                                 self.times_gpu, self.histories_gpu, 
+                                 self.last_hit_triangles_gpu, 
+                                 np.int32(max_steps), **self.per_photon_kernel_config)
+        #self.context.synchronize()
+
+    def get_photons(self):
+        '''Returns a dictionary of current photon state information.
+
+        Contents of dictionary have the same names as the parameters to load_photons().
+        '''
+        return { 'pos' : self.positions_gpu.get().view(np.float32).reshape(self.positions_gpu.size, 3),
+                 'dir' : self.directions_gpu.get().view(np.float32).reshape(self.directions_gpu.size, 3),
+                 'pol' : self.polarizations_gpu.get().view(np.float32).reshape(self.polarization_gpu.size, 3),
+                 'wavelength' : self.wavelengths_gpu.get(),
+                 't0' : self.times_gpu.get(),
+                 'histories' : self.histories_gpu.get(),
+                 'last_hit_triangles' : self.last_hit_triangles_gpu.get()}
+    
+    def setup_daq(self, max_pmt_id, pmt_rms=1.2e-9):
+        self.earliest_time_gpu = gpuarray.GPUArray(shape=(max_pmt_id+1,), dtype=np.float32)
+        self.earliest_time_int_gpu = gpuarray.GPUArray(shape=self.earliest_time_gpu.shape, 
+                                                       dtype=np.uint32)
+        self.daq_pmt_rms = pmt_rms
+
+    def run_daq(self):
+        self.daq_funcs.reset_earliest_time_int(np.float32(1e9), 
+                                               np.int32(len(self.earliest_time_int_gpu)), 
+                                               self.earliest_time_int_gpu, 
+                                               block=(self.nblocks,1,1), 
+                                               grid=(len(self.earliest_time_int_gpu)//self.nblocks+1,1))
+        #self.context.synchronize()
+        self.daq_funcs.run_daq(self.rng_states_gpu, np.uint32(0x1 << 2), 
+                               np.float32(self.daq_pmt_rms), 
+                               np.int32(len(self.positions_gpu)), 
+                               self.times_gpu, self.histories_gpu,
+                               self.last_hit_triangles_gpu, 
+                               self.solid_id_map_gpu, 
+                               np.int32(len(self.earliest_time_int_gpu)), 
+                               self.earliest_time_int_gpu, 
+                               block=(self.nblocks,1,1), grid=(self.nphotons//self.nblocks+1,1))
+        #self.context.synchronize()
+        self.daq_funcs.convert_sortable_int_to_float(np.int32(len(self.earliest_time_int_gpu)), 
+                                                     self.earliest_time_int_gpu, 
+                                                     self.earliest_time_gpu, 
+                                                     block=(self.nblocks,1,1), 
+                                                     grid=(len(self.earliest_time_int_gpu)//self.nblocks+1,1))
+        #self.context.synchronize()
+        
+
+    def get_hits(self):
+        return self.earliest_time_gpu.get()
+
+    def shutdown(self):
+        self.context.pop()
author	Stan Seibert <stan@mtrr.org>	2011-08-04 11:53:30 -0400
committer	Stan Seibert <stan@mtrr.org>	2011-08-04 11:53:30 -0400
commit	9b8a7902e01bea0f87154857163624b8da040ec5 (patch)
tree	4e307be8fcd9e58ace564742552cd4ba3785557d
parent	cf87548f09607927d49defa5c1bbb3e4fc877286 (diff)
download	chroma-9b8a7902e01bea0f87154857163624b8da040ec5.tar.gz chroma-9b8a7902e01bea0f87154857163624b8da040ec5.tar.bz2 chroma-9b8a7902e01bea0f87154857163624b8da040ec5.zip