diff options
Diffstat (limited to 'gputhread.py')
-rw-r--r-- | gputhread.py | 144 |
1 files changed, 144 insertions, 0 deletions
diff --git a/gputhread.py b/gputhread.py new file mode 100644 index 0000000..8258f63 --- /dev/null +++ b/gputhread.py @@ -0,0 +1,144 @@ +import numpy as np +import time +import pycuda.driver as cuda +from pycuda.compiler import SourceModule +from pycuda import gpuarray +import threading +import Queue +import src + +class Job(object): + def __init__(self, positions, directions, wavelengths, polarizations, + times, states, last_hit_triangles, max_steps=100): + if positions.dtype is not gpuarray.vec.float3: + if len(positions.shape) != 2 or positions.shape[-1] != 3: + raise Exception('shape mismatch') + + self.positions = np.empty(positions.shape[0], gpuarray.vec.float3) + self.positions['x'] = positions[:,0] + self.positions['y'] = positions[:,1] + self.positions['z'] = positions[:,2] + else: + self.positions = positions + + if directions.dtype is not gpuarray.vec.float3: + if len(directions.shape) != 2 or directions.shape[-1] != 3: + raise Exception('shape mismatch') + + self.directions = \ + np.empty(directions.shape[0], gpuarray.vec.float3) + self.directions['x'] = directions[:,0] + self.directions['y'] = directions[:,1] + self.directions['z'] = directions[:,2] + else: + self.directions = directions + + if polarizations.dtype is not gpuarray.vec.float3: + if len(polarizations.shape) != 2 or polarizations.shape[-1] != 3: + raise Exception('shape mismatch') + + self.polarizations = \ + np.empty(polarizations.shape[0], gpuarray.vec.float3) + self.polarizations['x'] = polarizations[:,0] + self.polarizations['y'] = polarizations[:,1] + self.polarizations['z'] = polarizations[:,2] + else: + self.polarizations = polarizations + + self.wavelengths = np.asarray(wavelengths, dtype=np.float32) + self.times = np.asarray(times, dtype=np.float32) + self.states = np.asarray(states, dtype=np.int32) + self.last_hit_triangles = \ + np.asarray(last_hit_triangles, dtype=np.int32) + + self.max_steps = max_steps + +class GPUThread(threading.Thread): + def __init__(self, device_id, geometry, jobs, output, nblocks=64): + threading.Thread.__init__(self) + + self.device_id = device_id + self.geometry = geometry + self.jobs = jobs + self.output = output + self.nblocks = nblocks + self._stop = threading.Event() + + def stop(self): + self._stop.set() + + def stopped(self): + return self._stop.is_set() + + def run(self): + device = cuda.Device(self.device_id) + context = device.make_context() + module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True, cache_dir=False) + propagate = module.get_function('propagate') + init_rng = module.get_function('init_rng') + texrefs = self.geometry.load(module) + + + daq_module = SourceModule(src.daq, options=['-I' + src.dir], + no_extern_c=True, cache_dir=False) + init_daq_rng = daq_module.get_function('init_daq_rng') + reset_earliest_time_int = daq_module.get_function('reset_earliest_time_int') + run_daq = daq_module.get_function('run_daq') + convert_sortable_int_to_float = daq_module.get_function('convert_sortable_int_to_float') + + earliest_time_gpu = gpuarray.GPUArray(shape=(max(self.geometry.pmtids)+1,), dtype=np.float32) + earliest_time_int_gpu = gpuarray.GPUArray(shape=earliest_time_gpu.shape, dtype=np.uint32) + + solid_map_gpu = gpuarray.to_gpu(self.geometry.solid_id.astype(np.int32)) + + init_rng(np.int32(100000), np.int32(self.device_id), np.int32(0), block=(self.nblocks,1,1), grid=(100000//self.nblocks+1,1)) + init_daq_rng(np.int32(100000), np.int32(10000+self.device_id), np.int32(0), block=(self.nblocks,1,1), grid=(100000//self.nblocks+1,1)) + + while not self.stopped(): + try: + job = self.jobs.get(block=False, timeout=0.01) + except Queue.Empty: + continue + + positions_gpu = cuda.to_device(job.positions) + directions_gpu = cuda.to_device(job.directions) + polarizations_gpu = cuda.to_device(job.polarizations) + wavelengths_gpu = cuda.to_device(job.wavelengths) + times_gpu = cuda.to_device(job.times) + states_gpu = cuda.to_device(job.states) + last_hit_triangles_gpu = cuda.to_device(job.last_hit_triangles) + + nphotons = len(job.positions) + + t0 = time.time() + propagate(np.int32(nphotons), positions_gpu, directions_gpu, wavelengths_gpu, polarizations_gpu, times_gpu, states_gpu, last_hit_triangles_gpu, np.int32(self.geometry.node_map.size-1), np.int32(self.geometry.first_node), np.int32(job.max_steps), block=(self.nblocks,1,1), grid=(nphotons//self.nblocks+1,1)) + + reset_earliest_time_int(np.float32(1e9), np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu, + block=(self.nblocks,1,1), + grid=(len(earliest_time_int_gpu)//self.nblocks+1,1)) + run_daq(np.int32(2), np.float32(1.2e-9), np.int32(nphotons), times_gpu, states_gpu, + last_hit_triangles_gpu, solid_map_gpu, + np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu, + block=(self.nblocks,1,1), grid=(nphotons//self.nblocks+1,1)) + convert_sortable_int_to_float(np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu, + earliest_time_gpu, + block=(self.nblocks,1,1), + grid=(len(earliest_time_int_gpu)//self.nblocks+1,1)) + + cuda.Context.synchronize() + elapsed = time.time() - t0 + + #print 'device %i; elapsed %f sec' % (self.device_id, elapsed) + cuda.memcpy_dtoh(job.positions, positions_gpu) + cuda.memcpy_dtoh(job.directions, directions_gpu) + cuda.memcpy_dtoh(job.wavelengths, wavelengths_gpu) + cuda.memcpy_dtoh(job.polarizations, polarizations_gpu) + cuda.memcpy_dtoh(job.times, times_gpu) + cuda.memcpy_dtoh(job.states, states_gpu) + cuda.memcpy_dtoh(job.last_hit_triangles, last_hit_triangles_gpu) + job.earliest_times = earliest_time_gpu.get() + + self.output.put(job) + self.jobs.task_done() + + context.pop() |