diff options
Diffstat (limited to 'gputhread.py')
-rw-r--r-- | gputhread.py | 95 |
1 files changed, 0 insertions, 95 deletions
diff --git a/gputhread.py b/gputhread.py deleted file mode 100644 index 63124e9..0000000 --- a/gputhread.py +++ /dev/null @@ -1,95 +0,0 @@ -import numpy as np -from copy import copy -import time -import pycuda.driver as cuda -from pycuda.characterize import sizeof -from pycuda.compiler import SourceModule -from pycuda import gpuarray -import threading -import Queue -import src - -class GPUThread(threading.Thread): - def __init__(self, device_id, geometry, jobs, output, nblocks=64, max_nthreads=100000): - threading.Thread.__init__(self) - - self.device_id = device_id - self.geometry = copy(geometry) - self.jobs = jobs - self.output = output - self.nblocks = nblocks - self.max_nthreads = max_nthreads - self._stop = threading.Event() - - def stop(self): - self._stop.set() - - def stopped(self): - return self._stop.is_set() - - def run(self): - device = cuda.Device(self.device_id) - context = device.make_context() - module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True, cache_dir=False) - - propagate = module.get_function('propagate') - fill_uniform = module.get_function('fill_uniform') - fill_uniform_sphere = module.get_function('fill_uniform_sphere') - - init_rng = module.get_function('init_rng') - rng_states_gpu = cuda.mem_alloc(self.max_nthreads*sizeof('curandStateXORWOW', '#include <curand_kernel.h>')) - init_rng(np.int32(self.max_nthreads), rng_states_gpu, np.int32(self.device_id), np.int32(0), block=(self.nblocks,1,1), grid=(self.max_nthreads//self.nblocks+1,1)) - - self.geometry.load(module) - - daq_module = SourceModule(src.daq, options=['-I' + src.dir], no_extern_c=True, cache_dir=False) - reset_earliest_time_int = daq_module.get_function('reset_earliest_time_int') - run_daq = daq_module.get_function('run_daq') - convert_sortable_int_to_float = daq_module.get_function('convert_sortable_int_to_float') - - earliest_time_gpu = gpuarray.GPUArray(shape=(max(self.geometry.pmtids)+1,), dtype=np.float32) - earliest_time_int_gpu = gpuarray.GPUArray(shape=earliest_time_gpu.shape, dtype=np.uint32) - - solid_map_gpu = gpuarray.to_gpu(self.geometry.solid_id.astype(np.int32)) - - while not self.stopped(): - try: - position, nphotons = self.jobs.get(block=False, timeout=0.1) - except Queue.Empty: - continue - - t0 = time.time() - - gpu_kwargs = {'block': (self.nblocks,1,1), 'grid': (nphotons//self.nblocks+1,1)} - - positions_gpu = gpuarray.empty(nphotons, dtype=gpuarray.vec.float3) - positions_gpu.fill(position) - directions_gpu = gpuarray.empty(nphotons, dtype=gpuarray.vec.float3) - fill_uniform_sphere(np.int32(nphotons), rng_states_gpu, directions_gpu, **gpu_kwargs) - wavelengths_gpu = gpuarray.empty(nphotons, dtype=np.float32) - fill_uniform(np.int32(nphotons), rng_states_gpu, wavelengths_gpu, np.float32(200), np.float32(800), **gpu_kwargs) - polarizations_gpu = gpuarray.empty(nphotons, dtype=gpuarray.vec.float3) - fill_uniform_sphere(np.int32(nphotons), rng_states_gpu, polarizations_gpu, **gpu_kwargs) - times_gpu = gpuarray.zeros(nphotons, dtype=np.float32) - histories_gpu = gpuarray.zeros(nphotons, dtype=np.uint32) - last_hit_triangles_gpu = gpuarray.empty(nphotons, dtype=np.int32) - last_hit_triangles_gpu.fill(-1) - - max_steps = 10 - - propagate(np.int32(nphotons), rng_states_gpu, positions_gpu, directions_gpu, wavelengths_gpu, polarizations_gpu, times_gpu, histories_gpu, last_hit_triangles_gpu, np.int32(max_steps), **gpu_kwargs) - - reset_earliest_time_int(np.float32(1e9), np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu, block=(self.nblocks,1,1), grid=(len(earliest_time_int_gpu)//self.nblocks+1,1)) - - run_daq(rng_states_gpu, np.uint32(0x1 << 2), np.float32(1.2e-9), np.int32(nphotons), times_gpu, histories_gpu, last_hit_triangles_gpu, solid_map_gpu, np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu, block=(self.nblocks,1,1), grid=(nphotons//self.nblocks+1,1)) - - convert_sortable_int_to_float(np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu, earliest_time_gpu, block=(self.nblocks,1,1), grid=(len(earliest_time_int_gpu)//self.nblocks+1,1)) - cuda.Context.synchronize() - elapsed = time.time() - t0 - - #print 'device %i; elapsed %f sec' % (self.device_id, elapsed) - - self.output.put(earliest_time_gpu.get()) - self.jobs.task_done() - - context.pop() |