summaryrefslogtreecommitdiff
path: root/gputhread.py
diff options
context:
space:
mode:
Diffstat (limited to 'gputhread.py')
-rw-r--r--gputhread.py35
1 files changed, 31 insertions, 4 deletions
diff --git a/gputhread.py b/gputhread.py
index 6271607..8258f63 100644
--- a/gputhread.py
+++ b/gputhread.py
@@ -77,12 +77,26 @@ class GPUThread(threading.Thread):
propagate = module.get_function('propagate')
init_rng = module.get_function('init_rng')
texrefs = self.geometry.load(module)
+
- init_rng(np.int32(100000), np.int32(0), np.int32(0), block=(self.nblocks,1,1), grid=(100000//self.nblocks+1,1))
+ daq_module = SourceModule(src.daq, options=['-I' + src.dir],
+ no_extern_c=True, cache_dir=False)
+ init_daq_rng = daq_module.get_function('init_daq_rng')
+ reset_earliest_time_int = daq_module.get_function('reset_earliest_time_int')
+ run_daq = daq_module.get_function('run_daq')
+ convert_sortable_int_to_float = daq_module.get_function('convert_sortable_int_to_float')
+
+ earliest_time_gpu = gpuarray.GPUArray(shape=(max(self.geometry.pmtids)+1,), dtype=np.float32)
+ earliest_time_int_gpu = gpuarray.GPUArray(shape=earliest_time_gpu.shape, dtype=np.uint32)
+
+ solid_map_gpu = gpuarray.to_gpu(self.geometry.solid_id.astype(np.int32))
+
+ init_rng(np.int32(100000), np.int32(self.device_id), np.int32(0), block=(self.nblocks,1,1), grid=(100000//self.nblocks+1,1))
+ init_daq_rng(np.int32(100000), np.int32(10000+self.device_id), np.int32(0), block=(self.nblocks,1,1), grid=(100000//self.nblocks+1,1))
while not self.stopped():
try:
- job = self.jobs.get(block=False, timeout=0.5)
+ job = self.jobs.get(block=False, timeout=0.01)
except Queue.Empty:
continue
@@ -93,16 +107,28 @@ class GPUThread(threading.Thread):
times_gpu = cuda.to_device(job.times)
states_gpu = cuda.to_device(job.states)
last_hit_triangles_gpu = cuda.to_device(job.last_hit_triangles)
-
+
nphotons = len(job.positions)
t0 = time.time()
propagate(np.int32(nphotons), positions_gpu, directions_gpu, wavelengths_gpu, polarizations_gpu, times_gpu, states_gpu, last_hit_triangles_gpu, np.int32(self.geometry.node_map.size-1), np.int32(self.geometry.first_node), np.int32(job.max_steps), block=(self.nblocks,1,1), grid=(nphotons//self.nblocks+1,1))
+
+ reset_earliest_time_int(np.float32(1e9), np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu,
+ block=(self.nblocks,1,1),
+ grid=(len(earliest_time_int_gpu)//self.nblocks+1,1))
+ run_daq(np.int32(2), np.float32(1.2e-9), np.int32(nphotons), times_gpu, states_gpu,
+ last_hit_triangles_gpu, solid_map_gpu,
+ np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu,
+ block=(self.nblocks,1,1), grid=(nphotons//self.nblocks+1,1))
+ convert_sortable_int_to_float(np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu,
+ earliest_time_gpu,
+ block=(self.nblocks,1,1),
+ grid=(len(earliest_time_int_gpu)//self.nblocks+1,1))
+
cuda.Context.synchronize()
elapsed = time.time() - t0
#print 'device %i; elapsed %f sec' % (self.device_id, elapsed)
-
cuda.memcpy_dtoh(job.positions, positions_gpu)
cuda.memcpy_dtoh(job.directions, directions_gpu)
cuda.memcpy_dtoh(job.wavelengths, wavelengths_gpu)
@@ -110,6 +136,7 @@ class GPUThread(threading.Thread):
cuda.memcpy_dtoh(job.times, times_gpu)
cuda.memcpy_dtoh(job.states, states_gpu)
cuda.memcpy_dtoh(job.last_hit_triangles, last_hit_triangles_gpu)
+ job.earliest_times = earliest_time_gpu.get()
self.output.put(job)
self.jobs.task_done()