summaryrefslogtreecommitdiff
path: root/gputhread.py
diff options
context:
space:
mode:
Diffstat (limited to 'gputhread.py')
-rw-r--r--gputhread.py63
1 files changed, 63 insertions, 0 deletions
diff --git a/gputhread.py b/gputhread.py
new file mode 100644
index 0000000..fe05e4f
--- /dev/null
+++ b/gputhread.py
@@ -0,0 +1,63 @@
+import numpy as np
+import pycuda.driver as cuda
+from pycuda.compiler import SourceModule
+import threading
+import layout
+from Queue import Empty
+
+class Job(object):
+ def __init__(self, origins, directions):
+ self.origins, self.directions = origins, directions
+
+class GPUThread(threading.Thread):
+ def __init__(self, device_id, geometry, jobs, output):
+ threading.Thread.__init__(self)
+
+ self.device_id = device_id
+ self.geometry = geometry
+ self.jobs = jobs
+ self.output = output
+ self._stop = threading.Event()
+
+ def stop(self):
+ self._stop.set()
+
+ def stopped(self):
+ return self._stop.is_set()
+
+ def run(self):
+ device = cuda.Device(self.device_id)
+ context = device.make_context()
+ source = open(layout.source + '/kernel.cu').read()
+ module = SourceModule(source, options=['-I' + layout.source], \
+ no_extern_c=True, cache_dir=False)
+ propagate = module.get_function('propagate')
+ texrefs = self.geometry.load(module)
+
+ while not self.stopped():
+ try:
+ job = self.jobs.get(timeout=2)
+ except Empty:
+ continue
+
+ origins_gpu, directions_gpu = cuda.to_device(job.origins), \
+ cuda.to_device(job.directions)
+
+ dest = np.empty(job.origins.size, dtype=np.int32)
+ dest_gpu = cuda.to_device(dest)
+
+ propagate(np.int32(job.origins.size), origins_gpu, directions_gpu, np.int32(self.geometry.node_map.size-1), np.int32(self.geometry.first_node), dest_gpu, block=(64,1,1), grid=(job.origins.size//64+1,1), texrefs=texrefs)
+ cuda.Context.synchronize()
+
+ cuda.memcpy_dtoh(dest, dest_gpu)
+
+ triangles = dest[(dest != -1)]
+
+ bincount = np.zeros(len(self.geometry.solids))
+ gpu_bincount = np.bincount(self.geometry.solid_index[triangles])
+ bincount[:gpu_bincount.size] = gpu_bincount
+
+ self.output.put(bincount)
+ self.jobs.task_done()
+
+ context.pop()