import numpy as np import pycuda.driver as cuda from pycuda.compiler import SourceModule import threading import layout from Queue import Empty class Job(object): def __init__(self, origins, directions): self.origins, self.directions = origins, directions class GPUThread(threading.Thread): def __init__(self, device_id, geometry, jobs, output): threading.Thread.__init__(self) self.device_id = device_id self.geometry = geometry self.jobs = jobs self.output = output self._stop = threading.Event() def stop(self): self._stop.set() def stopped(self): return self._stop.is_set() def run(self): device = cuda.Device(self.device_id) context = device.make_context() source = open(layout.source + '/kernel.cu').read() module = SourceModule(source, options=['-I' + layout.source], \ no_extern_c=True, cache_dir=False) propagate = module.get_function('propagate') texrefs = self.geometry.load(module) while not self.stopped(): try: job = self.jobs.get(timeout=2) except Empty: continue origins_gpu, directions_gpu = cuda.to_device(job.origins), \ cuda.to_device(job.directions) dest = np.empty(job.origins.size, dtype=np.int32) dest_gpu = cuda.to_device(dest) propagate(np.int32(job.origins.size), origins_gpu, directions_gpu, np.int32(self.geometry.node_map.size-1), np.int32(self.geometry.first_node), dest_gpu, block=(64,1,1), grid=(job.origins.size//64+1,1), texrefs=texrefs) cuda.Context.synchronize() cuda.memcpy_dtoh(dest, dest_gpu) triangles = dest[(dest != -1)] bincount = np.zeros(len(self.geometry.solids)) gpu_bincount = np.bincount(self.geometry.solid_index[triangles]) bincount[:gpu_bincount.size] = gpu_bincount self.output.put(bincount) self.jobs.task_done() context.pop()