diff options
-rw-r--r-- | gputhread.py | 35 | ||||
-rw-r--r-- | src/__init__.py | 1 | ||||
-rw-r--r-- | src/daq.cu | 80 | ||||
-rw-r--r-- | threadtest.py | 42 |
4 files changed, 117 insertions, 41 deletions
diff --git a/gputhread.py b/gputhread.py index 6271607..8258f63 100644 --- a/gputhread.py +++ b/gputhread.py @@ -77,12 +77,26 @@ class GPUThread(threading.Thread): propagate = module.get_function('propagate') init_rng = module.get_function('init_rng') texrefs = self.geometry.load(module) + - init_rng(np.int32(100000), np.int32(0), np.int32(0), block=(self.nblocks,1,1), grid=(100000//self.nblocks+1,1)) + daq_module = SourceModule(src.daq, options=['-I' + src.dir], + no_extern_c=True, cache_dir=False) + init_daq_rng = daq_module.get_function('init_daq_rng') + reset_earliest_time_int = daq_module.get_function('reset_earliest_time_int') + run_daq = daq_module.get_function('run_daq') + convert_sortable_int_to_float = daq_module.get_function('convert_sortable_int_to_float') + + earliest_time_gpu = gpuarray.GPUArray(shape=(max(self.geometry.pmtids)+1,), dtype=np.float32) + earliest_time_int_gpu = gpuarray.GPUArray(shape=earliest_time_gpu.shape, dtype=np.uint32) + + solid_map_gpu = gpuarray.to_gpu(self.geometry.solid_id.astype(np.int32)) + + init_rng(np.int32(100000), np.int32(self.device_id), np.int32(0), block=(self.nblocks,1,1), grid=(100000//self.nblocks+1,1)) + init_daq_rng(np.int32(100000), np.int32(10000+self.device_id), np.int32(0), block=(self.nblocks,1,1), grid=(100000//self.nblocks+1,1)) while not self.stopped(): try: - job = self.jobs.get(block=False, timeout=0.5) + job = self.jobs.get(block=False, timeout=0.01) except Queue.Empty: continue @@ -93,16 +107,28 @@ class GPUThread(threading.Thread): times_gpu = cuda.to_device(job.times) states_gpu = cuda.to_device(job.states) last_hit_triangles_gpu = cuda.to_device(job.last_hit_triangles) - + nphotons = len(job.positions) t0 = time.time() propagate(np.int32(nphotons), positions_gpu, directions_gpu, wavelengths_gpu, polarizations_gpu, times_gpu, states_gpu, last_hit_triangles_gpu, np.int32(self.geometry.node_map.size-1), np.int32(self.geometry.first_node), np.int32(job.max_steps), block=(self.nblocks,1,1), grid=(nphotons//self.nblocks+1,1)) + + reset_earliest_time_int(np.float32(1e9), np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu, + block=(self.nblocks,1,1), + grid=(len(earliest_time_int_gpu)//self.nblocks+1,1)) + run_daq(np.int32(2), np.float32(1.2e-9), np.int32(nphotons), times_gpu, states_gpu, + last_hit_triangles_gpu, solid_map_gpu, + np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu, + block=(self.nblocks,1,1), grid=(nphotons//self.nblocks+1,1)) + convert_sortable_int_to_float(np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu, + earliest_time_gpu, + block=(self.nblocks,1,1), + grid=(len(earliest_time_int_gpu)//self.nblocks+1,1)) + cuda.Context.synchronize() elapsed = time.time() - t0 #print 'device %i; elapsed %f sec' % (self.device_id, elapsed) - cuda.memcpy_dtoh(job.positions, positions_gpu) cuda.memcpy_dtoh(job.directions, directions_gpu) cuda.memcpy_dtoh(job.wavelengths, wavelengths_gpu) @@ -110,6 +136,7 @@ class GPUThread(threading.Thread): cuda.memcpy_dtoh(job.times, times_gpu) cuda.memcpy_dtoh(job.states, states_gpu) cuda.memcpy_dtoh(job.last_hit_triangles, last_hit_triangles_gpu) + job.earliest_times = earliest_time_gpu.get() self.output.put(job) self.jobs.task_done() diff --git a/src/__init__.py b/src/__init__.py index d2958f1..865d612 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -3,3 +3,4 @@ import os dir = os.path.split(os.path.realpath(__file__))[0] kernel = open(dir + '/kernel.cu').read() +daq = open(dir + '/daq.cu').read() diff --git a/src/daq.cu b/src/daq.cu new file mode 100644 index 0000000..c79401c --- /dev/null +++ b/src/daq.cu @@ -0,0 +1,80 @@ +// -*-c++-*- +#include <curand_kernel.h> + +__device__ unsigned int float_to_sortable_int(float f) +{ + return __float_as_int(f); + //int i = __float_as_int(f); + //unsigned int mask = -(int)(i >> 31) | 0x80000000; + //return i ^ mask; +} + +__device__ float sortable_int_to_float(unsigned int i) +{ + return __int_as_float(i); + //unsigned int mask = ((i >> 31) - 1) | 0x80000000; + //return __int_as_float(i ^ mask); +} + + +__device__ curandState daq_rng_states[100000]; + +extern "C" { + + __global__ void init_daq_rng(int nthreads, + unsigned long long seed, unsigned long long offset) + { + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + curand_init(seed, id, offset, daq_rng_states+id); + } + + __global__ void reset_earliest_time_int(float maxtime, + int ntime_ints, unsigned int *time_ints) + { + int id = threadIdx.x + blockDim.x * blockIdx.x; + if (id < ntime_ints) { + unsigned int maxtime_int = float_to_sortable_int(maxtime); + time_ints[id] = maxtime_int; + } + } + + __global__ void run_daq(int detection_state, float time_rms, + int nphotons, float *photon_times, int *photon_states, + int *last_hit_triangles, int *solid_map, + int nsolids, unsigned int *earliest_time_int) + { + int id = threadIdx.x + blockDim.x * blockIdx.x; + + curandState_t rng = daq_rng_states[id]; + + if (id < nphotons) { + int triangle_id = last_hit_triangles[id]; + + if (triangle_id > -1) { + int solid_id = solid_map[triangle_id]; + int state = photon_states[id]; + float time = photon_times[id];// + curand_normal(&rng) * time_rms; + unsigned int time_int = float_to_sortable_int(time); + if (solid_id < nsolids && state == detection_state) + atomicMin(earliest_time_int + solid_id, time_int); + } + } + + daq_rng_states[id] = rng; + } + + __global__ void convert_sortable_int_to_float(int n, + unsigned int *sortable_ints, + float *float_output) + { + int id = threadIdx.x + blockDim.x * blockIdx.x; + + if (id < n) + float_output[id] = sortable_int_to_float(sortable_ints[id]); + } + +} // extern "C" diff --git a/threadtest.py b/threadtest.py index d7b9d7c..681b019 100644 --- a/threadtest.py +++ b/threadtest.py @@ -28,25 +28,9 @@ def generate_event(detector, position=(0,0,0), nphotons=5000): job = output.get() - last_hit_triangles = job.last_hit_triangles - solids = detector.solid_id[last_hit_triangles] - solids[last_hit_triangles == -1] = -1 - surface_absorbed = job.states == 2 - - for i in np.unique(job.states): - print 'state %2i %i' % (i, len(job.states[job.states == i])) - - event_times = [] - for i in detector.pmtids: - photons = np.logical_and(solids == i, surface_absorbed) - - hit_times = job.times[photons] - - if len(hit_times) > 0: - event_times.append((i, np.min(hit_times))) - + pmt_times = job.earliest_times[detector.pmtids] + event_times = [ (i, t) for i, t in zip(detector.pmtids, pmt_times) if t < 1e8 ] print '%i hit pmts' % len(event_times) - return event_times def likelihood(detector, event_times, position=(0,0,0), nphotons=5000, neval=100): @@ -54,31 +38,15 @@ def likelihood(detector, event_times, position=(0,0,0), nphotons=5000, neval=100 jobs.put(create_job(position, nphotons)) jobs.join() - t = {} + t = np.zeros(shape=(neval, max(detector.pmtids)+1), dtype=np.float32) for i in range(neval): job = output.get() - - last_hit_triangles = job.last_hit_triangles - solids = detector.solid_id[job.last_hit_triangles] - solids[last_hit_triangles == -1] = -1 - surface_absorbed = job.states == 2 - - for j in detector.pmtids: - pmt_photons = solids == j - photons = np.logical_and(pmt_photons, surface_absorbed) - - if j not in t: - t[j] = [] - - hit_times = job.times[photons] - - if len(hit_times) > 0: - t[j].append(np.min(hit_times)) + t[i] = job.earliest_times log_likelihood = ufloat((0,0)) for i, time in event_times: h = Histogram(100, (-0.5e-9, 99.5e-9)) - h.fill(t[i]) + h.fill(t[:,i]) if h.nentries > 0: h.normalize() |