summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gputhread.py35
-rw-r--r--src/__init__.py1
-rw-r--r--src/daq.cu80
-rw-r--r--threadtest.py42
4 files changed, 117 insertions, 41 deletions
diff --git a/gputhread.py b/gputhread.py
index 6271607..8258f63 100644
--- a/gputhread.py
+++ b/gputhread.py
@@ -77,12 +77,26 @@ class GPUThread(threading.Thread):
propagate = module.get_function('propagate')
init_rng = module.get_function('init_rng')
texrefs = self.geometry.load(module)
+
- init_rng(np.int32(100000), np.int32(0), np.int32(0), block=(self.nblocks,1,1), grid=(100000//self.nblocks+1,1))
+ daq_module = SourceModule(src.daq, options=['-I' + src.dir],
+ no_extern_c=True, cache_dir=False)
+ init_daq_rng = daq_module.get_function('init_daq_rng')
+ reset_earliest_time_int = daq_module.get_function('reset_earliest_time_int')
+ run_daq = daq_module.get_function('run_daq')
+ convert_sortable_int_to_float = daq_module.get_function('convert_sortable_int_to_float')
+
+ earliest_time_gpu = gpuarray.GPUArray(shape=(max(self.geometry.pmtids)+1,), dtype=np.float32)
+ earliest_time_int_gpu = gpuarray.GPUArray(shape=earliest_time_gpu.shape, dtype=np.uint32)
+
+ solid_map_gpu = gpuarray.to_gpu(self.geometry.solid_id.astype(np.int32))
+
+ init_rng(np.int32(100000), np.int32(self.device_id), np.int32(0), block=(self.nblocks,1,1), grid=(100000//self.nblocks+1,1))
+ init_daq_rng(np.int32(100000), np.int32(10000+self.device_id), np.int32(0), block=(self.nblocks,1,1), grid=(100000//self.nblocks+1,1))
while not self.stopped():
try:
- job = self.jobs.get(block=False, timeout=0.5)
+ job = self.jobs.get(block=False, timeout=0.01)
except Queue.Empty:
continue
@@ -93,16 +107,28 @@ class GPUThread(threading.Thread):
times_gpu = cuda.to_device(job.times)
states_gpu = cuda.to_device(job.states)
last_hit_triangles_gpu = cuda.to_device(job.last_hit_triangles)
-
+
nphotons = len(job.positions)
t0 = time.time()
propagate(np.int32(nphotons), positions_gpu, directions_gpu, wavelengths_gpu, polarizations_gpu, times_gpu, states_gpu, last_hit_triangles_gpu, np.int32(self.geometry.node_map.size-1), np.int32(self.geometry.first_node), np.int32(job.max_steps), block=(self.nblocks,1,1), grid=(nphotons//self.nblocks+1,1))
+
+ reset_earliest_time_int(np.float32(1e9), np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu,
+ block=(self.nblocks,1,1),
+ grid=(len(earliest_time_int_gpu)//self.nblocks+1,1))
+ run_daq(np.int32(2), np.float32(1.2e-9), np.int32(nphotons), times_gpu, states_gpu,
+ last_hit_triangles_gpu, solid_map_gpu,
+ np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu,
+ block=(self.nblocks,1,1), grid=(nphotons//self.nblocks+1,1))
+ convert_sortable_int_to_float(np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu,
+ earliest_time_gpu,
+ block=(self.nblocks,1,1),
+ grid=(len(earliest_time_int_gpu)//self.nblocks+1,1))
+
cuda.Context.synchronize()
elapsed = time.time() - t0
#print 'device %i; elapsed %f sec' % (self.device_id, elapsed)
-
cuda.memcpy_dtoh(job.positions, positions_gpu)
cuda.memcpy_dtoh(job.directions, directions_gpu)
cuda.memcpy_dtoh(job.wavelengths, wavelengths_gpu)
@@ -110,6 +136,7 @@ class GPUThread(threading.Thread):
cuda.memcpy_dtoh(job.times, times_gpu)
cuda.memcpy_dtoh(job.states, states_gpu)
cuda.memcpy_dtoh(job.last_hit_triangles, last_hit_triangles_gpu)
+ job.earliest_times = earliest_time_gpu.get()
self.output.put(job)
self.jobs.task_done()
diff --git a/src/__init__.py b/src/__init__.py
index d2958f1..865d612 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -3,3 +3,4 @@ import os
dir = os.path.split(os.path.realpath(__file__))[0]
kernel = open(dir + '/kernel.cu').read()
+daq = open(dir + '/daq.cu').read()
diff --git a/src/daq.cu b/src/daq.cu
new file mode 100644
index 0000000..c79401c
--- /dev/null
+++ b/src/daq.cu
@@ -0,0 +1,80 @@
+// -*-c++-*-
+#include <curand_kernel.h>
+
+__device__ unsigned int float_to_sortable_int(float f)
+{
+ return __float_as_int(f);
+ //int i = __float_as_int(f);
+ //unsigned int mask = -(int)(i >> 31) | 0x80000000;
+ //return i ^ mask;
+}
+
+__device__ float sortable_int_to_float(unsigned int i)
+{
+ return __int_as_float(i);
+ //unsigned int mask = ((i >> 31) - 1) | 0x80000000;
+ //return __int_as_float(i ^ mask);
+}
+
+
+__device__ curandState daq_rng_states[100000];
+
+extern "C" {
+
+ __global__ void init_daq_rng(int nthreads,
+ unsigned long long seed, unsigned long long offset)
+ {
+ int id = blockIdx.x*blockDim.x + threadIdx.x;
+
+ if (id >= nthreads)
+ return;
+
+ curand_init(seed, id, offset, daq_rng_states+id);
+ }
+
+ __global__ void reset_earliest_time_int(float maxtime,
+ int ntime_ints, unsigned int *time_ints)
+ {
+ int id = threadIdx.x + blockDim.x * blockIdx.x;
+ if (id < ntime_ints) {
+ unsigned int maxtime_int = float_to_sortable_int(maxtime);
+ time_ints[id] = maxtime_int;
+ }
+ }
+
+ __global__ void run_daq(int detection_state, float time_rms,
+ int nphotons, float *photon_times, int *photon_states,
+ int *last_hit_triangles, int *solid_map,
+ int nsolids, unsigned int *earliest_time_int)
+ {
+ int id = threadIdx.x + blockDim.x * blockIdx.x;
+
+ curandState_t rng = daq_rng_states[id];
+
+ if (id < nphotons) {
+ int triangle_id = last_hit_triangles[id];
+
+ if (triangle_id > -1) {
+ int solid_id = solid_map[triangle_id];
+ int state = photon_states[id];
+ float time = photon_times[id];// + curand_normal(&rng) * time_rms;
+ unsigned int time_int = float_to_sortable_int(time);
+ if (solid_id < nsolids && state == detection_state)
+ atomicMin(earliest_time_int + solid_id, time_int);
+ }
+ }
+
+ daq_rng_states[id] = rng;
+ }
+
+ __global__ void convert_sortable_int_to_float(int n,
+ unsigned int *sortable_ints,
+ float *float_output)
+ {
+ int id = threadIdx.x + blockDim.x * blockIdx.x;
+
+ if (id < n)
+ float_output[id] = sortable_int_to_float(sortable_ints[id]);
+ }
+
+} // extern "C"
diff --git a/threadtest.py b/threadtest.py
index d7b9d7c..681b019 100644
--- a/threadtest.py
+++ b/threadtest.py
@@ -28,25 +28,9 @@ def generate_event(detector, position=(0,0,0), nphotons=5000):
job = output.get()
- last_hit_triangles = job.last_hit_triangles
- solids = detector.solid_id[last_hit_triangles]
- solids[last_hit_triangles == -1] = -1
- surface_absorbed = job.states == 2
-
- for i in np.unique(job.states):
- print 'state %2i %i' % (i, len(job.states[job.states == i]))
-
- event_times = []
- for i in detector.pmtids:
- photons = np.logical_and(solids == i, surface_absorbed)
-
- hit_times = job.times[photons]
-
- if len(hit_times) > 0:
- event_times.append((i, np.min(hit_times)))
-
+ pmt_times = job.earliest_times[detector.pmtids]
+ event_times = [ (i, t) for i, t in zip(detector.pmtids, pmt_times) if t < 1e8 ]
print '%i hit pmts' % len(event_times)
-
return event_times
def likelihood(detector, event_times, position=(0,0,0), nphotons=5000, neval=100):
@@ -54,31 +38,15 @@ def likelihood(detector, event_times, position=(0,0,0), nphotons=5000, neval=100
jobs.put(create_job(position, nphotons))
jobs.join()
- t = {}
+ t = np.zeros(shape=(neval, max(detector.pmtids)+1), dtype=np.float32)
for i in range(neval):
job = output.get()
-
- last_hit_triangles = job.last_hit_triangles
- solids = detector.solid_id[job.last_hit_triangles]
- solids[last_hit_triangles == -1] = -1
- surface_absorbed = job.states == 2
-
- for j in detector.pmtids:
- pmt_photons = solids == j
- photons = np.logical_and(pmt_photons, surface_absorbed)
-
- if j not in t:
- t[j] = []
-
- hit_times = job.times[photons]
-
- if len(hit_times) > 0:
- t[j].append(np.min(hit_times))
+ t[i] = job.earliest_times
log_likelihood = ufloat((0,0))
for i, time in event_times:
h = Histogram(100, (-0.5e-9, 99.5e-9))
- h.fill(t[i])
+ h.fill(t[:,i])
if h.nentries > 0:
h.normalize()