summaryrefslogtreecommitdiff
path: root/gputhread.py
blob: 63124e9deee5bec36bcf3efdc42d59d14f146211 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import numpy as np
from copy import copy
import time
import pycuda.driver as cuda
from pycuda.characterize import sizeof
from pycuda.compiler import SourceModule
from pycuda import gpuarray
import threading
import Queue
import src

class GPUThread(threading.Thread):
    def __init__(self, device_id, geometry, jobs, output, nblocks=64, max_nthreads=100000):
        threading.Thread.__init__(self)

        self.device_id = device_id
        self.geometry = copy(geometry)
        self.jobs = jobs
        self.output = output
        self.nblocks = nblocks
        self.max_nthreads = max_nthreads
        self._stop = threading.Event()

    def stop(self):
        self._stop.set()

    def stopped(self):
        return self._stop.is_set()

    def run(self):
        device = cuda.Device(self.device_id)
        context = device.make_context()
        module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True, cache_dir=False)

        propagate = module.get_function('propagate')
        fill_uniform = module.get_function('fill_uniform')
        fill_uniform_sphere = module.get_function('fill_uniform_sphere')

        init_rng = module.get_function('init_rng')
        rng_states_gpu = cuda.mem_alloc(self.max_nthreads*sizeof('curandStateXORWOW', '#include <curand_kernel.h>'))
        init_rng(np.int32(self.max_nthreads), rng_states_gpu, np.int32(self.device_id), np.int32(0), block=(self.nblocks,1,1), grid=(self.max_nthreads//self.nblocks+1,1))

        self.geometry.load(module)

        daq_module = SourceModule(src.daq, options=['-I' + src.dir], no_extern_c=True, cache_dir=False)
        reset_earliest_time_int = daq_module.get_function('reset_earliest_time_int')
        run_daq = daq_module.get_function('run_daq')
        convert_sortable_int_to_float = daq_module.get_function('convert_sortable_int_to_float')

        earliest_time_gpu = gpuarray.GPUArray(shape=(max(self.geometry.pmtids)+1,), dtype=np.float32)
        earliest_time_int_gpu = gpuarray.GPUArray(shape=earliest_time_gpu.shape, dtype=np.uint32)

        solid_map_gpu = gpuarray.to_gpu(self.geometry.solid_id.astype(np.int32))
        
        while not self.stopped():
            try:
                position, nphotons = self.jobs.get(block=False, timeout=0.1)
            except Queue.Empty:
                continue

            t0 = time.time()

            gpu_kwargs = {'block': (self.nblocks,1,1), 'grid': (nphotons//self.nblocks+1,1)}

            positions_gpu = gpuarray.empty(nphotons, dtype=gpuarray.vec.float3)
            positions_gpu.fill(position)
            directions_gpu = gpuarray.empty(nphotons, dtype=gpuarray.vec.float3)
            fill_uniform_sphere(np.int32(nphotons), rng_states_gpu, directions_gpu, **gpu_kwargs)
            wavelengths_gpu = gpuarray.empty(nphotons, dtype=np.float32)
            fill_uniform(np.int32(nphotons), rng_states_gpu, wavelengths_gpu, np.float32(200), np.float32(800), **gpu_kwargs)
            polarizations_gpu = gpuarray.empty(nphotons, dtype=gpuarray.vec.float3)
            fill_uniform_sphere(np.int32(nphotons), rng_states_gpu, polarizations_gpu, **gpu_kwargs)
            times_gpu = gpuarray.zeros(nphotons, dtype=np.float32)
            histories_gpu = gpuarray.zeros(nphotons, dtype=np.uint32)
            last_hit_triangles_gpu = gpuarray.empty(nphotons, dtype=np.int32)
            last_hit_triangles_gpu.fill(-1)

            max_steps = 10

            propagate(np.int32(nphotons), rng_states_gpu, positions_gpu, directions_gpu, wavelengths_gpu, polarizations_gpu, times_gpu, histories_gpu, last_hit_triangles_gpu, np.int32(max_steps), **gpu_kwargs)

            reset_earliest_time_int(np.float32(1e9), np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu, block=(self.nblocks,1,1), grid=(len(earliest_time_int_gpu)//self.nblocks+1,1))

            run_daq(rng_states_gpu, np.uint32(0x1 << 2), np.float32(1.2e-9), np.int32(nphotons), times_gpu, histories_gpu, last_hit_triangles_gpu, solid_map_gpu, np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu, block=(self.nblocks,1,1), grid=(nphotons//self.nblocks+1,1))

            convert_sortable_int_to_float(np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu, earliest_time_gpu, block=(self.nblocks,1,1), grid=(len(earliest_time_int_gpu)//self.nblocks+1,1))
            cuda.Context.synchronize()
            elapsed = time.time() - t0
            
            #print 'device %i; elapsed %f sec' % (self.device_id, elapsed)

            self.output.put(earliest_time_gpu.get())
            self.jobs.task_done()

        context.pop()
"p">; j < 3; j++) { if (!isclose(a[j], b[j], 1e-9, 0)) { sprintf(err, "a[%i] = %.5f, but expected %.5f", j, a[j], b[j]); return 1; } } } return 0; } int test_div(char *err) { int i, j; double a[3], b[3], c; init_genrand(0); for (i = 0; i < 100; i++) { b[0] = genrand_real2(); b[1] = genrand_real2(); b[2] = genrand_real2(); c = genrand_real2(); COPY(a,b); DIV(a,c); for (j = 0; j < 3; j++) { if (!isclose(a[j], b[j]/c, 1e-9, 0)) { sprintf(err, "a[%i] = %.5f, but expected %.5f", j, a[j], b[j]/c); return 1; } } } return 0; } int test_add(char *err) { int i; double a[3], b[3], c[3]; init_genrand(0); for (i = 0; i < 100; i++) { a[0] = genrand_real2(); a[1] = genrand_real2(); a[2] = genrand_real2(); b[0] = genrand_real2(); b[1] = genrand_real2(); b[2] = genrand_real2(); ADD(c,a,b); if (!isclose(c[0], a[0]+b[0], 1e-9, 0)) { sprintf(err, "a[0] + b[0] = %.5f, but expected %.5f", c[0], a[0]+b[0]); return 1; } if (!isclose(c[1], a[1]+b[1], 1e-9, 0)) { sprintf(err, "a[1] + b[1] = %.5f, but expected %.5f", c[1], a[1]+b[1]); return 1; } if (!isclose(c[2], a[2]+b[2], 1e-9, 0)) { sprintf(err, "a[2] + b[2] = %.5f, but expected %.5f", c[2], a[2]+b[2]); return 1; } } return 0; } int test_sub(char *err) { int i; double a[3], b[3], c[3]; init_genrand(0); for (i = 0; i < 100; i++) { a[0] = genrand_real2(); a[1] = genrand_real2(); a[2] = genrand_real2(); b[0] = genrand_real2(); b[1] = genrand_real2(); b[2] = genrand_real2(); SUB(c,a,b); if (!isclose(c[0], a[0]-b[0], 1e-9, 0)) { sprintf(err, "a[0] - b[0] = %.5f, but expected %.5f", c[0], a[0]-b[0]); return 1; } if (!isclose(c[1], a[1]-b[1], 1e-9, 0)) { sprintf(err, "a[1] - b[1] = %.5f, but expected %.5f", c[1], a[1]-b[1]); return 1; } if (!isclose(c[2], a[2]-b[2], 1e-9, 0)) { sprintf(err, "a[2] - b[2] = %.5f, but expected %.5f", c[2], a[2]-b[2]); return 1; } } return 0; } int test_normalize(char *err) { int i; double a[3], c; init_genrand(0); for (i = 0; i < 100; i++) { a[0] = genrand_real2(); a[1] = genrand_real2(); a[2] = genrand_real2(); normalize(a); c = NORM(a); if (!isclose(c, 1.0, 1e-9, 0)) { sprintf(err, "norm(a) = %.5f, but expected %.5f", c, 1.0); return 1; } } return 0; } int test_cross_product(char *err) { int i, j; double a[3], b[3], c[3], expected[3]; init_genrand(0); for (i = 0; i < 100; i++) { a[0] = genrand_real2(); a[1] = genrand_real2(); a[2] = genrand_real2(); b[0] = genrand_real2(); b[1] = genrand_real2(); b[2] = genrand_real2(); CROSS(c,a,b); expected[0] = a[1]*b[2] - a[2]*b[1]; expected[1] = a[2]*b[0] - a[0]*b[2]; expected[2] = a[0]*b[1] - a[1]*b[0]; for (j = 0; j < 3; j++) { if (!isclose(c[j], expected[j], 1e-9, 0)) { sprintf(err, "(a x b)[%i] = %.5f, but expected %.5f", j, c[j], expected[j]); return 1; } } } return 0; } int test_rotate(char *err) { int i; double a[3], b[3], c[3], expected[3]; a[0] = 1; a[1] = 0; a[2] = 0; b[0] = 0; b[1] = 0; b[2] = 1; rotate(c,a,b,M_PI/2); expected[0] = 0; expected[1] = 1; expected[2] = 0; for (i = 0; i < 3; i++) { if (!isclose(c[i], expected[i], 1e-9, 1e-9)) { sprintf(err, "rotate(a,b,pi/2)[%i] = %.5f, but expected %.5f", i, c[i], expected[i]); return 1; } } return 0; } struct tests { testFunction *test; char *name; } tests[] = { {test_dot, "test_dot"}, {test_norm, "test_norm"}, {test_copy, "test_copy"}, {test_div, "test_div"}, {test_add, "test_add"}, {test_sub, "test_sub"}, {test_normalize, "test_normalize"}, {test_cross_product, "test_cross_product"}, {test_rotate, "test_rotate"} }; int main(int argc, char **argv) { int i; char err[256]; int retval = 0; struct tests test; for (i = 0; i < LEN(tests); i++) { test = tests[i]; if (!test.test(err)) { printf("[\033[92mok\033[0m] %s\n", test.name); } else { printf("[\033[91mfail\033[0m] %s: %s\n", test.name, err); retval = 1; } } return retval; }