summaryrefslogtreecommitdiff
path: root/gpu.py
diff options
context:
space:
mode:
authorAnthony LaTorre <tlatorre9@gmail.com>2011-08-10 00:21:15 -0400
committerAnthony LaTorre <tlatorre9@gmail.com>2011-08-10 00:21:15 -0400
commit23b4bedf43f2ff120c1178c3445e39b8735030f3 (patch)
tree739b7e72a9e4bbdb4c26bc1a010bd908d7f69ec4 /gpu.py
parentea5cc8e4e0e9bd1d1a1e5e7140c0022a8bd2a59d (diff)
parent14eb2b598fc5c2f70f3f4692dc58f0206b07d728 (diff)
downloadchroma-23b4bedf43f2ff120c1178c3445e39b8735030f3.tar.gz
chroma-23b4bedf43f2ff120c1178c3445e39b8735030f3.tar.bz2
chroma-23b4bedf43f2ff120c1178c3445e39b8735030f3.zip
merge heads
Diffstat (limited to 'gpu.py')
-rw-r--r--gpu.py12
1 files changed, 8 insertions, 4 deletions
diff --git a/gpu.py b/gpu.py
index 0f56ee4..e6b6856 100644
--- a/gpu.py
+++ b/gpu.py
@@ -72,13 +72,15 @@ class GPU(object):
device = cuda.Device(device_id)
self.context = device.make_context()
print 'device %s' % self.context.get_device().name()
- self.module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True)
+ cuda_options = ['-I' + src.dir, '--use_fast_math']
+
+ self.module = SourceModule(src.kernel, options=cuda_options, no_extern_c=True)
self.geo_funcs = CUDAFuncs(self.module, ['set_wavelength_range', 'set_material', 'set_surface', 'set_global_mesh_variables', 'color_solids'])
self.prop_funcs = CUDAFuncs(self.module, ['init_rng', 'propagate'])
self.nthread_per_block = 64
self.max_blocks = 1024
- self.daq_module = SourceModule(src.daq, options=['-I' + src.dir], no_extern_c=True)
+ self.daq_module = SourceModule(src.daq, options=cuda_options, no_extern_c=True)
self.daq_funcs = CUDAFuncs(self.daq_module,
['reset_earliest_time_int', 'run_daq',
'convert_sortable_int_to_float'])
@@ -257,7 +259,8 @@ class GPU(object):
block=(self.nthread_per_block,1,1),
grid=(blocks, 1))
- #self.context.synchronize()
+ if 'profile' in __builtins__:
+ self.context.synchronize()
def get_photons(self):
'''Returns a dictionary of current photon state information.
@@ -309,7 +312,8 @@ class GPU(object):
self.earliest_time_gpu,
block=(self.nthread_per_block,1,1),
grid=(len(self.earliest_time_int_gpu)//self.nthread_per_block+1,1))
- #self.context.synchronize()
+ if 'profile' in __builtins__:
+ self.context.synchronize()
def get_hits(self):