summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthony LaTorre <tlatorre9@gmail.com>2011-08-10 00:21:15 -0400
committerAnthony LaTorre <tlatorre9@gmail.com>2011-08-10 00:21:15 -0400
commit23b4bedf43f2ff120c1178c3445e39b8735030f3 (patch)
tree739b7e72a9e4bbdb4c26bc1a010bd908d7f69ec4
parentea5cc8e4e0e9bd1d1a1e5e7140c0022a8bd2a59d (diff)
parent14eb2b598fc5c2f70f3f4692dc58f0206b07d728 (diff)
downloadchroma-23b4bedf43f2ff120c1178c3445e39b8735030f3.tar.gz
chroma-23b4bedf43f2ff120c1178c3445e39b8735030f3.tar.bz2
chroma-23b4bedf43f2ff120c1178c3445e39b8735030f3.zip
merge heads
-rw-r--r--gpu.py12
-rwxr-xr-xsim.py8
2 files changed, 15 insertions, 5 deletions
diff --git a/gpu.py b/gpu.py
index 0f56ee4..e6b6856 100644
--- a/gpu.py
+++ b/gpu.py
@@ -72,13 +72,15 @@ class GPU(object):
device = cuda.Device(device_id)
self.context = device.make_context()
print 'device %s' % self.context.get_device().name()
- self.module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True)
+ cuda_options = ['-I' + src.dir, '--use_fast_math']
+
+ self.module = SourceModule(src.kernel, options=cuda_options, no_extern_c=True)
self.geo_funcs = CUDAFuncs(self.module, ['set_wavelength_range', 'set_material', 'set_surface', 'set_global_mesh_variables', 'color_solids'])
self.prop_funcs = CUDAFuncs(self.module, ['init_rng', 'propagate'])
self.nthread_per_block = 64
self.max_blocks = 1024
- self.daq_module = SourceModule(src.daq, options=['-I' + src.dir], no_extern_c=True)
+ self.daq_module = SourceModule(src.daq, options=cuda_options, no_extern_c=True)
self.daq_funcs = CUDAFuncs(self.daq_module,
['reset_earliest_time_int', 'run_daq',
'convert_sortable_int_to_float'])
@@ -257,7 +259,8 @@ class GPU(object):
block=(self.nthread_per_block,1,1),
grid=(blocks, 1))
- #self.context.synchronize()
+ if 'profile' in __builtins__:
+ self.context.synchronize()
def get_photons(self):
'''Returns a dictionary of current photon state information.
@@ -309,7 +312,8 @@ class GPU(object):
self.earliest_time_gpu,
block=(self.nthread_per_block,1,1),
grid=(len(self.earliest_time_int_gpu)//self.nthread_per_block+1,1))
- #self.context.synchronize()
+ if 'profile' in __builtins__:
+ self.context.synchronize()
def get_hits(self):
diff --git a/sim.py b/sim.py
index 0d775f4..34477f9 100755
--- a/sim.py
+++ b/sim.py
@@ -75,7 +75,13 @@ def write_event(T, ev, event_id, hits, photon_start=None, photon_stop=None):
root.fill_hits(ev, len(hits['t']), hits['t'], hits['q'], hits['history'])
T.Fill()
-#@profile
+# Allow profile decorator to exist, but do nothing if not running under kernprof
+try:
+ profile = profile
+except NameError:
+ profile = lambda x: x
+
+@profile
def main():
parser = optparse.OptionParser('%prog')
parser.add_option('-b', type='int', dest='nbits', default=10)