diff options
-rwxr-xr-x | benchmark.py | 47 | ||||
-rwxr-xr-x | camera.py | 120 | ||||
-rw-r--r-- | geometry.py | 4 | ||||
-rw-r--r-- | gpu.py | 413 | ||||
-rw-r--r-- | make.py | 4 | ||||
-rwxr-xr-x | sim.py | 15 | ||||
-rw-r--r-- | src/__init__.py | 7 | ||||
-rw-r--r-- | src/alpha.h | 141 | ||||
-rw-r--r-- | src/daq.cu | 298 | ||||
-rw-r--r-- | src/geometry.h | 74 | ||||
-rw-r--r-- | src/hybrid_render.cu | 202 | ||||
-rw-r--r-- | src/intersect.h | 69 | ||||
-rw-r--r-- | src/kernel.cu | 389 | ||||
-rw-r--r-- | src/linalg.h | 157 | ||||
-rw-r--r-- | src/materials.h | 68 | ||||
-rw-r--r-- | src/matrix.h | 312 | ||||
-rw-r--r-- | src/mesh.h | 273 | ||||
-rw-r--r-- | src/photon.h | 483 | ||||
-rw-r--r-- | src/propagate.cu | 100 | ||||
-rw-r--r-- | src/random.h | 85 | ||||
-rw-r--r-- | src/render.cu | 169 | ||||
-rw-r--r-- | src/rotate.h | 21 | ||||
-rw-r--r-- | src/sorting.h | 134 | ||||
-rw-r--r-- | src/tools.cu | 18 | ||||
-rw-r--r-- | src/transform.cu | 38 | ||||
-rw-r--r-- | stl.py | 2 | ||||
-rw-r--r-- | tests/test_pdf.py | 8 | ||||
-rw-r--r-- | tests/test_propagation.py | 12 |
28 files changed, 1927 insertions, 1736 deletions
diff --git a/benchmark.py b/benchmark.py index f839f61..31c145c 100755 --- a/benchmark.py +++ b/benchmark.py @@ -17,21 +17,21 @@ from chroma import optics # Generator processes need to fork BEFORE the GPU context is setup g4generator = generator.photon.G4ParallelGenerator(4, optics.water_wcsim) -def intersect(gpu_instance, number=100, nphotons=500000, nthreads_per_block=64, max_blocks=1024): +def intersect(gpu_geometry, number=100, nphotons=500000, nthreads_per_block=64, + max_blocks=1024): "Returns the average number of ray intersections per second." distances_gpu = ga.empty(nphotons, dtype=np.float32) + module = gpu.get_cu_module('mesh.h', options=('--use_fast_math',)) + gpu_funcs = gpu.GPUFuncs(module) + run_times = [] for i in tools.progress(range(number)): - pos = np.zeros((nphotons,3), dtype=np.float32) - dir = sample.uniform_sphere(nphotons) - - gpu_rays = gpu.GPURays(pos, dir) - - gpu_funcs = gpu.GPUFuncs(gpu_instance.module) + pos = ga.zeros(nphotons, dtype=ga.vec.float3) + dir = ga.to_gpu(gpu.to_float3(sample.uniform_sphere(nphotons))) t0 = time.time() - gpu_funcs.distance_to_mesh(np.int32(gpu_rays.pos.size), gpu_rays.pos, gpu_rays.dir, distances_gpu, block=(nthreads_per_block,1,1), grid=(gpu_rays.pos.size//nthreads_per_block+1,1)) + gpu_funcs.distance_to_mesh(np.int32(pos.size), pos, dir, gpu_geometry.gpudata, distances_gpu, block=(nthreads_per_block,1,1), grid=(pos.size//nthreads_per_block+1,1)) cuda.Context.get_current().synchronize() elapsed = time.time() - t0 @@ -39,7 +39,7 @@ def intersect(gpu_instance, number=100, nphotons=500000, nthreads_per_block=64, # first kernel call incurs some driver overhead run_times.append(elapsed) - return gpu_rays.pos.size/ufloat((np.mean(run_times),np.std(run_times))) + return nphotons/ufloat((np.mean(run_times),np.std(run_times))) def load_photons(number=100, nphotons=500000): """Returns the average number of photons moved to the GPU device memory @@ -64,7 +64,8 @@ def load_photons(number=100, nphotons=500000): return nphotons/ufloat((np.mean(run_times),np.std(run_times))) -def propagate(gpu_instance, number=10, nphotons=500000, nthreads_per_block=64, max_blocks=1024): +def propagate(gpu_geometry, number=10, nphotons=500000, nthreads_per_block=64, + max_blocks=1024): "Returns the average number of photons propagated on the GPU per second." rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks) @@ -79,7 +80,8 @@ def propagate(gpu_instance, number=10, nphotons=500000, nthreads_per_block=64, m gpu_photons = gpu.GPUPhotons(photons) t0 = time.time() - gpu.propagate(gpu_instance, gpu_photons, rng_states, nthreads_per_block, max_blocks) + gpu_photons.propagate(gpu_geometry, rng_states, nthreads_per_block, + max_blocks) cuda.Context.get_current().synchronize() elapsed = time.time() - t0 @@ -90,7 +92,8 @@ def propagate(gpu_instance, number=10, nphotons=500000, nthreads_per_block=64, m return nphotons/ufloat((np.mean(run_times),np.std(run_times))) @tools.profile_if_possible -def pdf(gpu_instance, gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=1, nthreads_per_block=64, max_blocks=1024): +def pdf(gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=1, + nthreads_per_block=64, max_blocks=1024): """ Returns the average number of 100 MeV events per second that can be histogrammed per second. @@ -118,14 +121,17 @@ def pdf(gpu_instance, gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=1, t0 = time.time() gpu_pdf.clear_pdf() - vertex_gen = generator.vertex.constant_particle_gun('e-', (0,0,0), (1,0,0), 100) + vertex_gen = generator.vertex.constant_particle_gun('e-', (0,0,0), + (1,0,0), 100) vertex_iter = itertools.islice(vertex_gen, nevents) for ev in g4generator.generate_events(vertex_iter): for j in xrange(nreps): gpu_photons = gpu.GPUPhotons(ev.photons_beg) - gpu.propagate(gpu_instance, gpu_photons, rng_states, nthreads_per_block, max_blocks) - gpu_channels = gpu_daq.acquire(gpu_photons, rng_states, nthreads_per_block, max_blocks) + gpu_photons.propagate(gpu_geometry, rng_states, + nthreads_per_block, max_blocks) + gpu_channels = gpu_daq.acquire(gpu_photons, rng_states, + nthreads_per_block, max_blocks) gpu_pdf.add_hits_to_pdf(gpu_channels, nthreads_per_block) hitcount, pdf = gpu_pdf.get_pdfs() @@ -146,16 +152,19 @@ if __name__ == '__main__': lbne.build(bits=11) gpu_instance = gpu.GPU() - gpu_geometry = gpu.GPUGeometry(gpu_instance, lbne) + gpu_geometry = gpu.GPUGeometry(lbne) gpu_instance.print_mem_info() - print '%s ray intersections/sec.' % tools.ufloat_to_str(intersect(gpu_instance)) + print '%s ray intersections/sec.' % \ + tools.ufloat_to_str(intersect(gpu_geometry)) gc.collect() gpu_instance.print_mem_info() print '%s photons loaded/sec.' % tools.ufloat_to_str(load_photons()) gc.collect() gpu_instance.print_mem_info() - print '%s photons propagated/sec.' % tools.ufloat_to_str(propagate(gpu_instance)) + print '%s photons propagated/sec.' % \ + tools.ufloat_to_str(propagate(gpu_geometry)) gc.collect() gpu_instance.print_mem_info() - print '%s 100 MeV events histogrammed/s' % tools.ufloat_to_str(pdf(gpu_instance, gpu_geometry, max(lbne.pmtids))) + print '%s 100 MeV events histogrammed/s' % \ + tools.ufloat_to_str(pdf(gpu_geometry, max(lbne.pmtids))) @@ -102,17 +102,18 @@ def encode_movie(dir): print 'movie saved to %s.' % filename class Camera(Thread): - def __init__(self, geometry, size=(800,600), device_id=None, enable3d=False, green_magenta=False): + def __init__(self, geometry, size=(800,600), device_id=None, enable3d=False, green_magenta=False, alpha_depth=10): Thread.__init__(self) self.geometry = geometry self.device_id = device_id self.size = size self.enable3d = enable3d self.green_magenta = green_magenta + self.alpha_depth = alpha_depth self.unique_bvh_layers = np.unique(self.geometry.layers) self.currentlayer = None - self.bvh_layers = {self.currentlayer : self.geometry} + self.bvh_layers = {} try: import spnav as spnav_module @@ -123,15 +124,19 @@ class Camera(Thread): def init_gpu(self): self.gpu_instance = gpu.GPU(self.device_id) - self.gpu_geometry = gpu.GPUGeometry(self.gpu_instance, self.geometry) - self.gpu_funcs = gpu.GPUFuncs(self.gpu_instance.module) + self.gpu_geometry = gpu.GPUGeometry(self.geometry) + self.gpu_funcs = gpu.GPUFuncs(gpu.get_cu_module('mesh.h')) + self.hybrid_funcs = gpu.GPUFuncs(gpu.get_cu_module('hybrid_render.cu')) + + self.gpu_geometries = [self.gpu_geometry] self.width, self.height = self.size self.npixels = self.width*self.height pygame.init() - self.screen = pygame.display.set_mode(self.size) + self.window = pygame.display.set_mode(self.size) + self.screen = pygame.Surface(self.size, pygame.SRCALPHA) pygame.display.set_caption('') self.clock = pygame.time.Clock() @@ -182,9 +187,12 @@ class Camera(Thread): self.rays = gpu.GPURays(pos, dir) + self.distance_array = ga.empty(self.alpha_depth*self.rays.pos.size, dtype=np.float32) + self.index_array = ga.empty(self.alpha_depth*self.rays.pos.size, dtype=np.uint32) + self.n_array = ga.zeros(self.rays.pos.size, dtype=np.uint32) + self.pixels_gpu = ga.empty(self.npixels, dtype=np.int32) - self.alpha = True self.movie = False self.movie_index = 0 self.movie_dir = None @@ -218,7 +226,7 @@ class Camera(Thread): for wavelength, rgb_tuple in \ zip([685.0, 545.0, 445.0],[(1,0,0),(0,1,0),(0,0,1)]): for i in range(self.xyz_lookup1_gpu.size//(self.npixels)+1): - self.gpu_funcs.update_xyz_lookup(np.int32(self.npixels), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.npixels), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(wavelength), ga.vec.make_float3(*rgb_tuple), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.npixels//self.nblocks+1,1)) + self.hybrid_funcs.update_xyz_lookup(np.int32(self.npixels), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.npixels), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(wavelength), ga.vec.make_float3(*rgb_tuple), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), self.gpu_geometry.gpudata, block=(self.nblocks,1,1), grid=(self.npixels//self.nblocks+1,1)) self.nlookup_calls += 1 @@ -234,7 +242,7 @@ class Camera(Thread): def update_image_from_rays(self, image, rays): for wavelength, rgb_tuple in \ zip([685.0, 545.0, 445.0],[(1,0,0),(0,1,0),(0,0,1)]): - self.gpu_funcs.update_xyz_image(np.int32(rays.pos.size), self.rng_states_gpu, rays.pos, rays.dir, np.float32(wavelength), ga.vec.make_float3(*rgb_tuple), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, image, np.int32(self.nlookup_calls), np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(rays.pos.size//self.nblocks+1,1)) + self.hybrid_funcs.update_xyz_image(np.int32(rays.pos.size), self.rng_states_gpu, rays.pos, rays.dir, np.float32(wavelength), ga.vec.make_float3(*rgb_tuple), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, image, np.int32(self.nlookup_calls), np.int32(self.max_steps), self.gpu_geometry.gpudata, block=(self.nblocks,1,1), grid=(rays.pos.size//self.nblocks+1,1)) def update_image(self): if self.enable3d: @@ -247,10 +255,10 @@ class Camera(Thread): def process_image(self): if self.enable3d: - self.gpu_funcs.process_image(np.int32(self.pixels1_gpu.size), self.image1_gpu, self.pixels1_gpu, np.int32(self.nimages), block=(self.nblocks,1,1), grid=((self.pixels1_gpu.size)//self.nblocks+1,1)) - self.gpu_funcs.process_image(np.int32(self.pixels2_gpu.size), self.image2_gpu, self.pixels2_gpu, np.int32(self.nimages), block=(self.nblocks,1,1), grid=((self.pixels2_gpu.size)//self.nblocks+1,1)) + self.hybrid_funcs.process_image(np.int32(self.pixels1_gpu.size), self.image1_gpu, self.pixels1_gpu, np.int32(self.nimages), block=(self.nblocks,1,1), grid=((self.pixels1_gpu.size)//self.nblocks+1,1)) + self.hybrid_funcs.process_image(np.int32(self.pixels2_gpu.size), self.image2_gpu, self.pixels2_gpu, np.int32(self.nimages), block=(self.nblocks,1,1), grid=((self.pixels2_gpu.size)//self.nblocks+1,1)) else: - self.gpu_funcs.process_image(np.int32(self.pixels_gpu.size), self.image_gpu, self.pixels_gpu, np.int32(self.nimages), block=(self.nblocks,1,1), grid=((self.pixels_gpu.size)//self.nblocks+1,1)) + self.hybrid_funcs.process_image(np.int32(self.pixels_gpu.size), self.image_gpu, self.pixels_gpu, np.int32(self.nimages), block=(self.nblocks,1,1), grid=((self.pixels_gpu.size)//self.nblocks+1,1)) def screenshot(self, dir='', start=0): root, ext = 'screenshot', 'png' @@ -320,29 +328,28 @@ class Camera(Thread): self.update() - def update_pixels(self): + def update_pixels(self, gpu_geometry=None, keep_last_render=False): + if gpu_geometry is None: + gpu_geometry = self.gpu_geometry + if self.render: while self.nlookup_calls < 10: self.update_xyz_lookup(self.source_position) self.update_image() self.process_image() else: - if self.alpha: - if self.enable3d: - self.gpu_funcs.ray_trace_alpha(np.int32(self.rays1.pos.size), self.rays1.pos, self.rays1.dir, self.pixels1_gpu, block=(self.nblocks,1,1), grid=(self.rays2.pos.size//self.nblocks+1,1)) - self.gpu_funcs.ray_trace_alpha(np.int32(self.rays2.pos.size), self.rays2.pos, self.rays2.dir, self.pixels2_gpu, block=(self.nblocks,1,1), grid=(self.rays2.pos.size//self.nblocks+1,1)) - else: - self.gpu_funcs.ray_trace_alpha(np.int32(self.rays.pos.size), self.rays.pos, self.rays.dir, self.pixels_gpu, block=(self.nblocks,1,1), grid=(self.rays.pos.size//self.nblocks+1,1)) + if self.enable3d: + self.rays1.render(gpu_geometry, self.pixels1_gpu, + self.alpha_depth, keep_last_render) + self.rays2.render(gpu_geometry, self.pixels2_gpu, + self.alpha_depth, keep_last_render) else: - if self.enable3d: - self.gpu_funcs.ray_trace(np.int32(self.rays1.pos.size), self.rays1.pos, self.rays1.dir, self.pixels1_gpu, block=(self.nblocks,1,1), grid=(self.rays1.pos.size//self.nblocks+1,1)) - self.gpu_funcs.ray_trace(np.int32(self.rays2.pos.size), self.rays2.pos, self.rays2.dir, self.pixels2_gpu, block=(self.nblocks,1,1), grid=(self.rays2.pos.size//self.nblocks+1,1)) - else: - self.gpu_funcs.ray_trace(np.int32(self.rays.pos.size), self.rays.pos, self.rays.dir, self.pixels_gpu, block=(self.nblocks,1,1), grid=(self.rays.pos.size//self.nblocks+1,1)) + self.rays.render(gpu_geometry, self.pixels_gpu, + self.alpha_depth, keep_last_render) - def update(self): + def update_viewing_angle(self): if self.enable3d: - self.gpu_funcs.distance_to_mesh(np.int32(self.scope_rays.pos.size), self.scope_rays.pos, self.scope_rays.dir, self.distances_gpu, block=(self.nblocks,1,1), grid=(self.scope_rays.pos.size//self.nblocks,1)) + self.gpu_funcs.distance_to_mesh(np.int32(self.scope_rays.pos.size), self.scope_rays.pos, self.scope_rays.dir, self.gpu_geometry.gpudata, self.distances_gpu, block=(self.nblocks,1,1), grid=(self.scope_rays.pos.size//self.nblocks,1)) baseline = ga.min(self.distances_gpu).get().item() @@ -377,7 +384,16 @@ class Camera(Thread): self.viewing_angle = new_viewing_angle - self.update_pixels() + def update(self): + if self.enable3d: + self.update_viewing_angle() + + n = len(self.gpu_geometries) + for i, gpu_geometry in enumerate(self.gpu_geometries): + if i == 0: + self.update_pixels(gpu_geometry) + else: + self.update_pixels(gpu_geometry, keep_last_render=True) if self.enable3d: pixels1 = self.pixels1_gpu.get() @@ -387,12 +403,18 @@ class Camera(Thread): pixels = (pixels1 & 0x00ff00) | (pixels2 & 0xff00ff) else: pixels = (pixels1 & 0xff0000) | (pixels2 & 0x00ffff) + + alpha = ((0xff & (pixels1 >> 24)) + (0xff & (pixels2 >> 24)))/2 + + pixels |= (alpha << 24) else: pixels = self.pixels_gpu.get() pygame.surfarray.blit_array(self.screen, pixels.reshape(self.size)) if self.doom_mode: self.screen.blit(self.doom_hud, self.doom_rect) + self.window.fill(0) + self.window.blit(self.screen, (0,0)) pygame.display.flip() if self.movie: @@ -400,12 +422,17 @@ class Camera(Thread): self.movie_index += 1 def loadlayer(self, layer): - try: - layergeometry = self.bvh_layers[layer] - layergeometry.activate() - except KeyError: - layergeometry = build(bvh_mesh(self.geometry, layer),8) - self.bvh_layers[layer] = gpu.GPUGeometry(self.gpu, layergeometry) + if layer is None: + self.gpu_geometries = [self.gpu_geometry] + else: + try: + gpu_geometry = self.bvh_layers[layer] + except KeyError: + geometry = build(bvh_mesh(self.geometry, layer), 8) + gpu_geometry = gpu.GPUGeometry(geometry) + self.bvh_layers[layer] = gpu_geometry + + self.gpu_geometries = [self.gpu_geometry, gpu_geometry] self.update() @@ -484,6 +511,15 @@ class Camera(Thread): self.done = True return + elif event.key == K_EQUALS: + self.alpha_depth += 1 + self.update() + + elif event.key == K_MINUS: + if self.alpha_depth > 0: + self.alpha_depth -= 1 + self.update() + elif event.key == K_PAGEDOWN: if self.currentlayer is None: self.currentlayer = self.unique_bvh_layers[-1] @@ -517,10 +553,6 @@ class Camera(Thread): self.clear_image() self.update() - elif event.key == K_F8: - self.alpha = not self.alpha - self.update() - elif event.key == K_m: if self.movie: encode_movie(self.movie_dir) @@ -630,6 +662,20 @@ class EventViewer(Camera): Camera.__init__(self, geometry, **kwargs) self.rr = RootReader(filename) + def render_particle_track(self): + marker = Solid(make.cube(0.1), vacuum, vacuum) + + geometry = Geometry() + for pos in self.ev.photons_beg.pos[::100]: + geometry.add_solid(marker, displacement=pos) + + geometry.build(bits=11) + gpu_geometry = gpu.GPUGeometry(geometry) + + self.gpu_geometries = [self.gpu_geometry, gpu_geometry] + + self.update() + def color_hit_pmts(self): self.gpu_geometry.reset_colors() @@ -651,6 +697,7 @@ class EventViewer(Camera): pass else: self.color_hit_pmts() + self.render_particle_track() return elif event.key == K_PAGEDOWN: @@ -660,6 +707,7 @@ class EventViewer(Camera): pass else: self.color_hit_pmts() + self.render_particle_track() return Camera.process_event(self, event) diff --git a/geometry.py b/geometry.py index 1b1f004..5b89110 100644 --- a/geometry.py +++ b/geometry.py @@ -383,6 +383,8 @@ class Geometry(object): if unique_zvalues.size == 1: break + self.start_node = self.node_map.size - 1 + if use_cache: print >>sys.stderr, 'Writing BVH to cache directory...' @@ -391,7 +393,7 @@ class Geometry(object): with gzip.GzipFile(cache_path, 'wb', compresslevel=1) as f: data = {} - for key in ['lower_bounds', 'upper_bounds', 'node_map', 'node_map_end', 'layers', 'first_node']: + for key in ['lower_bounds', 'upper_bounds', 'node_map', 'node_map_end', 'layers', 'first_node', 'start_node']: data[key] = getattr(self, key) data['reorder'] = reorder pickle.dump(data, f, -1) @@ -8,7 +8,7 @@ import sys import pytools import pycuda.tools from pycuda.compiler import SourceModule -import pycuda.characterize +from pycuda import characterize import pycuda.driver as cuda from pycuda import gpuarray as ga @@ -20,24 +20,37 @@ from chroma import event cuda.init() -#@pycuda.tools.context_dependent_memoize +# standard nvcc options +cuda_options = ('--use_fast_math',)#, '--ptxas-options=-v'] + +@pycuda.tools.context_dependent_memoize def get_cu_module(name, options=None, include_source_directory=True): """Returns a pycuda.compiler.SourceModule object from a CUDA source file located in the chroma src directory at src/[name].cu.""" if options is None: options = [] + elif isinstance(options, tuple): + options = list(options) + else: + raise TypeError('`options` must be a tuple.') srcdir = dirname(chroma.src.__file__) if include_source_directory: options += ['-I' + srcdir] - with open('%s/%s.cu' % (srcdir, name)) as f: + with open('%s/%s' % (srcdir, name)) as f: source = f.read() return pycuda.compiler.SourceModule(source, options=options, no_extern_c=True) +def get_cu_source(name): + srcdir = dirname(chroma.src.__file__) + with open('%s/%s' % (srcdir, name)) as f: + source = f.read() + return source + class GPUFuncs(object): """Simple container class for GPU functions as attributes.""" def __init__(self, module): @@ -73,7 +86,7 @@ __global__ void init_rng(int nthreads, curandState *s, unsigned long long seed, def get_rng_states(size, seed=1): "Return `size` number of CUDA random number generator states." - rng_states = cuda.mem_alloc(size*pycuda.characterize.sizeof('curandStateXORWOW', '#include <curand_kernel.h>')) + rng_states = cuda.mem_alloc(size*characterize.sizeof('curandStateXORWOW', '#include <curand_kernel.h>')) module = pycuda.compiler.SourceModule(init_rng_src, no_extern_c=True) init_rng = module.get_function('init_rng') @@ -88,6 +101,12 @@ def to_float3(arr): arr = np.asarray(arr, order='c') return arr.astype(np.float32).view(ga.vec.float3)[:,0] +def to_uint3(arr): + "Returns a pycuda.gpuarray.vec.uint3 array from an (N,3) array." + if not arr.flags['C_CONTIGUOUS']: + arr = np.asarray(arr, order='c') + return arr.astype(np.uint32).view(ga.vec.uint3)[:,0] + def chunk_iterator(nelements, nthreads_per_block=64, max_blocks=1024): """Iterator that yields tuples with the values requried to process a long array in multiple kernel passes on the GPU. @@ -121,6 +140,11 @@ class GPUPhotons(object): self.last_hit_triangles = ga.to_gpu(photons.last_hit_triangles.astype(np.int32)) self.flags = ga.to_gpu(photons.flags.astype(np.uint32)) + #cuda_options = ('--use_fast_math', '-w')#, '--ptxas-options=-v'] + + module = get_cu_module('propagate.cu', options=cuda_options) + self.gpu_funcs = GPUFuncs(module) + def get(self): pos = self.pos.get().view(np.float32).reshape((len(self.pos),3)) dir = self.dir.get().view(np.float32).reshape((len(self.dir),3)) @@ -131,6 +155,50 @@ class GPUPhotons(object): flags = self.flags.get() return event.Photons(pos, dir, pol, wavelengths, t, last_hit_triangles, flags) + def propagate(self, gpu_geometry, rng_states, nthreads_per_block=64, + max_blocks=1024, max_steps=10): + """Propagate photons on GPU to termination or max_steps, whichever + comes first. + + May be called repeatedly without reloading photon information if + single-stepping through photon history. + + ..warning:: + `rng_states` must have at least `nthreads_per_block`*`max_blocks` + number of curandStates. + """ + nphotons = self.pos.size + step = 0 + input_queue = np.zeros(shape=nphotons+1, dtype=np.uint32) + input_queue[1:] = np.arange(nphotons, dtype=np.uint32) + input_queue_gpu = ga.to_gpu(input_queue) + output_queue = np.zeros(shape=nphotons+1, dtype=np.uint32) + output_queue[0] = 1 + output_queue_gpu = ga.to_gpu(output_queue) + + while step < max_steps: + # Just finish the rest of the steps if the # of photons is low + if nphotons < nthreads_per_block * 16 * 8: + nsteps = max_steps - step + else: + nsteps = 1 + + for first_photon, photons_this_round, blocks in \ + chunk_iterator(nphotons, nthreads_per_block, max_blocks): + self.gpu_funcs.propagate(np.int32(first_photon), np.int32(photons_this_round), input_queue_gpu[1:], output_queue_gpu, rng_states, self.pos, self.dir, self.wavelengths, self.pol, self.t, self.flags, self.last_hit_triangles, np.int32(nsteps), gpu_geometry.gpudata, block=(nthreads_per_block,1,1), grid=(blocks, 1)) + + step += nsteps + + if step < max_steps: + temp = input_queue_gpu + input_queue_gpu = output_queue_gpu + output_queue_gpu = temp + output_queue_gpu[:1].set(np.uint32(1)) + nphotons = input_queue_gpu[:1].get()[0] - 1 + + if ga.max(self.flags).get() & (1 << 31): + print >>sys.stderr, "WARNING: ABORTED PHOTONS" + class GPUChannels(object): def __init__(self, t, q, flags): self.t = t @@ -145,71 +213,89 @@ class GPUChannels(object): # enough hit time were hit. return event.Channels(t<1e8, t, q, self.flags.get()) -def propagate(gpu, gpuphotons, rng_states, nthreads_per_block=64, max_blocks=1024, max_steps=10): - """Propagate photons on GPU to termination or max_steps, whichever - comes first. - - May be called repeatedly without reloading photon information if - single-stepping through photon history. - - ..warning:: - `rng_states` must have at least `nthreads_per_block`*`max_blocks` - number of curandStates. - """ - nphotons = gpuphotons.pos.size - step = 0 - input_queue = np.zeros(shape=nphotons+1, dtype=np.uint32) - input_queue[1:] = np.arange(nphotons, dtype=np.uint32) - input_queue_gpu = ga.to_gpu(input_queue) - output_queue = np.zeros(shape=nphotons+1, dtype=np.uint32) - output_queue[0] = 1 - output_queue_gpu = ga.to_gpu(output_queue) - - propagate = gpu.module.get_function('propagate') - - while step < max_steps: - # Just finish the rest of the steps if the # of photons is low - if nphotons < nthreads_per_block * 16 * 8: - nsteps = max_steps - step - else: - nsteps = 1 - - for first_photon, photons_this_round, blocks in \ - chunk_iterator(nphotons, nthreads_per_block, max_blocks): - propagate(np.int32(first_photon), np.int32(photons_this_round), input_queue_gpu[1:], output_queue_gpu, rng_states, gpuphotons.pos, gpuphotons.dir, gpuphotons.wavelengths, gpuphotons.pol, gpuphotons.t, gpuphotons.flags, gpuphotons.last_hit_triangles, np.int32(nsteps), block=(nthreads_per_block,1,1), grid=(blocks, 1)) - - step += nsteps - - if step < max_steps: - temp = input_queue_gpu - input_queue_gpu = output_queue_gpu - output_queue_gpu = temp - output_queue_gpu[:1].set(np.uint32(1)) - nphotons = input_queue_gpu[:1].get()[0] - 1 - - if ga.max(gpuphotons.flags).get() & (1 << 31): - print >>sys.stderr, "WARNING: ABORTED PHOTONS" - class GPURays(object): - def __init__(self, pos, dir, nblocks=64): + """The GPURays class holds arrays of ray positions and directions + on the GPU that are used to render a geometry.""" + def __init__(self, pos, dir, max_alpha_depth=10, nblocks=64): self.pos = ga.to_gpu(to_float3(pos)) self.dir = ga.to_gpu(to_float3(dir)) + self.max_alpha_depth = max_alpha_depth + self.nblocks = nblocks - self.module = get_cu_module('transform') - self.gpu_funcs = GPUFuncs(self.module) + transform_module = get_cu_module('transform.cu', options=cuda_options) + self.transform_funcs = GPUFuncs(transform_module) + + render_module = get_cu_module('render.cu', options=cuda_options) + self.render_funcs = GPUFuncs(render_module) + + self.dx = ga.empty(max_alpha_depth*self.pos.size, dtype=np.float32) + self.color = ga.empty(self.dx.size, dtype=ga.vec.float4) + self.dxlen = ga.zeros(self.pos.size, dtype=np.uint32) def rotate(self, phi, n): - self.gpu_funcs.rotate(np.int32(self.pos.size), self.pos, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.pos.size//self.nblocks+1,1)) - self.gpu_funcs.rotate(np.int32(self.dir.size), self.dir, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.dir.size//self.nblocks+1,1)) + "Rotate by an angle phi around the axis `n`." + self.transform_funcs.rotate(np.int32(self.pos.size), self.pos, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.pos.size//self.nblocks+1,1)) + self.transform_funcs.rotate(np.int32(self.dir.size), self.dir, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.dir.size//self.nblocks+1,1)) def rotate_around_point(self, phi, n, point): - self.gpu_funcs.rotate_around_point(np.int32(self.pos.size), self.pos, np.float32(phi), ga.vec.make_float3(*n), ga.vec.make_float3(*point), block=(self.nblocks,1,1), grid=(self.pos.size//self.nblocks+1,1)) - self.gpu_funcs.rotate(np.int32(self.dir.size), self.dir, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.dir.size//self.nblocks+1,1)) + """"Rotate by an angle phi around the axis `n` passing through + the point `point`.""" + self.transform_funcs.rotate_around_point(np.int32(self.pos.size), self.pos, np.float32(phi), ga.vec.make_float3(*n), ga.vec.make_float3(*point), block=(self.nblocks,1,1), grid=(self.pos.size//self.nblocks+1,1)) + self.transform_funcs.rotate(np.int32(self.dir.size), self.dir, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.dir.size//self.nblocks+1,1)) def translate(self, v): - self.gpu_funcs.translate(np.int32(self.pos.size), self.pos, ga.vec.make_float3(*v), block=(self.nblocks,1,1), grid=(self.pos.size//self.nblocks+1,1)) + "Translate the ray positions by the vector `v`." + self.transform_funcs.translate(np.int32(self.pos.size), self.pos, ga.vec.make_float3(*v), block=(self.nblocks,1,1), grid=(self.pos.size//self.nblocks+1,1)) + + def render(self, gpu_geometry, pixels, alpha_depth=10, + keep_last_render=False): + """Render `gpu_geometry` and fill the GPU array `pixels` with pixel + colors.""" + if not keep_last_render: + self.dxlen.fill(0) + + if alpha_depth > self.max_alpha_depth: + raise Exception('alpha_depth > max_alpha_depth') + + if not isinstance(pixels, ga.GPUArray): + raise TypeError('`pixels` must be a %s instance.' % ga.GPUArray) + + if pixels.size != self.pos.size: + raise ValueError('`pixels`.size != number of rays') + + self.render_funcs.render(np.int32(self.pos.size), self.pos, self.dir, gpu_geometry.gpudata, np.uint32(alpha_depth), pixels, self.dx, self.dxlen, self.color, block=(self.nblocks,1,1), grid=(self.pos.size//self.nblocks+1,1)) + + def snapshot(self, gpu_geometry, alpha_depth=10): + "Render `gpu_geometry` and return a numpy array of pixel colors." + pixels = ga.empty(self.pos.size, dtype=np.uint32) + self.render(gpu_geometry, pixels, alpha_depth) + return pixels.get() + +def make_gpu_struct(size, members): + struct = cuda.mem_alloc(size) + + i = 0 + for member in members: + if isinstance(member, ga.GPUArray): + member = member.gpudata + + if isinstance(member, cuda.DeviceAllocation): + if i % 8: + raise Exception('cannot align 64-bit pointer. ' + 'arrange struct member variables in order of ' + 'decreasing size.') + + cuda.memcpy_htod(int(struct)+i, np.intp(int(member))) + i += 8 + elif np.isscalar(member): + cuda.memcpy_htod(int(struct)+i, member) + i += member.nbytes + else: + raise TypeError('expected a GPU device pointer or scalar type.') + + return struct def format_size(size): if size < 1e3: @@ -226,96 +312,142 @@ def format_array(name, array): (name, format_size(len(array)), format_size(array.nbytes)) class GPUGeometry(object): - def __init__(self, gpu, geometry, load=True, activate=True, print_usage=True): - self.geometry = geometry + def __init__(self, geometry, wavelengths=None, print_usage=True): + if wavelengths is None: + wavelengths = standard_wavelengths + + try: + wavelength_step = np.unique(np.diff(wavelengths)).item() + except ValueError: + raise ValueError('wavelengths must be equally spaced apart.') - self.module = gpu.module - self.gpu_funcs = GPUFuncs(gpu.module) + geometry_source = get_cu_source('geometry.h') + material_struct_size = characterize.sizeof('Material', geometry_source) + surface_struct_size = characterize.sizeof('Surface', geometry_source) + geometry_struct_size = characterize.sizeof('Geometry', geometry_source) - if load: - self.load(activate, print_usage) + self.material_data = [] + self.material_ptrs = [] - def load(self, activate=True, print_usage=True): - self.gpu_funcs.set_wavelength_range(np.float32(standard_wavelengths[0]), np.float32(standard_wavelengths[-1]), np.float32(standard_wavelengths[1]-standard_wavelengths[0]), np.uint32(standard_wavelengths.size), block=(1,1,1), grid=(1,1)) + def interp_material_property(wavelengths, property): + # note that it is essential that the material properties be + # interpolated linearly. this fact is used in the propagation + # code to guarantee that probabilities still sum to one. + return np.interp(wavelengths, property[:,0], property[:,1]).astype(np.float32) - self.materials = [] - for i in range(len(self.geometry.unique_materials)): - material = copy(self.geometry.unique_materials[i]) + for i in range(len(geometry.unique_materials)): + material = geometry.unique_materials[i] if material is None: raise Exception('one or more triangles is missing a material.') - material.refractive_index_gpu = ga.to_gpu(np.interp(standard_wavelengths, material.refractive_index[:,0], material.refractive_index[:,1]).astype(np.float32)) - material.absorption_length_gpu = ga.to_gpu(np.interp(standard_wavelengths, material.absorption_length[:,0], material.absorption_length[:,1]).astype(np.float32)) - material.scattering_length_gpu = ga.to_gpu(np.interp(standard_wavelengths, material.scattering_length[:,0], material.scattering_length[:,1]).astype(np.float32)) + refractive_index = interp_material_property(wavelengths, material.refractive_index) + refractive_index_gpu = ga.to_gpu(refractive_index) + absorption_length = interp_material_property(wavelengths, material.absorption_length) + absorption_length_gpu = ga.to_gpu(absorption_length) + scattering_length = interp_material_property(wavelengths, material.scattering_length) + scattering_length_gpu = ga.to_gpu(scattering_length) - self.gpu_funcs.set_material(np.int32(i), material.refractive_index_gpu, material.absorption_length_gpu, material.scattering_length_gpu, block=(1,1,1), grid=(1,1)) + self.material_data.append(refractive_index_gpu) + self.material_data.append(absorption_length_gpu) + self.material_data.append(scattering_length_gpu) - self.materials.append(material) + material_gpu = \ + make_gpu_struct(material_struct_size, + [refractive_index_gpu, absorption_length_gpu, + scattering_length_gpu, + np.uint32(len(wavelengths)), + np.float32(wavelength_step), + np.float32(wavelengths[0])]) - self.surfaces = [] - for i in range(len(self.geometry.unique_surfaces)): - surface = copy(self.geometry.unique_surfaces[i]) - - if surface is None: - continue + self.material_ptrs.append(material_gpu) - surface.detect_gpu = ga.to_gpu(np.interp(standard_wavelengths, surface.detect[:,0], surface.detect[:,1]).astype(np.float32)) - surface.absorb_gpu = ga.to_gpu(np.interp(standard_wavelengths, surface.absorb[:,0], surface.absorb[:,1]).astype(np.float32)) - surface.reflect_diffuse_gpu = ga.to_gpu(np.interp(standard_wavelengths, surface.reflect_diffuse[:,0], surface.reflect_diffuse[:,1]).astype(np.float32)) - surface.reflect_specular_gpu = ga.to_gpu(np.interp(standard_wavelengths, surface.reflect_specular[:,0], surface.reflect_specular[:,1]).astype(np.float32)) + self.material_pointer_array = \ + make_gpu_struct(8*len(self.material_ptrs), self.material_ptrs) - self.gpu_funcs.set_surface(np.int32(i), surface.detect_gpu, surface.absorb_gpu, surface.reflect_diffuse_gpu, surface.reflect_specular_gpu, block=(1,1,1), grid=(1,1)) + self.surface_data = [] + self.surface_ptrs = [] - self.surfaces.append(surface) + for i in range(len(geometry.unique_surfaces)): + surface = geometry.unique_surfaces[i] - self.vertices_gpu = ga.to_gpu(to_float3(self.geometry.mesh.vertices)) - - triangles = \ - np.empty(len(self.geometry.mesh.triangles), dtype=ga.vec.uint4) - triangles['x'] = self.geometry.mesh.triangles[:,0] - triangles['y'] = self.geometry.mesh.triangles[:,1] - triangles['z'] = self.geometry.mesh.triangles[:,2] - triangles['w'] = ((self.geometry.material1_index & 0xff) << 24) | ((self.geometry.material2_index & 0xff) << 16) | ((self.geometry.surface_index & 0xff) << 8) - self.triangles_gpu = ga.to_gpu(triangles) - - self.lower_bounds_gpu = ga.to_gpu(to_float3(self.geometry.lower_bounds)) - - self.upper_bounds_gpu = ga.to_gpu(to_float3(self.geometry.upper_bounds)) + if surface is None: + # need something to copy to the surface array struct + # that is the same size as a 64-bit pointer. + # this pointer will never be used by the simulation. + self.surface_ptrs.append(np.uint64(0)) + continue - self.colors_gpu = ga.to_gpu(self.geometry.colors.astype(np.uint32)) - self.node_map_gpu = ga.to_gpu(self.geometry.node_map.astype(np.uint32)) - self.node_map_end_gpu = ga.to_gpu(self.geometry.node_map_end.astype(np.uint32)) - self.solid_id_map_gpu = ga.to_gpu(self.geometry.solid_id.astype(np.uint32)) + detect = interp_material_property(wavelengths, surface.detect) + detect_gpu = ga.to_gpu(detect) + absorb = interp_material_property(wavelengths, surface.absorb) + absorb_gpu = ga.to_gpu(absorb) + reflect_diffuse = interp_material_property(wavelengths, surface.reflect_diffuse) + reflect_diffuse_gpu = ga.to_gpu(reflect_diffuse) + reflect_specular = interp_material_property(wavelengths, surface.reflect_specular) + reflect_specular_gpu = ga.to_gpu(reflect_specular) + + self.surface_data.append(detect_gpu) + self.surface_data.append(absorb_gpu) + self.surface_data.append(reflect_diffuse_gpu) + self.surface_data.append(reflect_specular_gpu) + + surface_gpu = \ + make_gpu_struct(surface_struct_size, + [detect_gpu, absorb_gpu, + reflect_diffuse_gpu, + reflect_specular_gpu, + np.uint32(len(wavelengths)), + np.float32(wavelength_step), + np.float32(wavelengths[0])]) + + self.surface_ptrs.append(surface_gpu) + + self.surface_pointer_array = \ + make_gpu_struct(8*len(self.surface_ptrs), self.surface_ptrs) + + self.vertices = ga.to_gpu(to_float3(geometry.mesh.vertices)) + self.triangles = ga.to_gpu(to_uint3(geometry.mesh.triangles)) + + material_codes = (((geometry.material1_index & 0xff) << 24) | + ((geometry.material2_index & 0xff) << 16) | + ((geometry.surface_index & 0xff) << 8)).astype(np.uint32) + + self.material_codes = ga.to_gpu(material_codes) + + self.lower_bounds = ga.to_gpu(to_float3(geometry.lower_bounds)) + self.upper_bounds = ga.to_gpu(to_float3(geometry.upper_bounds)) + self.colors = ga.to_gpu(geometry.colors.astype(np.uint32)) + self.node_map = ga.to_gpu(geometry.node_map.astype(np.uint32)) + self.node_map_end = ga.to_gpu(geometry.node_map_end.astype(np.uint32)) + self.solid_id_map = ga.to_gpu(geometry.solid_id.astype(np.uint32)) + + self.gpudata = make_gpu_struct(geometry_struct_size, + [self.vertices, self.triangles, + self.material_codes, + self.colors, self.lower_bounds, + self.upper_bounds, self.node_map, + self.node_map_end, + self.material_pointer_array, + self.surface_pointer_array, + np.uint32(geometry.start_node), + np.uint32(geometry.first_node)]) - self.node_map_tex = self.module.get_texref('node_map') - self.node_map_end_tex = self.module.get_texref('node_map_end') + self.geometry = geometry if print_usage: self.print_device_usage() - if activate: - self.activate() - - def activate(self): - self.gpu_funcs.set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(self.geometry.node_map.size-1), np.uint32(self.geometry.first_node), self.lower_bounds_gpu, self.upper_bounds_gpu, block=(1,1,1), grid=(1,1)) - - self.node_map_tex.set_address(self.node_map_gpu.gpudata, self.node_map_gpu.nbytes) - self.node_map_end_tex.set_address(self.node_map_end_gpu.gpudata, self.node_map_end_gpu.nbytes) - - self.node_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) - self.node_map_end_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) - def print_device_usage(self): print 'device usage:' print '-'*10 - print format_array('vertices', self.vertices_gpu) - print format_array('triangles', self.triangles_gpu) - print format_array('lower_bounds', self.lower_bounds_gpu) - print format_array('upper_bounds', self.upper_bounds_gpu) - print format_array('node_map', self.node_map_gpu) - print format_array('node_map_end', self.node_map_end_gpu) - print '%-15s %6s %6s' % ('total', '', format_size(self.vertices_gpu.nbytes + self.triangles_gpu.nbytes + self.lower_bounds_gpu.nbytes + self.upper_bounds_gpu.nbytes + self.node_map_gpu.nbytes + self.node_map_end_gpu.nbytes)) + print format_array('vertices', self.vertices) + print format_array('triangles', self.triangles) + print format_array('lower_bounds', self.lower_bounds) + print format_array('upper_bounds', self.upper_bounds) + print format_array('node_map', self.node_map) + print format_array('node_map_end', self.node_map_end) + print '%-15s %6s %6s' % ('total', '', format_size(self.vertices.nbytes + self.triangles.nbytes + self.lower_bounds.nbytes + self.upper_bounds.nbytes + self.node_map.nbytes + self.node_map_end.nbytes)) print '-'*10 free, total = cuda.mem_get_info() print '%-15s %6s %6s' % ('device total', '', format_size(total)) @@ -324,21 +456,24 @@ class GPUGeometry(object): print def reset_colors(self): - self.colors_gpu.set_async(self.geometry.colors.astype(np.uint32)) + self.colors.set_async(self.geometry.colors.astype(np.uint32)) - def color_solids(self, solid_hit, colors): + def color_solids(self, solid_hit, colors, nblocks_per_thread=64, + max_blocks=1024): solid_hit_gpu = ga.to_gpu(np.array(solid_hit, dtype=np.bool)) solid_colors_gpu = ga.to_gpu(np.array(colors, dtype=np.uint32)) + module = get_cu_module('mesh.h', options=cuda_options) + color_solids = module.get_function('color_solids') + for first_triangle, triangles_this_round, blocks in \ - chunk_iterator(self.triangles_gpu.size): - self.gpu_funcs.color_solids(np.int32(first_triangle), - np.int32(triangles_this_round), - self.solid_id_map_gpu, - solid_hit_gpu, - solid_colors_gpu, - block=(64,1,1), - grid=(blocks,1)) + chunk_iterator(self.triangles.size, nblocks_per_thread, + max_blocks): + color_solids(np.int32(first_triangle), + np.int32(triangles_this_round), self.solid_id_map, + solid_hit_gpu, solid_colors_gpu, self.gpudata, + block=(nblocks_per_thread,1,1), + grid=(blocks,1)) class GPUDaq(object): def __init__(self, gpu_geometry, max_pmt_id, pmt_rms=1.2e-9): @@ -347,9 +482,10 @@ class GPUDaq(object): self.channel_history_gpu = ga.zeros_like(self.earliest_time_int_gpu) self.channel_q_gpu = ga.zeros_like(self.earliest_time_int_gpu) self.daq_pmt_rms = pmt_rms - self.solid_id_map_gpu = gpu_geometry.solid_id_map_gpu + self.solid_id_map_gpu = gpu_geometry.solid_id_map - self.module = get_cu_module('daq', include_source_directory=False) + self.module = get_cu_module('daq.cu', options=cuda_options, + include_source_directory=False) self.gpu_funcs = GPUFuncs(self.module) def acquire(self, gpuphotons, rng_states, nthreads_per_block=64, max_blocks=1024): @@ -369,7 +505,8 @@ class GPUDaq(object): class GPUPDF(object): def __init__(self): - self.module = get_cu_module('daq') + self.module = get_cu_module('daq.cu', options=cuda_options, + include_source_directory=False) self.gpu_funcs = GPUFuncs(self.module) def setup_pdf(self, max_pmt_id, tbins, trange, qbins, qrange): @@ -556,10 +693,6 @@ class GPU(object): self.context.set_cache_config(cuda.func_cache.PREFER_L1) - cuda_options = ['--use_fast_math']#, '--ptxas-options=-v'] - - self.module = get_cu_module('kernel', options=cuda_options) - def print_mem_info(self): free, total = cuda.mem_get_info() @@ -72,12 +72,12 @@ def box(dx, dy, dz, center=(0,0,0)): "Return a box with linear dimensions `dx`, `dy`, and `dz`." return linear_extrude([-dx/2.0,dx/2.0,dx/2.0,-dx/2.0],[-dy/2.0,-dy/2.0,dy/2.0,dy/2.0],height=dz,center=center) -def cube(size=1, height=None): +def cube(size=1, height=None, center=(0,0,0)): "Return a cube mesh whose sides have length `size`." if height is None: height = size - return linear_extrude([-size/2.0,size/2.0,size/2.0,-size/2.0],[-size/2.0,-size/2.0,size/2.0,size/2.0], height=size) + return linear_extrude([-size/2.0,size/2.0,size/2.0,-size/2.0],[-size/2.0,-size/2.0,size/2.0,size/2.0], height=size, center=center) def cylinder(radius=1, height=2, radius2=None, nsteps=64): """ @@ -48,10 +48,7 @@ class Simulation(object): detector.build(bits=bvh_bits, use_cache=use_cache) self.gpu = gpu.GPU(cuda_device) - - # geometry is loaded onto gpu by default - self.gpu_geometry = gpu.GPUGeometry(self.gpu, detector) - + self.gpu_geometry = gpu.GPUGeometry(detector) self.gpu_daq = gpu.GPUDaq(self.gpu_geometry, max(self.detector.pmtids)) self.gpu_pdf = gpu.GPUPDF() @@ -77,7 +74,7 @@ class Simulation(object): for ev in iterable: gpu_photons = gpu.GPUPhotons(ev.photons_beg) - gpu.propagate(self.gpu, gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks, max_steps=max_steps) + gpu_photons.propagate(self.gpu_geometry, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks, max_steps=max_steps) ev.nphotons = len(ev.photons_beg.pos) @@ -114,7 +111,9 @@ class Simulation(object): for ev in iterable: gpu_photons = gpu.GPUPhotons(ev.photons_beg) - gpu.propagate(self.gpu, gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + gpu_photons.propagate(self.gpu_geometry, self.rng_states, + nthreads_per_block=self.nthreads_per_block, + max_blocks=self.max_blocks) gpu_channels = self.gpu_daq.acquire(gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) self.gpu_pdf.add_hits_to_pdf(gpu_channels) @@ -143,7 +142,9 @@ class Simulation(object): for ev in iterable: gpu_photons = gpu.GPUPhotons(ev.photons_beg) - gpu.propagate(self.gpu, gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + gpu_photons.propagate(self.gpu_geometry, self.rng_states, + nthreads_per_block=self.nthreads_per_block, + max_blocks=self.max_blocks) gpu_channels = self.gpu_daq.acquire(gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) self.gpu_pdf.accumulate_pdf_eval(gpu_channels) diff --git a/src/__init__.py b/src/__init__.py index f2f5215..e69de29 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,7 +0,0 @@ -from os.path import dirname - -with open(dirname(__file__) + '/kernel.cu') as f: - kernel = f.read() - -with open(dirname(__file__) + '/daq.cu') as f: - daq = f.read() diff --git a/src/alpha.h b/src/alpha.h deleted file mode 100644 index ac75834..0000000 --- a/src/alpha.h +++ /dev/null @@ -1,141 +0,0 @@ -#ifndef __ALPHA_H__ -#define __ALPHA_H__ - -#include "linalg.h" -#include "intersect.h" -#include "mesh.h" -#include "sorting.h" - -#include "stdio.h" - -#define ALPHA_DEPTH 10 - -__device__ int get_color_alpha(const float3 &origin, const float3& direction) -{ - float distance; - - if (!intersect_node(origin, direction, g_start_node, -1.0f)) - return 0; - - unsigned int stack[STACK_SIZE]; - - unsigned int *head = &stack[0]; - unsigned int *node = &stack[1]; - unsigned int *tail = &stack[STACK_SIZE-1]; - *node = g_start_node; - - unsigned int i; - - float distances[ALPHA_DEPTH]; - unsigned int indices[ALPHA_DEPTH]; - unsigned int n=0; - - do - { - unsigned int first_child = tex1Dfetch(node_map, *node); - unsigned int stop = tex1Dfetch(node_map_end, *node); - - while (*node >= g_first_node && stop == first_child+1) - { - *node = first_child; - first_child = tex1Dfetch(node_map, *node); - stop = tex1Dfetch(node_map_end, *node); - } - - if (*node >= g_first_node) - { - for (i=first_child; i < stop; i++) - { - if (intersect_node(origin, direction, i, -1.0f)) - { - *node = i; - node++; - } - } - - node--; - } - else // node is a leaf - { - for (i=first_child; i < stop; i++) - { - uint4 triangle_data = g_triangles[i]; - - float3 v0 = g_vertices[triangle_data.x]; - float3 v1 = g_vertices[triangle_data.y]; - float3 v2 = g_vertices[triangle_data.z]; - - if (intersect_triangle(origin, direction, v0, v1, v2, distance)) - { - if (n < 1) - { - distances[0] = distance; - indices[0] = i; - } - else - { - unsigned long j = searchsorted(n, distances, distance); - - if (j <= ALPHA_DEPTH-1) - { - insert(ALPHA_DEPTH, distances, j, distance); - insert(ALPHA_DEPTH, indices, j, i); - } - } - - if (n < ALPHA_DEPTH) - n++; - } - - } // triangle loop - - node--; - - } // node is a leaf - - } // while loop - while (node != head); - - if (n < 1) - return 0; - - float scale = 1.0f; - float fr = 0.0f; - float fg = 0.0f; - float fb = 0.0f; - unsigned int index; - for (i=0; i < n; i++) - { - index = indices[i]; - - uint4 triangle_data = g_triangles[index]; - - float3 v0 = g_vertices[triangle_data.x]; - float3 v1 = g_vertices[triangle_data.y]; - float3 v2 = g_vertices[triangle_data.z]; - - float cos_theta = dot(normalize(cross(v1-v0,v2-v1)),-direction); - - if (cos_theta < 0.0f) - cos_theta = dot(-normalize(cross(v1-v0,v2-v1)),-direction); - - unsigned int r0 = 0xff & (g_colors[index] >> 16); - unsigned int g0 = 0xff & (g_colors[index] >> 8); - unsigned int b0 = 0xff & g_colors[index]; - - float alpha = (255 - (0xff & (g_colors[index] >> 24)))/255.0f; - - fr += r0*scale*cos_theta*alpha; - fg += g0*scale*cos_theta*alpha; - fb += b0*scale*cos_theta*alpha; - - scale *= (1.0f-alpha); - } - unsigned int r = floorf(fr); - unsigned int g = floorf(fg); - unsigned int b = floorf(fb); - - return r << 16 | g << 8 | b; -} - -#endif @@ -1,208 +1,198 @@ // -*-c++-*- #include <curand_kernel.h> -__device__ unsigned int float_to_sortable_int(float f) +__device__ unsigned int +float_to_sortable_int(float f) { - return __float_as_int(f); - //int i = __float_as_int(f); - //unsigned int mask = -(int)(i >> 31) | 0x80000000; - //return i ^ mask; + return __float_as_int(f); + //int i = __float_as_int(f); + //unsigned int mask = -(int)(i >> 31) | 0x80000000; + //return i ^ mask; } -__device__ float sortable_int_to_float(unsigned int i) +__device__ float +sortable_int_to_float(unsigned int i) { - return __int_as_float(i); - //unsigned int mask = ((i >> 31) - 1) | 0x80000000; - //return __int_as_float(i ^ mask); + return __int_as_float(i); + //unsigned int mask = ((i >> 31) - 1) | 0x80000000; + //return __int_as_float(i ^ mask); } extern "C" { -__global__ void reset_earliest_time_int(float maxtime, int ntime_ints, unsigned int *time_ints) +__global__ void +reset_earliest_time_int(float maxtime, int ntime_ints, unsigned int *time_ints) { - int id = threadIdx.x + blockDim.x * blockIdx.x; - if (id < ntime_ints) { - unsigned int maxtime_int = float_to_sortable_int(maxtime); - time_ints[id] = maxtime_int; - } + int id = threadIdx.x + blockDim.x * blockIdx.x; + if (id < ntime_ints) { + unsigned int maxtime_int = float_to_sortable_int(maxtime); + time_ints[id] = maxtime_int; + } } -__global__ void run_daq(curandState *s, unsigned int detection_state, - float time_rms, - int first_photon, - int nphotons, float *photon_times, - unsigned int *photon_histories, - int *last_hit_triangles, int *solid_map, - int nsolids, unsigned int *earliest_time_int, - unsigned int *channel_q, - unsigned int *channel_histories) +__global__ void +run_daq(curandState *s, unsigned int detection_state, float time_rms, + int first_photon, int nphotons, float *photon_times, + unsigned int *photon_histories, int *last_hit_triangles, + int *solid_map, int nsolids, unsigned int *earliest_time_int, + unsigned int *channel_q, unsigned int *channel_histories) { - int id = threadIdx.x + blockDim.x * blockIdx.x; + int id = threadIdx.x + blockDim.x * blockIdx.x; - if (id < nphotons) - { - curandState rng = s[id]; - int photon_id = id + first_photon; - int triangle_id = last_hit_triangles[photon_id]; + if (id < nphotons) { + curandState rng = s[id]; + int photon_id = id + first_photon; + int triangle_id = last_hit_triangles[photon_id]; - if (triangle_id > -1) - { - int solid_id = solid_map[triangle_id]; - unsigned int history = photon_histories[photon_id]; - - if (solid_id < nsolids && (history & detection_state)) - { - float time = photon_times[photon_id] + curand_normal(&rng) * time_rms; - unsigned int time_int = float_to_sortable_int(time); - - atomicMin(earliest_time_int + solid_id, time_int); - atomicAdd(channel_q + solid_id, 1); - atomicOr(channel_histories + solid_id, history); - } - - } + if (triangle_id > -1) { + int solid_id = solid_map[triangle_id]; + unsigned int history = photon_histories[photon_id]; - s[id] = rng; + if (solid_id < nsolids && (history & detection_state)) { + float time = photon_times[photon_id] + curand_normal(&rng) * time_rms; + unsigned int time_int = float_to_sortable_int(time); + + atomicMin(earliest_time_int + solid_id, time_int); + atomicAdd(channel_q + solid_id, 1); + atomicOr(channel_histories + solid_id, history); + } } + s[id] = rng; + + } + } -__global__ void convert_sortable_int_to_float(int n, - unsigned int *sortable_ints, - float *float_output) +__global__ void +convert_sortable_int_to_float(int n, unsigned int *sortable_ints, + float *float_output) { - int id = threadIdx.x + blockDim.x * blockIdx.x; + int id = threadIdx.x + blockDim.x * blockIdx.x; - if (id < n) - float_output[id] = sortable_int_to_float(sortable_ints[id]); + if (id < n) + float_output[id] = sortable_int_to_float(sortable_ints[id]); } -__global__ void bin_hits(int nchannels, - unsigned int *channel_q, float *channel_time, - unsigned int *hitcount, - int tbins, float tmin, float tmax, - int qbins, float qmin, float qmax, - unsigned int *pdf) +__global__ void +bin_hits(int nchannels, unsigned int *channel_q, float *channel_time, + unsigned int *hitcount, int tbins, float tmin, float tmax, int qbins, + float qmin, float qmax, unsigned int *pdf) { - int id = threadIdx.x + blockDim.x * blockIdx.x; + int id = threadIdx.x + blockDim.x * blockIdx.x; - if (id >= nchannels) - return; + if (id >= nchannels) + return; - unsigned int q = channel_q[id]; - float t = channel_time[id]; + unsigned int q = channel_q[id]; + float t = channel_time[id]; - if (t < 1e8 && t >= tmin && t < tmax && q >= qmin && q < qmax) { - hitcount[id] += 1; + if (t < 1e8 && t >= tmin && t < tmax && q >= qmin && q < qmax) { + hitcount[id] += 1; - int tbin = (t - tmin) / (tmax - tmin) * tbins; - int qbin = (q - qmin) / (qmax - qmin) * qbins; + int tbin = (t - tmin) / (tmax - tmin) * tbins; + int qbin = (q - qmin) / (qmax - qmin) * qbins; - // row major order (channel, t, q) - int bin = id * (tbins * qbins) + tbin * qbins + qbin; - pdf[bin] += 1; - } + // row major order (channel, t, q) + int bin = id * (tbins * qbins) + tbin * qbins + qbin; + pdf[bin] += 1; + } } -__global__ void accumulate_pdf_eval(int time_only, - int nchannels, - unsigned int *event_hit, - float *event_time, - float *event_charge, - float *mc_time, - unsigned int *mc_charge, // quirk of DAQ! - unsigned int *hitcount, - unsigned int *bincount, - float min_twidth, float tmin, float tmax, - float min_qwidth, float qmin, float qmax, - float *nearest_mc, - int min_bin_content) +__global__ void +accumulate_pdf_eval(int time_only, int nchannels, unsigned int *event_hit, + float *event_time, float *event_charge, float *mc_time, + unsigned int *mc_charge, // quirk of DAQ! + unsigned int *hitcount, unsigned int *bincount, + float min_twidth, float tmin, float tmax, + float min_qwidth, float qmin, float qmax, + float *nearest_mc, int min_bin_content) { - int id = threadIdx.x + blockDim.x * blockIdx.x; + int id = threadIdx.x + blockDim.x * blockIdx.x; - if (id >= nchannels) - return; + if (id >= nchannels) + return; - // Was this channel hit in the Monte Carlo? - float channel_mc_time = mc_time[id]; - if (channel_mc_time >= 1e8) - return; // Nothing else to do + // Was this channel hit in the Monte Carlo? + float channel_mc_time = mc_time[id]; + if (channel_mc_time >= 1e8) + return; // Nothing else to do - // Is this channel inside the range of the PDF? - float distance; - int channel_bincount = bincount[id]; - if (time_only) { - if (channel_mc_time < tmin || channel_mc_time > tmax) - return; // Nothing else to do + // Is this channel inside the range of the PDF? + float distance; + int channel_bincount = bincount[id]; + if (time_only) { + if (channel_mc_time < tmin || channel_mc_time > tmax) + return; // Nothing else to do - // This MC information is contained in the PDF - hitcount[id] += 1; + // This MC information is contained in the PDF + hitcount[id] += 1; - // Was this channel hit in the event-of-interest? - int channel_event_hit = event_hit[id]; - if (!channel_event_hit) - return; // No need to update PDF value for unhit channel + // Was this channel hit in the event-of-interest? + int channel_event_hit = event_hit[id]; + if (!channel_event_hit) + return; // No need to update PDF value for unhit channel - // Are we inside the minimum size bin? - float channel_event_time = event_time[id]; - distance = fabsf(channel_mc_time - channel_event_time); - if (distance < min_twidth/2.0) - bincount[id] = channel_bincount + 1; - - } else { // time and charge PDF - float channel_mc_charge = mc_charge[id]; // int->float conversion because DAQ just returns an integer + // Are we inside the minimum size bin? + float channel_event_time = event_time[id]; + distance = fabsf(channel_mc_time - channel_event_time); + if (distance < min_twidth/2.0) + bincount[id] = channel_bincount + 1; + + } + else { // time and charge PDF + float channel_mc_charge = mc_charge[id]; // int->float conversion because DAQ just returns an integer - if (channel_mc_time < tmin || channel_mc_time > tmax || - channel_mc_charge < qmin || channel_mc_charge > qmax) - return; // Nothing else to do + if (channel_mc_time < tmin || channel_mc_time > tmax || + channel_mc_charge < qmin || channel_mc_charge > qmax) + return; // Nothing else to do - // This MC information is contained in the PDF - hitcount[id] += 1; + // This MC information is contained in the PDF + hitcount[id] += 1; - // Was this channel hit in the event-of-interest? - int channel_event_hit = event_hit[id]; - if (!channel_event_hit) - return; // No need to update PDF value for unhit channel + // Was this channel hit in the event-of-interest? + int channel_event_hit = event_hit[id]; + if (!channel_event_hit) + return; // No need to update PDF value for unhit channel - // Are we inside the minimum size bin? - float channel_event_time = event_time[id]; - float channel_event_charge = event_charge[id]; - float normalized_time_distance = fabsf(channel_event_time - channel_mc_time)/min_twidth/2.0; - float normalized_charge_distance = fabsf(channel_event_charge - channel_mc_charge)/min_qwidth/2.0; - distance = sqrt(normalized_time_distance*normalized_time_distance + normalized_charge_distance*normalized_charge_distance); - - if (distance < 1.0f) - bincount[id] = channel_bincount + 1; - } - - // Do we need to keep updating the nearest_mc list? - if (channel_bincount + 1 >= min_bin_content) - return; // No need to perform insertion into nearest_mc because bincount is a better estimate of the PDF value - - // insertion sort the distance into the array of nearest MC points - int offset = min_bin_content * id; - int entry = min_bin_content - 1; + // Are we inside the minimum size bin? + float channel_event_time = event_time[id]; + float channel_event_charge = event_charge[id]; + float normalized_time_distance = fabsf(channel_event_time - channel_mc_time)/min_twidth/2.0; + float normalized_charge_distance = fabsf(channel_event_charge - channel_mc_charge)/min_qwidth/2.0; + distance = sqrt(normalized_time_distance*normalized_time_distance + normalized_charge_distance*normalized_charge_distance); + + if (distance < 1.0f) + bincount[id] = channel_bincount + 1; + } + + // Do we need to keep updating the nearest_mc list? + if (channel_bincount + 1 >= min_bin_content) + return; // No need to perform insertion into nearest_mc because bincount is a better estimate of the PDF value + + // insertion sort the distance into the array of nearest MC points + int offset = min_bin_content * id; + int entry = min_bin_content - 1; - // If last entry less than new entry, nothing to update - if (distance > nearest_mc[offset + entry]) - return; - - // Find where to insert the new entry while sliding the rest - // to the right + // If last entry less than new entry, nothing to update + if (distance > nearest_mc[offset + entry]) + return; + + // Find where to insert the new entry while sliding the rest + // to the right + entry--; + while (entry >= 0) { + if (nearest_mc[offset+entry] >= distance) + nearest_mc[offset+entry+1] = nearest_mc[offset+entry]; + else + break; entry--; - while (entry >= 0) { - if (nearest_mc[offset+entry] >= distance) - nearest_mc[offset+entry+1] = nearest_mc[offset+entry]; - else - break; - entry--; - } + } - nearest_mc[offset+entry+1] = distance; + nearest_mc[offset+entry+1] = distance; } - } // extern "C" diff --git a/src/geometry.h b/src/geometry.h new file mode 100644 index 0000000..2b5eacb --- /dev/null +++ b/src/geometry.h @@ -0,0 +1,74 @@ +#ifndef __GEOMETRY_H__ +#define __GEOMETRY_H__ + +struct Material +{ + float *refractive_index; + float *absorption_length; + float *scattering_length; + unsigned int n; + float step; + float wavelength0; +}; + +struct Surface +{ + float *detect; + float *absorb; + float *reflect_diffuse; + float *reflect_specular; + unsigned int n; + float step; + float wavelength0; +}; + +struct Triangle +{ + float3 v0, v1, v2; +}; + +struct Geometry +{ + float3 *vertices; + uint3 *triangles; + unsigned int *material_codes; + unsigned int *colors; + float3 *lower_bounds; + float3 *upper_bounds; + unsigned int *node_map; + unsigned int *node_map_end; + Material **materials; + Surface **surfaces; + unsigned int start_node; + unsigned int first_node; +}; + +__device__ Triangle +get_triangle(Geometry *geometry, const unsigned int &i) +{ + uint3 triangle_data = geometry->triangles[i]; + + Triangle triangle; + triangle.v0 = geometry->vertices[triangle_data.x]; + triangle.v1 = geometry->vertices[triangle_data.y]; + triangle.v2 = geometry->vertices[triangle_data.z]; + + return triangle; +} + +template <class T> +__device__ float +interp_property(T *m, const float &x, const float *fp) +{ + if (x < m->wavelength0) + return fp[0]; + + if (x > (m->wavelength0 + (m->n-1)*m->step)) + return fp[m->n-1]; + + int jl = (x-m->wavelength0)/m->step; + + return fp[jl] + (x-(m->wavelength0 + jl*m->step))*(fp[jl+1]-fp[jl])/m->step; +} + +#endif diff --git a/src/hybrid_render.cu b/src/hybrid_render.cu new file mode 100644 index 0000000..29edefa --- /dev/null +++ b/src/hybrid_render.cu @@ -0,0 +1,202 @@ +//-*-c-*- +#include <math_constants.h> +#include <curand_kernel.h> + +#include "linalg.h" +#include "matrix.h" +#include "rotate.h" +#include "mesh.h" +#include "geometry.h" +#include "photon.h" + +__device__ void +fAtomicAdd(float *addr, float data) +{ + while (data) + data = atomicExch(addr, data+atomicExch(addr, 0.0f)); +} + +__device__ void +to_diffuse(Photon &p, State &s, Geometry *g, curandState &rng, int max_steps) +{ + int steps = 0; + while (steps < max_steps) { + steps++; + + int command; + + fill_state(s, p, g); + + if (p.last_hit_triangle == -1) + break; + + command = propagate_to_boundary(p, s, rng); + + if (command == BREAK) + break; + + if (command == CONTINUE) + continue; + + if (s.surface_index != -1) { + command = propagate_at_surface(p, s, rng, g); + + if (p.history & REFLECT_DIFFUSE) + break; + + if (command == BREAK) + break; + + if (command == CONTINUE) + continue; + } + + propagate_at_boundary(p, s, rng); + + } // while (steps < max_steps) + +} // to_diffuse + +extern "C" +{ + +__global__ void +update_xyz_lookup(int nthreads, int total_threads, int offset, float3 position, + curandState *rng_states, float wavelength, float3 xyz, + float3 *xyz_lookup1, float3 *xyz_lookup2, int max_steps, + Geometry *g) +{ + int kernel_id = blockIdx.x*blockDim.x + threadIdx.x; + int id = kernel_id + offset; + + if (kernel_id >= nthreads || id >= total_threads) + return; + + curandState rng = rng_states[kernel_id]; + + Triangle t = get_triangle(g, id); + + float a = curand_uniform(&rng); + float b = uniform(&rng, 0.0f, (1.0f - a)); + float c = 1.0f - a - b; + + float3 direction = a*t.v0 + b*t.v1 + c*t.v2 - position; + direction /= norm(direction); + + float distance; + int triangle_index = intersect_mesh(position, direction, g, distance); + + if (triangle_index != id) { + rng_states[kernel_id] = rng; + return; + } + + float3 v01 = t.v1 - t.v0; + float3 v12 = t.v2 - t.v1; + + float3 surface_normal = normalize(cross(v01,v12)); + + float cos_theta = dot(surface_normal,-direction); + + if (cos_theta < 0.0f) + cos_theta = dot(-surface_normal,-direction); + + Photon p; + p.position = position; + p.direction = direction; + p.wavelength = wavelength; + p.polarization = uniform_sphere(&rng); + p.last_hit_triangle = -1; + p.time = 0; + p.history = 0; + + State s; + to_diffuse(p, s, g, rng, max_steps); + + if (p.history & REFLECT_DIFFUSE) { + if (s.inside_to_outside) { + fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].x, cos_theta*xyz.x); + fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].y, cos_theta*xyz.y); + fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].z, cos_theta*xyz.z); + } + else { + fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].x, cos_theta*xyz.x); + fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].y, cos_theta*xyz.y); + fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].z, cos_theta*xyz.z); + } + } + + rng_states[kernel_id] = rng; + +} // update_xyz_lookup + +__global__ void +update_xyz_image(int nthreads, curandState *rng_states, float3 *positions, + float3 *directions, float wavelength, float3 xyz, + float3 *xyz_lookup1, float3 *xyz_lookup2, float3 *image, + int nlookup_calls, int max_steps, Geometry *g) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + curandState rng = rng_states[id]; + + Photon p; + p.position = positions[id]; + p.direction = directions[id]; + p.direction /= norm(p.direction); + p.wavelength = wavelength; + p.polarization = uniform_sphere(&rng); + p.last_hit_triangle = -1; + p.time = 0; + p.history = 0; + + State s; + to_diffuse(p, s, g, rng, max_steps); + + if (p.history & REFLECT_DIFFUSE) { + if (s.inside_to_outside) + image[id] += xyz*xyz_lookup1[p.last_hit_triangle]/nlookup_calls; + else + image[id] += xyz*xyz_lookup2[p.last_hit_triangle]/nlookup_calls; + } + + rng_states[id] = rng; + +} // update_xyz_image + +__global__ void +process_image(int nthreads, float3 *image, unsigned int *pixels, int nimages) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + float3 rgb = image[id]/nimages; + + if (rgb.x < 0.0f) + rgb.x = 0.0f; + if (rgb.y < 0.0f) + rgb.y = 0.0f; + if (rgb.z < 0.0f) + rgb.z = 0.0f; + + if (rgb.x > 1.0f) + rgb.x = 1.0f; + if (rgb.y > 1.0f) + rgb.y = 1.0f; + if (rgb.z > 1.0f) + rgb.z = 1.0f; + + unsigned int r = floorf(rgb.x*255.0f); + unsigned int g = floorf(rgb.y*255.0f); + unsigned int b = floorf(rgb.z*255.0f); + + pixels[id] = 255 << 24 | r << 16 | g << 8 | b; + +} // process_image + +} // extern "c" diff --git a/src/intersect.h b/src/intersect.h index 26e1d7e..978bde8 100644 --- a/src/intersect.h +++ b/src/intersect.h @@ -3,30 +3,32 @@ #ifndef __INTERSECT_H__ #define __INTERSECT_H__ -#include <math_constants.h> - #include "linalg.h" #include "matrix.h" -#include "rotate.h" +#include "geometry.h" #define EPSILON 0.0f -/* Test the intersection between a ray starting from `origin` traveling in the - direction `direction` and a triangle defined by the vertices `v0`, `v1`, and - `v2`. If the ray intersects the triangle, set `distance` to the distance - between `origin` and the intersection and return true, else return false. - - `direction` must be normalized. */ -__device__ bool intersect_triangle(const float3 &origin, const float3 &direction, const float3 &v0, const float3 &v1, const float3 &v2, float &distance) +/* Tests the intersection between a ray and a triangle. + If the ray intersects the triangle, set `distance` to the distance from + `origin` to the intersection and return true, else return false. + `direction` must be normalized to one. */ +__device__ bool +intersect_triangle(const float3 &origin, const float3 &direction, + const Triangle &triangle, float &distance) { - Matrix m = make_matrix(v1-v0, v2-v0, -direction); + float3 m1 = triangle.v1-triangle.v0; + float3 m2 = triangle.v2-triangle.v0; + float3 m3 = -direction; + + Matrix m = make_matrix(m1, m2, m3); float determinant = det(m); if (determinant == 0.0f) return false; - float3 b = origin-v0; + float3 b = origin-triangle.v0; float u1 = ((m.a11*m.a22 - m.a12*m.a21)*b.x + (m.a02*m.a21 - m.a01*m.a22)*b.y + @@ -54,38 +56,17 @@ __device__ bool intersect_triangle(const float3 &origin, const float3 &direction return true; } -/* Return the 32 bit color associated with the intersection between a ray - starting from `origin` traveling in the direction `direction` and the - plane defined by the points `v0`, `v1`, and `v2` using the cosine of the - angle between the ray and the plane normal to determine the brightness. - - `direction` must be normalized. */ -__device__ unsigned int get_color(const float3 &direction, const float3 &v0, const float3& v1, const float3 &v2, unsigned int base_color=0xFFFFFF) -{ - float scale = dot(normalize(cross(v1-v0,v2-v1)),-direction); - - if (scale < 0.0f) - { - base_color = 0xff0000; - scale = dot(-normalize(cross(v1-v0,v2-v1)),-direction); - } - - unsigned int r = 0xFF & (base_color >> 16); - unsigned int g = 0xFF & (base_color >> 8); - unsigned int b = 0xFF & base_color; - - r = floorf(r*scale); - g = floorf(g*scale); - b = floorf(b*scale); - - return r << 16 | g << 8 | b; -} - -/* Test the intersection between a ray starting from `origin` traveling in the - direction `direction` and the axis-aligned box defined by the opposite - vertices `lower_bound` and `upper_bound`. If the ray intersects the box - return True, else return False. */ -__device__ bool intersect_box(const float3 &origin, const float3 &direction, const float3 &lower_bound, const float3 &upper_bound, float& distance_to_box) +/* Tests the intersection between a ray and an axis-aligned box defined by + an upper and lower bound. If the ray intersects the box, set + `distance_to_box` to the distance from `origin` to the intersection and + return true, else return false. `direction` must be normalized to one. + + Source: "An Efficient and Robust Ray-Box Intersection Algorithm." + by Williams, et. al. */ +__device__ bool +intersect_box(const float3 &origin, const float3 &direction, + const float3 &lower_bound, const float3 &upper_bound, + float& distance_to_box) { float kmin, kmax, kymin, kymax, kzmin, kzmax; diff --git a/src/kernel.cu b/src/kernel.cu deleted file mode 100644 index 4418305..0000000 --- a/src/kernel.cu +++ /dev/null @@ -1,389 +0,0 @@ -//-*-c-*- -#include <math_constants.h> -#include <curand_kernel.h> - -#include "linalg.h" -#include "matrix.h" -#include "rotate.h" -#include "mesh.h" -#include "photon.h" -#include "alpha.h" - -__device__ void fAtomicAdd(float *addr, float data) -{ - while (data) - { - data = atomicExch(addr, data+atomicExch(addr, 0.0f)); - } -} - -__device__ void to_diffuse(Photon &p, State &s, curandState &rng, const int &max_steps) -{ - int steps = 0; - while (steps < max_steps) - { - steps++; - - int command; - - fill_state(s, p); - - if (p.last_hit_triangle == -1) - break; - - command = propagate_to_boundary(p, s, rng); - - if (command == BREAK) - break; - - if (command == CONTINUE) - continue; - - if (s.surface_index != -1) - { - command = propagate_at_surface(p, s, rng); - - if (p.history & REFLECT_DIFFUSE) - break; - - if (command == BREAK) - break; - - if (command == CONTINUE) - continue; - } - - propagate_at_boundary(p, s, rng); - - } // while (steps < max_steps) - -} // to_diffuse - -extern "C" -{ - -__global__ void update_xyz_lookup(int nthreads, int total_threads, int offset, float3 position, curandState *rng_states, float wavelength, float3 xyz, float3 *xyz_lookup1, float3 *xyz_lookup2, int max_steps) -{ - int kernel_id = blockIdx.x*blockDim.x + threadIdx.x; - int id = kernel_id + offset; - - if (kernel_id >= nthreads || id >= total_threads) - return; - - curandState rng = rng_states[kernel_id]; - - uint4 triangle_data = g_triangles[id]; - - float3 v0 = g_vertices[triangle_data.x]; - float3 v1 = g_vertices[triangle_data.y]; - float3 v2 = g_vertices[triangle_data.z]; - - float a = curand_uniform(&rng); - float b = uniform(&rng, 0.0f, (1.0f - a)); - float c = 1.0f - a - b; - - float3 direction = a*v0 + b*v1 + c*v2 - position; - direction /= norm(direction); - - float distance; - int triangle_index = intersect_mesh(position, direction, distance); - - if (triangle_index != id) - { - rng_states[kernel_id] = rng; - return; - } - - float cos_theta = dot(normalize(cross(v1-v0,v2-v1)),-direction); - - if (cos_theta < 0.0f) - cos_theta = dot(-normalize(cross(v1-v0,v2-v1)),-direction); - - Photon p; - p.position = position; - p.direction = direction; - p.wavelength = wavelength; - p.polarization = uniform_sphere(&rng); - p.last_hit_triangle = -1; - p.time = 0; - p.history = 0; - - State s; - to_diffuse(p, s, rng, max_steps); - - if (p.history & REFLECT_DIFFUSE) - { - if (s.inside_to_outside) - { - fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].x, cos_theta*xyz.x); - fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].y, cos_theta*xyz.y); - fAtomicAdd(&xyz_lookup1[p.last_hit_triangle].z, cos_theta*xyz.z); - } - else - { - fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].x, cos_theta*xyz.x); - fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].y, cos_theta*xyz.y); - fAtomicAdd(&xyz_lookup2[p.last_hit_triangle].z, cos_theta*xyz.z); - } - } - - rng_states[kernel_id] = rng; - -} // update_xyz_lookup - -__global__ void update_xyz_image(int nthreads, curandState *rng_states, float3 *positions, float3 *directions, float wavelength, float3 xyz, float3 *xyz_lookup1, float3 *xyz_lookup2, float3 *image, int nlookup_calls, int max_steps) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - curandState rng = rng_states[id]; - - Photon p; - p.position = positions[id]; - p.direction = directions[id]; - p.direction /= norm(p.direction); - p.wavelength = wavelength; - p.polarization = uniform_sphere(&rng); - p.last_hit_triangle = -1; - p.time = 0; - p.history = 0; - - State s; - to_diffuse(p, s, rng, max_steps); - - if (p.history & REFLECT_DIFFUSE) - { - if (s.inside_to_outside) - { - image[id].x += xyz.x*xyz_lookup1[p.last_hit_triangle].x/nlookup_calls; - image[id].y += xyz.y*xyz_lookup1[p.last_hit_triangle].y/nlookup_calls; - image[id].z += xyz.z*xyz_lookup1[p.last_hit_triangle].z/nlookup_calls; - } - else - { - image[id].x += xyz.x*xyz_lookup2[p.last_hit_triangle].x/nlookup_calls; - image[id].y += xyz.y*xyz_lookup2[p.last_hit_triangle].y/nlookup_calls; - image[id].z += xyz.z*xyz_lookup2[p.last_hit_triangle].z/nlookup_calls; - } - } - - rng_states[id] = rng; - -} // update_xyz_image - -__global__ void process_image(int nthreads, float3 *image, int *pixels, int nimages) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - float3 rgb = image[id]/nimages; - - if (rgb.x < 0.0f) - rgb.x = 0.0f; - if (rgb.y < 0.0f) - rgb.y = 0.0f; - if (rgb.z < 0.0f) - rgb.z = 0.0f; - - if (rgb.x > 1.0f) - rgb.x = 1.0f; - if (rgb.y > 1.0f) - rgb.y = 1.0f; - if (rgb.z > 1.0f) - rgb.z = 1.0f; - - unsigned int r = floorf(rgb.x*255.0f); - unsigned int g = floorf(rgb.y*255.0f); - unsigned int b = floorf(rgb.z*255.0f); - - pixels[id] = r << 16 | g << 8 | b; - -} // process_image - -__global__ void distance_to_mesh(int nthreads, float3 *positions, float3 *directions, float *distances) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - float3 position = positions[id]; - float3 direction = directions[id]; - direction /= norm(direction); - - float distance; - - int triangle_index = intersect_mesh(position, direction, distance); - - if (triangle_index == -1) - distances[id] = 1e9; - else - distances[id] = distance; - -} // distance_to_mesh - -__global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, int *pixels) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - float3 position = positions[id]; - float3 direction = directions[id]; - direction /= norm(direction); - - float distance; - - int triangle_index = intersect_mesh(position, direction, distance); - - if (triangle_index == -1) - { - pixels[id] = 0; - } - else - { - uint4 triangle_data = g_triangles[triangle_index]; - - float3 v0 = g_vertices[triangle_data.x]; - float3 v1 = g_vertices[triangle_data.y]; - float3 v2 = g_vertices[triangle_data.z]; - - pixels[id] = get_color(direction, v0, v1, v2, g_colors[triangle_index]); - } - -} // ray_trace - -/* Trace the rays starting at `positions` traveling in the direction - `directions` to their intersection with the global mesh. If the ray - intersects the mesh set the pixel associated with the ray to a 32 bit - color whose brightness is determined by the cosine of the angle between - the ray and the normal of the triangle it intersected, else set the pixel - to 0. */ -__global__ void ray_trace_alpha(int nthreads, float3 *positions, float3 *directions, int *pixels) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - float3 position = positions[id]; - float3 direction = directions[id]; - direction /= norm(direction); - - bool hit; - float distance; - - pixels[id] = get_color_alpha(position, direction); - -} // ray_trace - -__global__ void swap(int *values, int nswap, int *offset_a, int *offset_b) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id < nswap) { - int a = offset_a[id]; - int b = offset_b[id]; - - int tmp = values[a]; - values[a] = values[b]; - values[b] = tmp; - } -} - -__global__ void propagate(int first_photon, int nthreads, - unsigned int *input_queue, - unsigned int *output_queue, - curandState *rng_states, - float3 *positions, float3 *directions, float *wavelengths, float3 *polarizations, float *times, - unsigned int *histories, int *last_hit_triangles, int max_steps) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - curandState rng = rng_states[id]; - - int photon_id = input_queue[first_photon + id]; - - Photon p; - p.position = positions[photon_id]; - p.direction = directions[photon_id]; - p.direction /= norm(p.direction); - p.polarization = polarizations[photon_id]; - p.polarization /= norm(p.polarization); - p.wavelength = wavelengths[photon_id]; - p.time = times[photon_id]; - p.last_hit_triangle = last_hit_triangles[photon_id]; - p.history = histories[photon_id]; - - if (p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB)) - return; - - State s; - - int steps = 0; - while (steps < max_steps) - { - steps++; - - int command; - - // check for NaN and fail - if (isnan(p.direction.x*p.direction.y*p.direction.z*p.position.x*p.position.y*p.position.z)) - { - p.history |= NO_HIT | NAN_ABORT; - break; - } - - fill_state(s, p); - - if (p.last_hit_triangle == -1) - break; - - command = propagate_to_boundary(p, s, rng); - - if (command == BREAK) - break; - - if (command == CONTINUE) - continue; - - if (s.surface_index != -1) - { - command = propagate_at_surface(p, s, rng); - - if (command == BREAK) - break; - - if (command == CONTINUE) - continue; - } - - propagate_at_boundary(p, s, rng); - - } // while (steps < max_steps) - - rng_states[id] = rng; - positions[photon_id] = p.position; - directions[photon_id] = p.direction; - polarizations[photon_id] = p.polarization; - wavelengths[photon_id] = p.wavelength; - times[photon_id] = p.time; - histories[photon_id] = p.history; - last_hit_triangles[photon_id] = p.last_hit_triangle; - - // Not done, put photon in output queue - if ( (p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB)) == 0) { - int out_idx = atomicAdd(output_queue, 1); - output_queue[out_idx] = photon_id; - } -} // propagate - -} // extern "c" diff --git a/src/linalg.h b/src/linalg.h index fe6b1b2..35b2423 100644 --- a/src/linalg.h +++ b/src/linalg.h @@ -1,121 +1,170 @@ #ifndef __LINALG_H__ #define __LINALG_H__ -__device__ __host__ float3 operator- (const float3 &a) +__device__ float3 +operator- (const float3 &a) { - return make_float3(-a.x, -a.y, -a.z); + return make_float3(-a.x, -a.y, -a.z); } -__device__ __host__ float3 operator+ (const float3 &a, const float3 &b) +__device__ float3 +operator* (const float3 &a, const float3 &b) { - return make_float3(a.x+b.x, a.y+b.y, a.z+b.z); + return make_float3(a.x*b.x, a.y*b.y, a.z*b.z); } -__device__ __host__ void operator+= (float3 &a, const float3 &b) +__device__ float3 +operator/ (const float3 &a, const float3 &b) { - a.x += b.x; - a.y += b.y; - a.z += b.z; + return make_float3(a.x/b.x, a.y/b.y, a.z/b.z); } -__device__ __host__ float3 operator- (const float3 &a, const float3 &b) +__device__ void +operator*= (float3 &a, const float3 &b) { - return make_float3(a.x-b.x, a.y-b.y, a.z-b.z); + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; } -__device__ __host__ void operator-= (float3 &a, const float3 &b) +__device__ void +operator/= (float3 &a, const float3 &b) { - a.x -= b.x; - a.y -= b.y; - a.z -= b.z; + a.x /= b.x; + a.y /= b.y; + a.z /= b.z; } -__device__ __host__ float3 operator+ (const float3 &a, const float &c) +__device__ float3 +operator+ (const float3 &a, const float3 &b) { - return make_float3(a.x+c, a.y+c, a.z+c); + return make_float3(a.x+b.x, a.y+b.y, a.z+b.z); } -__device__ __host__ void operator+= (float3 &a, const float &c) +__device__ void +operator+= (float3 &a, const float3 &b) { - a.x += c; - a.y += c; - a.z += c; + a.x += b.x; + a.y += b.y; + a.z += b.z; } -__device__ __host__ float3 operator+ (const float &c, const float3 &a) +__device__ float3 +operator- (const float3 &a, const float3 &b) { - return make_float3(c+a.x, c+a.y, c+a.z); + return make_float3(a.x-b.x, a.y-b.y, a.z-b.z); } -__device__ __host__ float3 operator- (const float3 &a, const float &c) +__device__ void +operator-= (float3 &a, const float3 &b) { - return make_float3(a.x-c, a.y-c, a.z-c); + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; } -__device__ __host__ void operator-= (float3 &a, const float &c) +__device__ float3 +operator+ (const float3 &a, const float &c) { - a.x -= c; - a.y -= c; - a.z -= c; + return make_float3(a.x+c, a.y+c, a.z+c); } -__device__ __host__ float3 operator- (const float &c, const float3& a) +__device__ void +operator+= (float3 &a, const float &c) { - return make_float3(c-a.x, c-a.y, c-a.z); + a.x += c; + a.y += c; + a.z += c; } -__device__ __host__ float3 operator* (const float3 &a, const float &c) +__device__ float3 +operator+ (const float &c, const float3 &a) { - return make_float3(a.x*c, a.y*c, a.z*c); + return make_float3(c+a.x, c+a.y, c+a.z); } -__device__ __host__ void operator*= (float3 &a, const float &c) +__device__ float3 +operator- (const float3 &a, const float &c) { - a.x *= c; - a.y *= c; - a.z *= c; + return make_float3(a.x-c, a.y-c, a.z-c); } -__device__ __host__ float3 operator* (const float &c, const float3& a) +__device__ void +operator-= (float3 &a, const float &c) { - return make_float3(c*a.x, c*a.y, c*a.z); + a.x -= c; + a.y -= c; + a.z -= c; } -__device__ __host__ float3 operator/ (const float3 &a, const float &c) +__device__ float3 +operator- (const float &c, const float3& a) { - return make_float3(a.x/c, a.y/c, a.z/c); + return make_float3(c-a.x, c-a.y, c-a.z); } -__device__ __host__ void operator/= (float3 &a, const float &c) +__device__ float3 +operator* (const float3 &a, const float &c) { - a.x /= c; - a.y /= c; - a.z /= c; + return make_float3(a.x*c, a.y*c, a.z*c); } -__device__ __host__ float3 operator/ (const float &c, const float3 &a) +__device__ void +operator*= (float3 &a, const float &c) { - return make_float3(c/a.x, c/a.y, c/a.z); + a.x *= c; + a.y *= c; + a.z *= c; } -__device__ __host__ float dot(const float3 &a, const float3 &b) +__device__ float3 +operator* (const float &c, const float3& a) { - return a.x*b.x + a.y*b.y + a.z*b.z; + return make_float3(c*a.x, c*a.y, c*a.z); } -__device__ __host__ float3 cross(const float3 &a, const float3 &b) +__device__ float3 +operator/ (const float3 &a, const float &c) { - return make_float3(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); + return make_float3(a.x/c, a.y/c, a.z/c); } -__device__ __host__ float norm(const float3 &a) +__device__ void +operator/= (float3 &a, const float &c) { - return sqrtf(dot(a,a)); + a.x /= c; + a.y /= c; + a.z /= c; } -__device__ __host__ float3 normalize(const float3 &a) +__device__ float3 +operator/ (const float &c, const float3 &a) { - return a/norm(a); + return make_float3(c/a.x, c/a.y, c/a.z); +} + +__device__ float +dot(const float3 &a, const float3 &b) +{ + return a.x*b.x + a.y*b.y + a.z*b.z; +} + +__device__ float3 +cross(const float3 &a, const float3 &b) +{ + return make_float3(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); +} + +__device__ float +norm(const float3 &a) +{ + return sqrtf(dot(a,a)); +} + +__device__ float3 +normalize(const float3 &a) +{ + return a/norm(a); } #endif diff --git a/src/materials.h b/src/materials.h deleted file mode 100644 index 2355d24..0000000 --- a/src/materials.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef __MATERIALS_H__ -#define __MATERIALS_H__ - -__device__ float min_wavelength; -__device__ float max_wavelength; -__device__ float wavelength_step; -__device__ unsigned int wavelength_size; - -struct Material -{ - float *refractive_index; - float *absorption_length; - float *scattering_length; -}; - -struct Surface -{ - float *detect; - float *absorb; - float *reflect_diffuse; - float *reflect_specular; -}; - -__device__ Material materials[20]; -__device__ Surface surfaces[20]; - -__device__ float interp_property(const float &x, const float *fp) -{ - if (x < min_wavelength) - return fp[0]; - - if (x > max_wavelength) - return fp[wavelength_size-1]; - - int jl = (x-min_wavelength)/wavelength_step; - - return fp[jl] + (x-(min_wavelength + jl*wavelength_step))*(fp[jl+1]-fp[jl])/wavelength_step; -} - -extern "C" -{ - -__global__ void set_wavelength_range(float _min_wavelength, float _max_wavelength, float _wavelength_step, unsigned int _wavelength_size) -{ - min_wavelength = _min_wavelength; - max_wavelength = _max_wavelength; - wavelength_step = _wavelength_step; - wavelength_size = _wavelength_size; -} - -__global__ void set_material(int material_index, float *refractive_index, float *absorption_length, float *scattering_length) -{ - materials[material_index].refractive_index = refractive_index; - materials[material_index].absorption_length = absorption_length; - materials[material_index].scattering_length = scattering_length; -} - -__global__ void set_surface(int surface_index, float *detect, float *absorb, float *reflect_diffuse, float *reflect_specular) -{ - surfaces[surface_index].detect = detect; - surfaces[surface_index].absorb = absorb; - surfaces[surface_index].reflect_diffuse = reflect_diffuse; - surfaces[surface_index].reflect_specular = reflect_specular; -} - -} // extern "c" - -#endif diff --git a/src/matrix.h b/src/matrix.h index a3e480b..0a66e58 100644 --- a/src/matrix.h +++ b/src/matrix.h @@ -3,221 +3,247 @@ struct Matrix { - float a00, a01, a02, a10, a11, a12, a20, a21, a22; + float a00, a01, a02, a10, a11, a12, a20, a21, a22; }; -__device__ __host__ Matrix make_matrix(float a00, float a01, float a02, - float a10, float a11, float a12, - float a20, float a21, float a22) +__device__ Matrix +make_matrix(float a00, float a01, float a02, + float a10, float a11, float a12, + float a20, float a21, float a22) { - Matrix m = {a00, a01, a02, a10, a11, a12, a20, a21, a22}; - return m; + Matrix m = {a00, a01, a02, a10, a11, a12, a20, a21, a22}; + return m; } -__device__ __host__ Matrix make_matrix(const float3 &u1, const float3 &u2, const float3 &u3) +__device__ Matrix +make_matrix(const float3 &u1, const float3 &u2, const float3 &u3) { - Matrix m = {u1.x, u2.x, u3.x, u1.y, u2.y, u3.y, u1.z, u2.z, u3.z}; - return m; + Matrix m = {u1.x, u2.x, u3.x, u1.y, u2.y, u3.y, u1.z, u2.z, u3.z}; + return m; } -__device__ __host__ Matrix operator- (const Matrix &m) +__device__ Matrix +operator- (const Matrix &m) { - return make_matrix(-m.a00, -m.a01, -m.a02, - -m.a10, -m.a11, -m.a12, - -m.a20, -m.a21, -m.a22); + return make_matrix(-m.a00, -m.a01, -m.a02, + -m.a10, -m.a11, -m.a12, + -m.a20, -m.a21, -m.a22); } -__device__ __host__ float3 operator* (const Matrix &m, const float3 &a) +__device__ float3 +operator* (const Matrix &m, const float3 &a) { - return make_float3(m.a00*a.x + m.a01*a.y + m.a02*a.z, - m.a10*a.x + m.a11*a.y + m.a12*a.z, - m.a20*a.x + m.a21*a.y + m.a22*a.z); + return make_float3(m.a00*a.x + m.a01*a.y + m.a02*a.z, + m.a10*a.x + m.a11*a.y + m.a12*a.z, + m.a20*a.x + m.a21*a.y + m.a22*a.z); } -__device__ __host__ Matrix operator+ (const Matrix &m, const Matrix &n) +__device__ Matrix +operator+ (const Matrix &m, const Matrix &n) { - return make_matrix(m.a00+n.a00, m.a01+n.a01, m.a02+n.a02, - m.a10+n.a10, m.a11+n.a11, m.a12+n.a12, - m.a20+n.a20, m.a21+n.a21, m.a22+n.a22); + return make_matrix(m.a00+n.a00, m.a01+n.a01, m.a02+n.a02, + m.a10+n.a10, m.a11+n.a11, m.a12+n.a12, + m.a20+n.a20, m.a21+n.a21, m.a22+n.a22); } -__device__ __host__ void operator+= (Matrix &m, const Matrix &n) +__device__ void +operator+= (Matrix &m, const Matrix &n) { - m.a00 += n.a00; - m.a01 += n.a01; - m.a02 += n.a02; - m.a10 += n.a10; - m.a11 += n.a11; - m.a12 += n.a12; - m.a20 += n.a20; - m.a21 += n.a21; - m.a22 += n.a22; + m.a00 += n.a00; + m.a01 += n.a01; + m.a02 += n.a02; + m.a10 += n.a10; + m.a11 += n.a11; + m.a12 += n.a12; + m.a20 += n.a20; + m.a21 += n.a21; + m.a22 += n.a22; } -__device__ __host__ Matrix operator- (const Matrix &m, const Matrix &n) +__device__ Matrix +operator- (const Matrix &m, const Matrix &n) { - return make_matrix(m.a00-n.a00, m.a01-n.a01, m.a02-n.a02, - m.a10-n.a10, m.a11-n.a11, m.a12-n.a12, - m.a20-n.a20, m.a21-n.a21, m.a22-n.a22); + return make_matrix(m.a00-n.a00, m.a01-n.a01, m.a02-n.a02, + m.a10-n.a10, m.a11-n.a11, m.a12-n.a12, + m.a20-n.a20, m.a21-n.a21, m.a22-n.a22); } -__device__ __host__ void operator-= (Matrix &m, const Matrix& n) +__device__ void +operator-= (Matrix &m, const Matrix& n) { - m.a00 -= n.a00; - m.a01 -= n.a01; - m.a02 -= n.a02; - m.a10 -= n.a10; - m.a11 -= n.a11; - m.a12 -= n.a12; - m.a20 -= n.a20; - m.a21 -= n.a21; - m.a22 -= n.a22; + m.a00 -= n.a00; + m.a01 -= n.a01; + m.a02 -= n.a02; + m.a10 -= n.a10; + m.a11 -= n.a11; + m.a12 -= n.a12; + m.a20 -= n.a20; + m.a21 -= n.a21; + m.a22 -= n.a22; } -__device__ __host__ Matrix operator* (const Matrix &m, const Matrix &n) +__device__ Matrix +operator* (const Matrix &m, const Matrix &n) { - return make_matrix(m.a00*n.a00 + m.a01*n.a10 + m.a02*n.a20, - m.a00*n.a01 + m.a01*n.a11 + m.a02*n.a21, - m.a00*n.a02 + m.a01*n.a12 + m.a02*n.a22, - m.a10*n.a00 + m.a11*n.a10 + m.a12*n.a20, - m.a10*n.a01 + m.a11*n.a11 + m.a12*n.a21, - m.a10*n.a02 + m.a11*n.a12 + m.a12*n.a22, - m.a20*n.a00 + m.a21*n.a10 + m.a22*n.a20, - m.a20*n.a01 + m.a21*n.a11 + m.a22*n.a21, - m.a20*n.a02 + m.a21*n.a12 + m.a22*n.a22); + return make_matrix(m.a00*n.a00 + m.a01*n.a10 + m.a02*n.a20, + m.a00*n.a01 + m.a01*n.a11 + m.a02*n.a21, + m.a00*n.a02 + m.a01*n.a12 + m.a02*n.a22, + m.a10*n.a00 + m.a11*n.a10 + m.a12*n.a20, + m.a10*n.a01 + m.a11*n.a11 + m.a12*n.a21, + m.a10*n.a02 + m.a11*n.a12 + m.a12*n.a22, + m.a20*n.a00 + m.a21*n.a10 + m.a22*n.a20, + m.a20*n.a01 + m.a21*n.a11 + m.a22*n.a21, + m.a20*n.a02 + m.a21*n.a12 + m.a22*n.a22); } -__device__ __host__ Matrix operator+ (const Matrix &m, const float &c) +__device__ Matrix +operator+ (const Matrix &m, const float &c) { - return make_matrix(m.a00+c, m.a01+c, m.a02+c, - m.a10+c, m.a11+c, m.a12+c, - m.a20+c, m.a21+c, m.a22+c); + return make_matrix(m.a00+c, m.a01+c, m.a02+c, + m.a10+c, m.a11+c, m.a12+c, + m.a20+c, m.a21+c, m.a22+c); } -__device__ __host__ void operator+= (Matrix &m, const float &c) +__device__ void +operator+= (Matrix &m, const float &c) { - m.a00 += c; - m.a01 += c; - m.a02 += c; - m.a10 += c; - m.a11 += c; - m.a12 += c; - m.a20 += c; - m.a21 += c; - m.a22 += c; + m.a00 += c; + m.a01 += c; + m.a02 += c; + m.a10 += c; + m.a11 += c; + m.a12 += c; + m.a20 += c; + m.a21 += c; + m.a22 += c; } -__device__ __host__ Matrix operator+ (const float &c, const Matrix &m) +__device__ Matrix +operator+ (const float &c, const Matrix &m) { - return make_matrix(c+m.a00, c+m.a01, c+m.a02, - c+m.a10, c+m.a11, c+m.a12, - c+m.a20, c+m.a21, c+m.a22); + return make_matrix(c+m.a00, c+m.a01, c+m.a02, + c+m.a10, c+m.a11, c+m.a12, + c+m.a20, c+m.a21, c+m.a22); } -__device__ __host__ Matrix operator- (const Matrix &m, const float &c) +__device__ Matrix +operator- (const Matrix &m, const float &c) { - return make_matrix(m.a00-c, m.a01-c, m.a02-c, - m.a10-c, m.a11-c, m.a12-c, - m.a20-c, m.a21-c, m.a22-c); + return make_matrix(m.a00-c, m.a01-c, m.a02-c, + m.a10-c, m.a11-c, m.a12-c, + m.a20-c, m.a21-c, m.a22-c); } -__device__ __host__ void operator-= (Matrix &m, const float &c) +__device__ void +operator-= (Matrix &m, const float &c) { - m.a00 -= c; - m.a01 -= c; - m.a02 -= c; - m.a10 -= c; - m.a11 -= c; - m.a12 -= c; - m.a20 -= c; - m.a21 -= c; - m.a22 -= c; + m.a00 -= c; + m.a01 -= c; + m.a02 -= c; + m.a10 -= c; + m.a11 -= c; + m.a12 -= c; + m.a20 -= c; + m.a21 -= c; + m.a22 -= c; } -__device__ __host__ Matrix operator- (const float &c, const Matrix &m) +__device__ Matrix +operator- (const float &c, const Matrix &m) { - return make_matrix(c-m.a00, c-m.a01, c-m.a02, - c-m.a10, c-m.a11, c-m.a12, - c-m.a20, c-m.a21, c-m.a22); + return make_matrix(c-m.a00, c-m.a01, c-m.a02, + c-m.a10, c-m.a11, c-m.a12, + c-m.a20, c-m.a21, c-m.a22); } -__device__ __host__ Matrix operator* (const Matrix &m, const float &c) +__device__ Matrix +operator* (const Matrix &m, const float &c) { - return make_matrix(m.a00*c, m.a01*c, m.a02*c, - m.a10*c, m.a11*c, m.a12*c, - m.a20*c, m.a21*c, m.a22*c); + return make_matrix(m.a00*c, m.a01*c, m.a02*c, + m.a10*c, m.a11*c, m.a12*c, + m.a20*c, m.a21*c, m.a22*c); } -__device__ __host__ void operator*= (Matrix &m, const float &c) +__device__ void +operator*= (Matrix &m, const float &c) { - m.a00 *= c; - m.a01 *= c; - m.a02 *= c; - m.a10 *= c; - m.a11 *= c; - m.a12 *= c; - m.a20 *= c; - m.a21 *= c; - m.a22 *= c; + m.a00 *= c; + m.a01 *= c; + m.a02 *= c; + m.a10 *= c; + m.a11 *= c; + m.a12 *= c; + m.a20 *= c; + m.a21 *= c; + m.a22 *= c; } -__device__ __host__ Matrix operator* (const float &c, const Matrix &m) +__device__ Matrix +operator* (const float &c, const Matrix &m) { - return make_matrix(c*m.a00, c*m.a01, c*m.a02, - c*m.a10, c*m.a11, c*m.a12, - c*m.a20, c*m.a21, c*m.a22); + return make_matrix(c*m.a00, c*m.a01, c*m.a02, + c*m.a10, c*m.a11, c*m.a12, + c*m.a20, c*m.a21, c*m.a22); } -__device__ __host__ Matrix operator/ (const Matrix &m, const float &c) +__device__ Matrix +operator/ (const Matrix &m, const float &c) { - return make_matrix(m.a00/c, m.a01/c, m.a02/c, - m.a10/c, m.a11/c, m.a12/c, - m.a20/c, m.a21/c, m.a22/c); + return make_matrix(m.a00/c, m.a01/c, m.a02/c, + m.a10/c, m.a11/c, m.a12/c, + m.a20/c, m.a21/c, m.a22/c); } -__device__ __host__ void operator/= (Matrix &m, const float &c) +__device__ void +operator/= (Matrix &m, const float &c) { - m.a00 /= c; - m.a01 /= c; - m.a02 /= c; - m.a10 /= c; - m.a11 /= c; - m.a12 /= c; - m.a20 /= c; - m.a21 /= c; - m.a22 /= c; + m.a00 /= c; + m.a01 /= c; + m.a02 /= c; + m.a10 /= c; + m.a11 /= c; + m.a12 /= c; + m.a20 /= c; + m.a21 /= c; + m.a22 /= c; } -__device__ __host__ Matrix operator/ (const float &c, const Matrix &m) +__device__ Matrix +operator/ (const float &c, const Matrix &m) { - return make_matrix(c/m.a00, c/m.a01, c/m.a02, - c/m.a10, c/m.a11, c/m.a12, - c/m.a20, c/m.a21, c/m.a22); + return make_matrix(c/m.a00, c/m.a01, c/m.a02, + c/m.a10, c/m.a11, c/m.a12, + c/m.a20, c/m.a21, c/m.a22); } -__device__ __host__ float det(const Matrix &m) +__device__ float +det(const Matrix &m) { - return m.a00*(m.a11*m.a22 - m.a12*m.a21) - m.a10*(m.a01*m.a22 - m.a02*m.a21) + m.a20*(m.a01*m.a12 - m.a02*m.a11); + return m.a00*(m.a11*m.a22 - m.a12*m.a21) - + m.a10*(m.a01*m.a22 - m.a02*m.a21) + + m.a20*(m.a01*m.a12 - m.a02*m.a11); } -__device__ __host__ Matrix inv(const Matrix &m) +__device__ Matrix +inv(const Matrix &m) { - return make_matrix(m.a11*m.a22 - m.a12*m.a21, - m.a02*m.a21 - m.a01*m.a22, - m.a01*m.a12 - m.a02*m.a11, - m.a12*m.a20 - m.a10*m.a22, - m.a00*m.a22 - m.a02*m.a20, - m.a02*m.a10 - m.a00*m.a12, - m.a10*m.a21 - m.a11*m.a20, - m.a01*m.a20 - m.a00*m.a21, - m.a00*m.a11 - m.a01*m.a10)/det(m); + return make_matrix(m.a11*m.a22 - m.a12*m.a21, + m.a02*m.a21 - m.a01*m.a22, + m.a01*m.a12 - m.a02*m.a11, + m.a12*m.a20 - m.a10*m.a22, + m.a00*m.a22 - m.a02*m.a20, + m.a02*m.a10 - m.a00*m.a12, + m.a10*m.a21 - m.a11*m.a20, + m.a01*m.a20 - m.a00*m.a21, + m.a00*m.a11 - m.a01*m.a10)/det(m); } -__device__ __host__ Matrix outer(const float3 &a, const float3 &b) +__device__ Matrix +outer(const float3 &a, const float3 &b) { - return make_matrix(a.x*b.x, a.x*b.y, a.x*b.z, - a.y*b.x, a.y*b.y, a.y*b.z, - a.z*b.x, a.z*b.y, a.z*b.z); + return make_matrix(a.x*b.x, a.x*b.y, a.x*b.z, + a.y*b.x, a.y*b.y, a.y*b.z, + a.z*b.x, a.z*b.y, a.z*b.z); } #endif @@ -2,187 +2,164 @@ #define __MESH_H__ #include "intersect.h" +#include "geometry.h" #define STACK_SIZE 500 -/* flattened triangle mesh */ -__device__ float3 *g_vertices; -__device__ uint4 *g_triangles; -__device__ unsigned int *g_colors; -__device__ unsigned int g_start_node; -__device__ unsigned int g_first_node; - -/* lower/upper bounds for the bounding box associated with each node/leaf */ -__device__ float3 *g_lower_bounds; -__device__ float3 *g_upper_bounds; - -/* map to child node/triangle indices */ -texture<unsigned int, 1, cudaReadModeElementType> node_map; -texture<unsigned int, 1, cudaReadModeElementType> node_map_end; - -__device__ float3 make_float3(const float4 &a) +/* Tests the intersection between a ray and a node in the bounding volume + hierarchy. If the ray intersects the bounding volume and `min_distance` + is less than zero or the distance from `origin` to the intersection is + less than `min_distance`, return true, else return false. */ +__device__ bool +intersect_node(const float3 &origin, const float3 &direction, + Geometry *geometry, const int &i, const float &min_distance) { - return make_float3(a.x, a.y, a.z); -} + /* assigning these to local variables is faster for some reason */ + float3 lower_bound = geometry->lower_bounds[i]; + float3 upper_bound = geometry->upper_bounds[i]; -__device__ int convert(int c) -{ - if (c & 0x80) - return (0xFFFFFF00 | c); - else - return c; -} + float distance_to_box; -/* Test the intersection between a ray starting at `origin` traveling in the - direction `direction` and the bounding box around node `i`. If the ray - intersects the bounding box return true, else return false. */ -__device__ bool intersect_node(const float3 &origin, const float3 &direction, const int &i, const float &min_distance) -{ - float3 lower_bound = g_lower_bounds[i]; - float3 upper_bound = g_upper_bounds[i]; + if (intersect_box(origin, direction, lower_bound, upper_bound, + distance_to_box)) { + if (min_distance < 0.0f) + return true; - float distance_to_box; + if (distance_to_box > min_distance) + return false; - if (intersect_box(origin, direction, lower_bound, upper_bound, distance_to_box)) - { - if (min_distance < 0.0f) - return true; + return true; + } + else { + return false; + } - if (distance_to_box > min_distance) - return false; - - return true; - } - else - { - return false; - } - -} // intersect_node +} -/* Find the intersection between a ray starting at `origin` traveling in the - direction `direction` and the global mesh texture. If the ray intersects - the texture return the index of the triangle which the ray intersected, - else return -1. */ -__device__ int intersect_mesh(const float3 &origin, const float3& direction, float &min_distance, int last_hit_triangle = -1) +/* Finds the intersection between a ray and `geometry`. If the ray does + intersect the mesh and the index of the intersected triangle is not equal + to `last_hit_triangle`, set `min_distance` to the distance from `origin` to + the intersection and return the index of the triangle which the ray + intersected, else return -1. */ +__device__ int +intersect_mesh(const float3 &origin, const float3& direction, + Geometry *geometry, float &min_distance, + int last_hit_triangle = -1) { - int triangle_index = -1; + int triangle_index = -1; - float distance; - min_distance = -1.0f; + float distance; + min_distance = -1.0f; - if (!intersect_node(origin, direction, g_start_node, min_distance)) - return -1; + if (!intersect_node(origin, direction, geometry, geometry->start_node, + min_distance)) + return -1; - unsigned int stack[STACK_SIZE]; + unsigned int stack[STACK_SIZE]; - unsigned int *head = &stack[0]; - unsigned int *node = &stack[1]; - unsigned int *tail = &stack[STACK_SIZE-1]; - *node = g_start_node; + unsigned int *head = &stack[0]; + unsigned int *node = &stack[1]; + unsigned int *tail = &stack[STACK_SIZE-1]; + *node = geometry->start_node; - unsigned int i; + unsigned int i; - do - { - unsigned int first_child = tex1Dfetch(node_map, *node); - unsigned int stop = tex1Dfetch(node_map_end, *node); + do + { + unsigned int first_child = geometry->node_map[*node]; + unsigned int stop = geometry->node_map_end[*node]; - while (*node >= g_first_node && stop == first_child+1) - { - *node = first_child; - first_child = tex1Dfetch(node_map, *node); - stop = tex1Dfetch(node_map_end, *node); - } + while (*node >= geometry->first_node && stop == first_child+1) { + *node = first_child; + first_child = geometry->node_map[*node]; + stop = geometry->node_map_end[*node]; + } - if (*node >= g_first_node) - { - for (i=first_child; i < stop; i++) - { - if (intersect_node(origin, direction, i, min_distance)) - { - *node = i; - node++; - } - } - - node--; + if (*node >= geometry->first_node) { + for (i=first_child; i < stop; i++) { + if (intersect_node(origin, direction, geometry, i, + min_distance)) { + *node = i; + node++; + } + } + + node--; + } + else { + // node is a leaf + for (i=first_child; i < stop; i++) { + if (last_hit_triangle == i) + continue; + + Triangle triangle = get_triangle(geometry, i); + + if (intersect_triangle(origin, direction, triangle, + distance)) { + if (triangle_index == -1) { + triangle_index = i; + min_distance = distance; + continue; + } + + if (distance < min_distance) { + triangle_index = i; + min_distance = distance; + } } - else // node is a leaf - { - for (i=first_child; i < stop; i++) - { - if (last_hit_triangle == i) - continue; - - uint4 triangle_data = g_triangles[i]; - - float3 v0 = g_vertices[triangle_data.x]; - float3 v1 = g_vertices[triangle_data.y]; - float3 v2 = g_vertices[triangle_data.z]; - - if (intersect_triangle(origin, direction, v0, v1, v2, distance)) - { - if (triangle_index == -1) - { - triangle_index = i; - min_distance = distance; - continue; - } - - if (distance < min_distance) - { - triangle_index = i; - min_distance = distance; - } - } - } // triangle loop - - node--; - - } // node is a leaf - - } // while loop - while (node != head); - - return triangle_index; + } // triangle loop + + node--; + + } // node is a leaf + + } // while loop + while (node != head); + + return triangle_index; } extern "C" { -__global__ void set_global_mesh_variables(uint4 *triangles, float3 *vertices, unsigned int *colors, unsigned int start_node, unsigned int first_node, float3 *lower_bounds, float3 *upper_bounds) +__global__ void +distance_to_mesh(int nthreads, float3 *positions, float3 *directions, + Geometry *g, float *distances) { - g_triangles = triangles; - g_vertices = vertices; - g_colors = colors; - g_start_node = start_node; - g_first_node = first_node; - g_lower_bounds = lower_bounds; - g_upper_bounds = upper_bounds; -} + int id = blockIdx.x*blockDim.x + threadIdx.x; -__global__ void set_colors(unsigned int *colors) -{ - g_colors = colors; + if (id >= nthreads) + return; + + float3 position = positions[id]; + float3 direction = directions[id]; + direction /= norm(direction); + + float distance; + + int triangle_index = intersect_mesh(position, direction, g, distance); + + if (triangle_index == -1) + distances[id] = 1e9; + else + distances[id] = distance; } -__global__ void color_solids(int first_triangle, int nthreads, - int *solid_id_map, - bool *solid_hit, - unsigned int *solid_colors) +__global__ void +color_solids(int first_triangle, int nthreads, int *solid_id_map, + bool *solid_hit, unsigned int *solid_colors, Geometry *geometry) { - int id = blockIdx.x*blockDim.x + threadIdx.x; + int id = blockIdx.x*blockDim.x + threadIdx.x; - if (id >= nthreads) - return; + if (id >= nthreads) + return; - int triangle_id = first_triangle + id; - int solid_id = solid_id_map[triangle_id]; - if (solid_hit[solid_id]) - g_colors[triangle_id] = solid_colors[solid_id]; + int triangle_id = first_triangle + id; + int solid_id = solid_id_map[triangle_id]; + if (solid_hit[solid_id]) + geometry->colors[triangle_id] = solid_colors[solid_id]; } -} // extern "c" +} // extern "C" #endif diff --git a/src/photon.h b/src/photon.h index e781864..8b7e588 100644 --- a/src/photon.h +++ b/src/photon.h @@ -3,309 +3,322 @@ #include "stdio.h" #include "linalg.h" -#include "materials.h" #include "rotate.h" #include "random.h" #include "physical_constants.h" #include "mesh.h" +#include "geometry.h" struct Photon { - float3 position; - float3 direction; - float3 polarization; - float wavelength; - float time; + float3 position; + float3 direction; + float3 polarization; + float wavelength; + float time; - unsigned int history; + unsigned int history; - int last_hit_triangle; + int last_hit_triangle; }; struct State { - bool inside_to_outside; + bool inside_to_outside; - float3 surface_normal; + float3 surface_normal; - float refractive_index1, refractive_index2; - float absorption_length; - float scattering_length; + float refractive_index1, refractive_index2; + float absorption_length; + float scattering_length; - int surface_index; + int surface_index; - float distance_to_boundary; + float distance_to_boundary; }; enum { - NO_HIT = 0x1 << 0, - BULK_ABSORB = 0x1 << 1, - SURFACE_DETECT = 0x1 << 2, - SURFACE_ABSORB = 0x1 << 3, - RAYLEIGH_SCATTER = 0x1 << 4, - REFLECT_DIFFUSE = 0x1 << 5, - REFLECT_SPECULAR = 0x1 << 6, - NAN_ABORT = 0x1 << 31 + NO_HIT = 0x1 << 0, + BULK_ABSORB = 0x1 << 1, + SURFACE_DETECT = 0x1 << 2, + SURFACE_ABSORB = 0x1 << 3, + RAYLEIGH_SCATTER = 0x1 << 4, + REFLECT_DIFFUSE = 0x1 << 5, + REFLECT_SPECULAR = 0x1 << 6, + NAN_ABORT = 0x1 << 31 }; // processes enum {BREAK, CONTINUE, PASS}; // return value from propagate_to_boundary -__device__ float get_theta(const float3 &a, const float3 &b) +__device__ int +convert(int c) { - return acosf(fmaxf(-1.0f,fminf(1.0f,dot(a,b)))); + if (c & 0x80) + return (0xFFFFFF00 | c); + else + return c; } -__device__ void fill_state(State &s, Photon &p) +__device__ float +get_theta(const float3 &a, const float3 &b) { - p.last_hit_triangle = intersect_mesh(p.position, p.direction, s.distance_to_boundary, p.last_hit_triangle); - - if (p.last_hit_triangle == -1) - { - p.history |= NO_HIT; - return; - } - - uint4 triangle_data = g_triangles[p.last_hit_triangle]; - - float3 v0 = g_vertices[triangle_data.x]; - float3 v1 = g_vertices[triangle_data.y]; - float3 v2 = g_vertices[triangle_data.z]; - - int inner_material_index = convert(0xFF & (triangle_data.w >> 24)); - int outer_material_index = convert(0xFF & (triangle_data.w >> 16)); - s.surface_index = convert(0xFF & (triangle_data.w >> 8)); - - s.surface_normal = cross(v1-v0, v2-v1); - s.surface_normal /= norm(s.surface_normal); - - Material material1, material2; - if (dot(s.surface_normal,-p.direction) > 0.0f) - { - // outside to inside - material1 = materials[outer_material_index]; - material2 = materials[inner_material_index]; - - s.inside_to_outside = false; - } - else - { - // inside to outside - material1 = materials[inner_material_index]; - material2 = materials[outer_material_index]; - s.surface_normal = -s.surface_normal; - - s.inside_to_outside = true; - } + return acosf(fmaxf(-1.0f,fminf(1.0f,dot(a,b)))); +} - s.refractive_index1 = interp_property(p.wavelength, material1.refractive_index); - s.refractive_index2 = interp_property(p.wavelength, material2.refractive_index); - s.absorption_length = interp_property(p.wavelength, material1.absorption_length); - s.scattering_length = interp_property(p.wavelength, material1.scattering_length); +__device__ void +fill_state(State &s, Photon &p, Geometry *g) +{ + p.last_hit_triangle = intersect_mesh(p.position, p.direction, g, + s.distance_to_boundary, + p.last_hit_triangle); + + if (p.last_hit_triangle == -1) { + p.history |= NO_HIT; + return; + } + + Triangle t = get_triangle(g, p.last_hit_triangle); + + unsigned int material_code = g->material_codes[p.last_hit_triangle]; + + int inner_material_index = convert(0xFF & (material_code >> 24)); + int outer_material_index = convert(0xFF & (material_code >> 16)); + s.surface_index = convert(0xFF & (material_code >> 8)); + + float3 v01 = t.v1 - t.v0; + float3 v12 = t.v2 - t.v1; + + s.surface_normal = normalize(cross(v01, v12)); + + Material *material1, *material2; + if (dot(s.surface_normal,-p.direction) > 0.0f) { + // outside to inside + material1 = g->materials[outer_material_index]; + material2 = g->materials[inner_material_index]; + + s.inside_to_outside = false; + } + else { + // inside to outside + material1 = g->materials[inner_material_index]; + material2 = g->materials[outer_material_index]; + s.surface_normal = -s.surface_normal; + + s.inside_to_outside = true; + } + + s.refractive_index1 = interp_property(material1, p.wavelength, + material1->refractive_index); + s.refractive_index2 = interp_property(material2, p.wavelength, + material2->refractive_index); + s.absorption_length = interp_property(material1, p.wavelength, + material1->absorption_length); + s.scattering_length = interp_property(material1, p.wavelength, + material1->scattering_length); } // fill_state -__device__ float3 pick_new_direction(float3 axis, float theta, float phi) +__device__ float3 +pick_new_direction(float3 axis, float theta, float phi) { - // Taken from SNOMAN rayscatter.for - float cos_theta = cosf(theta); - float sin_theta = sinf(theta); - float cos_phi = cosf(phi); - float sin_phi = sinf(phi); + // Taken from SNOMAN rayscatter.for + float cos_theta = cosf(theta); + float sin_theta = sinf(theta); + float cos_phi = cosf(phi); + float sin_phi = sinf(phi); - float sin_axis_theta = sqrt(1.0f - axis.z*axis.z); - float cos_axis_phi, sin_axis_phi; + float sin_axis_theta = sqrt(1.0f - axis.z*axis.z); + float cos_axis_phi, sin_axis_phi; - if (isnan(sin_axis_theta) || sin_axis_theta < 0.00001f) { - cos_axis_phi = 1.0f; - sin_axis_phi = 0.0f; - } else { - cos_axis_phi = axis.x / sin_axis_theta; - sin_axis_phi = axis.y / sin_axis_theta; - } - - return make_float3(cos_theta*axis.x + sin_theta*(axis.z*cos_phi*cos_axis_phi - sin_phi*sin_axis_phi), - cos_theta*axis.y + sin_theta*(cos_phi*axis.z*sin_axis_phi - sin_phi*cos_axis_phi), - cos_theta*axis.z - sin_theta*cos_phi*sin_axis_theta); + if (isnan(sin_axis_theta) || sin_axis_theta < 0.00001f) { + cos_axis_phi = 1.0f; + sin_axis_phi = 0.0f; + } + else { + cos_axis_phi = axis.x / sin_axis_theta; + sin_axis_phi = axis.y / sin_axis_theta; + } + + float dirx = cos_theta*axis.x + + sin_theta*(axis.z*cos_phi*cos_axis_phi - sin_phi*sin_axis_phi); + float diry = cos_theta*axis.y + + sin_theta*(cos_phi*axis.z*sin_axis_phi - sin_phi*cos_axis_phi); + float dirz = cos_theta*axis.z - sin_theta*cos_phi*sin_axis_theta; + + return make_float3(dirx, diry, dirz); } -__device__ void rayleigh_scatter(Photon &p, curandState &rng) +__device__ void +rayleigh_scatter(Photon &p, curandState &rng) { - float cos_theta = 2.0f*cosf((acosf(1.0f - 2.0f*curand_uniform(&rng))-2*PI)/3.0f); - if (cos_theta > 1.0f) - cos_theta = 1.0f; - else if (cos_theta < -1.0f) - cos_theta = -1.0f; - - float theta = acosf(cos_theta); - float phi = uniform(&rng, 0.0f, 2.0f * PI); - - p.direction = pick_new_direction(p.polarization, theta, phi); - - if (1.0f - fabsf(cos_theta) < 1e-6f) { - p.polarization = pick_new_direction(p.polarization, PI/2.0f, phi); - } else { - // linear combination of old polarization and new direction - p.polarization = p.polarization - cos_theta * p.direction; - } - - p.direction /= norm(p.direction); - p.polarization /= norm(p.polarization); + float cos_theta = 2.0f*cosf((acosf(1.0f - 2.0f*curand_uniform(&rng))-2*PI)/3.0f); + if (cos_theta > 1.0f) + cos_theta = 1.0f; + else if (cos_theta < -1.0f) + cos_theta = -1.0f; + + float theta = acosf(cos_theta); + float phi = uniform(&rng, 0.0f, 2.0f * PI); + + p.direction = pick_new_direction(p.polarization, theta, phi); + + if (1.0f - fabsf(cos_theta) < 1e-6f) { + p.polarization = pick_new_direction(p.polarization, PI/2.0f, phi); + } + else { + // linear combination of old polarization and new direction + p.polarization = p.polarization - cos_theta * p.direction; + } + + p.direction /= norm(p.direction); + p.polarization /= norm(p.polarization); } // scatter __device__ int propagate_to_boundary(Photon &p, State &s, curandState &rng) { - float absorption_distance = -s.absorption_length*logf(curand_uniform(&rng)); - float scattering_distance = -s.scattering_length*logf(curand_uniform(&rng)); + float absorption_distance = -s.absorption_length*logf(curand_uniform(&rng)); + float scattering_distance = -s.scattering_length*logf(curand_uniform(&rng)); - if (absorption_distance <= scattering_distance) - { - if (absorption_distance <= s.distance_to_boundary) - { - p.time += absorption_distance/(SPEED_OF_LIGHT/s.refractive_index1); - p.position += absorption_distance*p.direction; - p.history |= BULK_ABSORB; + if (absorption_distance <= scattering_distance) { + if (absorption_distance <= s.distance_to_boundary) { + p.time += absorption_distance/(SPEED_OF_LIGHT/s.refractive_index1); + p.position += absorption_distance*p.direction; + p.history |= BULK_ABSORB; - p.last_hit_triangle = -1; + p.last_hit_triangle = -1; - return BREAK; - } // photon is absorbed in material1 - } - else - { - if (scattering_distance <= s.distance_to_boundary) - { - p.time += scattering_distance/(SPEED_OF_LIGHT/s.refractive_index1); - p.position += scattering_distance*p.direction; + return BREAK; + } // photon is absorbed in material1 + } + else { + if (scattering_distance <= s.distance_to_boundary) { + p.time += scattering_distance/(SPEED_OF_LIGHT/s.refractive_index1); + p.position += scattering_distance*p.direction; - rayleigh_scatter(p, rng); + rayleigh_scatter(p, rng); - p.history |= RAYLEIGH_SCATTER; + p.history |= RAYLEIGH_SCATTER; - p.last_hit_triangle = -1; + p.last_hit_triangle = -1; - return CONTINUE; - } // photon is scattered in material1 - } // if scattering_distance < absorption_distance + return CONTINUE; + } // photon is scattered in material1 + } // if scattering_distance < absorption_distance - p.position += s.distance_to_boundary*p.direction; - p.time += s.distance_to_boundary/(SPEED_OF_LIGHT/s.refractive_index1); + p.position += s.distance_to_boundary*p.direction; + p.time += s.distance_to_boundary/(SPEED_OF_LIGHT/s.refractive_index1); - return PASS; + return PASS; } // propagate_to_boundary -__device__ void propagate_at_boundary(Photon &p, State &s, curandState &rng) +__device__ void +propagate_at_boundary(Photon &p, State &s, curandState &rng) { - float incident_angle = get_theta(s.surface_normal,-p.direction); - float refracted_angle = asinf(sinf(incident_angle)*s.refractive_index1/s.refractive_index2); - - float3 incident_plane_normal = cross(p.direction, s.surface_normal); - float incident_plane_normal_length = norm(incident_plane_normal); - - // Photons at normal incidence do not have a unique plane of incidence, - // so we have to pick the plane normal to be the polarization vector - // to get the correct logic below - if (incident_plane_normal_length < 1e-6f) - incident_plane_normal = p.polarization; - else - incident_plane_normal /= incident_plane_normal_length; - - float normal_coefficient = dot(p.polarization, incident_plane_normal); - float normal_probability = normal_coefficient*normal_coefficient; - - float reflection_coefficient; - if (curand_uniform(&rng) < normal_probability) - { - reflection_coefficient = -sinf(incident_angle-refracted_angle)/sinf(incident_angle+refracted_angle); - - if ((curand_uniform(&rng) < reflection_coefficient*reflection_coefficient) || isnan(refracted_angle)) - { - p.direction = rotate(s.surface_normal, incident_angle, incident_plane_normal); + float incident_angle = get_theta(s.surface_normal,-p.direction); + float refracted_angle = asinf(sinf(incident_angle)*s.refractive_index1/s.refractive_index2); + + float3 incident_plane_normal = cross(p.direction, s.surface_normal); + float incident_plane_normal_length = norm(incident_plane_normal); + + // Photons at normal incidence do not have a unique plane of incidence, + // so we have to pick the plane normal to be the polarization vector + // to get the correct logic below + if (incident_plane_normal_length < 1e-6f) + incident_plane_normal = p.polarization; + else + incident_plane_normal /= incident_plane_normal_length; + + float normal_coefficient = dot(p.polarization, incident_plane_normal); + float normal_probability = normal_coefficient*normal_coefficient; + + float reflection_coefficient; + if (curand_uniform(&rng) < normal_probability) { + // photon polarization normal to plane of incidence + reflection_coefficient = -sinf(incident_angle-refracted_angle)/sinf(incident_angle+refracted_angle); + + if ((curand_uniform(&rng) < reflection_coefficient*reflection_coefficient) || isnan(refracted_angle)) { + p.direction = rotate(s.surface_normal, incident_angle, incident_plane_normal); - p.history |= REFLECT_SPECULAR; - } - else - { - p.direction = rotate(s.surface_normal, PI-refracted_angle, incident_plane_normal); - } - - p.polarization = incident_plane_normal; - } // photon polarization normal to plane of incidence - else - { - reflection_coefficient = tanf(incident_angle-refracted_angle)/tanf(incident_angle+refracted_angle); - - if ((curand_uniform(&rng) < reflection_coefficient*reflection_coefficient) || isnan(refracted_angle)) - { - p.direction = rotate(s.surface_normal, incident_angle, incident_plane_normal); - - p.history |= REFLECT_SPECULAR; - } - else - { - p.direction = rotate(s.surface_normal, PI-refracted_angle, incident_plane_normal); - } - - p.polarization = cross(incident_plane_normal, p.direction); - p.polarization /= norm(p.polarization); - } // photon polarization parallel to plane of incidence - -} // propagate_at_boundary - -__device__ int propagate_at_surface(Photon &p, State &s, curandState &rng) -{ - Surface surface = surfaces[s.surface_index]; - - float detect = interp_property(p.wavelength, surface.detect); - float absorb = interp_property(p.wavelength, surface.absorb); - float reflect_diffuse = interp_property(p.wavelength, surface.reflect_diffuse); - float reflect_specular = interp_property(p.wavelength, surface.reflect_specular); - - // since the surface properties are interpolated linearly, we are - // guaranteed that they still sum to 1.0. + p.history |= REFLECT_SPECULAR; + } + else { + p.direction = rotate(s.surface_normal, PI-refracted_angle, incident_plane_normal); + } - float uniform_sample = curand_uniform(&rng); + p.polarization = incident_plane_normal; + } + else { + // photon polarization parallel to plane of incidence + reflection_coefficient = tanf(incident_angle-refracted_angle)/tanf(incident_angle+refracted_angle); - if (uniform_sample < absorb) - { - p.history |= SURFACE_ABSORB; - return BREAK; + if ((curand_uniform(&rng) < reflection_coefficient*reflection_coefficient) || isnan(refracted_angle)) { + p.direction = rotate(s.surface_normal, incident_angle, incident_plane_normal); + + p.history |= REFLECT_SPECULAR; } - else if (uniform_sample < absorb + detect) - { - p.history |= SURFACE_DETECT; - return BREAK; + else { + p.direction = rotate(s.surface_normal, PI-refracted_angle, incident_plane_normal); } - else if (uniform_sample < absorb + detect + reflect_diffuse) - { - // diffusely reflect - p.direction = uniform_sphere(&rng); - if (dot(p.direction, s.surface_normal) < 0.0f) - p.direction = -p.direction; + p.polarization = cross(incident_plane_normal, p.direction); + p.polarization /= norm(p.polarization); + } - // randomize polarization? - p.polarization = cross(uniform_sphere(&rng), p.direction); - p.polarization /= norm(p.polarization); +} // propagate_at_boundary + +__device__ int +propagate_at_surface(Photon &p, State &s, curandState &rng, Geometry *geometry) +{ + Surface *surface = geometry->surfaces[s.surface_index]; + + /* since the surface properties are interpolated linearly, we are + guaranteed that they still sum to 1.0. */ + + float detect = interp_property(surface, p.wavelength, surface->detect); + float absorb = interp_property(surface, p.wavelength, surface->absorb); + float reflect_diffuse = interp_property(surface, p.wavelength, surface->reflect_diffuse); + float reflect_specular = interp_property(surface, p.wavelength, surface->reflect_specular); + + float uniform_sample = curand_uniform(&rng); + + if (uniform_sample < absorb) { + p.history |= SURFACE_ABSORB; + return BREAK; + } + else if (uniform_sample < absorb + detect) { + p.history |= SURFACE_DETECT; + return BREAK; + } + else if (uniform_sample < absorb + detect + reflect_diffuse) { + // diffusely reflect + p.direction = uniform_sphere(&rng); + + if (dot(p.direction, s.surface_normal) < 0.0f) + p.direction = -p.direction; + + // randomize polarization? + p.polarization = cross(uniform_sphere(&rng), p.direction); + p.polarization /= norm(p.polarization); - p.history |= REFLECT_DIFFUSE; + p.history |= REFLECT_DIFFUSE; - return CONTINUE; - } - else - { - // specularly reflect - float incident_angle = get_theta(s.surface_normal,-p.direction); - float3 incident_plane_normal = cross(p.direction, s.surface_normal); - incident_plane_normal /= norm(incident_plane_normal); + return CONTINUE; + } + else { + // specularly reflect + float incident_angle = get_theta(s.surface_normal,-p.direction); + float3 incident_plane_normal = cross(p.direction, s.surface_normal); + incident_plane_normal /= norm(incident_plane_normal); - p.direction = rotate(s.surface_normal, incident_angle, incident_plane_normal); + p.direction = rotate(s.surface_normal, incident_angle, + incident_plane_normal); - p.history |= REFLECT_SPECULAR; + p.history |= REFLECT_SPECULAR; - return CONTINUE; - } + return CONTINUE; + } } // propagate_at_surface diff --git a/src/propagate.cu b/src/propagate.cu new file mode 100644 index 0000000..2d5183e --- /dev/null +++ b/src/propagate.cu @@ -0,0 +1,100 @@ +//-*-c-*- + +#include "linalg.h" +#include "mesh.h" +#include "geometry.h" +#include "photon.h" + +extern "C" +{ + +__global__ void +propagate(int first_photon, int nthreads, unsigned int *input_queue, + unsigned int *output_queue, curandState *rng_states, + float3 *positions, float3 *directions, + float *wavelengths, float3 *polarizations, + float *times, unsigned int *histories, + int *last_hit_triangles, int max_steps, + Geometry *geometry) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + curandState rng = rng_states[id]; + + int photon_id = input_queue[first_photon + id]; + + Photon p; + p.position = positions[photon_id]; + p.direction = directions[photon_id]; + p.direction /= norm(p.direction); + p.polarization = polarizations[photon_id]; + p.polarization /= norm(p.polarization); + p.wavelength = wavelengths[photon_id]; + p.time = times[photon_id]; + p.last_hit_triangle = last_hit_triangles[photon_id]; + p.history = histories[photon_id]; + + if (p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB)) + return; + + State s; + + int steps = 0; + while (steps < max_steps) { + steps++; + + int command; + + // check for NaN and fail + if (isnan(p.direction.x*p.direction.y*p.direction.z*p.position.x*p.position.y*p.position.z)) { + p.history |= NO_HIT | NAN_ABORT; + break; + } + + fill_state(s, p, geometry); + + if (p.last_hit_triangle == -1) + break; + + command = propagate_to_boundary(p, s, rng); + + if (command == BREAK) + break; + + if (command == CONTINUE) + continue; + + if (s.surface_index != -1) { + command = propagate_at_surface(p, s, rng, geometry); + + if (command == BREAK) + break; + + if (command == CONTINUE) + continue; + } + + propagate_at_boundary(p, s, rng); + + } // while (steps < max_steps) + + rng_states[id] = rng; + positions[photon_id] = p.position; + directions[photon_id] = p.direction; + polarizations[photon_id] = p.polarization; + wavelengths[photon_id] = p.wavelength; + times[photon_id] = p.time; + histories[photon_id] = p.history; + last_hit_triangles[photon_id] = p.last_hit_triangle; + + // Not done, put photon in output queue + if ((p.history & (NO_HIT | BULK_ABSORB | SURFACE_DETECT | SURFACE_ABSORB)) == 0) { + int out_idx = atomicAdd(output_queue, 1); + output_queue[out_idx] = photon_id; + } +} // propagate + +} // extern "C" diff --git a/src/random.h b/src/random.h index 6abc415..e9d2e75 100644 --- a/src/random.h +++ b/src/random.h @@ -5,76 +5,81 @@ #include "physical_constants.h" -__device__ float uniform(curandState *s, const float &low, const float &high) +__device__ float +uniform(curandState *s, const float &low, const float &high) { - return low + curand_uniform(s)*(high-low); + return low + curand_uniform(s)*(high-low); } -__device__ float3 uniform_sphere(curandState *s) +__device__ float3 +uniform_sphere(curandState *s) { - float theta = uniform(s, 0.0f, 2*PI); - float u = uniform(s, -1.0f, 1.0f); - float c = sqrtf(1.0f-u*u); + float theta = uniform(s, 0.0f, 2*PI); + float u = uniform(s, -1.0f, 1.0f); + float c = sqrtf(1.0f-u*u); - return make_float3(c*cosf(theta), c*sinf(theta), u); + return make_float3(c*cosf(theta), c*sinf(theta), u); } // Draw a random sample given a cumulative distribution function // Assumptions: ncdf >= 2, cdf_y[0] is 0.0, and cdf_y[ncdf-1] is 1.0 -__device__ float sample_cdf(curandState *rng, int ncdf, - float *cdf_x, float *cdf_y) +__device__ float +sample_cdf(curandState *rng, int ncdf, float *cdf_x, float *cdf_y) { - float u = curand_uniform(rng); - - // Find u in cdf_y with binary search - // list must contain at least 2 elements: 0.0 and 1.0 - int lower=0; - int upper=ncdf-1; - while(lower < upper-1) { - int half = (lower+upper) / 2; - if (u < cdf_y[half]) - upper = half; - else - lower = half; - } + float u = curand_uniform(rng); + + // Find u in cdf_y with binary search + // list must contain at least 2 elements: 0.0 and 1.0 + int lower=0; + int upper=ncdf-1; + while(lower < upper-1) { + int half = (lower+upper) / 2; + if (u < cdf_y[half]) + upper = half; + else + lower = half; + } - float frac = (u - cdf_y[lower]) / (cdf_y[upper] - cdf_y[lower]); - return cdf_x[lower] * frac + cdf_x[upper] * (1.0f - frac); + float frac = (u - cdf_y[lower]) / (cdf_y[upper] - cdf_y[lower]); + return cdf_x[lower] * frac + cdf_x[upper] * (1.0f - frac); } - extern "C" { -__global__ void init_rng(int nthreads, curandState *s, unsigned long long seed, unsigned long long offset) +__global__ void +init_rng(int nthreads, curandState *s, unsigned long long seed, + unsigned long long offset) { - int id = blockIdx.x*blockDim.x + threadIdx.x; + int id = blockIdx.x*blockDim.x + threadIdx.x; - if (id >= nthreads) - return; + if (id >= nthreads) + return; - curand_init(seed, id, offset, &s[id]); + curand_init(seed, id, offset, &s[id]); } -__global__ void fill_uniform(int nthreads, curandState *s, float *a, float low, float high) +__global__ void +fill_uniform(int nthreads, curandState *s, float *a, float low, float high) { - int id = blockIdx.x*blockDim.x + threadIdx.x; + int id = blockIdx.x*blockDim.x + threadIdx.x; - if (id >= nthreads) - return; + if (id >= nthreads) + return; - a[id] = uniform(&s[id], low, high); + a[id] = uniform(&s[id], low, high); } -__global__ void fill_uniform_sphere(int nthreads, curandState *s, float3 *a) +__global__ void +fill_uniform_sphere(int nthreads, curandState *s, float3 *a) { - int id = blockIdx.x*blockDim.x + threadIdx.x; + int id = blockIdx.x*blockDim.x + threadIdx.x; - if (id >= nthreads) - return; + if (id >= nthreads) + return; - a[id] = uniform_sphere(&s[id]); + a[id] = uniform_sphere(&s[id]); } } // extern "c" diff --git a/src/render.cu b/src/render.cu new file mode 100644 index 0000000..e5b4ac5 --- /dev/null +++ b/src/render.cu @@ -0,0 +1,169 @@ +//-*-c-*- + +#include "linalg.h" +#include "intersect.h" +#include "mesh.h" +#include "sorting.h" +#include "geometry.h" + +#include "stdio.h" + +__device__ float4 +get_color(const float3 &direction, const Triangle &triangle, unsigned int rgba) +{ + float3 v01 = triangle.v1 - triangle.v0; + float3 v12 = triangle.v2 - triangle.v1; + + float3 surface_normal = normalize(cross(v01,v12)); + + float cos_theta = dot(surface_normal,-direction); + + if (cos_theta < 0.0f) + cos_theta = dot(-surface_normal,-direction); + + unsigned int a0 = 0xff & (rgba >> 24); + unsigned int r0 = 0xff & (rgba >> 16); + unsigned int g0 = 0xff & (rgba >> 8); + unsigned int b0 = 0xff & rgba; + + float alpha = (255 - a0)/255.0f; + + return make_float4(r0*cos_theta, g0*cos_theta, b0*cos_theta, alpha); +} + +extern "C" +{ + +__global__ void +render(int nthreads, float3 *_origin, float3 *_direction, Geometry *geometry, + unsigned int alpha_depth, unsigned int *pixels, float *_dx, + unsigned int *dxlen, float4 *_color) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + float3 origin = _origin[id]; + float3 direction = _direction[id]; + unsigned int n = dxlen[id]; + + float distance; + + if (n < 1 && !intersect_node(origin, direction, geometry, + geometry->start_node, -1.0f)) { + pixels[id] = 0; + return; + } + + unsigned int stack[STACK_SIZE]; + + unsigned int *head = &stack[0]; + unsigned int *node = &stack[1]; + unsigned int *tail = &stack[STACK_SIZE-1]; + *node = geometry->start_node; + + float *dx = _dx + id*alpha_depth; + float4 *color_a = _color + id*alpha_depth; + + unsigned int i; + + do { + unsigned int first_child = geometry->node_map[*node]; + unsigned int stop = geometry->node_map_end[*node]; + + while (*node >= geometry->first_node && stop == first_child+1) { + *node = first_child; + first_child = geometry->node_map[*node]; + stop = geometry->node_map_end[*node]; + } + + if (*node >= geometry->first_node) { + for (i=first_child; i < stop; i++) { + if (intersect_node(origin, direction, geometry, i, -1.0f)) { + *node = i; + node++; + } + } + + node--; + } + else { + // node is a leaf + for (i=first_child; i < stop; i++) { + Triangle triangle = get_triangle(geometry, i); + + if (intersect_triangle(origin, direction, triangle, + distance)) { + if (n < 1) { + dx[0] = distance; + + unsigned int rgba = geometry->colors[i]; + float4 color = get_color(direction, triangle, rgba); + + color_a[0] = color; + } + else { + unsigned long j = searchsorted(n, dx, distance); + + if (j <= alpha_depth-1) { + insert(alpha_depth, dx, j, distance); + + unsigned int rgba = geometry->colors[i]; + float4 color = get_color(direction, triangle, + rgba); + + insert(alpha_depth, color_a, j, color); + } + } + + if (n < alpha_depth) + n++; + } + + } // triangle loop + + node--; + + } // node is a leaf + + } // while loop + while (node != head); + + if (n < 1) { + pixels[id] = 0; + return; + } + + dxlen[id] = n; + + float scale = 1.0f; + float fr = 0.0f; + float fg = 0.0f; + float fb = 0.0f; + for (i=0; i < n; i++) { + float alpha; + if (i < alpha_depth-1) + alpha = color_a[i].w; + else + alpha = 1.0; + + fr += scale*color_a[i].x*alpha; + fg += scale*color_a[i].y*alpha; + fb += scale*color_a[i].z*alpha; + + scale *= (1.0f-alpha); + } + unsigned int a; + if (n < alpha_depth) + a = floorf(255*(1.0f-scale)); + else + a = 255; + unsigned int r = floorf(fr); + unsigned int g = floorf(fg); + unsigned int b = floorf(fb); + + pixels[id] = a << 24 | r << 16 | g << 8 | b; +} + +} // extern "C" diff --git a/src/rotate.h b/src/rotate.h index 7fc6f7e..15f8037 100644 --- a/src/rotate.h +++ b/src/rotate.h @@ -6,21 +6,22 @@ __device__ const Matrix IDENTITY_MATRIX = {1,0,0,0,1,0,0,0,1}; -__device__ __host__ Matrix make_rotation_matrix(float phi, const float3 &n) +__device__ Matrix +make_rotation_matrix(float phi, const float3 &n) { - /* rotate points counterclockwise, when looking towards +infinity, - through an angle `phi` about the axis `n`. */ + float cos_phi = cosf(phi); + float sin_phi = sinf(phi); - float cos_phi = cosf(phi); - float sin_phi = sinf(phi); - - return IDENTITY_MATRIX*cos_phi + (1-cos_phi)*outer(n,n) + - sin_phi*make_matrix(0,n.z,-n.y,-n.z,0,n.x,n.y,-n.x,0); + return IDENTITY_MATRIX*cos_phi + (1-cos_phi)*outer(n,n) + + sin_phi*make_matrix(0,n.z,-n.y,-n.z,0,n.x,n.y,-n.x,0); } -__device__ __host__ float3 rotate(const float3 &a, float phi, const float3 &n) +/* rotate points counterclockwise, when looking towards +infinity, + through an angle `phi` about the axis `n`. */ +__device__ float3 +rotate(const float3 &a, float phi, const float3 &n) { - return make_rotation_matrix(phi,n)*a; + return make_rotation_matrix(phi,n)*a; } #endif diff --git a/src/sorting.h b/src/sorting.h index ec16bbe..03eabc6 100644 --- a/src/sorting.h +++ b/src/sorting.h @@ -1,103 +1,105 @@ template <class T> -__device__ void swap(T &a, T &b) +__device__ void +swap(T &a, T &b) { - T tmp = a; - a = b; - b = tmp; + T tmp = a; + a = b; + b = tmp; } template <class T> -__device__ void reverse(int n, T *a) +__device__ void +reverse(int n, T *a) { - for (int i=0; i < n/2; i++) - swap(a[i],a[n-1-i]); + for (int i=0; i < n/2; i++) + swap(a[i],a[n-1-i]); } template <class T> -__device__ void piksrt(int n, T *arr) +__device__ void +piksrt(int n, T *arr) { - int i,j; - T a; + int i,j; + T a; - for (j=1; j < n; j++) - { - a = arr[j]; - i = j-1; - while (i >= 0 && arr[i] > a) - { - arr[i+1] = arr[i]; - i--; - } - arr[i+1] = a; + for (j=1; j < n; j++) { + a = arr[j]; + i = j-1; + while (i >= 0 && arr[i] > a) { + arr[i+1] = arr[i]; + i--; } + arr[i+1] = a; + } } template <class T, class U> -__device__ void piksrt2(int n, T *arr, U *brr) +__device__ void +piksrt2(int n, T *arr, U *brr) { - int i,j; - T a; - U b; + int i,j; + T a; + U b; - for (j=1; j < n; j++) - { - a = arr[j]; - b = brr[j]; - i = j-1; - while (i >= 0 && arr[i] > a) - { - arr[i+1] = arr[i]; - brr[i+1] = brr[i]; - i--; - } - arr[i+1] = a; - brr[i+1] = b; + for (j=1; j < n; j++) { + a = arr[j]; + b = brr[j]; + i = j-1; + while (i >= 0 && arr[i] > a) { + arr[i+1] = arr[i]; + brr[i+1] = brr[i]; + i--; } + arr[i+1] = a; + brr[i+1] = b; + } } template <class T> -__device__ unsigned long searchsorted(unsigned long n, T *arr, const T &x) +__device__ unsigned long +searchsorted(unsigned long n, T *arr, const T &x) { - unsigned long ju,jm,jl; - int ascnd; + unsigned long ju,jm,jl; + int ascnd; - jl = 0; - ju = n; + jl = 0; + ju = n; - ascnd = (arr[n-1] > arr[0]); + ascnd = (arr[n-1] > arr[0]); - while (ju-jl > 1) - { - jm = (ju+jl) >> 1; + while (ju-jl > 1) { + jm = (ju+jl) >> 1; - if (x >= arr[jm] == ascnd) - jl = jm; - else - ju = jm; - } - - if (x <= arr[0]) - return 0; - else if (x == arr[n-1]) - return n-1; + if (x >= arr[jm] == ascnd) + jl = jm; else - return ju; + ju = jm; + } + + if (x <= arr[0]) + return 0; + else if (x == arr[n-1]) + return n-1; + else + return ju; } template <class T> -__device__ void insert(unsigned long n, T *arr, unsigned long i, const T &x) +__device__ void +insert(unsigned long n, T *arr, unsigned long i, const T &x) { - unsigned long j; - for (j=n-1; j > i; j--) - arr[j] = arr[j-1]; - arr[i] = x; + unsigned long j; + for (j=n-1; j > i; j--) + arr[j] = arr[j-1]; + arr[i] = x; } template <class T> -__device__ void add_sorted(unsigned long n, T *arr, const T &x) +__device__ void +add_sorted(unsigned long n, T *arr, const T &x) { - unsigned long i = searchsorted(n, arr, x); + unsigned long i = searchsorted(n, arr, x); - if (i < n) - insert(n, arr, i, x); + if (i < n) + insert(n, arr, i, x); } diff --git a/src/tools.cu b/src/tools.cu index 3d3fed7..80d06ec 100644 --- a/src/tools.cu +++ b/src/tools.cu @@ -3,15 +3,19 @@ extern "C" { -__global__ void interleave(int nthreads, unsigned long long *x, unsigned long long *y, unsigned long long *z, int bits, unsigned long long *dest) +__global__ void +interleave(int nthreads, unsigned long long *x, unsigned long long *y, + unsigned long long *z, int bits, unsigned long long *dest) { - int id = blockIdx.x*blockDim.x + threadIdx.x; + int id = blockIdx.x*blockDim.x + threadIdx.x; - if (id >= nthreads) - return; + if (id >= nthreads) + return; - for (int i=0; i < bits; i++) - dest[id] |= (x[id] & 1 << i) << (2*i) | (y[id] & 1 << i) << (2*i+1) | (z[id] & 1 << i) << (2*i+2); + for (int i=0; i < bits; i++) + dest[id] |= (x[id] & 1 << i) << (2*i) | + (y[id] & 1 << i) << (2*i+1) | + (z[id] & 1 << i) << (2*i+2); } -} +} // extern "C" diff --git a/src/transform.cu b/src/transform.cu index 57bd509..1f4405e 100644 --- a/src/transform.cu +++ b/src/transform.cu @@ -7,41 +7,45 @@ extern "C" { /* Translate the points `a` by the vector `v` */ -__global__ void translate(int nthreads, float3 *a, float3 v) +__global__ void +translate(int nthreads, float3 *a, float3 v) { - int id = blockIdx.x*blockDim.x + threadIdx.x; + int id = blockIdx.x*blockDim.x + threadIdx.x; - if (id >= nthreads) - return; + if (id >= nthreads) + return; - a[id] += v; + a[id] += v; } /* Rotate the points `a` through an angle `phi` counter-clockwise about the axis `axis` (when looking towards +infinity). */ -__global__ void rotate(int nthreads, float3 *a, float phi, float3 axis) +__global__ void +rotate(int nthreads, float3 *a, float phi, float3 axis) { - int id = blockIdx.x*blockDim.x + threadIdx.x; + int id = blockIdx.x*blockDim.x + threadIdx.x; - if (id >= nthreads) - return; + if (id >= nthreads) + return; - a[id] = rotate(a[id], phi, axis); + a[id] = rotate(a[id], phi, axis); } /* Rotate the points `a` through an angle `phi` counter-clockwise (when looking towards +infinity along `axis`) about the axis defined by the point `point` and the vector `axis` . */ -__global__ void rotate_around_point(int nthreads, float3 *a, float phi, float3 axis, float3 point) +__global__ void +rotate_around_point(int nthreads, float3 *a, float phi, float3 axis, + float3 point) { - int id = blockIdx.x*blockDim.x + threadIdx.x; + int id = blockIdx.x*blockDim.x + threadIdx.x; - if (id >= nthreads) - return; + if (id >= nthreads) + return; - a[id] -= point; - a[id] = rotate(a[id], phi, axis); - a[id] += point; + a[id] -= point; + a[id] = rotate(a[id], phi, axis); + a[id] += point; } } // extern "c" @@ -5,7 +5,7 @@ from chroma.geometry import Mesh import bz2 def mesh_from_stl(filename): - "Return a mesh from an stl file." + "Returns a `chroma.geometry.Mesh` from an STL file." if filename.endswith('.bz2'): f = bz2.BZ2File(filename) else: diff --git a/tests/test_pdf.py b/tests/test_pdf.py index 791f119..c7c9394 100644 --- a/tests/test_pdf.py +++ b/tests/test_pdf.py @@ -20,7 +20,7 @@ class TestPDF(unittest.TestCase): g4generator = G4ParallelGenerator(1, water_wcsim) gpu_instance = gpu.GPU(0) - gpu_geometry = gpu.GPUGeometry(gpu_instance, self.detector) + gpu_geometry = gpu.GPUGeometry(self.detector) nthreads_per_block, max_blocks = 64, 1024 @@ -34,8 +34,10 @@ class TestPDF(unittest.TestCase): for ev in g4generator.generate_events(itertools.islice(self.vertex_gen, 10)): gpu_photons = gpu.GPUPhotons(ev.photons_beg) - gpu.propagate(gpu_instance, gpu_photons, rng_states, nthreads_per_block, max_blocks) - gpu_channels = gpu_daq.acquire(gpu_photons, rng_states, nthreads_per_block, max_blocks) + gpu_photons.propagate(gpu_geometry, rng_states, nthreads_per_block, + max_blocks) + gpu_channels = gpu_daq.acquire(gpu_photons, rng_states, + nthreads_per_block, max_blocks) gpu_pdf.add_hits_to_pdf(gpu_channels) hitcount, pdf = gpu_pdf.get_pdfs() diff --git a/tests/test_propagation.py b/tests/test_propagation.py index bf886bd..8f9d084 100644 --- a/tests/test_propagation.py +++ b/tests/test_propagation.py @@ -34,10 +34,12 @@ class TestPropagation(unittest.TestCase): wavelengths = np.empty(nphotons, np.float32) wavelengths.fill(400.0) - photons = Photons(pos=pos, dir=dir, pol=pol, t=t, wavelengths=wavelengths) + photons = Photons(pos=pos, dir=dir, pol=pol, t=t, + wavelengths=wavelengths) # First make one step to check for strangeness - photons_end = sim.simulate([photons], keep_photons_end=True, max_steps=1).next().photons_end + photons_end = sim.simulate([photons], keep_photons_end=True, + max_steps=1).next().photons_end self.assertFalse(np.isnan(photons_end.pos).any()) self.assertFalse(np.isnan(photons_end.dir).any()) @@ -46,8 +48,10 @@ class TestPropagation(unittest.TestCase): self.assertFalse(np.isnan(photons_end.wavelengths).any()) # Now let it run the usual ten steps - photons_end = sim.simulate([photons], keep_photons_end=True, max_steps=10).next().photons_end + photons_end = sim.simulate([photons], keep_photons_end=True, + max_steps=10).next().photons_end aborted = (photons_end.flags & (1 << 31)) > 0 - print 'aborted photons: %1.1f' % (float(np.count_nonzero(aborted)) / nphotons) + print 'aborted photons: %1.1f' % \ + (float(np.count_nonzero(aborted)) / nphotons) self.assertFalse(aborted.any()) |