diff options
-rwxr-xr-x | benchmark.py | 205 | ||||
-rwxr-xr-x | camera.py | 312 | ||||
-rw-r--r-- | detectors/lbne.py | 10 | ||||
-rw-r--r-- | detectors/miniclean.py | 4 | ||||
-rw-r--r-- | event.py | 102 | ||||
-rw-r--r-- | fileio/root.C | 209 | ||||
-rw-r--r-- | fileio/root.py | 186 | ||||
-rw-r--r-- | generator/g4gen.py | 96 | ||||
-rw-r--r-- | generator/photon.py | 52 | ||||
-rw-r--r-- | generator/vertex.py | 75 | ||||
-rw-r--r-- | geometry.py | 3 | ||||
-rw-r--r-- | gpu.py | 655 | ||||
-rw-r--r-- | gputhread.py | 95 | ||||
-rw-r--r-- | itertoolset.py | 51 | ||||
-rw-r--r-- | likelihood.py | 54 | ||||
-rw-r--r-- | optics.py | 2 | ||||
-rw-r--r-- | project.py | 28 | ||||
-rwxr-xr-x | sim.py | 283 | ||||
-rw-r--r-- | src/alpha.h | 89 | ||||
-rw-r--r-- | src/kernel.cu | 60 | ||||
-rw-r--r-- | src/sorting.h | 103 | ||||
-rw-r--r-- | src/transform.cu | 47 | ||||
-rw-r--r-- | tests/test_fileio.py | 66 | ||||
-rw-r--r-- | tests/test_generator_photon.py | 21 | ||||
-rw-r--r-- | tests/test_generator_vertex.py | 16 | ||||
-rw-r--r-- | tests/test_pdf.py | 40 | ||||
-rw-r--r-- | tests/test_propagation.py | 42 | ||||
-rw-r--r-- | tests/test_rayleigh.py | 36 | ||||
-rw-r--r-- | threadtest.py | 94 | ||||
-rw-r--r-- | tools.py | 18 |
30 files changed, 1570 insertions, 1484 deletions
diff --git a/benchmark.py b/benchmark.py index 4c46a1f..f839f61 100755 --- a/benchmark.py +++ b/benchmark.py @@ -1,95 +1,61 @@ #!/usr/bin/env python import numpy as np +import pycuda.driver as cuda from pycuda import gpuarray as ga import time from uncertainties import ufloat import sys - -from chroma.gpu import GPU, to_float3 -from chroma.camera import get_rays -from chroma.event import Photons -from chroma.sample import uniform_sphere -from chroma.generator.photon import G4ParallelGenerator -from chroma.optics import water_wcsim -from chroma.generator.vertex import constant_particle_gun -from chroma.tools import profile_if_possible +import itertools + +from chroma import gpu +from chroma import camera +from chroma import event +from chroma import sample +from chroma import generator +from chroma import tools +from chroma import optics # Generator processes need to fork BEFORE the GPU context is setup -g4generator = G4ParallelGenerator(4, water_wcsim) - -def progress(seq): - "Print progress while iterating over `seq`." - n = len(seq) - print '[' + ' '*21 + ']\r[', - sys.stdout.flush() - for i, item in enumerate(seq): - if i % (n//10) == 0: - print '.', - sys.stdout.flush() - yield item - print ']' - sys.stdout.flush() - -def ray_trace(gpu, number=1000): - """ - Return the number of mean and standard deviation of the number of ray - intersections per second as a ufloat for the geometry loaded onto `gpu`. - - .. note:: - The rays are thrown from a camera sitting *outside* of the geometry. +g4generator = generator.photon.G4ParallelGenerator(4, optics.water_wcsim) - Args: - - gpu, chroma.gpu.GPU - The GPU object with a geometry already loaded. - - number, int - The number of kernel calls to average. - """ - lb, ub = gpu.geometry.mesh.get_bounds() - scale = np.linalg.norm(ub-lb) - point = [0,scale,(lb[2]+ub[2])/2] +def intersect(gpu_instance, number=100, nphotons=500000, nthreads_per_block=64, max_blocks=1024): + "Returns the average number of ray intersections per second." + distances_gpu = ga.empty(nphotons, dtype=np.float32) - size = (800,600) - width, height = size + run_times = [] + for i in tools.progress(range(number)): + pos = np.zeros((nphotons,3), dtype=np.float32) + dir = sample.uniform_sphere(nphotons) - origins, directions = get_rays(point, size, 0.035, focal_length=0.018) + gpu_rays = gpu.GPURays(pos, dir) - origins_gpu = ga.to_gpu(to_float3(origins)) - directions_gpu = ga.to_gpu(to_float3(directions)) - pixels_gpu = ga.zeros(width*height, dtype=np.int32) + gpu_funcs = gpu.GPUFuncs(gpu_instance.module) - run_times = [] - for i in progress(range(number)): t0 = time.time() - gpu.kernels.ray_trace(np.int32(pixels_gpu.size), origins_gpu, directions_gpu, pixels_gpu, block=(gpu.nthreads_per_block,1,1), grid=(pixels_gpu.size//gpu.nthreads_per_block+1,1)) - gpu.context.synchronize() + gpu_funcs.distance_to_mesh(np.int32(gpu_rays.pos.size), gpu_rays.pos, gpu_rays.dir, distances_gpu, block=(nthreads_per_block,1,1), grid=(gpu_rays.pos.size//nthreads_per_block+1,1)) + cuda.Context.get_current().synchronize() elapsed = time.time() - t0 if i > 0: # first kernel call incurs some driver overhead run_times.append(elapsed) - return pixels_gpu.size/ufloat((np.mean(run_times),np.std(run_times))) + return gpu_rays.pos.size/ufloat((np.mean(run_times),np.std(run_times))) -def load_photons(gpu, number=10, nphotons=500000): - """ - Return the mean and standard deviation of the number of photons loaded - onto `gpu` per second. - - Args: - - gpu, chroma.gpu.GPU - The GPU object with a geometry already loaded. - - number, int - The number of loads to average - - nphotons, int - The number of photons to load per trial - """ - gpu.setup_propagate() - photons = Photons(np.zeros((nphotons,3)), uniform_sphere(nphotons), np.random.uniform(400,800,size=nphotons)) +def load_photons(number=100, nphotons=500000): + """Returns the average number of photons moved to the GPU device memory + per second.""" + pos = np.zeros((nphotons,3)) + dir = sample.uniform_sphere(nphotons) + pol = np.cross(sample.uniform_sphere(nphotons), dir) + pol /= np.apply_along_axis(np.linalg.norm,1,pol)[:,np.newaxis] + wavelengths = np.random.uniform(400,800,size=nphotons) + photons = event.Photons(pos, dir, pol, wavelengths) run_times = [] - for i in progress(range(number)): + for i in tools.progress(range(number)): t0 = time.time() - gpu.load_photons(photons) - gpu.context.synchronize() + gpu_photons = gpu.GPUPhotons(photons) + cuda.Context.get_current().synchronize() elapsed = time.time() - t0 if i > 0: @@ -98,28 +64,23 @@ def load_photons(gpu, number=10, nphotons=500000): return nphotons/ufloat((np.mean(run_times),np.std(run_times))) -def propagate(gpu, number=10, nphotons=500000): - """ - Return the mean and standard deviation of the number of photons propagated - per second as a ufloat for the geometry loaded onto `gpu`. - - Args: - - gpu, chroma.gpu.GPU - The GPU object with a geometry already loaded. - - number, int - The number of kernel calls to average. - - nphotons, int - The number of photons to propagate per kernel call. - """ - gpu.setup_propagate() +def propagate(gpu_instance, number=10, nphotons=500000, nthreads_per_block=64, max_blocks=1024): + "Returns the average number of photons propagated on the GPU per second." + rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks) run_times = [] - for i in progress(range(number)): - photons = Photons(np.zeros((nphotons,3)), uniform_sphere(nphotons), np.random.uniform(400,800,size=nphotons)) - gpu.load_photons(photons) + for i in tools.progress(range(number)): + pos = np.zeros((nphotons,3)) + dir = sample.uniform_sphere(nphotons) + pol = np.cross(sample.uniform_sphere(nphotons), dir) + pol /= np.apply_along_axis(np.linalg.norm,1,pol)[:,np.newaxis] + wavelengths = np.random.uniform(400,800,size=nphotons) + photons = event.Photons(pos, dir, pol, wavelengths) + gpu_photons = gpu.GPUPhotons(photons) + t0 = time.time() - gpu.propagate() - gpu.context.synchronize() + gpu.propagate(gpu_instance, gpu_photons, rng_states, nthreads_per_block, max_blocks) + cuda.Context.get_current().synchronize() elapsed = time.time() - t0 if i > 0: @@ -128,16 +89,15 @@ def propagate(gpu, number=10, nphotons=500000): return nphotons/ufloat((np.mean(run_times),np.std(run_times))) -@profile_if_possible -def pdf(gpu, max_pmt_id, npdfs=10, nevents=100, nreps=1): +@tools.profile_if_possible +def pdf(gpu_instance, gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=1, nthreads_per_block=64, max_blocks=1024): """ - Return the mean and standard deviation of the number of 100 MeV - events per second that can be histogrammed a ufloat for the - geometry loaded onto `gpu`. + Returns the average number of 100 MeV events per second that can be + histogrammed per second. Args: - - gpu, chroma.gpu.GPU - The GPU object with a geometry already loaded. + - gpu_instance, chroma.gpu.GPU + The GPU instance passed to the GPUGeometry constructor. - max_pmt_id, int The channel number of the highest PMT - npdfs, int @@ -147,27 +107,28 @@ def pdf(gpu, max_pmt_id, npdfs=10, nevents=100, nreps=1): - nreps, int The number of times to propagate each event and add to PDF """ - gpu.setup_propagate() - - run_times = [] + rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks) - gpu.setup_propagate() - gpu.setup_daq(max_pmt_id) - gpu.setup_pdf(max_pmt_id, 100, (-0.5, 999.5), 10, (-0.5, 9.5)) - vertex_gen = constant_particle_gun('e-', (0,0,0), (1,0,0), 100) + gpu_daq = gpu.GPUDaq(gpu_geometry, max_pmt_id) + gpu_pdf = gpu.GPUPDF() + gpu_pdf.setup_pdf(max_pmt_id, 100, (-0.5, 999.5), 10, (-0.5, 9.5)) - for i in progress(range(npdfs)): + run_times = [] + for i in tools.progress(range(npdfs)): t0 = time.time() - gpu.clear_pdf() + gpu_pdf.clear_pdf() + + vertex_gen = generator.vertex.constant_particle_gun('e-', (0,0,0), (1,0,0), 100) + vertex_iter = itertools.islice(vertex_gen, nevents) - for ev in g4generator.generate_events(nevents, vertex_gen): + for ev in g4generator.generate_events(vertex_iter): for j in xrange(nreps): - gpu.load_photons(ev.photon_start) - gpu.propagate() - gpu.run_daq() - gpu.add_hits_to_pdf() + gpu_photons = gpu.GPUPhotons(ev.photons_beg) + gpu.propagate(gpu_instance, gpu_photons, rng_states, nthreads_per_block, max_blocks) + gpu_channels = gpu_daq.acquire(gpu_photons, rng_states, nthreads_per_block, max_blocks) + gpu_pdf.add_hits_to_pdf(gpu_channels, nthreads_per_block) - hitcount, pdf = gpu.get_pdfs() + hitcount, pdf = gpu_pdf.get_pdfs() elapsed = time.time() - t0 @@ -178,15 +139,23 @@ def pdf(gpu, max_pmt_id, npdfs=10, nevents=100, nreps=1): return nevents*nreps/ufloat((np.mean(run_times),np.std(run_times))) if __name__ == '__main__': - from chroma.detectors import build_lbne_200kton, build_minilbne + from chroma import detectors + import gc - lbne = build_lbne_200kton() + lbne = detectors.build_lbne_200kton() lbne.build(bits=11) - gpu = GPU() - gpu.load_geometry(lbne, print_usage=False) + gpu_instance = gpu.GPU() + gpu_geometry = gpu.GPUGeometry(gpu_instance, lbne) - print '%s track steps/s' % ray_trace(gpu) - print '%s loaded photons/s' % load_photons(gpu) - print '%s propagated photons/s' % propagate(gpu) - print '%s histogrammed 100 MeV events/s' % pdf(gpu, max(lbne.pmtids)) + gpu_instance.print_mem_info() + print '%s ray intersections/sec.' % tools.ufloat_to_str(intersect(gpu_instance)) + gc.collect() + gpu_instance.print_mem_info() + print '%s photons loaded/sec.' % tools.ufloat_to_str(load_photons()) + gc.collect() + gpu_instance.print_mem_info() + print '%s photons propagated/sec.' % tools.ufloat_to_str(propagate(gpu_instance)) + gc.collect() + gpu_instance.print_mem_info() + print '%s 100 MeV events histogrammed/s' % tools.ufloat_to_str(pdf(gpu_instance, gpu_geometry, max(lbne.pmtids))) @@ -9,15 +9,17 @@ from chroma.geometry import Mesh, Solid, Geometry import chroma.make as make import matplotlib.pyplot as plt -from chroma.gpu import GPU, CUDAFuncs +from chroma import gpu from chroma.tools import timeit from chroma.transform import rotate from chroma.optics import vacuum, lambertian_surface +import chroma.project as project +from chroma.fileio.root import RootReader import pygame from pygame.locals import * -from pycuda import gpuarray +from pycuda import gpuarray as ga from pycuda.characterize import sizeof import pycuda.driver as cuda @@ -98,28 +100,14 @@ def encode_movie(dir): print 'movie saved to %s.' % filename -def get_rays(position, size = (800, 600), width = 0.035, focal_length=0.018): - height = width*(size[1]/float(size[0])) - - x = np.linspace(-width/2, width/2, size[0]) - z = np.linspace(-height/2, height/2, size[1]) - - grid = np.array(tuple(product(x,[0],z))) - - grid += (0,focal_length,0) - focal_point = np.zeros(3) - - grid += position - focal_point += position - - return grid, focal_point-grid - class Camera(Thread): - def __init__(self, geometry, size=(800,600), device_id=None): + def __init__(self, geometry, size=(800,600), device_id=None, enable3d=False, green_magenta=False): Thread.__init__(self) self.geometry = geometry self.device_id = device_id self.size = size + self.enable3d = enable3d + self.green_magenta = green_magenta self.unique_bvh_layers = np.unique(self.geometry.layers) self.currentlayer = None @@ -133,11 +121,14 @@ class Camera(Thread): self.spnav = False def init_gpu(self): - self.gpu = GPU(self.device_id) - self.gpu.load_geometry(geometry) + self.gpu_instance = gpu.GPU(self.device_id) + self.gpu_geometry = gpu.GPUGeometry(self.gpu_instance, geometry) + self.gpu_funcs = gpu.GPUFuncs(self.gpu_instance.module) self.width, self.height = size + self.npixels = self.width*self.height + pygame.init() self.screen = pygame.display.set_mode(size) pygame.display.set_caption('') @@ -159,15 +150,38 @@ class Camera(Thread): self.nblocks = 64 - self.point = np.array([0, self.scale*1.0, (lower_bound[2]+upper_bound[2])/2]) - self.axis1 = np.array([1,0,0], float) - self.axis2 = np.array([0,0,1], float) + self.point = np.array([0, -self.scale*1.0, (lower_bound[2]+upper_bound[2])/2]) + self.axis1 = np.array([0,0,1], float) + self.axis2 = np.array([1,0,0], float) + + self.film_width = 0.035 + + if self.enable3d: + self.point1 = self.point-(self.scale/60,0,0) + self.point2 = self.point+(self.scale/60,0,0) + + self.viewing_angle = 0.0 - origins, directions = get_rays(self.point, self.size) + pos1, dir1 = project.from_film(self.point1, size=self.size, width=self.film_width) + pos2, dir2 = project.from_film(self.point2, size=self.size, width=self.film_width) - self.origins_gpu = gpuarray.to_gpu(origins.astype(np.float32).view(gpuarray.vec.float3)) - self.directions_gpu = gpuarray.to_gpu(directions.astype(np.float32).view(gpuarray.vec.float3)) - self.pixels_gpu = gpuarray.zeros(self.width*self.height, dtype=np.int32) + self.rays1 = gpu.GPURays(pos1, dir1) + self.rays2 = gpu.GPURays(pos2, dir2) + + scope_pos, scope_dir = project.from_film(self.point, size=np.array(self.size)/4.0, width=self.film_width/4.0) + + self.scope_rays = gpu.GPURays(scope_pos, scope_dir) + + self.pixels1_gpu = ga.empty(self.width*self.height, dtype=np.int32) + self.pixels2_gpu = ga.empty(self.width*self.height, dtype=np.int32) + + self.distances_gpu = ga.empty(self.scope_rays.pos.size, dtype=np.float32) + else: + pos, dir = project.from_film(self.point, size=self.size, width=self.film_width) + + self.rays = gpu.GPURays(pos, dir) + + self.pixels_gpu = ga.empty(self.npixels, dtype=np.int32) self.alpha = True self.movie = False @@ -177,12 +191,15 @@ class Camera(Thread): @timeit def initialize_render(self): - self.rng_states_gpu = cuda.mem_alloc(self.width*self.height*sizeof('curandStateXORWOW', '#include <curand_kernel.h>')) - self.gpu.kernels.init_rng(np.int32(self.width*self.height), self.rng_states_gpu, np.int32(0), np.int32(0), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1)) - self.xyz_lookup1_gpu = gpuarray.zeros(len(self.geometry.mesh.triangles), dtype=gpuarray.vec.float3) - self.xyz_lookup2_gpu = gpuarray.zeros(len(self.geometry.mesh.triangles), dtype=gpuarray.vec.float3) - self.image_gpu = gpuarray.zeros(self.width*self.height, dtype=gpuarray.vec.float3) - self.gpu.context.synchronize() + self.rng_states_gpu = gpu.get_rng_states(self.npixels) + self.xyz_lookup1_gpu = ga.zeros(len(self.geometry.mesh.triangles), dtype=ga.vec.float3) + self.xyz_lookup2_gpu = ga.zeros(len(self.geometry.mesh.triangles), dtype=ga.vec.float3) + + if self.enable3d: + self.image1_gpu = ga.zeros(self.npixels, dtype=ga.vec.float3) + self.image2_gpu = ga.zeros(self.npixels, dtype=ga.vec.float3) + else: + self.image_gpu = ga.zeros(self.npixels, dtype=ga.vec.float3) self.source_position = self.point @@ -191,39 +208,48 @@ class Camera(Thread): self.max_steps = 10 def clear_xyz_lookup(self): - self.xyz_lookup1_gpu.fill(gpuarray.vec.make_float3(0.0,0.0,0.0)) - self.xyz_lookup2_gpu.fill(gpuarray.vec.make_float3(0.0,0.0,0.0)) + self.xyz_lookup1_gpu.fill(ga.vec.make_float3(0.0,0.0,0.0)) + self.xyz_lookup2_gpu.fill(ga.vec.make_float3(0.0,0.0,0.0)) self.nlookup_calls = 0 def update_xyz_lookup(self, source_position): - for i in range(self.xyz_lookup1_gpu.size//(self.width*self.height)+1): - self.gpu.kernels.update_xyz_lookup(np.int32(self.width*self.height), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.width*self.height), gpuarray.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(685.0), gpuarray.vec.make_float3(1.0,0.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1)) - - for i in range(self.xyz_lookup1_gpu.size//(self.width*self.height)+1): - self.gpu.kernels.update_xyz_lookup(np.int32(self.width*self.height), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.width*self.height), gpuarray.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(545.0), gpuarray.vec.make_float3(0.0,1.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1)) - - for i in range(self.xyz_lookup1_gpu.size//(self.width*self.height)+1): - self.gpu.kernels.update_xyz_lookup(np.int32(self.width*self.height), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.width*self.height), gpuarray.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(445.0), gpuarray.vec.make_float3(0.0,0.0,1.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1)) + for wavelength, rgb_tuple in \ + zip([685.0, 545.0, 445.0],[(1,0,0),(0,1,0),(0,0,1)]): + for i in range(self.xyz_lookup1_gpu.size//(self.npixels)+1): + self.gpu_funcs.update_xyz_lookup(np.int32(self.npixels), np.int32(self.xyz_lookup1_gpu.size), np.int32(i*self.npixels), ga.vec.make_float3(*source_position), self.rng_states_gpu, np.float32(wavelength), ga.vec.make_float3(*rgb_tuple), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.npixels//self.nblocks+1,1)) self.nlookup_calls += 1 def clear_image(self): - self.image_gpu.fill(gpuarray.vec.make_float3(0.0,0.0,0.0)) + if self.enable3d: + self.image1_gpu.fill(ga.vec.make_float3(0.0,0.0,0.0)) + self.image2_gpu.fill(ga.vec.make_float3(0.0,0.0,0.0)) + else: + self.image_gpu.fill(ga.vec.make_float3(0.0,0.0,0.0)) self.nimages = 0 - def update_image(self): - self.gpu.kernels.update_xyz_image(np.int32(self.width*self.height), self.rng_states_gpu, self.origins_gpu, self.directions_gpu, np.float32(685.0), gpuarray.vec.make_float3(1.0,0.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, self.image_gpu, np.int32(self.nlookup_calls), np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1)) - - self.gpu.kernels.update_xyz_image(np.int32(self.width*self.height), self.rng_states_gpu, self.origins_gpu, self.directions_gpu, np.float32(545.0), gpuarray.vec.make_float3(0.0,1.0,0.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, self.image_gpu, np.int32(self.nlookup_calls), np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1)) + def update_image_from_rays(self, image, rays): + for wavelength, rgb_tuple in \ + zip([685.0, 545.0, 445.0],[(1,0,0),(0,1,0),(0,0,1)]): + self.gpu_funcs.update_xyz_image(np.int32(rays.pos.size), self.rng_states_gpu, rays.pos, rays.dir, np.float32(wavelength), ga.vec.make_float3(*rgb_tuple), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, image, np.int32(self.nlookup_calls), np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(rays.pos.size//self.nblocks+1,1)) - self.gpu.kernels.update_xyz_image(np.int32(self.width*self.height), self.rng_states_gpu, self.origins_gpu, self.directions_gpu, np.float32(445.0), gpuarray.vec.make_float3(0.0,0.0,1.0), self.xyz_lookup1_gpu, self.xyz_lookup2_gpu, self.image_gpu, np.int32(self.nlookup_calls), np.int32(self.max_steps), block=(self.nblocks,1,1), grid=(self.width*self.height//self.nblocks+1,1)) + def update_image(self): + if self.enable3d: + self.update_image_from_rays(self.image1_gpu, self.rays1) + self.update_image_from_rays(self.image2_gpu, self.rays2) + else: + self.update_image_from_rays(self.image_gpu, self.rays) self.nimages += 1 def process_image(self): - self.gpu.kernels.process_image(np.int32(self.width*self.height), self.image_gpu, self.pixels_gpu, np.int32(self.nimages), block=(self.nblocks,1,1), grid=((self.width*self.height)//self.nblocks+1,1)) + if self.enable3d: + self.gpu_funcs.process_image(np.int32(self.pixels1_gpu.size), self.image1_gpu, self.pixels1_gpu, np.int32(self.nimages), block=(self.nblocks,1,1), grid=((self.pixels1_gpu.size)//self.nblocks+1,1)) + self.gpu_funcs.process_image(np.int32(self.pixels2_gpu.size), self.image2_gpu, self.pixels2_gpu, np.int32(self.nimages), block=(self.nblocks,1,1), grid=((self.pixels2_gpu.size)//self.nblocks+1,1)) + else: + self.gpu_funcs.process_image(np.int32(self.pixels_gpu.size), self.image_gpu, self.pixels_gpu, np.int32(self.nimages), block=(self.nblocks,1,1), grid=((self.pixels_gpu.size)//self.nblocks+1,1)) def screenshot(self, dir='', start=0): root, ext = 'screenshot', 'png' @@ -238,8 +264,15 @@ class Camera(Thread): print 'image saved to %s' % filename def rotate(self, phi, n): - self.gpu.kernels.rotate(np.int32(self.pixels_gpu.size), self.origins_gpu, np.float32(phi), gpuarray.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1)) - self.gpu.kernels.rotate(np.int32(self.pixels_gpu.size), self.directions_gpu, np.float32(phi), gpuarray.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1)) + if self.enable3d: + self.rays1.rotate(phi, n) + self.rays2.rotate(phi, n) + self.scope_rays.rotate(phi, n) + + self.point1 = rotate(self.point1, phi, n) + self.point2 = rotate(self.point2, phi, n) + else: + self.rays.rotate(phi, n) self.point = rotate(self.point, phi, n) self.axis1 = rotate(self.axis1, phi, n) @@ -251,12 +284,16 @@ class Camera(Thread): self.update() def rotate_around_point(self, phi, n, point, redraw=True): - self.gpu.kernels.rotate_around_point(np.int32(self.origins_gpu.size), self.origins_gpu, np.float32(phi), gpuarray.vec.make_float3(*n), gpuarray.vec.make_float3(*point), block=(self.nblocks,1,1), grid=(self.origins_gpu.size//self.nblocks+1,1)) - self.gpu.kernels.rotate(np.int32(self.directions_gpu.size), self.directions_gpu, np.float32(phi), gpuarray.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.directions_gpu.size//self.nblocks+1,1)) - self.axis1 = rotate(self.axis1, phi, n) self.axis2 = rotate(self.axis2, phi, n) + if self.enable3d: + self.rays1.rotate_around_point(phi, n, point) + self.rays2.rotate_around_point(phi, n, point) + self.scope_rays.rotate_around_point(phi, n, point) + else: + self.rays.rotate_around_point(phi, n, point) + if redraw: if self.render: self.clear_image() @@ -264,17 +301,25 @@ class Camera(Thread): self.update() def translate(self, v, redraw=True): - self.gpu.kernels.translate(np.int32(self.pixels_gpu.size), self.origins_gpu, gpuarray.vec.make_float3(*v), block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks,1)) - self.point += v + if self.enable3d: + self.rays1.translate(v) + self.rays2.translate(v) + self.scope_rays.translate(v) + + self.point1 += v + self.point2 += v + else: + self.rays.translate(v) + if redraw: if self.render: self.clear_image() self.update() - def update(self): + def update_pixels(self): if self.render: while self.nlookup_calls < 10: self.update_xyz_lookup(self.source_position) @@ -282,11 +327,69 @@ class Camera(Thread): self.process_image() else: if self.alpha: - self.gpu.kernels.ray_trace_alpha(np.int32(self.pixels_gpu.size), self.origins_gpu, self.directions_gpu, self.pixels_gpu, block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1)) + if self.enable3d: + self.gpu_funcs.ray_trace_alpha(np.int32(self.rays1.pos.size), self.rays1.pos, self.rays1.dir, self.pixels1_gpu, block=(self.nblocks,1,1), grid=(self.rays2.pos.size//self.nblocks+1,1)) + self.gpu_funcs.ray_trace_alpha(np.int32(self.rays2.pos.size), self.rays2.pos, self.rays2.dir, self.pixels2_gpu, block=(self.nblocks,1,1), grid=(self.rays2.pos.size//self.nblocks+1,1)) + else: + self.gpu_funcs.ray_trace_alpha(np.int32(self.rays.pos.size), self.rays.pos, self.rays.dir, self.pixels_gpu, block=(self.nblocks,1,1), grid=(self.rays.pos.size//self.nblocks+1,1)) + else: + if self.enable3d: + self.gpu_funcs.ray_trace(np.int32(self.rays1.pos.size), self.rays1.pos, self.rays1.dir, self.pixels1_gpu, block=(self.nblocks,1,1), grid=(self.rays1.pos.size//self.nblocks+1,1)) + self.gpu_funcs.ray_trace(np.int32(self.rays2.pos.size), self.rays2.pos, self.rays2.dir, self.pixels2_gpu, block=(self.nblocks,1,1), grid=(self.rays2.pos.size//self.nblocks+1,1)) + else: + self.gpu_funcs.ray_trace(np.int32(self.rays.pos.size), self.rays.pos, self.rays.dir, self.pixels_gpu, block=(self.nblocks,1,1), grid=(self.rays.pos.size//self.nblocks+1,1)) + + def update(self): + if self.enable3d: + self.gpu_funcs.distance_to_mesh(np.int32(self.scope_rays.pos.size), self.scope_rays.pos, self.scope_rays.dir, self.distances_gpu, block=(self.nblocks,1,1), grid=(self.scope_rays.pos.size//self.nblocks,1)) + + baseline = ga.min(self.distances_gpu).get().item() + + if baseline < 1e9: + d1 = self.point1 - self.point + v1 = d1/np.linalg.norm(d1) + v1 *= baseline/60 - np.linalg.norm(d1) + + self.rays1.translate(v1) + + self.point1 += v1 + + d2 = self.point2 - self.point + v2 = d2/np.linalg.norm(d2) + v2 *= baseline/60 - np.linalg.norm(d2) + + self.rays2.translate(v2) + + self.point2 += v2 + + direction = np.cross(self.axis1,self.axis2) + direction /= np.linalg.norm(direction) + direction1 = self.point + direction*baseline - self.point1 + direction1 /= np.linalg.norm(direction1) + + new_viewing_angle = np.arccos(direction1.dot(direction)) + + phi = new_viewing_angle - self.viewing_angle + + self.rays1.rotate_around_point(phi, self.axis1, self.point1) + self.rays2.rotate_around_point(-phi, self.axis1, self.point2) + + self.viewing_angle = new_viewing_angle + + self.update_pixels() + + if self.enable3d: + pixels1 = self.pixels1_gpu.get() + pixels2 = self.pixels2_gpu.get() + + if self.green_magenta: + pixels = (pixels1 & 0x00ff00) | (pixels2 & 0xff00ff) else: - self.gpu.kernels.ray_trace(np.int32(self.pixels_gpu.size), self.origins_gpu, self.directions_gpu, self.pixels_gpu, block=(self.nblocks,1,1), grid=(self.pixels_gpu.size//self.nblocks+1,1)) + pixels = (pixels1 & 0xff0000) | (pixels2 & 0x00ffff) + else: + pixels = self.pixels_gpu.get() - pygame.surfarray.blit_array(self.screen, self.pixels_gpu.get().reshape(self.size)) + pygame.surfarray.blit_array(self.screen, pixels.reshape(self.size)) if self.doom_mode: self.screen.blit(self.doom_hud, self.doom_rect) pygame.display.flip() @@ -298,11 +401,11 @@ class Camera(Thread): def loadlayer(self, layer): try: layergeometry = self.bvh_layers[layer] + layergeometry.activate() except KeyError: layergeometry = build(bvh_mesh(self.geometry, layer),8) - self.bvh_layers[layer] = layergeometry + self.bvh_layers[layer] = gpu.GPUGeometry(self.gpu, layergeometry) - self.gpu.load_geometry(layergeometry) self.update() def process_event(self, event): @@ -331,15 +434,15 @@ class Camera(Thread): length = np.linalg.norm(movement) - mouse_direction = movement[0]*self.axis1 + movement[1]*self.axis2 + mouse_direction = movement[0]*self.axis2 - movement[1]*self.axis1 mouse_direction /= np.linalg.norm(mouse_direction) if pygame.key.get_mods() & (KMOD_LSHIFT | KMOD_RSHIFT): - v = mouse_direction*self.scale*length/float(self.width) + v = -mouse_direction*self.scale*length/float(self.width) self.translate(v) else: phi = np.float32(2*np.pi*length/float(self.width)) - n = rotate(mouse_direction, np.pi/2, -np.cross(self.axis1,self.axis2)) + n = rotate(mouse_direction, np.pi/2, np.cross(self.axis1,self.axis2)) if pygame.key.get_mods() & KMOD_LCTRL: self.rotate_around_point(phi, n, self.point) @@ -348,11 +451,11 @@ class Camera(Thread): elif event.type == KEYDOWN: if event.key == K_a: - v = self.scale*self.axis1/10.0 + v = self.scale*self.axis2/10.0 self.translate(v) elif event.key == K_d: - v = -self.scale*self.axis1/10.0 + v = -self.scale*self.axis2/10.0 self.translate(v) elif event.key == K_w: @@ -363,10 +466,6 @@ class Camera(Thread): v = -self.scale*np.cross(self.axis1,self.axis2)/10.0 self.translate(v) - elif event.key == K_SPACE: - v = self.scale*self.axis2/10.0 - self.translate(v) - elif event.key == K_F6: self.clear_xyz_lookup() self.clear_image() @@ -443,10 +542,10 @@ class Camera(Thread): else: accelerate_factor = 1.0 #print 'raw:', spnav_event - v1 = self.axis1 - v2 = self.axis2 - v3 = np.cross(self.axis1,self.axis2) - + v1 = self.axis2 + v2 = self.axis1 + v3 = -np.cross(self.axis1,self.axis2) + v = -v1*spnav_event.motion.x + v2*spnav_event.motion.y \ + v3*spnav_event.motion.z v *= self.scale / 5000.0 * accelerate_factor @@ -502,12 +601,10 @@ class Camera(Thread): self.spnav = False self.update() - + self.done = False self.clicked = False - #current_layer = 0 - while not self.done: self.clock.tick(20) @@ -525,53 +622,44 @@ class Camera(Thread): if self.spnav: self.spnav_module.spnav_close() - del self.gpu + del self.gpu_instance class EventViewer(Camera): def __init__(self, geometry, filename, **kwargs): Camera.__init__(self, geometry, **kwargs) - - import ROOT - - self.f = ROOT.TFile(filename) - self.T = self.f.Get('T') - self.T.GetEntry(0) - self.nsolids = geometry.solid_id.max() + 1 + self.rr = RootReader(filename) def color_hit_pmts(self): - self.gpu.reset_colors() - - hit = np.empty(self.nsolids, np.int32) - t = np.empty(self.nsolids, np.float32) - q = np.empty(self.nsolids, np.float32) - - # self.nsolids has a weird data type that PyROOT doesn't understand - self.T.ev.get_channels(int(self.nsolids), hit, t, q) + self.gpu_geometry.reset_colors() - # PyROOT prints warnings when we try to pass a bool array directly - # so we convert afterward - hit = hit.astype(np.bool) + hit = self.ev.channels.hit + t = self.ev.channels.t + q = self.ev.channels.q # Important: Compute range only with HIT channels solid_colors = map_to_color(q, range=(q[hit].min(),q[hit].max())) - self.gpu.color_solids(hit, solid_colors) + self.gpu_geometry.color_solids(hit, solid_colors) self.update() def process_event(self, event): if event.type == KEYDOWN: if event.key == K_PAGEUP: - entry = self.T.GetReadEntry() - if entry < self.T.GetEntries() - 1: - self.T.GetEntry(entry+1) + try: + self.ev = self.rr.next() + except StopIteration: + pass + else: self.color_hit_pmts() - return + return elif event.key == K_PAGEDOWN: - entry = self.T.GetReadEntry() - if entry > 0: - self.T.GetEntry(entry-1) + try: + self.ev = self.rr.prev() + except StopIteration: + pass + else: self.color_hit_pmts() - return + return Camera.process_event(self, event) @@ -589,6 +677,10 @@ if __name__ == '__main__': help='bits for z-ordering space axes', default=10) parser.add_option('-r', '--resolution', dest='resolution', help='specify resolution', default='1024,576') + parser.add_option('--3d', action='store_true', dest='enable3d', + help='enable 3d', default=False) + parser.add_option('--green', action='store_true', dest='green_magenta', + help='3d with green and magenta lenses', default=False) parser.add_option('-i', dest='io_file', default=None) options, args = parser.parse_args() @@ -619,9 +711,9 @@ if __name__ == '__main__': geometry = build(obj, options.bits) if options.io_file is not None: - camera = EventViewer(geometry, options.io_file, size=size) + camera = EventViewer(geometry, options.io_file, size=size, enable3d=options.enable3d, green_magenta=options.green_magenta) else: - camera = Camera(geometry, size) + camera = Camera(geometry, size, enable3d=options.enable3d, green_magenta=options.green_magenta) camera.start() camera.join() diff --git a/detectors/lbne.py b/detectors/lbne.py index ca13c2d..97d09e9 100644 --- a/detectors/lbne.py +++ b/detectors/lbne.py @@ -6,18 +6,18 @@ from chroma.transform import rotate, make_rotation_matrix from itertools import product import chroma.make as make -def build_lbne(radius, height, nstrings, pmts_per_string, endcap_spacing, physical_model=True): +def build_lbne(radius, height, nstrings, pmts_per_string, endcap_spacing, detector_material=water_wcsim, physical_model=True): if physical_model: - pmt = solids.build_12inch_pmt_with_lc(outer_material=water_wcsim) + pmt = solids.build_12inch_pmt_with_lc(outer_material=detector_material) else: - pmt = solids.build_12inch_pmt_shell(outer_material=water_wcsim) + pmt = solids.build_12inch_pmt_shell(outer_material=detector_material) - lbne = Geometry() + lbne = Geometry(detector_material) # outer cylinder cylinder_mesh = make.segmented_cylinder(radius, height+height/(pmts_per_string-1), nsteps=16*nstrings, nsegments=1200) cylinder_mesh.vertices = rotate(cylinder_mesh.vertices, np.pi/2, (-1,0,0)) - lbne.add_solid(Solid(cylinder_mesh, water_wcsim, vacuum, black_surface, 0xff0000ff)) + lbne.add_solid(Solid(cylinder_mesh, detector_material, vacuum, black_surface, 0xff0000ff)) lbne.pmtids = [] diff --git a/detectors/miniclean.py b/detectors/miniclean.py index b0d4955..92dd019 100644 --- a/detectors/miniclean.py +++ b/detectors/miniclean.py @@ -42,7 +42,7 @@ def build_miniclean(real_av=False): geo = Geometry() simple_iv = sphere(0.818) - geo.add_solid(Solid(simple_iv, liquid_argon, vacuum, color=0x33FF0000)) + geo.add_solid(Solid(simple_iv, liquid_argon, vacuum, color=0xffFF0000)) polygons = read_polygons(os.path.join(dir, 'miniclean_polygons.txt')) @@ -59,7 +59,7 @@ def build_miniclean(real_av=False): triangles = mesh.assemble(group=True) triangle_centroids = triangles.mean(axis=1) face = triangle_centroids[:,2] < 0.1 - colors[face] = 0x1100FF00 + colors[face] = 0xffffff polygon_types[polygon_id] = (mesh, colors) @@ -1,69 +1,73 @@ import numpy as np -from chroma.sample import uniform_sphere +class Vertex(object): + def __init__(self, particle_name, pos, dir, pol, ke, t0=0.0): + self.particle_name = particle_name + self.pos = pos + self.dir = dir + self.pol = pol + self.ke = ke + self.t0 = t0 class Photons(object): - def __init__(self, positions, directions, wavelengths, polarizations=None, times=None, last_hit_triangles=None, histories=None): - self.positions = positions - - nphotons = len(positions) - - assert len(directions) == nphotons - self.directions = directions - - assert len(wavelengths) == nphotons + def __init__(self, pos, dir, pol, wavelengths, t=None, last_hit_triangles=None, flags=None): + self.pos = pos + self.dir = dir + self.pol = pol self.wavelengths = wavelengths - if polarizations is not None: - self.polarizations = polarizations + if t is None: + self.t = np.zeros(len(pos), dtype=np.float32) else: - # polarizations are isotropic in plane perpendicular - # to the direction vectors - polarizations = np.cross(directions, uniform_sphere(nphotons)) - # normalize polarization vectors - polarizations /= np.tile(np.apply_along_axis(np.linalg.norm,1,polarizations),[3,1]).transpose() - self.polarizations = np.asarray(polarizations, order='C') + self.t = t - self.times = times - self.last_hit_triangles = last_hit_triangles - self.histories = histories + if last_hit_triangles is None: + self.last_hit_triangles = np.empty(len(pos), dtype=np.int32) + self.last_hit_triangles.fill(-1) + else: + self.last_hit_triangles = last_hit_triangles + + if flags is None: + self.flags = np.zeros(len(pos), dtype=np.uint32) + else: + self.flags = flags -def concatenate_photons(photons): - '''Merge a list of Photons objects into one long list of photons''' - return Photons(positions=np.concatenate([p.positions for p in photons]), - directions=np.concatenate([p.directions for p in photons]), - polarizations=np.concatenate([p.polarizations for p in photons]), - times=np.concatenate([p.times for p in photons]), - wavelengths=np.concatenate([p.wavelengths for p in photons])) + def __add__(self, other): + pos = np.concatenate((self.pos, other.pos)) + dir = np.concatenate((self.dir, other.dir)) + pol = np.concatenate((self.pol, other.pol)) + wavelengths = np.concatenate((self.wavelengths, other.wavelengths)) + t = np.concatenate((self.t, other.t)) + last_hit_triangles = np.concatenate((self.last_hit_triangles, other.last_hit_triangles)) + flags = np.concatenate((self.flags, other.flags)) + return Photons(pos, dir, pol, wavelengths, t, last_hit_triangles, flags) class Channels(object): - def __init__(self, hit, t, q, histories=None): + def __init__(self, hit, t, q, flags=None): self.hit = hit self.t = t self.q = q - self.histories=histories + self.flags = flags def hit_channels(self): return self.hit.nonzero(), self.t[self.hit], self.q[self.hit] -class Subtrack(object): - def __init__(self, particle_name, position, direction, start_time, total_energy): - self.particle_name = particle_name - self.position = position - self.direction = direction - self.start_time = start_time - self.total_energy = total_energy - class Event(object): - def __init__(self, event_id, particle_name=None, gen_position=None, gen_direction=None, gen_total_energy=None, ): - self.event_id = event_id - self.particle_name = particle_name - self.gen_position = gen_position - self.gen_direction = gen_direction - self.gen_total_energy = gen_total_energy + def __init__(self, id=0, primary_vertex=None, vertices=None, photons_beg=None, photons_end=None, channels=None): + self.id = id + + self.nphotons = None + + self.primary_vertex = primary_vertex + + if vertices is not None: + if np.iterable(vertices): + self.vertices = vertices + else: + self.vertices = [vertices] + else: + self.vertices = [] - self.subtracks = [] - self.nphoton = 0 - self.photon_start = None - self.photon_stop = None - self.channels = None + self.photons_beg = photons_beg + self.photons_end = photons_end + self.channels = channels diff --git a/fileio/root.C b/fileio/root.C index 07e50a1..2b39b1b 100644 --- a/fileio/root.C +++ b/fileio/root.C @@ -3,166 +3,143 @@ #include <TTree.h> #include <string> +struct Vertex { + std::string particle_name; + TVector3 pos; + TVector3 dir; + TVector3 pol; + double ke; + double t0; + + ClassDef(Vertex, 1); +}; + struct Photon { - double time; - TVector3 position; - TVector3 direction; - TVector3 polarization; + double t; + TVector3 pos; + TVector3 dir; + TVector3 pol; double wavelength; // nm - unsigned int history; + unsigned int flag; int last_hit_triangle; ClassDef(Photon, 1); }; -struct Track { - std::string particle; - TVector3 position; - TVector3 direction; - double start_time; - double total_energy; +struct Channel { + Channel() : id(-1), t(-1e9), q(-1e9) { }; + int id; + double t; + double q; + unsigned int flag; - ClassDef(Track, 1); + ClassDef(Channel, 1); }; -struct MC { - std::string particle; - TVector3 gen_position; - TVector3 gen_direction; - double gen_total_energy; - - int nphoton; - - std::vector<Track> subtrack; - std::vector<Photon> photon_start; - std::vector<Photon> photon_stop; +struct Event { + int id; + unsigned int nhit; + unsigned int nchannels; - ClassDef(MC, 1); -}; + Vertex primary_vertex; -struct Channel { - Channel() : channel_id(-1), time(-9999.0), charge(-9999.0) { }; - int channel_id; - double time; - double charge; - unsigned int mc_history; + std::vector<Vertex> vertices; + std::vector<Photon> photons_beg; + std::vector<Photon> photons_end; + std::vector<Channel> channels; - ClassDef(Channel, 1); + ClassDef(Event, 1); }; - -struct Event { - int event_id; - MC mc; - int nhit; - int max_channel_id; - std::vector<Channel> channel; - ClassDef(Event, 1); +void fill_channels(Event *ev, unsigned int nhit, unsigned int *ids, float *t, + float *q, unsigned int *flags, unsigned int nchannels) +{ + ev->nhit = 0; + ev->nchannels = nchannels; + ev->channels.resize(0); - // Populate arrays of length nentries with hit, time, and charge - // information, indexed by channel ID - void get_channels(unsigned int nentries, int *hit, float *time, - float *charge, unsigned int *mc_history=0) - { - for (unsigned int i=0; i < nentries; i++) { - hit[i] = 0; - time[i] = -1e9f; - charge[i] = -1e9f; - if (mc_history) - mc_history[i] = 0; - } + Channel ch; + unsigned int id; + for (unsigned int i=0; i < nhit; i++) { + ev->nhit++; + id = ids[i]; + ch.id = id; + ch.t = t[id]; + ch.q = q[id]; + ch.flag = flags[id]; + ev->channels.push_back(ch); + } +} + +void get_channels(Event *ev, int *hit, float *t, float *q, unsigned int *flags) +{ + for (unsigned int i=0; i < ev->nchannels; i++) { + hit[i] = 0; + t[i] = -1e9f; + q[i] = -1e9f; + flags[i] = 0; + } - for (unsigned int i=0; i < channel.size(); i++) { - unsigned int channel_id = channel[i].channel_id; + unsigned int id; + for (unsigned int i=0; i < ev->channels.size(); i++) { + id = ev->channels[i].id; - if (channel_id < nentries) { - hit[channel_id] = 1; - time[channel_id] = channel[i].time; - charge[channel_id] = channel[i].charge; - if (mc_history) - mc_history[channel_id] = channel[i].mc_history; - } + if (id < ev->nchannels) { + hit[id] = 1; + t[id] = ev->channels[i].t; + q[id] = ev->channels[i].q; + flags[id] = ev->channels[i].flag; } } +} -}; - -void get_photons(const std::vector<Photon> &photons, float *positions, - float *directions, float *polarizations, float *wavelengths, - float *times, unsigned int *histories, int *last_hit_triangles) +void get_photons(const std::vector<Photon> &photons, float *pos, float *dir, + float *pol, float *wavelengths, float *t, + int *last_hit_triangles, unsigned int *flags) { for (unsigned int i=0; i < photons.size(); i++) { const Photon &photon = photons[i]; - positions[3*i] = photon.position.X(); - positions[3*i+1] = photon.position.Y(); - positions[3*i+2] = photon.position.Z(); + pos[3*i] = photon.pos.X(); + pos[3*i+1] = photon.pos.Y(); + pos[3*i+2] = photon.pos.Z(); - directions[3*i] = photon.direction.X(); - directions[3*i+1] = photon.direction.Y(); - directions[3*i+2] = photon.direction.Z(); + dir[3*i] = photon.dir.X(); + dir[3*i+1] = photon.dir.Y(); + dir[3*i+2] = photon.dir.Z(); - polarizations[3*i] = photon.polarization.X(); - polarizations[3*i+1] = photon.polarization.Y(); - polarizations[3*i+2] = photon.polarization.Z(); + pol[3*i] = photon.pol.X(); + pol[3*i+1] = photon.pol.Y(); + pol[3*i+2] = photon.pol.Z(); wavelengths[i] = photon.wavelength; - times[i] = photon.time; - histories[i] = photon.history; + t[i] = photon.t; + flags[i] = photon.flag; last_hit_triangles[i] = photon.last_hit_triangle; } } - void fill_photons(std::vector<Photon> &photons, unsigned int nphotons, float *pos, float *dir, - float *pol, float *wavelength, float *t0, - unsigned int *histories=0, int *last_hit_triangle=0) + float *pol, float *wavelengths, float *t, + int *last_hit_triangles, unsigned int *flags) { photons.resize(nphotons); for (unsigned int i=0; i < nphotons; i++) { Photon &photon = photons[i]; - photon.time = t0[i]; - photon.position.SetXYZ(pos[3*i], pos[3*i + 1], pos[3*i + 2]); - photon.direction.SetXYZ(dir[3*i], dir[3*i + 1], dir[3*i + 2]); - photon.polarization.SetXYZ(pol[3*i], pol[3*i + 1], pol[3*i + 2]); - photon.wavelength = wavelength[i]; - if (histories) - photon.history = histories[i]; - else - photon.history = 0; - - if (last_hit_triangle) - photon.last_hit_triangle = last_hit_triangle[i]; - else - photon.last_hit_triangle = -1; - } -} - - -void fill_hits(Event *ev, unsigned int nchannels, float *time, - float *charge, unsigned int *history) -{ - ev->channel.resize(0); - ev->nhit = 0; - ev->max_channel_id = nchannels - 1; + photon.t = t[i]; + photon.pos.SetXYZ(pos[3*i], pos[3*i + 1], pos[3*i + 2]); + photon.dir.SetXYZ(dir[3*i], dir[3*i + 1], dir[3*i + 2]); + photon.pol.SetXYZ(pol[3*i], pol[3*i + 1], pol[3*i + 2]); + photon.wavelength = wavelengths[i]; + photon.last_hit_triangle = last_hit_triangles[i]; + photon.flag = flags[i]; - Channel ch; - for (unsigned int i=0; i < nchannels; i++) { - if (time[i] < 1e8) { - ev->nhit++; - ch.channel_id = i; - ch.time = time[i]; - ch.charge = charge[i]; - ch.mc_history = history[i]; - ev->channel.push_back(ch); - } } } - #ifdef __MAKECINT__ -#pragma link C++ class vector<Track>; +#pragma link C++ class vector<Vertex>; #pragma link C++ class vector<Photon>; #pragma link C++ class vector<Channel>; #endif diff --git a/fileio/root.py b/fileio/root.py index 4c9d9bb..507c249 100644 --- a/fileio/root.py +++ b/fileio/root.py @@ -14,61 +14,66 @@ def tvector3_to_ndarray(vec): def make_photon_with_arrays(size): '''Returns a new chroma.event.Photons object for `size` number of photons with empty arrays set for all the photon attributes.''' - return event.Photons(positions=np.empty((size,3), dtype=np.float32), - directions=np.empty((size,3), dtype=np.float32), - polarizations=np.empty((size,3), dtype=np.float32), + return event.Photons(pos=np.empty((size,3), dtype=np.float32), + dir=np.empty((size,3), dtype=np.float32), + pol=np.empty((size,3), dtype=np.float32), wavelengths=np.empty(size, dtype=np.float32), - times=np.empty(size, dtype=np.float32), - histories=np.empty(size, dtype=np.uint32), + t=np.empty(size, dtype=np.float32), + flags=np.empty(size, dtype=np.uint32), last_hit_triangles=np.empty(size, dtype=np.int32)) +def root_vertex_to_python_vertex(vertex): + "Returns a chroma.event.Vertex object from a root Vertex object." + return event.Vertex(str(vertex.particle_name), + tvector3_to_ndarray(vertex.pos), + tvector3_to_ndarray(vertex.dir), + tvector3_to_ndarray(vertex.pol), + vertex.ke, + vertex.t0) def root_event_to_python_event(ev): '''Returns a new chroma.event.Event object created from the contents of the ROOT event `ev`.''' - pyev = event.Event(ev.event_id) - - # MC - pyev.particle_name = str(ev.mc.particle) - pyev.gen_position = tvector3_to_ndarray(ev.mc.gen_position) - pyev.gen_direction = tvector3_to_ndarray(ev.mc.gen_direction) - pyev.gen_total_energy = ev.mc.gen_total_energy - - pyev.nphoton = ev.mc.nphoton - - for subtrack in ev.mc.subtrack: - pysubtrack = event.Subtrack(str(subtrack.particle), - tvector3_to_ndarray(subtrack.position), - tvector3_to_ndarray(subtrack.direction), - subtrack.start_time, - subtrack.total_energy) - pyev.subtracks.append(pysubtrack) - - # photon start - if ev.mc.photon_start.size() > 0: - photons = make_photon_with_arrays(ev.mc.photon_start.size()) - ROOT.get_photons(ev.mc.photon_start, photons.positions.ravel(), photons.directions.ravel(), - photons.polarizations.ravel(), photons.wavelengths, photons.times, - photons.histories, photons.last_hit_triangles) - pyev.photon_start = photons - - # photon stop - if ev.mc.photon_stop.size() > 0: - photons = make_photon_with_arrays(ev.mc.photon_stop.size()) - ROOT.get_photons(ev.mc.photon_stop, photons.positions.ravel(), photons.directions.ravel(), - photons.polarizations.ravel(), photons.wavelengths, photons.times, - photons.histories, photons.last_hit_triangles) - pyev.photon_stop = photons - - # hits - max_channel_id = ev.max_channel_id - hit = np.empty(shape=max_channel_id+1, dtype=np.int32) - t = np.empty(shape=max_channel_id+1, dtype=np.float32) - q = np.empty(shape=max_channel_id+1, dtype=np.float32) - histories = np.empty(shape=max_channel_id+1, dtype=np.uint32) - - ev.get_channels(max_channel_id+1, hit, t, q, histories) - pyev.channels = event.Channels(hit.astype(bool), t, q, histories) + pyev = event.Event(ev.id) + pyev.primary_vertex = root_vertex_to_python_vertex(ev.primary_vertex) + + for vertex in ev.vertices: + pyev.vertices.append(root_vertex_to_python_vertex(vertex)) + + # photon begin + if ev.photons_beg.size() > 0: + photons = make_photon_with_arrays(ev.photons_beg.size()) + ROOT.get_photons(ev.photons_beg, + photons.pos.ravel(), + photons.dir.ravel(), + photons.pol.ravel(), + photons.wavelengths, + photons.t, + photons.last_hit_triangles, + photons.flags) + pyev.photons_beg = photons + + # photon end + if ev.photons_end.size() > 0: + photons = make_photon_with_arrays(ev.photons_end.size()) + ROOT.get_photons(ev.photons_end, + photons.pos.ravel(), + photons.dir.ravel(), + photons.pol.ravel(), + photons.wavelengths, + photons.t, + photons.last_hit_triangles, + photons.flags) + pyev.photons_end = photons + + # channels + hit = np.empty(ev.nchannels, dtype=np.int32) + t = np.empty(ev.nchannels, dtype=np.float32) + q = np.empty(ev.nchannels, dtype=np.float32) + flags = np.empty(ev.nchannels, dtype=np.uint32) + + ROOT.get_channels(ev, hit, t, q, flags) + pyev.channels = event.Channels(hit.astype(bool), t, q, flags) return pyev class RootReader(object): @@ -140,48 +145,53 @@ class RootWriter(object): self.T.Branch('ev', self.ev) def write_event(self, pyev): - '''Write an event.Event object to the ROOT tree as a ROOT.Event object.''' - self.ev.event_id = pyev.event_id - - self.ev.mc.particle = pyev.particle_name - self.ev.mc.gen_position.SetXYZ(*pyev.gen_position) - self.ev.mc.gen_direction.SetXYZ(*pyev.gen_direction) - self.ev.mc.gen_total_energy = pyev.gen_total_energy - self.ev.mc.nphoton = pyev.nphoton - - if pyev.photon_start is not None: - photons = pyev.photon_start - ROOT.fill_photons(self.ev.mc.photon_start, - len(photons.positions), - np.ravel(photons.positions), - np.ravel(photons.directions), - np.ravel(photons.polarizations), - photons.wavelengths, photons.times, - photons.histories, photons.last_hit_triangles) - if pyev.photon_stop is not None: - photons = pyev.photon_stop - ROOT.fill_photons(self.ev.mc.photon_stop, - len(photons.positions), - np.ravel(photons.positions), - np.ravel(photons.directions), - np.ravel(photons.polarizations), - photons.wavelengths, photons.times, - photons.histories, photons.last_hit_triangles) - - self.ev.mc.subtrack.resize(0) - if pyev.subtracks is not None: - self.ev.mc.subtrack.resize(len(pyev.subtracks)) - for i, subtrack in enumerate(pyev.subtracks): - self.ev.mc.subtrack[i].name = subtrack.particle_name - self.ev.mc.subtrack[i].position.SetXYZ(*subtrack.position) - self.ev.mc.subtrack[i].direction.SetXYZ(*subtrack.direction) - self.ev.mc.subtrack[i].start_time = subtrack.start_time - self.ev.mc.subtrack[i].total_energy = subtrack.total_energy - - ROOT.fill_hits(self.ev, len(pyev.channels.t), pyev.channels.t, pyev.channels.q, pyev.channels.histories) + "Write an event.Event object to the ROOT tree as a ROOT.Event object." + self.ev.id = pyev.id + + self.ev.primary_vertex.particle_name = pyev.primary_vertex.particle_name + self.ev.primary_vertex.pos.SetXYZ(*pyev.primary_vertex.pos) + self.ev.primary_vertex.dir.SetXYZ(*pyev.primary_vertex.dir) + if pyev.primary_vertex.pol is not None: + self.ev.primary_vertex.pol.SetXYZ(*pyev.primary_vertex.pol) + self.ev.primary_vertex.ke = pyev.primary_vertex.ke + + if pyev.photons_beg is not None: + photons = pyev.photons_beg + ROOT.fill_photons(self.ev.photons_beg, + len(photons.pos), + photons.pos.ravel(), + photons.dir.ravel(), + photons.pol.ravel(), + photons.wavelengths, photons.t, + photons.last_hit_triangles, photons.flags) + + if pyev.photons_end is not None: + photons = pyev.photons_end + ROOT.fill_photons(self.ev.photons_end, + len(photons.pos), + photons.pos.ravel(), + photons.dir.ravel(), + photons.pol.ravel(), + photons.wavelengths, photons.t, + photons.last_hit_triangles, photons.flags) + + self.ev.vertices.resize(0) + if pyev.vertices is not None: + self.ev.vertices.resize(len(pyev.vertices)) + for i, vertex in enumerate(pyev.vertices): + self.ev.vertices[i].particle_name = vertex.particle_name + self.ev.vertices[i].pos.SetXYZ(*vertex.pos) + self.ev.vertices[i].dir.SetXYZ(*vertex.dir) + if vertex.pol is not None: + self.ev.vertices[i].pol.SetXYZ(*vertex.pol) + self.ev.vertices[i].ke = vertex.ke + self.ev.vertices[i].t0 = vertex.t0 + + if pyev.channels is not None: + ROOT.fill_channels(self.ev, np.count_nonzero(pyev.channels.hit), np.arange(len(pyev.channels.t))[pyev.channels.hit].astype(np.int32), pyev.channels.t, pyev.channels.q, pyev.channels.flags, len(pyev.channels.hit)) + self.T.Fill() def close(self): self.T.Write() self.file.Close() - diff --git a/generator/g4gen.py b/generator/g4gen.py index 9650b68..8dca086 100644 --- a/generator/g4gen.py +++ b/generator/g4gen.py @@ -4,7 +4,7 @@ import g4py.NISTmaterials import g4py.ParticleGun import pyublas import numpy as np -import chroma.event as event +from chroma.event import Photons, Vertex try: import G4chroma @@ -20,16 +20,18 @@ except: class G4Generator(object): def __init__(self, material, seed=None): - '''Create generator to produce photons inside the specified material. + """Create generator to produce photons inside the specified material. material: chroma.geometry.Material object with density, composition dict and refractive_index. composition dictionary should be { element_symbol : fraction_by_weight, ... }. - seed: Random number generator seed for HepRandom. If None, - generator is not seeded. - ''' + + seed: int, *optional* + Random number generator seed for HepRandom. If None, generator + is not seeded. + """ if seed is not None: HepRandom.setTheSeed(seed) @@ -51,11 +53,8 @@ class G4Generator(object): gRunManager.SetUserAction(self.tracking_action) gRunManager.Initialize() - #preinitialize the process by running a simple event - self.generate_photons(event.Event(event_id=0, particle_name='e-', - gen_position=(0,0,0), - gen_direction=(1,0,0), - gen_total_energy=1.0)) + # preinitialize the process by running a simple event + self.generate_photons([Vertex('e-', (0,0,0), (1,0,0), 0, 1.0)]) def create_g4material(self, material): g4material = G4Material('world_material', material.density * g / cm3, @@ -96,57 +95,38 @@ class G4Generator(object): pol[:,2] = self.tracking_action.GetPolZ() wavelengths = self.tracking_action.GetWavelength().astype(np.float32) - times = self.tracking_action.GetT0().astype(np.float32) - return event.Photons(positions=pos, directions=dir, polarizations=pol, times=times, wavelengths=wavelengths) + t0 = self.tracking_action.GetT0().astype(np.float32) + + return Photons(pos, dir, pol, wavelengths, t0) - def generate_photons(self, ev): - '''Use GEANT4 to generate photons produced by the given particle. + def generate_photons(self, vertices): + """Use GEANT4 to generate photons produced by propagating `vertices`. - ev: a generator.event.Event object with the particle - properties set. If it contains subtracks, those - will be used to create the photon vertices rather - than the main particle. - - Returns an instance of event.Photons containing the - generated photon vertices for the primary particle or - all the subtracks, if present. - ''' - photons = [] - if ev.subtracks: - subtracks = ev.subtracks - else: - # Create temporary subtrack for single primary particle - subtracks = [event.Subtrack(particle_name=ev.particle_name, - position=ev.gen_position, - direction=ev.gen_direction, - start_time=0.0, - total_energy=ev.gen_total_energy)] - - for subtrack in subtracks: - self.particle_gun.SetParticleByName(subtrack.particle_name) - self.particle_gun.SetParticleEnergy(subtrack.total_energy * MeV) - self.particle_gun.SetParticlePosition(G4ThreeVector(*subtrack.position)*m) - self.particle_gun.SetParticleMomentumDirection(G4ThreeVector(*subtrack.direction).unit()) + Args: + vertices: list of event.Vertex objects + List of initial particle vertices. + + Returns: + photons: event.Photons + Photon vertices generated by the propagation of `vertices`. + """ + photons = None + + for vertex in vertices: + self.particle_gun.SetParticleByName(vertex.particle_name) + mass = G4ParticleTable.GetParticleTable().FindParticle(vertex.particle_name).GetPDGMass() + total_energy = vertex.ke*MeV + mass + self.particle_gun.SetParticleEnergy(total_energy) + self.particle_gun.SetParticlePosition(G4ThreeVector(*vertex.pos)*m) + self.particle_gun.SetParticleMomentumDirection(G4ThreeVector(*vertex.dir).unit()) self.tracking_action.Clear() gRunManager.BeamOn(1) - photons.append(self._extract_photons_from_tracking_action()) - - # Merge all photon lists into one big list - return event.concatenate_photons(photons) - -if __name__ == '__main__': - import time - import optics - gen = G4Generator(optics.water) - - start = time.time() - n = 0 - for i in xrange(100): - photons = gen.generate_photons(event.Event('mu-', (0,0,0), (1,0,0), 1.0)) - n += len(photons.times) - print photons.positions[0].min(), photons.positions[0].max() - stop = time.time() - print stop - start, 'sec' - print n / (stop-start), 'photons/sec' + + if photons is None: + photons = self._extract_photons_from_tracking_action() + else: + photons += self._extract_photons_from_tracking_action() + + return photons diff --git a/generator/photon.py b/generator/photon.py index e656428..6c656b2 100644 --- a/generator/photon.py +++ b/generator/photon.py @@ -25,24 +25,29 @@ class G4GeneratorProcess(multiprocessing.Process): photon_socket = context.socket(zmq.PUSH) photon_socket.connect(self.photon_socket_address) - # Signal with the photon socket that we are online and ready for messages + # Signal with the photon socket that we are online + # and ready for messages. photon_socket.send('READY') while True: ev = vertex_socket.recv_pyobj() - ev.photon_start = gen.generate_photons(ev) + ev.photons_beg = gen.generate_photons(ev.vertices) + #print 'type(ev.photons_beg) is %s' % type(ev.photons_beg) photon_socket.send_pyobj(ev) def partition(num, partitions): - '''Generator that returns num//partitions, with the last item including the remainder. - - Useful for partitioning a number into mostly equal parts while preserving the sum. - - >>> list(partition(800, 3)) - [266, 266, 268] - >>> sum(list(partition(800, 3))) - 800 - ''' + """Generator that returns num//partitions, with the last item including + the remainder. + + Useful for partitioning a number into mostly equal parts while preserving + the sum. + + Examples: + >>> list(partition(800, 3)) + [266, 266, 268] + >>> sum(list(partition(800, 3))) + 800 + """ step = num // partitions for i in xrange(partitions): if i < partitions - 1: @@ -51,8 +56,8 @@ def partition(num, partitions): yield step + (num % partitions) def vertex_sender(vertex_iterator, vertex_socket): - for ev in vertex_iterator: - vertex_socket.send_pyobj(ev) + for vertex in vertex_iterator: + vertex_socket.send_pyobj(vertex) def socket_iterator(nelements, socket): for i in xrange(nelements): @@ -66,8 +71,7 @@ class G4ParallelGenerator(object): base_address = 'ipc:///tmp/chroma_'+str(uuid.uuid4()) self.vertex_address = base_address + '.vertex' self.photon_address = base_address + '.photon' - self.processes = [ G4GeneratorProcess(i, material, self.vertex_address, self.photon_address, seed=base_seed + i) - for i in xrange(nprocesses) ] + self.processes = [ G4GeneratorProcess(i, material, self.vertex_address, self.photon_address, seed=base_seed + i) for i in xrange(nprocesses) ] for p in self.processes: p.start() @@ -78,18 +82,16 @@ class G4ParallelGenerator(object): self.photon_socket = self.zmq_context.socket(zmq.PULL) self.photon_socket.bind(self.photon_address) - # Verify everyone is running and connected to avoid sending all the events to one client + # Verify everyone is running and connected to avoid + # sending all the events to one client. for i in xrange(nprocesses): msg = self.photon_socket.recv() assert msg == 'READY' - def generate_events(self, nevents, vertex_iterator): - # Doing this to avoid a deadlock caused by putting to one queue while getting from another - limited_iterator = itertools.islice(vertex_iterator, nevents) - sender_thread = threading.Thread(target=vertex_sender, args=(limited_iterator, - self.vertex_socket)) + def generate_events(self, vertex_iterator): + # Doing this to avoid a deadlock caused by putting to one queue + # while getting from another. + vertex_list = list(vertex_iterator) + sender_thread = threading.Thread(target=vertex_sender, args=(vertex_list, self.vertex_socket)) sender_thread.start() - return socket_iterator(nevents, self.photon_socket) - - - + return socket_iterator(len(vertex_list), self.photon_socket) diff --git a/generator/vertex.py b/generator/vertex.py index 139d675..5521d6b 100644 --- a/generator/vertex.py +++ b/generator/vertex.py @@ -1,26 +1,28 @@ -import itertools import numpy as np -from math import sin, cos, sqrt -import copy +from itertools import izip, count -import chroma.pi0 as pi0 -from chroma.event import Event, Subtrack +from chroma.pi0 import pi0_decay +from chroma import event +from chroma.sample import uniform_sphere +from chroma.itertoolset import repeat_func # generator parts for use with gun() +def from_histogram(h): + "Yield values drawn randomly from the histogram `h` interpreted as a pdf." + pdf = h.hist/h.hist.sum() + cdf = np.cumsum(pdf) + + for x in repeat_func(np.random.random_sample): + yield h.bincenters[np.searchsorted(cdf, x)] + def constant(obj): while True: yield obj def isotropic(): while True: - cos_theta = np.random.uniform(-1.0, 1.0) - sin_theta = sqrt(1.0 - cos_theta**2) - phi = np.random.uniform(0.0, 2*np.pi) - direction = np.array([sin_theta * cos(phi), - sin_theta * sin(phi), - cos_theta]) - yield direction + yield uniform_sphere() def line_segment(point1, point2): while True: @@ -38,43 +40,34 @@ def flat(e_lo, e_hi): # vertex generators -def particle_gun(particle_name, position, direction, total_energy, start_id=0): - for i, ev_particle, ev_position, ev_direction, ev_total_energy in \ - itertools.izip(itertools.count(start_id), - particle_name, position, direction, total_energy): - ev = Event(event_id=i, particle_name=ev_particle, - gen_position=ev_position, gen_direction=ev_direction, gen_total_energy=ev_total_energy) - yield ev - -def pi0_gun(pi0_position, pi0_direction, pi0_total_energy, start_id=0): - for i, ev_position, ev_direction, ev_total_energy in \ - itertools.izip(itertools.count(start_id), pi0_position, pi0_direction, pi0_total_energy): +def particle_gun(particle_name_iter, pos_iter, dir_iter, ke_iter, start_id=0): + for i, particle_name, pos, dir, ke in izip(count(start_id), particle_name_iter, pos_iter, dir_iter, ke_iter): + dir /= np.linalg.norm(dir) + vertex = event.Vertex(particle_name, pos, dir, None, ke) + ev_vertex = event.Event(i, vertex, [vertex]) + yield ev_vertex - ev = Event(event_id=i, particle_name='pi0', - gen_position=ev_position, gen_direction=ev_direction, gen_total_energy=ev_total_energy) +def pi0_gun(pos_iter, dir_iter, ke_iter, start_id=0): + for i, pos, dir, ke in izip(count(start_id), pos_iter, dir_iter, ke_iter): + dir /= np.linalg.norm(dir) + primary_vertex = event.Vertex('pi0', pos, dir, None, ke) - ev_direction = ev_direction / np.linalg.norm(ev_direction) - cos_theta_rest = np.random.random_sample() * 2 - 1 theta_rest = np.arccos(cos_theta_rest) phi_rest = np.random.random_sample() * 2 * np.pi - (gamma1_e, gamma1_dir), (gamma2_e, gamma2_dir) = pi0.pi0_decay(energy=ev_total_energy, - direction=ev_direction, - theta=theta_rest, - phi=phi_rest) + (gamma1_e, gamma1_dir), (gamma2_e, gamma2_dir) = \ + pi0_decay(ke+134.9766, dir, theta_rest, phi_rest) + + gamma1_vertex = event.Vertex('gamma', pos, gamma1_dir, None, gamma1_e) + gamma2_vertex = event.Vertex('gamma', pos, gamma2_dir, None, gamma2_e) + + ev_vertex = event.Event(i, primary_vertex, [gamma1_vertex, gamma2_vertex]) - ev.subtracks = [Subtrack(particle_name='gamma', position=ev_position, direction=gamma1_dir, - start_time=0.0, total_energy=gamma1_e), - Subtrack(particle_name='gamma', position=ev_position, direction=gamma2_dir, - start_time=0.0, total_energy=gamma2_e)] - yield ev + yield ev_vertex -def constant_particle_gun(particle_name, position, direction, total_energy, - start_id=0): +def constant_particle_gun(particle_name, pos, dir, ke, start_id=0): '''Convenience wrapper around particle gun that assumes all arguments are constants, rather than generators.''' - return particle_gun(constant(particle_name), constant(position), - constant(direction), constant(total_energy), - start_id=start_id) + return particle_gun(constant(particle_name), constant(pos), constant(dir), constant(ke), start_id=start_id) diff --git a/geometry.py b/geometry.py index b2cb9c0..cccf516 100644 --- a/geometry.py +++ b/geometry.py @@ -194,7 +194,8 @@ def morton_order(mesh, bits): return interleave(mean_positions, bits) class Geometry(object): - def __init__(self): + def __init__(self, detector_material=None): + self.detector_material = detector_material self.solids = [] self.solid_rotations = [] self.solid_displacements = [] @@ -1,67 +1,215 @@ import numpy as np import numpy.ma as ma -from pycuda.tools import make_default_context -from pycuda.compiler import SourceModule -from pycuda.characterize import sizeof -import pycuda.driver as cuda -from pycuda import gpuarray from copy import copy from itertools import izip -from chroma.tools import timeit from os.path import dirname +import sys + +import pytools +import pycuda.tools +from pycuda.compiler import SourceModule +import pycuda.characterize +import pycuda.driver as cuda +from pycuda import gpuarray as ga import chroma.src +from chroma.tools import timeit from chroma.geometry import standard_wavelengths from chroma.color import map_to_color -import sys -import event +from chroma import event cuda.init() +#@pycuda.tools.context_dependent_memoize +def get_cu_module(name, options=None, include_source_directory=True): + """Returns a pycuda.compiler.SourceModule object from a CUDA source file + located in the chroma src directory at src/[name].cu.""" + if options is None: + options = [] + + srcdir = dirname(chroma.src.__file__) + + if include_source_directory: + options += ['-I' + srcdir] + + with open('%s/%s.cu' % (srcdir, name)) as f: + source = f.read() + + return pycuda.compiler.SourceModule(source, options=options, + no_extern_c=True) + +class GPUFuncs(object): + """Simple container class for GPU functions as attributes.""" + def __init__(self, module): + self.module = module + self.funcs = {} + + def __getattr__(self, name): + try: + return self.funcs[name] + except KeyError: + f = self.module.get_function(name) + self.funcs[name] = f + return f + +init_rng_src = """ +#include <curand_kernel.h> + +extern "C" +{ + +__global__ void init_rng(int nthreads, curandState *s, unsigned long long seed, unsigned long long offset) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + curand_init(seed, id, offset, &s[id]); +} + +} // extern "C" +""" + +def get_rng_states(size, seed=1): + "Return `size` number of CUDA random number generator states." + rng_states = cuda.mem_alloc(size*pycuda.characterize.sizeof('curandStateXORWOW', '#include <curand_kernel.h>')) + + module = pycuda.compiler.SourceModule(init_rng_src, no_extern_c=True) + init_rng = module.get_function('init_rng') + + init_rng(np.int32(size), rng_states, np.uint64(seed), np.uint64(0), block=(64,1,1), grid=(size//64+1,1)) + + return rng_states + def to_float3(arr): - return arr.astype(np.float32).view(gpuarray.vec.float3) - -def boolean_argsort(condition): - '''Returns two arrays of indicies indicating which elements of the - boolean array `condition` should be exchanged so that all of the - True entries occur before the false entries. - - Returns: tuple (a,b, ntrue) which give the indices for elements - to exchange in a and b, and the number of true entries in the array - in ntrue. This function in general requires fewer swaps than - numpy.argsort. - ''' - - true_indices = condition.nonzero()[0][::-1] # reverse - false_indices = (~condition).nonzero()[0] - - length = min(len(true_indices), len(false_indices)) - - cut_index = np.searchsorted((true_indices[:length] - false_indices[:length]) <= 0, True) - return (true_indices[:cut_index], false_indices[:cut_index], len(true_indices)) - - -def chunk_iterator(nelements, nthreads_per_block, max_blocks): - '''Iterator that yields tuples with the values requried to process + "Returns an pycuda.gpuarray.vec.float3 array from an (N,3) array." + if not arr.flags['C_CONTIGUOUS']: + arr = np.asarray(arr, order='c') + return arr.astype(np.float32).view(ga.vec.float3)[:,0] + +def chunk_iterator(nelements, nthreads_per_block=64, max_blocks=1024): + """Iterator that yields tuples with the values requried to process a long array in multiple kernel passes on the GPU. - Each yielded value is (first_index, elements_this_iteration, nblocks_this_iteration) + Each yielded value is of the form, + (first_index, elements_this_iteration, nblocks_this_iteration) - >>> list(chunk_iterator(300, 32, 2)) - [(0, 64, 2), (64, 64, 2), (128, 64, 2), (192, 64, 2), (256, 9, 1)] - ''' + Example: + >>> list(chunk_iterator(300, 32, 2)) + [(0, 64, 2), (64, 64, 2), (128, 64, 2), (192, 64, 2), (256, 9, 1)] + """ first = 0 while first < nelements: elements_left = nelements - first blocks = int(elements_left // nthreads_per_block) if elements_left % nthreads_per_block != 0: - blocks += 1 #Round up only if needed + blocks += 1 # Round up only if needed blocks = min(max_blocks, blocks) elements_this_round = min(elements_left, blocks * nthreads_per_block) yield (first, elements_this_round, blocks) first += elements_this_round - + +class GPUPhotons(object): + def __init__(self, photons): + self.pos = ga.to_gpu(to_float3(photons.pos)) + self.dir = ga.to_gpu(to_float3(photons.dir)) + self.pol = ga.to_gpu(to_float3(photons.pol)) + self.wavelengths = ga.to_gpu(photons.wavelengths.astype(np.float32)) + self.t = ga.to_gpu(photons.t.astype(np.float32)) + self.last_hit_triangles = ga.to_gpu(photons.last_hit_triangles.astype(np.int32)) + self.flags = ga.to_gpu(photons.flags.astype(np.uint32)) + + def get(self): + pos = self.pos.get().view(np.float32).reshape((len(self.pos),3)) + dir = self.dir.get().view(np.float32).reshape((len(self.dir),3)) + pol = self.pol.get().view(np.float32).reshape((len(self.pol),3)) + wavelengths = self.wavelengths.get() + t = self.t.get() + last_hit_triangles = self.last_hit_triangles.get() + flags = self.flags.get() + return event.Photons(pos, dir, pol, wavelengths, t, last_hit_triangles, flags) + +class GPUChannels(object): + def __init__(self, t, q, flags): + self.t = t + self.q = q + self.flags = flags + + def get(self): + t = self.t.get() + q = self.q.get().astype(np.float32) + + # For now, assume all channels with small + # enough hit time were hit. + return event.Channels(t<1e8, t, q, self.flags.get()) + +def propagate(gpu, gpuphotons, rng_states, nthreads_per_block=64, max_blocks=1024, max_steps=10): + """Propagate photons on GPU to termination or max_steps, whichever + comes first. + + May be called repeatedly without reloading photon information if + single-stepping through photon history. + + ..warning:: + `rng_states` must have at least `nthreads_per_block`*`max_blocks` + number of curandStates. + """ + nphotons = gpuphotons.pos.size + step = 0 + input_queue = np.zeros(shape=nphotons+1, dtype=np.uint32) + input_queue[1:] = np.arange(nphotons, dtype=np.uint32) + input_queue_gpu = ga.to_gpu(input_queue) + output_queue = np.zeros(shape=nphotons+1, dtype=np.uint32) + output_queue[0] = 1 + output_queue_gpu = ga.to_gpu(output_queue) + + propagate = gpu.module.get_function('propagate') + + while step < max_steps: + # Just finish the rest of the steps if the # of photons is low + if nphotons < nthreads_per_block * 16 * 8: + nsteps = max_steps - step + else: + nsteps = 1 + + for first_photon, photons_this_round, blocks in \ + chunk_iterator(nphotons, nthreads_per_block, max_blocks): + propagate(np.int32(first_photon), np.int32(photons_this_round), input_queue_gpu[1:], output_queue_gpu, rng_states, gpuphotons.pos, gpuphotons.dir, gpuphotons.wavelengths, gpuphotons.pol, gpuphotons.t, gpuphotons.flags, gpuphotons.last_hit_triangles, np.int32(nsteps), block=(nthreads_per_block,1,1), grid=(blocks, 1)) + + step += nsteps + + if step < max_steps: + temp = input_queue_gpu + input_queue_gpu = output_queue_gpu + output_queue_gpu = temp + output_queue_gpu[:1].set(np.uint32(1)) + nphotons = input_queue_gpu[:1].get()[0] - 1 + + if ga.max(gpuphotons.flags).get() & (1 << 31): + print >>sys.stderr, "WARNING: ABORTED PHOTONS" + +class GPURays(object): + def __init__(self, pos, dir, nblocks=64): + self.pos = ga.to_gpu(to_float3(pos)) + self.dir = ga.to_gpu(to_float3(dir)) + + self.nblocks = nblocks + + self.module = get_cu_module('transform') + self.gpu_funcs = GPUFuncs(self.module) + + def rotate(self, phi, n): + self.gpu_funcs.rotate(np.int32(self.pos.size), self.pos, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.pos.size//self.nblocks+1,1)) + self.gpu_funcs.rotate(np.int32(self.dir.size), self.dir, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.dir.size//self.nblocks+1,1)) + + def rotate_around_point(self, phi, n, point): + self.gpu_funcs.rotate_around_point(np.int32(self.pos.size), self.pos, np.float32(phi), ga.vec.make_float3(*n), ga.vec.make_float3(*point), block=(self.nblocks,1,1), grid=(self.pos.size//self.nblocks+1,1)) + self.gpu_funcs.rotate(np.int32(self.dir.size), self.dir, np.float32(phi), ga.vec.make_float3(*n), block=(self.nblocks,1,1), grid=(self.dir.size//self.nblocks+1,1)) + + def translate(self, v): + self.gpu_funcs.translate(np.int32(self.pos.size), self.pos, ga.vec.make_float3(*v), block=(self.nblocks,1,1), grid=(self.pos.size//self.nblocks+1,1)) def format_size(size): if size < 1e3: @@ -77,317 +225,165 @@ def format_array(name, array): return '%-15s %6s %6s' % \ (name, format_size(len(array)), format_size(array.nbytes)) -class CUDAFuncs(object): - '''simple container class for GPU functions as attributes''' - def __init__(self, cuda_module, func_names): - '''Extract __global__ functions listed in func_names from - the PyCUDA module object. The functions are assigned - to attributes of this object with the same name.''' - for name in func_names: - setattr(self, name, cuda_module.get_function(name)) - -class GPU(object): - def __init__(self, device_id=None, nthreads_per_block=64, max_blocks=1024): - '''Initialize a GPU context on the specified device. If device_id==None, - the default device is used.''' - - if device_id is None: - self.context = make_default_context() - else: - device = cuda.Device(device_id) - self.context = device.make_context() - - print 'device %s' % self.context.get_device().name() - - self.nthreads_per_block = nthreads_per_block - self.max_blocks = max_blocks - - self.context.set_cache_config(cuda.func_cache.PREFER_L1) - - cuda_options = ['-I' + dirname(chroma.src.__file__), '--use_fast_math', '--ptxas-options=-v'] - - self.module = SourceModule(chroma.src.kernel, options=cuda_options, no_extern_c=True) - self.kernels = CUDAFuncs(self.module, ['ray_trace', 'ray_trace_alpha', 'rotate', 'rotate_around_point', 'translate', 'update_xyz_lookup', 'update_xyz_image', 'process_image', 'init_rng']) - - self.geo_funcs = CUDAFuncs(self.module, ['set_wavelength_range', 'set_material', 'set_surface', 'set_global_mesh_variables', 'color_solids']) - - self.prop_funcs = CUDAFuncs(self.module, ['init_rng', 'propagate', 'swap']) - self.daq_module = SourceModule(chroma.src.daq, options=cuda_options, no_extern_c=True) - self.daq_funcs = CUDAFuncs(self.daq_module, - ['reset_earliest_time_int', 'run_daq', - 'convert_sortable_int_to_float', 'bin_hits', - 'accumulate_pdf_eval']) +class GPUGeometry(object): + def __init__(self, gpu, geometry, load=True, activate=True, print_usage=True): + self.geometry = geometry - def print_device_usage(self): - print 'device usage:' - print format_array('vertices', self.vertices_gpu) - print format_array('triangles', self.triangles_gpu) - print format_array('lower_bounds', self.lower_bounds_gpu) - print format_array('upper_bounds', self.upper_bounds_gpu) - print format_array('node_map', self.node_map_gpu) - print format_array('node_map_end', self.node_map_end_gpu) - print '%-15s %6s %6s' % ('total', '', format_size(self.vertices_gpu.nbytes + self.triangles_gpu.nbytes + self.lower_bounds_gpu.nbytes + self.upper_bounds_gpu.nbytes + self.node_map_gpu.nbytes + self.node_map_end_gpu.nbytes)) + self.module = gpu.module + self.gpu_funcs = GPUFuncs(gpu.module) - def load_geometry(self, geometry, print_usage=True): - if not hasattr(geometry, 'mesh'): - print 'WARNING: building geometry with 8-bits' - geometry.build(bits=8) + if load: + self.load(activate, print_usage) - self.geo_funcs.set_wavelength_range(np.float32(standard_wavelengths[0]), np.float32(standard_wavelengths[-1]), np.float32(standard_wavelengths[1]-standard_wavelengths[0]), np.uint32(standard_wavelengths.size), block=(1,1,1), grid=(1,1)) + def load(self, activate=True, print_usage=True): + self.gpu_funcs.set_wavelength_range(np.float32(standard_wavelengths[0]), np.float32(standard_wavelengths[-1]), np.float32(standard_wavelengths[1]-standard_wavelengths[0]), np.uint32(standard_wavelengths.size), block=(1,1,1), grid=(1,1)) self.materials = [] - for i in range(len(geometry.unique_materials)): - material = copy(geometry.unique_materials[i]) + for i in range(len(self.geometry.unique_materials)): + material = copy(self.geometry.unique_materials[i]) if material is None: raise Exception('one or more triangles is missing a material.') - material.refractive_index_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, material.refractive_index[:,0], material.refractive_index[:,1]).astype(np.float32)) - material.absorption_length_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, material.absorption_length[:,0], material.absorption_length[:,1]).astype(np.float32)) - material.scattering_length_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, material.scattering_length[:,0], material.scattering_length[:,1]).astype(np.float32)) + material.refractive_index_gpu = ga.to_gpu(np.interp(standard_wavelengths, material.refractive_index[:,0], material.refractive_index[:,1]).astype(np.float32)) + material.absorption_length_gpu = ga.to_gpu(np.interp(standard_wavelengths, material.absorption_length[:,0], material.absorption_length[:,1]).astype(np.float32)) + material.scattering_length_gpu = ga.to_gpu(np.interp(standard_wavelengths, material.scattering_length[:,0], material.scattering_length[:,1]).astype(np.float32)) - self.geo_funcs.set_material(np.int32(i), material.refractive_index_gpu, material.absorption_length_gpu, material.scattering_length_gpu, block=(1,1,1), grid=(1,1)) + self.gpu_funcs.set_material(np.int32(i), material.refractive_index_gpu, material.absorption_length_gpu, material.scattering_length_gpu, block=(1,1,1), grid=(1,1)) self.materials.append(material) self.surfaces = [] - for i in range(len(geometry.unique_surfaces)): - surface = copy(geometry.unique_surfaces[i]) + for i in range(len(self.geometry.unique_surfaces)): + surface = copy(self.geometry.unique_surfaces[i]) if surface is None: continue - surface.detect_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.detect[:,0], surface.detect[:,1]).astype(np.float32)) - surface.absorb_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.absorb[:,0], surface.absorb[:,1]).astype(np.float32)) - surface.reflect_diffuse_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.reflect_diffuse[:,0], surface.reflect_diffuse[:,1]).astype(np.float32)) - surface.reflect_specular_gpu = gpuarray.to_gpu(np.interp(standard_wavelengths, surface.reflect_specular[:,0], surface.reflect_specular[:,1]).astype(np.float32)) + surface.detect_gpu = ga.to_gpu(np.interp(standard_wavelengths, surface.detect[:,0], surface.detect[:,1]).astype(np.float32)) + surface.absorb_gpu = ga.to_gpu(np.interp(standard_wavelengths, surface.absorb[:,0], surface.absorb[:,1]).astype(np.float32)) + surface.reflect_diffuse_gpu = ga.to_gpu(np.interp(standard_wavelengths, surface.reflect_diffuse[:,0], surface.reflect_diffuse[:,1]).astype(np.float32)) + surface.reflect_specular_gpu = ga.to_gpu(np.interp(standard_wavelengths, surface.reflect_specular[:,0], surface.reflect_specular[:,1]).astype(np.float32)) - self.geo_funcs.set_surface(np.int32(i), surface.detect_gpu, surface.absorb_gpu, surface.reflect_diffuse_gpu, surface.reflect_specular_gpu, block=(1,1,1), grid=(1,1)) + self.gpu_funcs.set_surface(np.int32(i), surface.detect_gpu, surface.absorb_gpu, surface.reflect_diffuse_gpu, surface.reflect_specular_gpu, block=(1,1,1), grid=(1,1)) self.surfaces.append(surface) - self.vertices_gpu = gpuarray.to_gpu(to_float3(geometry.mesh.vertices)) + self.vertices_gpu = ga.to_gpu(to_float3(self.geometry.mesh.vertices)) triangles = \ - np.empty(len(geometry.mesh.triangles), dtype=gpuarray.vec.uint4) - triangles['x'] = geometry.mesh.triangles[:,0] - triangles['y'] = geometry.mesh.triangles[:,1] - triangles['z'] = geometry.mesh.triangles[:,2] - triangles['w'] = ((geometry.material1_index & 0xff) << 24) | ((geometry.material2_index & 0xff) << 16) | ((geometry.surface_index & 0xff) << 8) - self.triangles_gpu = gpuarray.to_gpu(triangles) + np.empty(len(self.geometry.mesh.triangles), dtype=ga.vec.uint4) + triangles['x'] = self.geometry.mesh.triangles[:,0] + triangles['y'] = self.geometry.mesh.triangles[:,1] + triangles['z'] = self.geometry.mesh.triangles[:,2] + triangles['w'] = ((self.geometry.material1_index & 0xff) << 24) | ((self.geometry.material2_index & 0xff) << 16) | ((self.geometry.surface_index & 0xff) << 8) + self.triangles_gpu = ga.to_gpu(triangles) - self.lower_bounds_gpu = gpuarray.to_gpu(to_float3(geometry.lower_bounds)) + self.lower_bounds_gpu = ga.to_gpu(to_float3(self.geometry.lower_bounds)) - self.upper_bounds_gpu = gpuarray.to_gpu(to_float3(geometry.upper_bounds)) + self.upper_bounds_gpu = ga.to_gpu(to_float3(self.geometry.upper_bounds)) - self.colors_gpu = gpuarray.to_gpu(geometry.colors.astype(np.uint32)) - self.node_map_gpu = gpuarray.to_gpu(geometry.node_map.astype(np.uint32)) - self.node_map_end_gpu = gpuarray.to_gpu(geometry.node_map_end.astype(np.uint32)) - self.solid_id_map_gpu = gpuarray.to_gpu(geometry.solid_id.astype(np.uint32)) - - self.geo_funcs.set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(geometry.node_map.size-1), np.uint32(geometry.first_node), self.lower_bounds_gpu, self.upper_bounds_gpu, block=(1,1,1), grid=(1,1)) + self.colors_gpu = ga.to_gpu(self.geometry.colors.astype(np.uint32)) + self.node_map_gpu = ga.to_gpu(self.geometry.node_map.astype(np.uint32)) + self.node_map_end_gpu = ga.to_gpu(self.geometry.node_map_end.astype(np.uint32)) + self.solid_id_map_gpu = ga.to_gpu(self.geometry.solid_id.astype(np.uint32)) self.node_map_tex = self.module.get_texref('node_map') self.node_map_end_tex = self.module.get_texref('node_map_end') + if print_usage: + self.print_device_usage() + + if activate: + self.activate() + + def activate(self): + self.gpu_funcs.set_global_mesh_variables(self.triangles_gpu, self.vertices_gpu, self.colors_gpu, np.uint32(self.geometry.node_map.size-1), np.uint32(self.geometry.first_node), self.lower_bounds_gpu, self.upper_bounds_gpu, block=(1,1,1), grid=(1,1)) + self.node_map_tex.set_address(self.node_map_gpu.gpudata, self.node_map_gpu.nbytes) self.node_map_end_tex.set_address(self.node_map_end_gpu.gpudata, self.node_map_end_gpu.nbytes) self.node_map_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) self.node_map_end_tex.set_format(cuda.array_format.UNSIGNED_INT32, 1) - self.geometry = geometry - - if print_usage: - print 'average of %f child nodes per node' % (np.mean(geometry.node_map_end[geometry.first_node:] - geometry.node_map[geometry.first_node:])) - print '%i nodes with one child' % (np.count_nonzero((geometry.node_map_end[geometry.first_node:] - geometry.node_map[geometry.first_node:]) == 1)) - print '%i leaf nodes with one child' % (np.count_nonzero((geometry.node_map_end[:geometry.first_node] - geometry.node_map[:geometry.first_node]) == 1)) - self.print_device_usage() + def print_device_usage(self): + print 'device usage:' + print '-'*10 + print format_array('vertices', self.vertices_gpu) + print format_array('triangles', self.triangles_gpu) + print format_array('lower_bounds', self.lower_bounds_gpu) + print format_array('upper_bounds', self.upper_bounds_gpu) + print format_array('node_map', self.node_map_gpu) + print format_array('node_map_end', self.node_map_end_gpu) + print '%-15s %6s %6s' % ('total', '', format_size(self.vertices_gpu.nbytes + self.triangles_gpu.nbytes + self.lower_bounds_gpu.nbytes + self.upper_bounds_gpu.nbytes + self.node_map_gpu.nbytes + self.node_map_end_gpu.nbytes)) + print '-'*10 + free, total = cuda.mem_get_info() + print '%-15s %6s %6s' % ('device total', '', format_size(total)) + print '%-15s %6s %6s' % ('device used', '', format_size(total-free)) + print '%-15s %6s %6s' % ('device free', '', format_size(free)) + print def reset_colors(self): self.colors_gpu.set_async(self.geometry.colors.astype(np.uint32)) def color_solids(self, solid_hit, colors): - solid_hit_gpu = gpuarray.to_gpu(np.array(solid_hit, dtype=np.bool)) - solid_colors_gpu = gpuarray.to_gpu(np.array(colors, dtype=np.uint32)) + solid_hit_gpu = ga.to_gpu(np.array(solid_hit, dtype=np.bool)) + solid_colors_gpu = ga.to_gpu(np.array(colors, dtype=np.uint32)) - for first_triangle, triangles_this_round, blocks in chunk_iterator(self.triangles_gpu.size, self.nthreads_per_block, self.max_blocks): - self.geo_funcs.color_solids(np.int32(first_triangle), + for first_triangle, triangles_this_round, blocks in \ + chunk_iterator(self.triangles_gpu.size): + self.gpu_funcs.color_solids(np.int32(first_triangle), np.int32(triangles_this_round), self.solid_id_map_gpu, solid_hit_gpu, solid_colors_gpu, - block=(self.nthreads_per_block,1,1), + block=(64,1,1), grid=(blocks,1)) - self.context.synchronize() - - def setup_propagate(self, seed=1): - self.rng_states_gpu = cuda.mem_alloc(self.nthreads_per_block * self.max_blocks - * sizeof('curandStateXORWOW', '#include <curand_kernel.h>')) - self.prop_funcs.init_rng(np.int32(self.max_blocks * self.nthreads_per_block), self.rng_states_gpu, - np.uint64(seed), np.uint64(0), - block=(self.nthreads_per_block,1,1), - grid=(self.max_blocks,1)) - #self.context.synchronize() - - def load_photons(self, photons): - '''Load photons onto the GPU from an event.Photons object. - - If photon.histories or photon.last_hit_triangles are set to none, - they will be initialized to 0 and -1 on the GPU, respectively. - ''' - self.nphotons = len(photons.positions) - assert len(photons.directions) == self.nphotons - assert len(photons.polarizations) == self.nphotons - assert len(photons.wavelengths) == self.nphotons - - self.positions_gpu = gpuarray.to_gpu(to_float3(photons.positions)) - self.directions_gpu = gpuarray.to_gpu(to_float3(photons.directions)) - self.polarizations_gpu = gpuarray.to_gpu(to_float3(photons.polarizations)) - self.wavelengths_gpu = gpuarray.to_gpu(photons.wavelengths.astype(np.float32)) - - if photons.times is not None: - self.times_gpu = gpuarray.to_gpu(photons.times.astype(np.float32)) - else: - self.times_gpu = gpuarray.zeros(self.nphotons, dtype=np.float32) - if photons.histories is not None: - self.histories_gpu = gpuarray.to_gpu(photons.histories.astype(np.uint32)) - else: - self.histories_gpu = gpuarray.zeros(self.nphotons, dtype=np.uint32) - - if photons.last_hit_triangles is not None: - self.last_hit_triangles_gpu = gpuarray.to_gpu(photons.last_hit_triangles.astype(np.int32)) - else: - self.last_hit_triangles_gpu = gpuarray.empty(self.nphotons, dtype=np.int32) - self.last_hit_triangles_gpu.fill(-1) - - - def propagate(self, max_steps=10): - '''Propagate photons on GPU to termination or max_steps, whichever comes first. - - May be called repeatedly without reloading photon information if single-stepping - through photon history. - ''' - nphotons = self.nphotons - step = 0 - input_queue = np.zeros(shape=self.nphotons+1, dtype=np.uint32) - input_queue[1:] = np.arange(self.nphotons, dtype=np.uint32) - input_queue_gpu = gpuarray.to_gpu(input_queue) - output_queue = np.zeros(shape=self.nphotons+1, dtype=np.uint32) - output_queue[0] = 1 - output_queue_gpu = gpuarray.to_gpu(output_queue) - - - while step < max_steps: - ## Just finish the rest of the steps if the # of photons is low - if nphotons < self.nthreads_per_block * 16 * 8: - nsteps = max_steps - step - else: - nsteps = 1 - - for first_photon, photons_this_round, blocks in chunk_iterator(nphotons, self.nthreads_per_block, self.max_blocks): - self.prop_funcs.propagate(np.int32(first_photon), - np.int32(photons_this_round), - input_queue_gpu[1:], - output_queue_gpu, - self.rng_states_gpu, self.positions_gpu, - self.directions_gpu, - self.wavelengths_gpu, self.polarizations_gpu, - self.times_gpu, self.histories_gpu, - self.last_hit_triangles_gpu, - np.int32(nsteps), - block=(self.nthreads_per_block,1,1), - grid=(blocks, 1)) - - step += nsteps - - if step < max_steps: - temp = input_queue_gpu - input_queue_gpu = output_queue_gpu - output_queue_gpu = temp - output_queue_gpu[:1].set(np.uint32(1)) - nphotons = input_queue_gpu[:1].get()[0] - 1 - - if gpuarray.max(self.histories_gpu).get() & (1 << 31): - print >>sys.stderr, "WARNING: ABORTED PHOTONS IN THIS EVENT" - - if 'profile' in __builtins__: - self.context.synchronize() - - def get_photons(self): - '''Returns a dictionary of current photon state information. - - Contents of dictionary have the same names as the parameters to load_photons(). - ''' - return event.Photons(positions=self.positions_gpu.get().view(np.float32).reshape(self.positions_gpu.size, 3), - directions=self.directions_gpu.get().view(np.float32).reshape(self.directions_gpu.size, 3), - polarizations=self.polarizations_gpu.get().view(np.float32).reshape(self.polarizations_gpu.size, 3), - wavelengths=self.wavelengths_gpu.get(), - times=self.times_gpu.get(), - histories=self.histories_gpu.get(), - last_hit_triangles=self.last_hit_triangles_gpu.get()) - - def setup_daq(self, max_pmt_id, pmt_rms=1.2e-9): - self.earliest_time_gpu = gpuarray.GPUArray(shape=(max_pmt_id+1,), dtype=np.float32) - self.earliest_time_int_gpu = gpuarray.GPUArray(shape=self.earliest_time_gpu.shape, - dtype=np.uint32) - self.channel_history_gpu = gpuarray.zeros_like(self.earliest_time_int_gpu) - self.channel_q_gpu = gpuarray.zeros_like(self.earliest_time_int_gpu) +class GPUDaq(object): + def __init__(self, gpu_geometry, max_pmt_id, pmt_rms=1.2e-9): + self.earliest_time_gpu = ga.empty(max_pmt_id+1, dtype=np.float32) + self.earliest_time_int_gpu = ga.empty(max_pmt_id+1, dtype=np.uint32) + self.channel_history_gpu = ga.zeros_like(self.earliest_time_int_gpu) + self.channel_q_gpu = ga.zeros_like(self.earliest_time_int_gpu) self.daq_pmt_rms = pmt_rms + self.solid_id_map_gpu = gpu_geometry.solid_id_map_gpu + + self.module = get_cu_module('daq', include_source_directory=False) + self.gpu_funcs = GPUFuncs(self.module) - def run_daq(self): - self.daq_funcs.reset_earliest_time_int(np.float32(1e9), - np.int32(len(self.earliest_time_int_gpu)), - self.earliest_time_int_gpu, - block=(self.nthreads_per_block,1,1), - grid=(len(self.earliest_time_int_gpu)//self.nthreads_per_block+1,1)) + def acquire(self, gpuphotons, rng_states, nthreads_per_block=64, max_blocks=1024): + self.gpu_funcs.reset_earliest_time_int(np.float32(1e9), np.int32(len(self.earliest_time_int_gpu)), self.earliest_time_int_gpu, block=(nthreads_per_block,1,1), grid=(len(self.earliest_time_int_gpu)//nthreads_per_block+1,1)) self.channel_q_gpu.fill(0) self.channel_history_gpu.fill(0) - #self.context.synchronize() - for first_photon, photons_this_round, blocks in chunk_iterator(self.nphotons, self.nthreads_per_block, self.max_blocks): - self.daq_funcs.run_daq(self.rng_states_gpu, np.uint32(0x1 << 2), - np.float32(self.daq_pmt_rms), - np.int32(first_photon), - np.int32(photons_this_round), - self.times_gpu, self.histories_gpu, - self.last_hit_triangles_gpu, - self.solid_id_map_gpu, - np.int32(len(self.earliest_time_int_gpu)), - self.earliest_time_int_gpu, - self.channel_q_gpu, - self.channel_history_gpu, - block=(self.nthreads_per_block,1,1), grid=(blocks,1)) - #self.context.synchronize() - self.daq_funcs.convert_sortable_int_to_float(np.int32(len(self.earliest_time_int_gpu)), - self.earliest_time_int_gpu, - self.earliest_time_gpu, - block=(self.nthreads_per_block,1,1), - grid=(len(self.earliest_time_int_gpu)//self.nthreads_per_block+1,1)) - if 'profile' in __builtins__: - self.context.synchronize() - + n = len(gpuphotons.pos) - def get_hits(self): - t = self.earliest_time_gpu.get() - # For now, assume all channels with small enough hit time were hit - return event.Channels(hit=t<1e8, t=t, - q=self.channel_q_gpu.get().astype(np.float32), - histories=self.channel_history_gpu.get()) + for first_photon, photons_this_round, blocks in \ + chunk_iterator(n, nthreads_per_block, max_blocks): + self.gpu_funcs.run_daq(rng_states, np.uint32(0x1 << 2), np.float32(self.daq_pmt_rms), np.int32(first_photon), np.int32(photons_this_round), gpuphotons.t, gpuphotons.flags, gpuphotons.last_hit_triangles, self.solid_id_map_gpu, np.int32(len(self.earliest_time_int_gpu)), self.earliest_time_int_gpu, self.channel_q_gpu, self.channel_history_gpu, block=(nthreads_per_block,1,1), grid=(blocks,1)) + + self.gpu_funcs.convert_sortable_int_to_float(np.int32(len(self.earliest_time_int_gpu)), self.earliest_time_int_gpu, self.earliest_time_gpu, block=(nthreads_per_block,1,1), grid=(len(self.earliest_time_int_gpu)//nthreads_per_block+1,1)) + + return GPUChannels(self.earliest_time_gpu, self.channel_q_gpu, self.channel_history_gpu) + +class GPUPDF(object): + def __init__(self): + self.module = get_cu_module('daq') + self.gpu_funcs = GPUFuncs(self.module) def setup_pdf(self, max_pmt_id, tbins, trange, qbins, qrange): - '''Setup GPU arrays to hold PDF information. + """Setup GPU arrays to hold PDF information. max_pmt_id: int, largest PMT id # tbins: number of time bins trange: tuple of (min, max) time in PDF qbins: number of charge bins qrange: tuple of (min, max) charge in PDF - ''' + """ self.events_in_histogram = 0 - self.hitcount_gpu = gpuarray.zeros(max_pmt_id+1, dtype=np.uint32) - self.pdf_gpu = gpuarray.zeros(shape=(max_pmt_id+1, tbins, qbins), + self.hitcount_gpu = ga.zeros(max_pmt_id+1, dtype=np.uint32) + self.pdf_gpu = ga.zeros(shape=(max_pmt_id+1, tbins, qbins), dtype=np.uint32) self.tbins = tbins self.trange = trange @@ -395,16 +391,14 @@ class GPU(object): self.qrange = qrange def clear_pdf(self): - '''Rezero the PDF counters.''' + """Rezero the PDF counters.""" self.hitcount_gpu.fill(0) self.pdf_gpu.fill(0) - def add_hits_to_pdf(self): - '''Put the most recent results of run_daq() into the PDF.''' - - self.daq_funcs.bin_hits(np.int32(len(self.hitcount_gpu)), - self.channel_q_gpu, - self.earliest_time_gpu, + def add_hits_to_pdf(self, gpuchannels, nthreads_per_block=64): + self.gpu_funcs.bin_hits(np.int32(len(self.hitcount_gpu)), + gpuchannels.q, + gpuchannels.t, self.hitcount_gpu, np.int32(self.tbins), np.float32(self.trange[0]), @@ -413,20 +407,21 @@ class GPU(object): np.float32(self.qrange[0]), np.float32(self.qrange[1]), self.pdf_gpu, - block=(self.nthreads_per_block,1,1), - grid=(len(self.earliest_time_gpu)//self.nthreads_per_block+1,1)) + block=(nthreads_per_block,1,1), + grid=(len(gpuchannels.t)//nthreads_per_block+1,1)) self.events_in_histogram += 1 def get_pdfs(self): - '''Returns the 1D hitcount array and the 3D [channel, time, charge] histogram''' + """Returns the 1D hitcount array and the 3D [channel, time, charge] + histogram.""" return self.hitcount_gpu.get(), self.pdf_gpu.get() - def setup_pdf_eval(self, event_hit, event_time, event_charge, - min_twidth, trange, min_qwidth, qrange, min_bin_content=10, + def setup_pdf_eval(self, event_hit, event_time, event_charge, min_twidth, + trange, min_qwidth, qrange, min_bin_content=10, time_only=True): - '''Setup GPU arrays to compute PDF values for the given event. + """Setup GPU arrays to compute PDF values for the given event. The pdf_eval calculation allows the PDF to be evaluated at a single point for each channel as the Monte Carlo is run. The effective bin size will be as small as (`min_twidth`, @@ -455,14 +450,14 @@ class GPU(object): The bin will be expanded to include at least this many events time_only: bool If True, only the time observable will be used in the PDF. - ''' - self.event_hit_gpu = gpuarray.to_gpu(event_hit.astype(np.uint32)) - self.event_time_gpu = gpuarray.to_gpu(event_time.astype(np.float32)) - self.event_charge_gpu = gpuarray.to_gpu(event_charge.astype(np.float32)) - - self.eval_hitcount_gpu = gpuarray.zeros(len(event_hit), dtype=np.uint32) - self.eval_bincount_gpu = gpuarray.zeros(len(event_hit), dtype=np.uint32) - self.nearest_mc_gpu = gpuarray.empty(shape=len(event_hit) * min_bin_content, + """ + self.event_hit_gpu = ga.to_gpu(event_hit.astype(np.uint32)) + self.event_time_gpu = ga.to_gpu(event_time.astype(np.float32)) + self.event_charge_gpu = ga.to_gpu(event_charge.astype(np.float32)) + + self.eval_hitcount_gpu = ga.zeros(len(event_hit), dtype=np.uint32) + self.eval_bincount_gpu = ga.zeros(len(event_hit), dtype=np.uint32) + self.nearest_mc_gpu = ga.empty(shape=len(event_hit) * min_bin_content, dtype=np.float32) self.nearest_mc_gpu.fill(1e9) @@ -474,21 +469,20 @@ class GPU(object): self.time_only = time_only def clear_pdf_eval(self): - '''Reset PDF evaluation counters to start accumulating new Monte Carlo.''' + "Reset PDF evaluation counters to start accumulating new Monte Carlo." self.eval_hitcount_gpu.fill(0) self.eval_bincount_gpu.fill(0) self.nearest_mc_gpu.fill(1e9) - def accumulate_pdf_eval(self): - '''Add the most recent results of run_daq() to the PDF evaluation.''' - - self.daq_funcs.accumulate_pdf_eval(np.int32(self.time_only), + def accumulate_pdf_eval(self, gpuchannels, nthreads_per_block=64): + "Add the most recent results of run_daq() to the PDF evaluation." + self.gpu_funcs.accumulate_pdf_eval(np.int32(self.time_only), np.int32(len(self.event_hit_gpu)), self.event_hit_gpu, self.event_time_gpu, self.event_charge_gpu, - self.earliest_time_gpu, - self.channel_q_gpu, + gpuchannels.t, + gpuchannels.q, self.eval_hitcount_gpu, self.eval_bincount_gpu, np.float32(self.min_twidth), @@ -499,8 +493,8 @@ class GPU(object): np.float32(self.qrange[1]), self.nearest_mc_gpu, np.int32(self.min_bin_content), - block=(self.nthreads_per_block,1,1), - grid=(len(self.earliest_time_gpu)//self.nthreads_per_block+1,1)) + block=(nthreads_per_block,1,1), + grid=(len(gpuchannels.t)//nthreads_per_block+1,1)) def get_pdf_eval(self): evhit = self.event_hit_gpu.get().astype(bool) @@ -525,8 +519,9 @@ class GPU(object): nearest_mc = self.nearest_mc_gpu.get().reshape((len(hitcount), self.min_bin_content)) - # Deal with the case where we did not even get min_bin_content events in the PDF - # but also clamp the lower range to ensure we don't index by a negative number in 2 lines + # Deal with the case where we did not even get min_bin_content events + # in the PDF but also clamp the lower range to ensure we don't index + # by a negative number in 2 lines last_valid_entry = np.maximum(0, (nearest_mc < 1e9).astype(int).sum(axis=1) - 1) distance = nearest_mc[np.arange(len(last_valid_entry)),last_valid_entry] @@ -544,5 +539,31 @@ class GPU(object): return hitcount, pdf_value, pdf_value * pdf_frac_uncert +class GPU(object): + def __init__(self, device_id=None): + """Initialize a GPU context on the specified device. + If device_id is None, the default device is used.""" + + if device_id is None: + self.context = pycuda.tools.make_default_context() + else: + device = cuda.Device(device_id) + self.context = device.make_context() + + print 'device %s' % self.context.get_device().name() + + self.print_mem_info() + + self.context.set_cache_config(cuda.func_cache.PREFER_L1) + + cuda_options = ['--use_fast_math']#, '--ptxas-options=-v'] + + self.module = get_cu_module('kernel', options=cuda_options) + + def print_mem_info(self): + free, total = cuda.mem_get_info() + + print '%.1f %% of device memory is free.' % ((free/float(total))*100) + def __del__(self): self.context.pop() diff --git a/gputhread.py b/gputhread.py deleted file mode 100644 index 63124e9..0000000 --- a/gputhread.py +++ /dev/null @@ -1,95 +0,0 @@ -import numpy as np -from copy import copy -import time -import pycuda.driver as cuda -from pycuda.characterize import sizeof -from pycuda.compiler import SourceModule -from pycuda import gpuarray -import threading -import Queue -import src - -class GPUThread(threading.Thread): - def __init__(self, device_id, geometry, jobs, output, nblocks=64, max_nthreads=100000): - threading.Thread.__init__(self) - - self.device_id = device_id - self.geometry = copy(geometry) - self.jobs = jobs - self.output = output - self.nblocks = nblocks - self.max_nthreads = max_nthreads - self._stop = threading.Event() - - def stop(self): - self._stop.set() - - def stopped(self): - return self._stop.is_set() - - def run(self): - device = cuda.Device(self.device_id) - context = device.make_context() - module = SourceModule(src.kernel, options=['-I' + src.dir], no_extern_c=True, cache_dir=False) - - propagate = module.get_function('propagate') - fill_uniform = module.get_function('fill_uniform') - fill_uniform_sphere = module.get_function('fill_uniform_sphere') - - init_rng = module.get_function('init_rng') - rng_states_gpu = cuda.mem_alloc(self.max_nthreads*sizeof('curandStateXORWOW', '#include <curand_kernel.h>')) - init_rng(np.int32(self.max_nthreads), rng_states_gpu, np.int32(self.device_id), np.int32(0), block=(self.nblocks,1,1), grid=(self.max_nthreads//self.nblocks+1,1)) - - self.geometry.load(module) - - daq_module = SourceModule(src.daq, options=['-I' + src.dir], no_extern_c=True, cache_dir=False) - reset_earliest_time_int = daq_module.get_function('reset_earliest_time_int') - run_daq = daq_module.get_function('run_daq') - convert_sortable_int_to_float = daq_module.get_function('convert_sortable_int_to_float') - - earliest_time_gpu = gpuarray.GPUArray(shape=(max(self.geometry.pmtids)+1,), dtype=np.float32) - earliest_time_int_gpu = gpuarray.GPUArray(shape=earliest_time_gpu.shape, dtype=np.uint32) - - solid_map_gpu = gpuarray.to_gpu(self.geometry.solid_id.astype(np.int32)) - - while not self.stopped(): - try: - position, nphotons = self.jobs.get(block=False, timeout=0.1) - except Queue.Empty: - continue - - t0 = time.time() - - gpu_kwargs = {'block': (self.nblocks,1,1), 'grid': (nphotons//self.nblocks+1,1)} - - positions_gpu = gpuarray.empty(nphotons, dtype=gpuarray.vec.float3) - positions_gpu.fill(position) - directions_gpu = gpuarray.empty(nphotons, dtype=gpuarray.vec.float3) - fill_uniform_sphere(np.int32(nphotons), rng_states_gpu, directions_gpu, **gpu_kwargs) - wavelengths_gpu = gpuarray.empty(nphotons, dtype=np.float32) - fill_uniform(np.int32(nphotons), rng_states_gpu, wavelengths_gpu, np.float32(200), np.float32(800), **gpu_kwargs) - polarizations_gpu = gpuarray.empty(nphotons, dtype=gpuarray.vec.float3) - fill_uniform_sphere(np.int32(nphotons), rng_states_gpu, polarizations_gpu, **gpu_kwargs) - times_gpu = gpuarray.zeros(nphotons, dtype=np.float32) - histories_gpu = gpuarray.zeros(nphotons, dtype=np.uint32) - last_hit_triangles_gpu = gpuarray.empty(nphotons, dtype=np.int32) - last_hit_triangles_gpu.fill(-1) - - max_steps = 10 - - propagate(np.int32(nphotons), rng_states_gpu, positions_gpu, directions_gpu, wavelengths_gpu, polarizations_gpu, times_gpu, histories_gpu, last_hit_triangles_gpu, np.int32(max_steps), **gpu_kwargs) - - reset_earliest_time_int(np.float32(1e9), np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu, block=(self.nblocks,1,1), grid=(len(earliest_time_int_gpu)//self.nblocks+1,1)) - - run_daq(rng_states_gpu, np.uint32(0x1 << 2), np.float32(1.2e-9), np.int32(nphotons), times_gpu, histories_gpu, last_hit_triangles_gpu, solid_map_gpu, np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu, block=(self.nblocks,1,1), grid=(nphotons//self.nblocks+1,1)) - - convert_sortable_int_to_float(np.int32(len(earliest_time_int_gpu)), earliest_time_int_gpu, earliest_time_gpu, block=(self.nblocks,1,1), grid=(len(earliest_time_int_gpu)//self.nblocks+1,1)) - cuda.Context.synchronize() - elapsed = time.time() - t0 - - #print 'device %i; elapsed %f sec' % (self.device_id, elapsed) - - self.output.put(earliest_time_gpu.get()) - self.jobs.task_done() - - context.pop() diff --git a/itertoolset.py b/itertoolset.py index eb96e74..498b940 100644 --- a/itertoolset.py +++ b/itertoolset.py @@ -2,21 +2,54 @@ from itertools import * import collections from copy import deepcopy +def peek(iterable): + """Peek at the first element of `iterable`. + + Returns a tuple of the form (first_element, iterable). + + Once peek() has been called, the original iterable will be modified if it + was an iterator (it will be advanced by 1); use the returned iterator for + an equivalent copy of the original iterable. + """ + it = iter(iterable) + first_element = next(it) + return first_element, chain([first_element], it) + +def repeat_func(func, times=None, args=()): + "equivalent to (func(*args) for i in xrange(times))." + if times is None: + while True: + yield func(*args) + else: + for i in xrange(times): + yield func(*args) + +def repeat_copy(object, times=None): + """Returns deep copies of `object` over and over again. Runs indefinitely + unless the `times` argument is specified.""" + if times is None: + while True: + yield deepcopy(object) + else: + for i in xrange(times): + yield object + def repeating_iterator(i, nreps): - '''Returns an iterator that emits each element of `i` multiple times specified by `nreps`. - The length of this iterator is the lenght of `i` times `nreps`. This iterator - is safe even if the item consumer modifies the items. + """Returns an iterator that emits each element of `i` multiple times + specified by `nreps`. The length of this iterator is the lenght of `i` + times `nreps`. This iterator is safe even if the item consumer modifies + the items. - >>> list(repeating_iterator('ABCD', 3) - ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C', 'D', 'D', 'D'] - >>> list(repeating_iterator('ABCD', 1) - ['A', 'B', 'C', 'D'] - ''' + Examples: + >>> list(repeating_iterator('ABCD', 3) + ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C', 'D', 'D', 'D'] + >>> list(repeating_iterator('ABCD', 1) + ['A', 'B', 'C', 'D'] + """ for item in i: for counter in xrange(nreps): yield deepcopy(item) - def grouper(n, iterable, fillvalue=None): "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx" args = [iter(iterable)] * n diff --git a/likelihood.py b/likelihood.py index c797afe..51d7ef9 100644 --- a/likelihood.py +++ b/likelihood.py @@ -2,7 +2,7 @@ import numpy as np from histogram import HistogramDD from uncertainties import ufloat, umath, unumpy from math import sqrt -from itertools import izip, compress +from itertools import islice, izip, compress, repeat from chroma.tools import profile_if_possible class Likelihood(object): @@ -47,16 +47,19 @@ class Likelihood(object): """ Return the negative log likelihood that the detector event set in the constructor or by set_event() was the result of a particle generated - by `vertex_generator`. If `nreps` set to > 1, each set of photon vertices - will be propagated `nreps` times. + by `vertex_generator`. If `nreps` set to > 1, each set of photon + vertices will be propagated `nreps` times. """ - hitcount, pdf_prob, pdf_prob_uncert = self.sim.eval_pdf(self.event.channels, - nevals, vertex_generator, - 2.0e-9, self.trange, - 1, self.qrange, - nreps=nreps, - time_only=self.time_only) + vertex_generator = islice(vertex_generator, nevals) + + hitcount, pdf_prob, pdf_prob_uncert = \ + self.sim.eval_pdf(self.event.channels, + vertex_generator, + 2.0e-9, self.trange, + 1, self.qrange, + nreps=nreps, + time_only=self.time_only) # Normalize probabilities and put a floor to keep the log finite hit_prob = hitcount.astype(np.float32) / (nreps * nevals) @@ -79,7 +82,8 @@ class Likelihood(object): hit_channel_prob_uncert = ( (nevals * nreps * hit_prob * (1.0 - hit_prob)) / hit_prob**2 ).sum()**0.5 log_likelihood = ufloat((hit_channel_prob, 0.0)) - # Then include the probability densities of the observed charges and times + # Then include the probability densities of the observed + # charges and times. hit_pdf_ufloat = unumpy.uarray((pdf_prob[self.event.channels.hit], pdf_prob_uncert[self.event.channels.hit])) log_likelihood += unumpy.log(hit_pdf_ufloat).sum() @@ -91,21 +95,35 @@ if __name__ == '__main__': from chroma.sim import Simulation from chroma.optics import water from chroma.generator import constant_particle_gun - from chroma.tools import enable_debug_on_crash + from chroma import tools import time - enable_debug_on_crash() + tools.enable_debug_on_crash() detector = detectors.find('lbne') - sim = Simulation(detector, water) + sim = Simulation(detector, seed=0) - event = sim.simulate(1, constant_particle_gun('e-',(0,0,0),(1,0,0),100.0)).next() + event = sim.simulate(islice(constant_particle_gun('e-',(0,0,0),(1,0,0),100.0), 1)).next() print 'nhit = %i' % np.count_nonzero(event.channels.hit) likelihood = Likelihood(sim, event) - for x in np.linspace(-10.0, 10.0, 2*10*5+1): - start = time.time() - l = likelihood.eval(constant_particle_gun('e-',(0,0,0),(1,0,0),100.0),100, nreps=200) - print 'x = %5.1f, %s (%1.1f sec)' % (x, l, time.time() - start) + x = np.linspace(-10.0, 10.0, 100) + l = [] + + for pos in izip(x, repeat(0), repeat(0)): + t0 = time.time() + ev_vertex_iter = constant_particle_gun('e-',pos,(1,0,0),100.0) + l.append(likelihood.eval(ev_vertex_iter, 1000)) + elapsed = time.time() - t0 + + print '(%.1f, %.1f, %.1f), %s (%1.1f sec)' % \ + (pos[0], pos[1], pos[2], tools.ufloat_to_str(l[-1]), elapsed) + + import matplotlib.pyplot as plt + + plt.errorbar(x, [v.nominal_value for v in l], [v.std_dev() for v in l]) + plt.xlabel('X Position (m)') + plt.ylabel('Negative Log Likelihood') + plt.show() @@ -172,7 +172,7 @@ labppo_scintillator.set('scattering_length', wavelengths=[ 200.0, 215.0, 225.0, ################### WCSim materials ##################### -water_wcsim = Material('water') +water_wcsim = Material('water_wcsim') water_wcsim.density = 1.0 # g/cm^3 water_wcsim.composition = { 'H' : 0.1119, 'O' : 0.8881 } # fraction by mass hc_over_GeV = 1.2398424468024265e-06 # h_Planck * c_light / GeV / nanometer diff --git a/project.py b/project.py new file mode 100644 index 0000000..0e3626f --- /dev/null +++ b/project.py @@ -0,0 +1,28 @@ +import numpy as np +from itertools import repeat +from chroma.transform import rotate + +def from_film(position, axis1=(0,0,1), axis2=(1,0,0), size=(800,600), width=0.035, focal_length=0.018): + height = width*(size[1]/float(size[0])) + + x = np.linspace(width/2, -width/2, size[0]) + z = np.linspace(-height/2, height/2, size[1]) + + zz, xx = np.meshgrid(z,x) + + grid = np.array(zip(xx.flatten(),np.zeros(xx.size),zz.flatten())) + + axis1 = np.asarray(axis1)/np.linalg.norm(axis1) + axis2 = np.asarray(axis2)/np.linalg.norm(axis2) + + assert axis1.dot(axis2) == 0.0 + + if (axis1 != (0,0,1)).any(): + grid = rotate(grid, np.arccos(axis1.dot((0,0,1))), np.cross(axis1,(0,0,1))) + + if (axis2 != (1,0,0)).any(): + grid = rotate(grid, np.arccos(axis2.dot((1,0,0))), np.cross(axis2,(1,0,0))) + + grid -= np.cross(axis1,axis2)*focal_length + + return grid+position, -grid/np.apply_along_axis(np.linalg.norm,1,grid)[:,np.newaxis] @@ -3,148 +3,147 @@ import sys import time import os import numpy as np - -import detectors -import optics -import generator -from generator import constant import itertools import threading -import gpu -from fileio import root -from chroma.itertoolset import repeating_iterator -from tools import profile_if_possible, enable_debug_on_crash + +from chroma import detectors +from chroma import optics +from chroma import generator +from chroma import gpu +from chroma.fileio import root +from chroma import tools +from chroma import event +from chroma.itertoolset import peek, repeat_copy, repeating_iterator def pick_seed(): - '''Returns a seed for a random number generator selected using - a mixture of the current time and the current process ID.''' + """Returns a seed for a random number generator selected using + a mixture of the current time and the current process ID.""" return int(time.time()) ^ (os.getpid() << 16) class Simulation(object): - def __init__(self, detector, detector_material, - seed=None, cuda_device=None, geant4_processes=4, bvh_bits=11, - use_cache=True): + def __init__(self, detector, seed=None, cuda_device=None, geant4_processes=4, bvh_bits=11, use_cache=True, nthreads_per_block=64, max_blocks=1024): self.detector = detector + self.nthreads_per_block = nthreads_per_block + self.max_blocks = max_blocks + if seed is None: self.seed = pick_seed() else: self.seed = seed - print >>sys.stderr, 'RNG seed:', self.seed + + print 'RNG seed: %i' % self.seed # We have three generators to seed: numpy.random, GEANT4, and CURAND. - # The latter two are done below + # The latter two are done below. np.random.seed(self.seed) - self.detector_material = detector_material if geant4_processes > 0: - self.photon_generator = generator.photon.G4ParallelGenerator(geant4_processes, - detector_material, - base_seed=self.seed) + self.photon_generator = generator.photon.G4ParallelGenerator(geant4_processes, detector.detector_material, base_seed=self.seed) else: self.photon_generator = None - print >>sys.stderr, 'Creating BVH with %d bits...' % (bvh_bits) - detector.build(bits=bvh_bits, use_cache=use_cache) + if not hasattr(detector, 'mesh'): + # need to build geometry + print 'Creating BVH with %i bits...' % bvh_bits + detector.build(bits=bvh_bits, use_cache=use_cache) - print >>sys.stderr, 'Initializing GPU...' - self.gpu_worker = gpu.GPU(cuda_device) + self.gpu = gpu.GPU(cuda_device) + + # geometry is loaded onto gpu by default + self.gpu_geometry = gpu.GPUGeometry(self.gpu, detector) - print >>sys.stderr, 'Loading detector onto GPU...' - self.gpu_worker.load_geometry(detector) + self.gpu_daq = gpu.GPUDaq(self.gpu_geometry, max(self.detector.pmtids)) + self.gpu_pdf = gpu.GPUPDF() + + print 'Initializing random numbers generators...' + self.rng_states = gpu.get_rng_states(self.nthreads_per_block*self.max_blocks, seed=self.seed) - print >>sys.stderr, 'Initializing random numbers generators...' - self.gpu_worker.setup_propagate(seed=self.seed) - self.gpu_worker.setup_daq(max(self.detector.pmtids)) self.pdf_config = None - def simulate(self, nevents, vertex_generator, keep_photon_start=False, keep_photon_stop=False, - run_daq=True, nreps=1): - photon_gen = repeating_iterator(self.photon_generator.generate_events(nevents, vertex_generator), - nreps) - return self.simulate_photons(nevents*nreps, - photon_gen, - keep_photon_start=keep_photon_start, keep_photon_stop=keep_photon_stop, - run_daq=run_daq) - - def simulate_photons(self, nevents, photon_generator, keep_photon_start=False, keep_photon_stop=False, - run_daq=True): - for ev in itertools.islice(photon_generator, nevents): - self.gpu_worker.load_photons(ev.photon_start) - self.gpu_worker.propagate() - self.gpu_worker.run_daq() - - ev.nphoton = len(ev.photon_start.positions) - - if not keep_photon_start: - ev.photon_start = None - - if keep_photon_stop: - ev.photon_stop = self.gpu_worker.get_photons() + def simulate(self, iterable, keep_photons_beg=False, keep_photons_end=False, run_daq=True, max_steps=10): + first_element, iterable = peek(iterable) + + if isinstance(first_element, event.Event): + iterable = self.photon_generator.generate_events(iterable) + elif isinstance(first_element, event.Photons): + iterable = (event.Event(photons_beg=x) for x in iterable) + + for ev in iterable: + gpu_photons = gpu.GPUPhotons(ev.photons_beg) + + gpu.propagate(self.gpu, gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks, max_steps=max_steps) + + ev.nphotons = len(ev.photons_beg.pos) + + if not keep_photons_beg: + ev.photons_beg = None + + if keep_photons_end: + ev.photons_end = gpu_photons.get() if run_daq: - ev.channels = self.gpu_worker.get_hits() + gpu_channels = self.gpu_daq.acquire(gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + ev.channels = gpu_channels.get() yield ev - def propagate_photons(self, photons, max_steps=10): - self.gpu_worker.load_photons(photons) - self.gpu_worker.propagate(max_steps=max_steps) - return self.gpu_worker.get_photons() - - def create_pdf(self, nevents, vertex_generator, tbins, trange, - qbins, qrange, nreps=1): - photon_gen = repeating_iterator(self.photon_generator.generate_events(nevents, vertex_generator), - nreps) - return self.create_pdf_from_photons(nevents*nreps, photon_gen, - tbins, trange, qbins, qrange) - - def create_pdf_from_photons(self, nevents, photon_generator, - tbins, trange, qbins, qrange): - '''Returns tuple: 1D array of channel hit counts, 3D array of (channel, time, charge) pdfs''' + def create_pdf(self, iterable, tbins, trange, qbins, qrange, nreps=1): + """Returns tuple: 1D array of channel hit counts, 3D array of + (channel, time, charge) pdfs.""" + first_element, iterable = peek(iterable) + + if isinstance(first_element, event.Event): + iterable = self.photon_generator.generate_events(iterable) + pdf_config = (tbins, trange, qbins, qrange) if pdf_config != self.pdf_config: self.pdf_config = pdf_config - self.gpu_worker.setup_pdf(max(self.detector.pmtids), tbins, trange, + self.gpu_pdf.setup_pdf(max(self.detector.pmtids), tbins, trange, qbins, qrange) else: - self.gpu_worker.clear_pdf() + self.gpu_pdf.clear_pdf() - for ev in itertools.islice(photon_generator, nevents): - self.gpu_worker.load_photons(ev.photon_start) - self.gpu_worker.propagate() - self.gpu_worker.run_daq() - self.gpu_worker.add_hits_to_pdf() - - return self.gpu_worker.get_pdfs() - - - def eval_pdf(self, event_channels, nevents, vertex_generator, - min_twidth, trange, min_qwidth, qrange, - min_bin_content=20, nreps=1, time_only=True): - photon_gen = repeating_iterator(self.photon_generator.generate_events(nevents, vertex_generator), - nreps) - return self.eval_pdf_from_photons(event_channels, nevents*nreps, photon_gen, - min_twidth, trange, min_qwidth, qrange, - min_bin_content=min_bin_content, time_only=time_only) - - def eval_pdf_from_photons(self, event_channels, nevents, photon_generator, - min_twidth, trange, min_qwidth, qrange, - min_bin_content=20, time_only=True): - '''Returns tuple: 1D array of channel hit counts, 1D array of PDF probability densities''' - self.gpu_worker.setup_pdf_eval(event_channels.hit, event_channels.t, event_channels.q, - min_twidth, trange, min_qwidth, qrange, - min_bin_content=min_bin_content, time_only=True) - - for ev in itertools.islice(photon_generator, nevents): - self.gpu_worker.load_photons(ev.photon_start) - self.gpu_worker.propagate() - self.gpu_worker.run_daq() - self.gpu_worker.accumulate_pdf_eval() + if nreps > 1: + iterable = repeating_iterator(iterable) + + for ev in iterable: + gpu_photons = gpu.GPUPhotons(ev.photons_beg) + gpu.propagate(self.gpu, gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + gpu_channels = self.gpu_daq.acquire(gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + self.gpu_pdf.add_hits_to_pdf(gpu_channels) - return self.gpu_worker.get_pdf_eval() + return self.gpu_pdf.get_pdfs() + + def eval_pdf(self, event_channels, iterable, min_twidth, trange, min_qwidth, qrange, min_bin_content=20, nreps=1, time_only=True): + """Returns tuple: 1D array of channel hit counts, 1D array of PDF + probability densities.""" + self.gpu_pdf.setup_pdf_eval(event_channels.hit, + event_channels.t, + event_channels.q, + min_twidth, + trange, + min_qwidth, + qrange, + min_bin_content=min_bin_content, + time_only=True) + + first_element, iterable = peek(iterable) + + if isinstance(first_element, event.Event): + iterable = self.photon_generator.generate_events(iterable) + + if nreps > 1: + iterable = repeating_iterator(iterable) + + for ev in iterable: + gpu_photons = gpu.GPUPhotons(ev.photons_beg) + gpu.propagate(self.gpu, gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + gpu_channels = self.gpu_daq.acquire(gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + self.gpu_pdf.accumulate_pdf_eval(gpu_channels) + return self.gpu_pdf.get_pdf_eval() -@profile_if_possible +@tools.profile_if_possible def main(): import optparse @@ -155,17 +154,22 @@ def main(): help='Set random number generator seed') parser.add_option('-g', type='int', dest='ngenerators', default=4, help='Number of GEANT4 generator processes') - parser.add_option('--detector', type='string', dest='detector', default='microlbne') + parser.add_option('--detector', type='string', dest='detector', + default='microlbne') parser.add_option('--nevents', type='int', dest='nevents', default=100) - parser.add_option('--particle', type='string', dest='particle', default='e-') - parser.add_option('--energy', type='float', dest='energy', default=100.0) - parser.add_option('--pos', type='string', dest='pos', default='(0,0,0)') - parser.add_option('--dir', type='string', dest='dir', default='(1,0,0)') - parser.add_option('--save-photon-start', action='store_true', - dest='save_photon_start', default=False, + parser.add_option('--particle', type='string', dest='particle', + help='particle name', default='e-') + parser.add_option('--ke', type='float', dest='ke', + help='particle kinetic energy in MeV', default=100.0) + parser.add_option('--pos', type='string', dest='pos', + help='particle vertex origin.', default='(0,0,0)') + parser.add_option('--dir', type='string', dest='dir', + help='particle vertex direction.', default='(1,0,0)') + parser.add_option('--save-photon-beg', action='store_true', + dest='save_photons_beg', default=False, help='Save initial photon vertices to disk') - parser.add_option('--save-photon-stop', action='store_true', - dest='save_photon_stop', default=False, + parser.add_option('--save-photon-end', action='store_true', + dest='save_photons_end', default=False, help='Save final photon vertices to disk') options, args = parser.parse_args() @@ -178,57 +182,52 @@ def main(): if options.nevents <= 0: sys.exit('--nevents must be greater than 0!') - position = np.array(eval(options.pos), dtype=float) - direction = np.array(eval(options.dir), dtype=float) - print >>sys.stderr, 'Loading detector %s...' % options.detector + pos = np.array(eval(options.pos), dtype=float) + dir = np.array(eval(options.dir), dtype=float) + + print 'Loading detector %s...' % options.detector + sys.stdout.flush() detector = detectors.find(options.detector) - print >>sys.stderr, 'Creating generator...' + print 'Creating particle vertex generator...' + sys.stdout.flush() if options.particle == 'pi0': - vertex_generator = generator.vertex.pi0_gun(pi0_position=constant(position), - pi0_direction=constant(direction), - pi0_total_energy=constant(options.energy)) + ev_vertex_iter = itertools.islice(generator.vertex.pi0_gun(itertools.repeat(pos), itertools.repeat(dir), itertools.repeat(options.ke)), options.nevents) else: - vertex_generator = generator.vertex.particle_gun(particle_name=constant(options.particle), - position=constant(position), - direction=constant(direction), - total_energy=constant(options.energy)) + vertex = event.Vertex(options.particle, pos, dir, None, options.ke) + ev_vertex_iter = (event.Event(i, vertex, [vertex]) for i, vertex in zip(range(options.nevents), repeat_copy(vertex))) # Initializing simulation - print >>sys.stderr, 'WARNING: ASSUMING DETECTOR IS WCSIM WATER!!' - simulation = Simulation(detector=detector, detector_material=optics.water_wcsim, - seed=options.seed, cuda_device=options.device, - geant4_processes=options.ngenerators, bvh_bits=options.nbits) + simulation = Simulation(detector=detector, seed=options.seed, cuda_device=options.device, geant4_processes=options.ngenerators, bvh_bits=options.nbits) # Create output file writer = root.RootWriter(output_filename) # Preheat generator - event_iterator = simulation.simulate(options.nevents, vertex_generator, - keep_photon_start=options.save_photon_start, - keep_photon_stop=options.save_photon_stop) + ev_iter = simulation.simulate(ev_vertex_iter, keep_photons_beg=options.save_photons_beg, keep_photons_end=options.save_photons_end) + print 'Starting simulation...' - print >>sys.stderr, 'Starting simulation...' - start_sim = time.time() nphotons = 0 + t0 = time.time() - for i, ev in enumerate(event_iterator): - assert ev.nphoton > 0, 'GEANT4 generated event with no photons!' - nphotons += ev.nphoton + for i, ev in enumerate(ev_iter): + print "\rEvent: %i" % (i+1), + sys.stdout.flush() - writer.write_event(ev) + assert ev.nphotons > 0, 'Geant4 generated event with no photons!' + nphotons += ev.nphotons - if i % 10 == 0: - print >>sys.stderr, "\rEvent:", i, + writer.write_event(ev) + print - end_sim = time.time() - print >>sys.stderr, "\rEvent:", options.nevents - 1 + elapsed = time.time() - t0 writer.close() - print >>sys.stderr, 'Done. %1.1f events/sec, %1.0f photons/sec.' % (options.nevents/(end_sim - start_sim), nphotons/(end_sim - start_sim)) + print '%f elapsed, %1.1f events/sec, %1.0f photons/sec.' % \ + (elapsed, options.nevents/elapsed, nphotons/elapsed) if __name__ == '__main__': - enable_debug_on_crash() + tools.enable_debug_on_crash() main() diff --git a/src/alpha.h b/src/alpha.h index e4a3a40..ac75834 100644 --- a/src/alpha.h +++ b/src/alpha.h @@ -4,57 +4,14 @@ #include "linalg.h" #include "intersect.h" #include "mesh.h" +#include "sorting.h" -template <class T> -__device__ void swap(T &a, T &b) -{ - T temp = a; - a = b; - b = temp; -} +#include "stdio.h" #define ALPHA_DEPTH 10 -struct HitList -{ - unsigned int size; - unsigned int indices[ALPHA_DEPTH]; - float distances[ALPHA_DEPTH]; -}; - -__device__ void add_to_hit_list(const float &distance, const int &index, HitList &h) -{ - unsigned int i; - if (h.size >= ALPHA_DEPTH) - { - if (distance > h.distances[ALPHA_DEPTH-1]) - return; - - i = ALPHA_DEPTH-1; - } - else - { - i = h.size; - h.size += 1; - } - - h.indices[i] = index; - h.distances[i] = distance; - - while (i > 0 && h.distances[i-1] > h.distances[i]) - { - swap(h.distances[i-1], h.distances[i]); - swap(h.indices[i-1], h.indices[i]); - - i -= 1; - } -} - __device__ int get_color_alpha(const float3 &origin, const float3& direction) { - HitList h; - h.size = 0; - float distance; if (!intersect_node(origin, direction, g_start_node, -1.0f)) @@ -69,6 +26,10 @@ __device__ int get_color_alpha(const float3 &origin, const float3& direction) unsigned int i; + float distances[ALPHA_DEPTH]; + unsigned int indices[ALPHA_DEPTH]; + unsigned int n=0; + do { unsigned int first_child = tex1Dfetch(node_map, *node); @@ -106,9 +67,26 @@ __device__ int get_color_alpha(const float3 &origin, const float3& direction) if (intersect_triangle(origin, direction, v0, v1, v2, distance)) { - add_to_hit_list(distance, i, h); + if (n < 1) + { + distances[0] = distance; + indices[0] = i; + } + else + { + unsigned long j = searchsorted(n, distances, distance); + + if (j <= ALPHA_DEPTH-1) + { + insert(ALPHA_DEPTH, distances, j, distance); + insert(ALPHA_DEPTH, indices, j, i); + } + } + + if (n < ALPHA_DEPTH) + n++; } - + } // triangle loop node--; @@ -118,16 +96,19 @@ __device__ int get_color_alpha(const float3 &origin, const float3& direction) } // while loop while (node != head); - if (h.size < 1) + if (n < 1) return 0; float scale = 1.0f; float fr = 0.0f; float fg = 0.0f; float fb = 0.0f; - for (i=0; i < h.size; i++) + unsigned int index; + for (i=0; i < n; i++) { - uint4 triangle_data = g_triangles[h.indices[i]]; + index = indices[i]; + + uint4 triangle_data = g_triangles[index]; float3 v0 = g_vertices[triangle_data.x]; float3 v1 = g_vertices[triangle_data.y]; @@ -138,11 +119,11 @@ __device__ int get_color_alpha(const float3 &origin, const float3& direction) if (cos_theta < 0.0f) cos_theta = dot(-normalize(cross(v1-v0,v2-v1)),-direction); - unsigned int r0 = 0xff & (g_colors[h.indices[i]] >> 16); - unsigned int g0 = 0xff & (g_colors[h.indices[i]] >> 8); - unsigned int b0 = 0xff & g_colors[h.indices[i]]; + unsigned int r0 = 0xff & (g_colors[index] >> 16); + unsigned int g0 = 0xff & (g_colors[index] >> 8); + unsigned int b0 = 0xff & g_colors[index]; - float alpha = (255 - (0xff & (g_colors[h.indices[i]] >> 24)))/255.0f; + float alpha = (255 - (0xff & (g_colors[index] >> 24)))/255.0f; fr += r0*scale*cos_theta*alpha; fg += g0*scale*cos_theta*alpha; diff --git a/src/kernel.cu b/src/kernel.cu index 307412e..4418305 100644 --- a/src/kernel.cu +++ b/src/kernel.cu @@ -62,41 +62,6 @@ __device__ void to_diffuse(Photon &p, State &s, curandState &rng, const int &max extern "C" { -/* Translate the points `a` by the vector `v` */ -__global__ void translate(int nthreads, float3 *a, float3 v) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - a[id] += v; -} - -/* Rotate the points `a` through an angle `phi` counter-clockwise about the - axis `axis` (when looking towards +infinity). */ -__global__ void rotate(int nthreads, float3 *a, float phi, float3 axis) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - a[id] = rotate(a[id], phi, axis); -} - -__global__ void rotate_around_point(int nthreads, float3 *a, float phi, float3 axis, float3 point) -{ - int id = blockIdx.x*blockDim.x + threadIdx.x; - - if (id >= nthreads) - return; - - a[id] -= point; - a[id] = rotate(a[id], phi, axis); - a[id] += point; -} - __global__ void update_xyz_lookup(int nthreads, int total_threads, int offset, float3 position, curandState *rng_states, float wavelength, float3 xyz, float3 *xyz_lookup1, float3 *xyz_lookup2, int max_steps) { int kernel_id = blockIdx.x*blockDim.x + threadIdx.x; @@ -239,6 +204,28 @@ __global__ void process_image(int nthreads, float3 *image, int *pixels, int nima } // process_image +__global__ void distance_to_mesh(int nthreads, float3 *positions, float3 *directions, float *distances) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + float3 position = positions[id]; + float3 direction = directions[id]; + direction /= norm(direction); + + float distance; + + int triangle_index = intersect_mesh(position, direction, distance); + + if (triangle_index == -1) + distances[id] = 1e9; + else + distances[id] = distance; + +} // distance_to_mesh + __global__ void ray_trace(int nthreads, float3 *positions, float3 *directions, int *pixels) { int id = blockIdx.x*blockDim.x + threadIdx.x; @@ -288,6 +275,9 @@ __global__ void ray_trace_alpha(int nthreads, float3 *positions, float3 *directi float3 direction = directions[id]; direction /= norm(direction); + bool hit; + float distance; + pixels[id] = get_color_alpha(position, direction); } // ray_trace diff --git a/src/sorting.h b/src/sorting.h new file mode 100644 index 0000000..ec16bbe --- /dev/null +++ b/src/sorting.h @@ -0,0 +1,103 @@ +template <class T> +__device__ void swap(T &a, T &b) +{ + T tmp = a; + a = b; + b = tmp; +} + +template <class T> +__device__ void reverse(int n, T *a) +{ + for (int i=0; i < n/2; i++) + swap(a[i],a[n-1-i]); +} + +template <class T> +__device__ void piksrt(int n, T *arr) +{ + int i,j; + T a; + + for (j=1; j < n; j++) + { + a = arr[j]; + i = j-1; + while (i >= 0 && arr[i] > a) + { + arr[i+1] = arr[i]; + i--; + } + arr[i+1] = a; + } +} + +template <class T, class U> +__device__ void piksrt2(int n, T *arr, U *brr) +{ + int i,j; + T a; + U b; + + for (j=1; j < n; j++) + { + a = arr[j]; + b = brr[j]; + i = j-1; + while (i >= 0 && arr[i] > a) + { + arr[i+1] = arr[i]; + brr[i+1] = brr[i]; + i--; + } + arr[i+1] = a; + brr[i+1] = b; + } +} + +template <class T> +__device__ unsigned long searchsorted(unsigned long n, T *arr, const T &x) +{ + unsigned long ju,jm,jl; + int ascnd; + + jl = 0; + ju = n; + + ascnd = (arr[n-1] > arr[0]); + + while (ju-jl > 1) + { + jm = (ju+jl) >> 1; + + if (x >= arr[jm] == ascnd) + jl = jm; + else + ju = jm; + } + + if (x <= arr[0]) + return 0; + else if (x == arr[n-1]) + return n-1; + else + return ju; +} + +template <class T> +__device__ void insert(unsigned long n, T *arr, unsigned long i, const T &x) +{ + unsigned long j; + for (j=n-1; j > i; j--) + arr[j] = arr[j-1]; + arr[i] = x; +} + +template <class T> +__device__ void add_sorted(unsigned long n, T *arr, const T &x) +{ + unsigned long i = searchsorted(n, arr, x); + + if (i < n) + insert(n, arr, i, x); +} diff --git a/src/transform.cu b/src/transform.cu new file mode 100644 index 0000000..57bd509 --- /dev/null +++ b/src/transform.cu @@ -0,0 +1,47 @@ +//-*-c-*- + +#include "linalg.h" +#include "rotate.h" + +extern "C" +{ + +/* Translate the points `a` by the vector `v` */ +__global__ void translate(int nthreads, float3 *a, float3 v) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + a[id] += v; +} + +/* Rotate the points `a` through an angle `phi` counter-clockwise about the + axis `axis` (when looking towards +infinity). */ +__global__ void rotate(int nthreads, float3 *a, float phi, float3 axis) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + a[id] = rotate(a[id], phi, axis); +} + +/* Rotate the points `a` through an angle `phi` counter-clockwise + (when looking towards +infinity along `axis`) about the axis defined + by the point `point` and the vector `axis` . */ +__global__ void rotate_around_point(int nthreads, float3 *a, float phi, float3 axis, float3 point) +{ + int id = blockIdx.x*blockDim.x + threadIdx.x; + + if (id >= nthreads) + return; + + a[id] -= point; + a[id] = rotate(a[id], phi, axis); + a[id] += point; +} + +} // extern "c" diff --git a/tests/test_fileio.py b/tests/test_fileio.py index d21c9ff..2911976 100644 --- a/tests/test_fileio.py +++ b/tests/test_fileio.py @@ -5,36 +5,34 @@ import numpy as np class TestFileIO(unittest.TestCase): def test_file_write_and_read(self): - ev = event.Event(1, 'e-', (0,0,1), (1,0,0), 15) + ev = event.Event(1, event.Vertex('e-', (0,0,1), (1,0,0), (0,1,0), 15)) - photon_start = root.make_photon_with_arrays(1) - photon_start.positions[0] = (1,2,3) - photon_start.directions[0] = (4,5,6) - photon_start.polarizations[0] = (7,8,9) - photon_start.wavelengths[0] = 400.0 - photon_start.times[0] = 100.0 - photon_start.histories[0] = 20 - photon_start.last_hit_triangles[0] = 5 - ev.photon_start = photon_start + photons_beg = root.make_photon_with_arrays(1) + photons_beg.pos[0] = (1,2,3) + photons_beg.dir[0] = (4,5,6) + photons_beg.pol[0] = (7,8,9) + photons_beg.wavelengths[0] = 400.0 + photons_beg.t[0] = 100.0 + photons_beg.last_hit_triangles[0] = 5 + photons_beg.flags[0] = 20 + ev.photons_beg = photons_beg - photon_stop = root.make_photon_with_arrays(1) - photon_stop.positions[0] = (1,2,3) - photon_stop.directions[0] = (4,5,6) - photon_stop.polarizations[0] = (7,8,9) - photon_stop.wavelengths[0] = 400.0 - photon_stop.times[0] = 100.0 - photon_stop.histories[0] = 20 - photon_stop.last_hit_triangles[0] = 5 - ev.photon_stop = photon_stop + photons_end = root.make_photon_with_arrays(1) + photons_end.pos[0] = (1,2,3) + photons_end.dir[0] = (4,5,6) + photons_end.pol[0] = (7,8,9) + photons_end.wavelengths[0] = 400.0 + photons_end.t[0] = 100.0 + photons_end.last_hit_triangles[0] = 5 + photons_end.flags[0] = 20 + ev.photons_end = photons_end - ev.nphoton = 1 - - ev.subtracks.append(event.Subtrack('e-', (40,30,20), (-1, -2, -3), 400, 800)) + ev.vertices = [ev.primary_vertex] channels = event.Channels(hit=np.array([True, False]), t=np.array([20.0, 1e9], dtype=np.float32), q=np.array([2.0, 0.0], dtype=np.float32), - histories=np.array([8, 32], dtype=np.uint32)) + flags=np.array([8, 32], dtype=np.uint32)) ev.channels = channels filename = '/tmp/chroma-filewritertest.root' @@ -58,16 +56,18 @@ class TestFileIO(unittest.TestCase): # Enough screwing around, let's get the one event in the file newev = reader.current() - # Now check if everything is correct in the event - for attribute in ['event_id', 'particle_name','gen_total_energy']: + for attribute in ['id']: self.assertEqual(getattr(ev, attribute), getattr(newev, attribute), 'compare %s' % attribute) - for attribute in ['gen_position', 'gen_direction']: - self.assertTrue(np.allclose(getattr(ev, attribute), getattr(newev, attribute)), 'compare %s' % attribute) - for attribute in ['positions', 'directions', 'wavelengths', 'polarizations', 'times', - 'histories', 'last_hit_triangles']: - self.assertTrue(np.allclose(getattr(ev.photon_start, attribute), - getattr(newev.photon_start, attribute)), 'compare %s' % attribute) - self.assertTrue(np.allclose(getattr(ev.photon_stop, attribute), - getattr(newev.photon_stop, attribute)), 'compare %s' % attribute) + for attribute in ['pos', 'dir', 'pol', 'ke', 't0']: + self.assertTrue(np.allclose(getattr(ev.primary_vertex, attribute), getattr(newev.primary_vertex, attribute)), 'compare %s' % attribute) + + for i in range(len(ev.vertices)): + self.assertTrue(np.allclose(getattr(ev.vertices[i], attribute), getattr(newev.vertices[i], attribute)), 'compare %s' % attribute) + + for attribute in ['pos', 'dir', 'pol', 'wavelengths', 't', 'last_hit_triangles', 'flags']: + self.assertTrue(np.allclose(getattr(ev.photons_beg, attribute), + getattr(newev.photons_beg, attribute)), 'compare %s' % attribute) + self.assertTrue(np.allclose(getattr(ev.photons_end, attribute), + getattr(newev.photons_end, attribute)), 'compare %s' % attribute) diff --git a/tests/test_generator_photon.py b/tests/test_generator_photon.py index 9684126..13501fe 100644 --- a/tests/test_generator_photon.py +++ b/tests/test_generator_photon.py @@ -1,20 +1,21 @@ import unittest +import itertools -import chroma.generator.photon +from chroma import generator from chroma.generator.vertex import constant_particle_gun -from chroma.optics import water_wcsim +from chroma import optics class TestG4ParallelGenerator(unittest.TestCase): def test_center(self): '''Generate Cherenkov light at the center of the world volume''' - gen = chroma.generator.photon.G4ParallelGenerator(1, water_wcsim) - vertex = constant_particle_gun('e-', (0,0,0), (1,0,0), 100) - for event in gen.generate_events(10, vertex): - self.assertGreater(len(event.photon_start.positions), 0) + gen = generator.photon.G4ParallelGenerator(1, optics.water_wcsim) + vertex = itertools.islice(constant_particle_gun('e-', (0,0,0), (1,0,0), 100), 10) + for event in gen.generate_events(vertex): + self.assertGreater(len(event.photons_beg.pos), 0) def test_off_center(self): '''Generate Cherenkov light at (1 m, 0 m, 0 m)''' - gen = chroma.generator.photon.G4ParallelGenerator(1, water_wcsim) - vertex = constant_particle_gun('e-', (1,0,0), (1,0,0), 100) - for event in gen.generate_events(10, vertex): - self.assertGreater(len(event.photon_start.positions), 0) + gen = generator.photon.G4ParallelGenerator(1, optics.water_wcsim) + vertex = itertools.islice(constant_particle_gun('e-', (1,0,0), (1,0,0), 100), 10) + for event in gen.generate_events(vertex): + self.assertGreater(len(event.photons_beg.pos), 0) diff --git a/tests/test_generator_vertex.py b/tests/test_generator_vertex.py index d773579..cec363d 100644 --- a/tests/test_generator_vertex.py +++ b/tests/test_generator_vertex.py @@ -8,16 +8,16 @@ class TestParticleGun(unittest.TestCase): '''Generate electron vertices at the center of the world volume.''' vertex = chroma.generator.vertex.constant_particle_gun('e-', (0,0,0), (1,0,0), 100) for ev in itertools.islice(vertex, 100): - self.assertEquals(ev.particle_name, 'e-') - self.assertTrue(np.allclose(ev.gen_position, [0,0,0])) - self.assertTrue(np.allclose(ev.gen_direction, [1,0,0])) - self.assertTrue(np.allclose(ev.gen_total_energy, 100)) + self.assertEquals(ev.primary_vertex.particle_name, 'e-') + self.assertTrue(np.allclose(ev.primary_vertex.pos, [0,0,0])) + self.assertTrue(np.allclose(ev.primary_vertex.dir, [1,0,0])) + self.assertTrue(np.allclose(ev.primary_vertex.ke, 100)) def test_off_center(self): '''Generate electron vertices at (1,0,0) in the world volume.''' vertex = chroma.generator.vertex.constant_particle_gun('e-', (1,0,0), (1,0,0), 100) for ev in itertools.islice(vertex, 100): - self.assertEquals(ev.particle_name, 'e-') - self.assertTrue(np.allclose(ev.gen_position, [1,0,0])) - self.assertTrue(np.allclose(ev.gen_direction, [1,0,0])) - self.assertTrue(np.allclose(ev.gen_total_energy, 100)) + self.assertEquals(ev.primary_vertex.particle_name, 'e-') + self.assertTrue(np.allclose(ev.primary_vertex.pos, [1,0,0])) + self.assertTrue(np.allclose(ev.primary_vertex.dir, [1,0,0])) + self.assertTrue(np.allclose(ev.primary_vertex.ke, 100)) diff --git a/tests/test_pdf.py b/tests/test_pdf.py index 9a08b53..791f119 100644 --- a/tests/test_pdf.py +++ b/tests/test_pdf.py @@ -1,11 +1,12 @@ import unittest import numpy as np +import itertools import chroma.detectors from chroma.generator.photon import G4ParallelGenerator from chroma.generator.vertex import constant_particle_gun from chroma.optics import water_wcsim -from chroma.gpu import GPU +from chroma import gpu from chroma.sim import Simulation class TestPDF(unittest.TestCase): @@ -18,21 +19,26 @@ class TestPDF(unittest.TestCase): '''Create a hit count and (q,t) PDF for 10 MeV events in MicroLBNE''' g4generator = G4ParallelGenerator(1, water_wcsim) - gpu = GPU(0) - gpu.load_geometry(self.detector) - gpu.setup_propagate() - gpu.setup_daq(max(self.detector.pmtids)) - gpu.setup_pdf(max(self.detector.pmtids), 100, (-0.5, 999.5), 10, (-0.5, 9.5)) + gpu_instance = gpu.GPU(0) + gpu_geometry = gpu.GPUGeometry(gpu_instance, self.detector) + + nthreads_per_block, max_blocks = 64, 1024 + + rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks) + + gpu_daq = gpu.GPUDaq(gpu_geometry, max(self.detector.pmtids)) + gpu_pdf = gpu.GPUPDF() + gpu_pdf.setup_pdf(max(self.detector.pmtids), 100, (-0.5, 999.5), 10, (-0.5, 9.5)) - gpu.clear_pdf() + gpu_pdf.clear_pdf() - for ev in g4generator.generate_events(10, self.vertex_gen): - gpu.load_photons(ev.photon_start) - gpu.propagate() - gpu.run_daq() - gpu.add_hits_to_pdf() + for ev in g4generator.generate_events(itertools.islice(self.vertex_gen, 10)): + gpu_photons = gpu.GPUPhotons(ev.photons_beg) + gpu.propagate(gpu_instance, gpu_photons, rng_states, nthreads_per_block, max_blocks) + gpu_channels = gpu_daq.acquire(gpu_photons, rng_states, nthreads_per_block, max_blocks) + gpu_pdf.add_hits_to_pdf(gpu_channels) - hitcount, pdf = gpu.get_pdfs() + hitcount, pdf = gpu_pdf.get_pdfs() self.assertTrue( (hitcount > 0).any() ) self.assertTrue( (pdf > 0).any() ) @@ -41,9 +47,11 @@ class TestPDF(unittest.TestCase): self.assertEqual(nhits, pdf[i].sum()) def testSimPDF(self): - sim = Simulation(self.detector, water_wcsim) - hitcount, pdf = sim.create_pdf(100, self.vertex_gen, - 100, (-0.5, 999.5), 10, (-0.5, 9.5)) + sim = Simulation(self.detector) + + vertex_iter = itertools.islice(self.vertex_gen, 10) + + hitcount, pdf = sim.create_pdf(vertex_iter, 100, (-0.5, 999.5), 10, (-0.5, 9.5)) self.assertTrue( (hitcount > 0).any() ) self.assertTrue( (pdf > 0).any() ) diff --git a/tests/test_propagation.py b/tests/test_propagation.py index 331242b..bf886bd 100644 --- a/tests/test_propagation.py +++ b/tests/test_propagation.py @@ -11,43 +11,43 @@ class TestPropagation(unittest.TestCase): def testAbort(self): '''Photons that hit a triangle at normal incidence should not abort. - Photons that hit a triangle at exactly normal incidence can sometimes produce a dot product - that is outside the range allowed by acos(). Trigger these with axis aligned photons in a box + Photons that hit a triangle at exactly normal incidence can sometimes + produce a dot product that is outside the range allowed by acos(). + Trigger these with axis aligned photons in a box. ''' # Setup geometry - cube = Geometry() + cube = Geometry(vacuum) cube.add_solid(Solid(box(100,100,100), vacuum, vacuum)) cube.pmtids = [0] - sim = Simulation(cube, vacuum, bvh_bits=4, geant4_processes=0, - use_cache=False) + sim = Simulation(cube, bvh_bits=4, geant4_processes=0, use_cache=False) # Create initial photons nphotons = 10000 - positions = np.tile([0,0,0], (nphotons,1)).astype(np.float32) - directions = np.tile([0,0,1], (nphotons,1)).astype(np.float32) - polarizations = np.zeros_like(positions) + pos = np.tile([0,0,0], (nphotons,1)).astype(np.float32) + dir = np.tile([0,0,1], (nphotons,1)).astype(np.float32) + pol = np.zeros_like(pos) phi = np.random.uniform(0, 2*np.pi, nphotons).astype(np.float32) - polarizations[:,0] = np.cos(phi) - polarizations[:,1] = np.sin(phi) - times = np.zeros(nphotons, dtype=np.float32) + pol[:,0] = np.cos(phi) + pol[:,1] = np.sin(phi) + t = np.zeros(nphotons, dtype=np.float32) wavelengths = np.empty(nphotons, np.float32) wavelengths.fill(400.0) - photons = Photons(positions=positions, directions=directions, polarizations=polarizations, - times=times, wavelengths=wavelengths) + photons = Photons(pos=pos, dir=dir, pol=pol, t=t, wavelengths=wavelengths) # First make one step to check for strangeness - photon_stop = sim.propagate_photons(photons, max_steps=1) - self.assertFalse(np.isnan(photon_stop.positions).any()) - self.assertFalse(np.isnan(photon_stop.directions).any()) - self.assertFalse(np.isnan(photon_stop.polarizations).any()) - self.assertFalse(np.isnan(photon_stop.times).any()) - self.assertFalse(np.isnan(photon_stop.wavelengths).any()) + photons_end = sim.simulate([photons], keep_photons_end=True, max_steps=1).next().photons_end + + self.assertFalse(np.isnan(photons_end.pos).any()) + self.assertFalse(np.isnan(photons_end.dir).any()) + self.assertFalse(np.isnan(photons_end.pol).any()) + self.assertFalse(np.isnan(photons_end.t).any()) + self.assertFalse(np.isnan(photons_end.wavelengths).any()) # Now let it run the usual ten steps - photon_stop = sim.propagate_photons(photons, max_steps=10) - aborted = (photon_stop.histories & (1 << 31)) > 0 + photons_end = sim.simulate([photons], keep_photons_end=True, max_steps=10).next().photons_end + aborted = (photons_end.flags & (1 << 31)) > 0 print 'aborted photons: %1.1f' % (float(np.count_nonzero(aborted)) / nphotons) self.assertFalse(aborted.any()) diff --git a/tests/test_rayleigh.py b/tests/test_rayleigh.py index 4394ada..7e5fc92 100644 --- a/tests/test_rayleigh.py +++ b/tests/test_rayleigh.py @@ -13,43 +13,43 @@ ROOT.gROOT.SetBatch(1) class TestRayleigh(unittest.TestCase): def setUp(self): - self.cube = Geometry() + self.cube = Geometry(water_wcsim) self.cube.add_solid(Solid(box(100,100,100), water_wcsim, water_wcsim)) self.cube.pmtids = [0] - self.sim = Simulation(self.cube, water_wcsim, bvh_bits=4, geant4_processes=0, + self.sim = Simulation(self.cube, bvh_bits=4, geant4_processes=0, use_cache=False) nphotons = 100000 - positions = np.tile([0,0,0], (nphotons,1)).astype(np.float32) - directions = np.tile([0,0,1], (nphotons,1)).astype(np.float32) - polarizations = np.zeros_like(positions) + pos = np.tile([0,0,0], (nphotons,1)).astype(np.float32) + dir = np.tile([0,0,1], (nphotons,1)).astype(np.float32) + pol = np.zeros_like(pos) phi = np.random.uniform(0, 2*np.pi, nphotons).astype(np.float32) - polarizations[:,0] = np.cos(phi) - polarizations[:,1] = np.sin(phi) - times = np.zeros(nphotons, dtype=np.float32) + pol[:,0] = np.cos(phi) + pol[:,1] = np.sin(phi) + t = np.zeros(nphotons, dtype=np.float32) wavelengths = np.empty(nphotons, np.float32) wavelengths.fill(400.0) - self.photons = Photons(positions=positions, directions=directions, polarizations=polarizations, - times=times, wavelengths=wavelengths) + self.photons = Photons(pos=pos, dir=dir, pol=pol, t=t, wavelengths=wavelengths) def testAngularDistributionPolarized(self): # Fully polarized photons - self.photons.polarizations[:] = [1.0, 0.0, 0.0] + self.photons.pol[:] = [1.0, 0.0, 0.0] - photon_stop = self.sim.propagate_photons(self.photons, max_steps=1) - aborted = (photon_stop.histories & (1 << 31)) > 0 + photons_end = self.sim.simulate([self.photons], keep_photons_end=True, max_steps=1).next().photons_end + aborted = (photons_end.flags & (1 << 31)) > 0 self.assertFalse(aborted.any()) - # Compute the dot product between initial and final directions - rayleigh_scatters = (photon_stop.histories & (1 << 4)) > 0 - cos_scatter = (self.photons.directions[rayleigh_scatters] * photon_stop.directions[rayleigh_scatters]).sum(axis=1) + # Compute the dot product between initial and final dir + rayleigh_scatters = (photons_end.flags & (1 << 4)) > 0 + cos_scatter = (self.photons.dir[rayleigh_scatters] * photons_end.dir[rayleigh_scatters]).sum(axis=1) theta_scatter = np.arccos(cos_scatter) h = histogram.Histogram(bins=100, range=(0, np.pi)) h.fill(theta_scatter) h = rootify(h) - # The functional form for polarized light should be (1 + \cos^2 \theta)\sin \theta - # according to GEANT4 physics reference manual + # The functional form for polarized light should be + # (1 + \cos^2 \theta)\sin \theta according to GEANT4 physics + # reference manual. f = ROOT.TF1("pol_func", "[0]*(1+cos(x)**2)*sin(x)", 0, np.pi) h.Fit(f) self.assertGreater(f.GetProb(), 1e-3) diff --git a/threadtest.py b/threadtest.py deleted file mode 100644 index f03f920..0000000 --- a/threadtest.py +++ /dev/null @@ -1,94 +0,0 @@ -from gputhread import * -from Queue import Queue -from sample import uniform_sphere -import numpy as np -from pycuda import gpuarray -import pycuda.driver as cuda -from uncertainties import ufloat, umath -from histogram import Histogram -import math - -jobs = Queue() -output = Queue() - -def generate_event(detector, position=(0,0,0), nphotons=5000): - jobs.put((gpuarray.vec.make_float3(*position), nphotons)) - jobs.join() - - earliest_times = output.get() - - pmt_times = earliest_times[detector.pmtids] - event_times = [ (i, t) for i, t in zip(detector.pmtids, pmt_times) if t < 1e8 ] - print '%i hit pmts' % len(event_times) - return event_times - -def likelihood(detector, event_times, position=(0,0,0), nphotons=5000, neval=100): - for i in range(neval): - jobs.put((gpuarray.vec.make_float3(*position), nphotons)) - jobs.join() - - t = np.zeros(shape=(neval, max(detector.pmtids)+1), dtype=np.float32) - for i in range(neval): - t[i] = output.get() - - log_likelihood = 0.0 - log_likelihood_variance = 0.0 - for i, time in event_times: - h = Histogram(500, (-0.5e-9, 99.5e-9)) - h.fill(t[:,i]) - - if h.nentries > 0: - h.normalize() - - probability = h.ueval(time) - - if probability.nominal_value == 0.0: - if h.nentries > 0: - probability = ufloat((0.5/h.nentries, 0.5/h.nentries)) - else: - probability = \ - ufloat((1.0/len(h.bincenters), 1.0/len(h.bincenters))) - - log_likelihood += math.log(probability.nominal_value) - log_likelihood_variance += (probability.std_dev()/probability.nominal_value)**2 - - return ufloat((-log_likelihood, log_likelihood_variance**0.5)) - -if __name__ == '__main__': - import sys - import optparse - import time - - parser = optparse.OptionParser('%prog') - parser.add_option('-b', type='int', dest='nbits', default=8) - parser.add_option('-j', type='string', dest='devices', default='0') - parser.add_option('-n', type='int', dest='nblocks', default=64) - options, args = parser.parse_args() - - from detectors import build_minilbne - - detector = build_minilbne() - - detector.build(bits=options.nbits) - - cuda.init() - - gputhreads = [] - for i in [int(s) for s in options.devices.split(',')]: - gputhreads.append(GPUThread(i, detector, jobs, output, options.nblocks)) - gputhreads[-1].start() - - try: - event_times = generate_event(detector) - - for z in np.linspace(-1.0, 1.0, 100): - t0 = time.time() - log_likelihood = likelihood(detector, event_times, (z,0,0)) - elapsed = time.time() - t0 - print 'z = %5.2f, likelihood = %s, elapsed %.2f sec' % (z, log_likelihood, elapsed) - finally: - for gputhread in gputhreads: - gputhread.stop() - - for gputhread in gputhreads: - gputhread.join() @@ -2,6 +2,24 @@ import numpy as np import time import datetime import sys +import math + +def ufloat_to_str(x): + msd = -int(math.floor(math.log10(x.std_dev()))) + return '%.*f +/- %.*f' % (msd, x.nominal_value, msd, x.std_dev()) + +def progress(seq): + "Print progress while iterating over `seq`." + n = len(seq) + print '[' + ' '*21 + ']\r[', + sys.stdout.flush() + for i, item in enumerate(seq): + if i % (n//10) == 0: + print '.', + sys.stdout.flush() + yield item + print ']' + sys.stdout.flush() def debugger_hook(type, value, tb): if hasattr(sys, 'ps1') or not sys.stderr.isatty(): |