#!/usr/bin/env python import numpy as np from pycuda import gpuarray as ga import time from uncertainties import ufloat import sys from chroma.gpu import GPU, to_float3 from chroma.camera import get_rays from chroma.event import Photons from chroma.sample import uniform_sphere from chroma.generator.photon import G4ParallelGenerator from chroma.optics import water_wcsim from chroma.generator.vertex import constant_particle_gun from chroma.tools import profile_if_possible # Generator processes need to fork BEFORE the GPU context is setup g4generator = G4ParallelGenerator(4, water_wcsim) def progress(seq): "Print progress while iterating over `seq`." n = len(seq) print '[' + ' '*21 + ']\r[', sys.stdout.flush() for i, item in enumerate(seq): if i % (n//10) == 0: print '.', sys.stdout.flush() yield item print ']' sys.stdout.flush() def ray_trace(gpu, number=1000): """ Return the number of mean and standard deviation of the number of ray intersections per second as a ufloat for the geometry loaded onto `gpu`. .. note:: The rays are thrown from a camera sitting *outside* of the geometry. Args: - gpu, chroma.gpu.GPU The GPU object with a geometry already loaded. - number, int The number of kernel calls to average. """ lb, ub = gpu.geometry.mesh.get_bounds() scale = np.linalg.norm(ub-lb) point = [0,scale,(lb[2]+ub[2])/2] size = (800,600) width, height = size origins, directions = get_rays(point, size, 0.035, focal_length=0.018) origins_gpu = ga.to_gpu(to_float3(origins)) directions_gpu = ga.to_gpu(to_float3(directions)) pixels_gpu = ga.zeros(width*height, dtype=np.int32) run_times = [] for i in progress(range(number)): t0 = time.time() gpu.kernels.ray_trace(np.int32(pixels_gpu.size), origins_gpu, directions_gpu, pixels_gpu, block=(gpu.nthreads_per_block,1,1), grid=(pixels_gpu.size//gpu.nthreads_per_block+1,1)) gpu.context.synchronize() elapsed = time.time() - t0 if i > 0: # first kernel call incurs some driver overhead run_times.append(elapsed) return pixels_gpu.size/ufloat((np.mean(run_times),np.std(run_times))) def load_photons(gpu, number=10, nphotons=500000): """ Return the mean and standard deviation of the number of photons loaded onto `gpu` per second. Args: - gpu, chroma.gpu.GPU The GPU object with a geometry already loaded. - number, int The number of loads to average - nphotons, int The number of photons to load per trial """ gpu.setup_propagate() photons = Photons(np.zeros((nphotons,3)), uniform_sphere(nphotons), np.random.uniform(400,800,size=nphotons)) run_times = [] for i in progress(range(number)): t0 = time.time() gpu.load_photons(photons) gpu.context.synchronize() elapsed = time.time() - t0 if i > 0: # first kernel call incurs some driver overhead run_times.append(elapsed) return nphotons/ufloat((np.mean(run_times),np.std(run_times))) def propagate(gpu, number=10, nphotons=500000): """ Return the mean and standard deviation of the number of photons propagated per second as a ufloat for the geometry loaded onto `gpu`. Args: - gpu, chroma.gpu.GPU The GPU object with a geometry already loaded. - number, int The number of kernel calls to average. - nphotons, int The number of photons to propagate per kernel call. """ gpu.setup_propagate() run_times = [] for i in progress(range(number)): photons = Photons(np.zeros((nphotons,3)), uniform_sphere(nphotons), np.random.uniform(400,800,size=nphotons)) gpu.load_photons(photons) t0 = time.time() gpu.propagate() gpu.context.synchronize() elapsed = time.time() - t0 if i > 0: # first kernel call incurs some driver overhead run_times.append(elapsed) return nphotons/ufloat((np.mean(run_times),np.std(run_times))) @profile_if_possible def pdf(gpu, max_pmt_id, npdfs=10, nevents=100, nreps=1): """ Return the mean and standard deviation of the number of 100 MeV events per second that can be histogrammed a ufloat for the geometry loaded onto `gpu`. Args: - gpu, chroma.gpu.GPU The GPU object with a geometry already loaded. - max_pmt_id, int The channel number of the highest PMT - npdfs, int The number of pdf generations to average. - nevents, int The number of 100 MeV events to generate for each PDF. - nreps, int The number of times to propagate each event and add to PDF """ gpu.setup_propagate() run_times = [] gpu.setup_propagate() gpu.setup_daq(max_pmt_id) gpu.setup_pdf(max_pmt_id, 100, (-0.5, 999.5), 10, (-0.5, 9.5)) vertex_gen = constant_particle_gun('e-', (0,0,0), (1,0,0), 100) for i in progress(range(npdfs)): t0 = time.time() gpu.clear_pdf() for ev in g4generator.generate_events(nevents, vertex_gen): for j in xrange(nreps): gpu.load_photons(ev.photon_start) gpu.propagate() gpu.run_daq() gpu.add_hits_to_pdf() hitcount, pdf = gpu.get_pdfs() elapsed = time.time() - t0 if i > 0: # first kernel call incurs some driver overhead run_times.append(elapsed) return nevents*nreps/ufloat((np.mean(run_times),np.std(run_times))) if __name__ == '__main__': from chroma.detectors import build_lbne_200kton, build_minilbne lbne = build_lbne_200kton() lbne.build(bits=11) gpu = GPU() gpu.load_geometry(lbne, print_usage=False) print '%s track steps/s' % ray_trace(gpu) print '%s loaded photons/s' % load_photons(gpu) print '%s propagated photons/s' % propagate(gpu) print '%s histogrammed 100 MeV events/s' % pdf(gpu, max(lbne.pmtids))