summaryrefslogtreecommitdiff
path: root/test/data/ray_intersection.npy
AgeCommit message (Collapse)Author
2011-10-05Epic port of Chroma from units of meters/seconds/MeV toStan Seibert
millimeters/nanoseconds/MeV in order to match GEANT4, and also avoid huge discrepancies in magnitude caused by values like 10e-9 sec. Along the way, cleaned up a few things: * Switch the PI and SPEED_OF_LIGHT constants from double to single precision. This avoid some unnecessary double precision calculations in the GPU code. * Fixed a silly problem in the definition of the spherical spiral. Now the demo detector looks totally awesome. Also wrapped it in a black surface. Demo detector now has 10055 PMTs. * Updated the test_ray_intersection data file to reflect the new units. * Fix a missing import in chroma.gpu.tools
60'>60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
#!/usr/bin/env python
import numpy as np
import pycuda.driver as cuda
from pycuda import gpuarray as ga
import time
from uncertainties import ufloat
import sys
import itertools

from chroma import gpu
from chroma import camera
from chroma import event
from chroma import sample
from chroma import generator
from chroma import tools
from chroma import optics
from chroma.transform import normalize
# Generator processes need to fork BEFORE the GPU context is setup
g4generator = generator.photon.G4ParallelGenerator(4, optics.water_wcsim)

def intersect(gpu_geometry, number=100, nphotons=500000, nthreads_per_block=64,
              max_blocks=1024):
    "Returns the average number of ray intersections per second."
    distances_gpu = ga.empty(nphotons, dtype=np.float32)

    module = gpu.get_cu_module('mesh.h', options=('--use_fast_math',))
    gpu_funcs = gpu.GPUFuncs(module)

    run_times = []
    for i in tools.progress(range(number)):
        pos = ga.zeros(nphotons, dtype=ga.vec.float3)
        dir = ga.to_gpu(gpu.to_float3(sample.uniform_sphere(nphotons)))

        t0 = time.time()
        gpu_funcs.distance_to_mesh(np.int32(pos.size), pos, dir, gpu_geometry.gpudata, distances_gpu, block=(nthreads_per_block,1,1), grid=(pos.size//nthreads_per_block+1,1))
        cuda.Context.get_current().synchronize()
        elapsed = time.time() - t0

        if i > 0:
            # first kernel call incurs some driver overhead
            run_times.append(elapsed)

    return nphotons/ufloat((np.mean(run_times),np.std(run_times)))

def load_photons(number=100, nphotons=500000):
    """Returns the average number of photons moved to the GPU device memory
    per second."""
    pos = np.zeros((nphotons,3))
    dir = sample.uniform_sphere(nphotons)
    pol = normalize(np.cross(sample.uniform_sphere(nphotons), dir))
    wavelengths = np.random.uniform(400,800,size=nphotons)
    photons = event.Photons(pos, dir, pol, wavelengths)

    run_times = []
    for i in tools.progress(range(number)):
        t0 = time.time()
        gpu_photons = gpu.GPUPhotons(photons)
        cuda.Context.get_current().synchronize()
        elapsed = time.time() - t0

        if i > 0:
            # first kernel call incurs some driver overhead
            run_times.append(elapsed)

    return nphotons/ufloat((np.mean(run_times),np.std(run_times)))

def propagate(gpu_geometry, number=10, nphotons=500000, nthreads_per_block=64,
              max_blocks=1024):
    "Returns the average number of photons propagated on the GPU per second."
    rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks)

    run_times = []
    for i in tools.progress(range(number)):
        pos = np.zeros((nphotons,3))
        dir = sample.uniform_sphere(nphotons)
        pol = normalize(np.cross(sample.uniform_sphere(nphotons), dir))
        wavelengths = np.random.uniform(400,800,size=nphotons)
        photons = event.Photons(pos, dir, pol, wavelengths)
        gpu_photons = gpu.GPUPhotons(photons)

        t0 = time.time()
        gpu_photons.propagate(gpu_geometry, rng_states, nthreads_per_block,
                              max_blocks)
        cuda.Context.get_current().synchronize()
        elapsed = time.time() - t0

        if i > 0:
            # first kernel call incurs some driver overhead
            run_times.append(elapsed)

    return nphotons/ufloat((np.mean(run_times),np.std(run_times)))

@tools.profile_if_possible
def pdf(gpu_geometry, max_pmt_id, npdfs=10, nevents=100, nreps=16, ndaq=1,
        nthreads_per_block=64, max_blocks=1024):
    """
    Returns the average number of 100 MeV events per second that can be
    histogrammed per second.

    Args:
        - gpu_instance, chroma.gpu.GPU
            The GPU instance passed to the GPUGeometry constructor.
        - max_pmt_id, int
            The channel number of the highest PMT
        - npdfs, int
            The number of pdf generations to average.
        - nevents, int
            The number of 100 MeV events to generate for each PDF.
        - nreps, int
            The number of times to propagate each event and add to PDF
        - ndaq, int
            The number of times to run the DAQ simulation on the propagated
            event and add it to the PDF.
    """
    rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks)

    gpu_daq = gpu.GPUDaq(gpu_geometry, max_pmt_id)
    gpu_pdf = gpu.GPUPDF()
    gpu_pdf.setup_pdf(max_pmt_id, 100, (-0.5, 999.5), 10, (-0.5, 9.5))

    run_times = []
    for i in tools.progress(range(npdfs)):
        t0 = time.time()
        gpu_pdf.clear_pdf()

        vertex_gen = generator.vertex.constant_particle_gun('e-', (0,0,0),
                                                            (1,0,0), 100)
        vertex_iter = itertools.islice(vertex_gen, nevents)

        for ev in g4generator.generate_events(vertex_iter):
            gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps)

            gpu_photons.propagate(gpu_geometry, rng_states,
                                  nthreads_per_block, max_blocks)
            for gpu_photon_slice in gpu_photons.iterate_copies():
                for idaq in xrange(ndaq):
                    gpu_channels = gpu_daq.acquire(gpu_photon_slice, rng_states,
                                                   nthreads_per_block, max_blocks)
                    gpu_pdf.add_hits_to_pdf(gpu_channels, nthreads_per_block)

        hitcount, pdf = gpu_pdf.get_pdfs()

        elapsed = time.time() - t0

        if i > 0:
            # first kernel call incurs some driver overhead
            run_times.append(elapsed)

    return nevents*nreps*ndaq/ufloat((np.mean(run_times),np.std(run_times)))

if __name__ == '__main__':
    from chroma import detectors
    import gc

    lbne = detectors.lbne()
    lbne.build(bits=11)

    context = gpu.create_cuda_context()
    gpu_geometry = gpu.GPUGeometry(lbne)
    
    print '%s ray intersections/sec.' % \
        tools.ufloat_to_str(intersect(gpu_geometry))
    # run garbage collection since there is a reference loop
    # in the GPUArray class.
    gc.collect()
    print '%s photons loaded/sec.' % tools.ufloat_to_str(load_photons())
    gc.collect()
    print '%s photons propagated/sec.' % \
        tools.ufloat_to_str(propagate(gpu_geometry))
    gc.collect()
    print '%s 100 MeV events histogrammed/s' % \
        tools.ufloat_to_str(pdf(gpu_geometry, max(lbne.pmtids)))

    context.pop()