diff options
| -rw-r--r-- | chroma/cuda/pdf.cu | 139 | ||||
| -rw-r--r-- | chroma/gpu/pdf.py | 155 | ||||
| -rw-r--r-- | chroma/likelihood.py | 78 | ||||
| -rw-r--r-- | chroma/sim.py | 55 | 
4 files changed, 422 insertions, 5 deletions
| diff --git a/chroma/cuda/pdf.cu b/chroma/cuda/pdf.cu index c9c2ea4..9f547d0 100644 --- a/chroma/cuda/pdf.cu +++ b/chroma/cuda/pdf.cu @@ -122,4 +122,143 @@ accumulate_pdf_eval(int time_only, int nchannels, unsigned int *event_hit,      nearest_mc[offset+entry+1] = distance;  } + +__global__ void +accumulate_moments(int time_only, int nchannels, +		   float *mc_time, +		   unsigned int *mc_charge, // quirk of DAQ! +		   float tmin, float tmax, +		   float qmin, float qmax, +		   unsigned int *mom0, +		   float *t_mom1, +		   float *t_mom2, +		   float *q_mom1, +		   float *q_mom2) + +{ +    int id = threadIdx.x + blockDim.x * blockIdx.x; +	 +    if (id >= nchannels) +	return; +	 +    // Was this channel hit in the Monte Carlo? +    float channel_mc_time = mc_time[id]; + +    // Is this channel inside the range of the PDF? +    if (time_only) { +	if (channel_mc_time < tmin || channel_mc_time > tmax) +	    return;  // Nothing else to do +	 +	mom0[id] += 1; +	t_mom1[id] += channel_mc_time; +	t_mom2[id] += channel_mc_time*channel_mc_time; +    } +    else { // time and charge PDF +	float channel_mc_charge = mc_charge[id]; // int->float conversion because DAQ just returns an integer +	 +	if (channel_mc_time < tmin || channel_mc_time > tmax || +	    channel_mc_charge < qmin || channel_mc_charge > qmax) +	    return;  // Nothing else to do + +	mom0[id] += 1; +	t_mom1[id] += channel_mc_time; +	t_mom2[id] += channel_mc_time*channel_mc_time; +	q_mom1[id] += channel_mc_charge; +	q_mom2[id] += channel_mc_charge*channel_mc_charge; +    } +} + +static const float invroot2 = 0.70710678118654746f; // 1/sqrt(2) +static const float rootPiBy2 = 1.2533141373155001f; // sqrt(M_PI/2) + +__global__ void +accumulate_kernel_eval(int time_only, int nchannels, unsigned int *event_hit, +		       float *event_time, float *event_charge, float *mc_time, +		       unsigned int *mc_charge, // quirk of DAQ! +		       float tmin, float tmax, +		       float qmin, float qmax, +		       float *inv_time_bandwidths, +		       float *inv_charge_bandwidths, +		       unsigned int *hitcount, +		       float *pdf_values) +{ +    int id = threadIdx.x + blockDim.x * blockIdx.x; +	 +    if (id >= nchannels) +	return; +	 +    // Was this channel hit in the Monte Carlo? +    float channel_mc_time = mc_time[id]; +	 +    // Is this channel inside the range of the PDF? +    if (time_only) { +	if (channel_mc_time < tmin || channel_mc_time > tmax) +	    return;  // Nothing else to do +		 +	// This MC information is contained in the PDF +	hitcount[id] += 1; +	   +	// Was this channel hit in the event-of-interest? +	int channel_event_hit = event_hit[id]; +	if (!channel_event_hit) +	    return; // No need to update PDF value for unhit channel + +	// Kernel argument +	float channel_event_time = event_time[id]; +	float inv_bandwidth = inv_time_bandwidths[id]; +	float arg = (channel_mc_time - channel_event_time) * inv_bandwidth; + +	// evaluate 1D Gaussian normalized within time window +	float term = expf(-0.5f * arg * arg) * inv_bandwidth; + +	float norm = tmax - tmin; +	if (inv_bandwidth > 0.0f) { +	  float loarg = (tmin - channel_mc_time)*inv_bandwidth*invroot2; +	  float hiarg = (tmax - channel_mc_time)*inv_bandwidth*invroot2; +	  norm = (erff(hiarg) - erff(loarg)) * rootPiBy2; +	} +	pdf_values[id] += term / norm; +    } +    else { // time and charge PDF +	float channel_mc_charge = mc_charge[id]; // int->float conversion because DAQ just returns an integer +	 +	if (channel_mc_time < tmin || channel_mc_time > tmax || +	    channel_mc_charge < qmin || channel_mc_charge > qmax) +	    return;  // Nothing else to do +		 +	// This MC information is contained in the PDF +	hitcount[id] += 1; +	   +	// Was this channel hit in the event-of-interest? +	int channel_event_hit = event_hit[id]; +	if (!channel_event_hit) +	    return; // No need to update PDF value for unhit channel + + +	// Kernel argument: time dim +	float channel_event_obs = event_time[id]; +	float inv_bandwidth = inv_time_bandwidths[id]; +	float arg = (channel_mc_time - channel_event_obs) * inv_bandwidth; + +	float loarg = (tmin - channel_mc_time)*inv_bandwidth*invroot2; +	float hiarg = (tmax - channel_mc_time)*inv_bandwidth*invroot2; +	float norm = (erff(hiarg) - erff(loarg)) * rootPiBy2; + +	// Kernel argument: charge dim +	channel_event_obs = event_time[id]; +	inv_bandwidth = inv_time_bandwidths[id]; +	arg *= (channel_mc_charge - channel_event_obs) * inv_bandwidth; + +	loarg = (tmin - channel_mc_charge)*inv_bandwidth*invroot2; +	hiarg = (tmax - channel_mc_charge)*inv_bandwidth*invroot2; +	norm *= (erff(hiarg) - erff(loarg)) * rootPiBy2; + +	// evaluate 2D Gaussian normalized within time window +	float term = expf(-0.5f * arg * arg); + +	pdf_values[id] += term / norm; +    } +} + +  } // extern "C" diff --git a/chroma/gpu/pdf.py b/chroma/gpu/pdf.py index 0ad78d2..ba0d2af 100644 --- a/chroma/gpu/pdf.py +++ b/chroma/gpu/pdf.py @@ -3,6 +3,161 @@ from pycuda import gpuarray as ga  from chroma.gpu.tools import get_cu_module, cuda_options, GPUFuncs +class GPUKernelPDF(object): +    def __init__(self): +        self.module = get_cu_module('pdf.cu', options=cuda_options, +                                    include_source_directory=False) +        self.gpu_funcs = GPUFuncs(self.module) + +    def setup_moments(self, nchannels, trange, qrange, time_only=True): +        """Setup GPU arrays to accumulate moments and eventually +        compute a kernel estimate of PDF values for each hit channel. + +            trange: (float, float) +              Range of time dimension in PDF +            qrange: (float, float) +              Range of charge dimension in PDF +            time_only: bool +              If True, only the time observable will be used in the PDF. +        """ +        self.hitcount_gpu = ga.zeros(nchannels, dtype=np.uint32) +        self.tmom1_gpu = ga.zeros(nchannels, dtype=np.float32) +        self.tmom2_gpu = ga.zeros(nchannels, dtype=np.float32) +        self.qmom1_gpu = ga.zeros(nchannels, dtype=np.float32) +        self.qmom2_gpu = ga.zeros(nchannels, dtype=np.float32) + +        self.trange = trange +        self.qrange = qrange +        self.time_only = time_only + +    def clear_moments(self): +        "Reset PDF evaluation counters to start accumulating new Monte Carlo." +        self.hitcount_gpu.fill(0) +        self.tmom1_gpu.fill(0.0) +        self.tmom2_gpu.fill(0.0) +        self.qmom1_gpu.fill(0.0) +        self.qmom2_gpu.fill(0.0) + +    def accumulate_moments(self, gpuchannels, nthreads_per_block=64): +        """Add the most recent results of run_daq() to the accumulate of  +        moments for future bandwidth calculation.""" +        self.gpu_funcs.accumulate_moments(np.int32(self.time_only), +                                          np.int32(len(gpuchannels.t)), +                                          gpuchannels.t, +                                          gpuchannels.q, +                                          np.float32(self.trange[0]), +                                          np.float32(self.trange[1]), +                                          np.float32(self.qrange[0]), +                                          np.float32(self.qrange[1]), +                                          self.hitcount_gpu, +                                          self.tmom1_gpu, +                                          self.tmom2_gpu, +                                          self.qmom1_gpu, +                                          self.qmom2_gpu, +                                          block=(nthreads_per_block,1,1),  +                                          grid=(len(gpuchannels.t)//nthreads_per_block+1,1)) +         +    def compute_bandwidth(self, scale_factor=1.0): +        """Use the MC information accumulated by accumulate_moments() to +        estimate the best bandwidth to use when kernel estimating.""" + +        hitcount = self.hitcount_gpu.get() +        mom0 = np.maximum(hitcount, 1) +        tmom1 = self.tmom1_gpu.get() +        tmom2 = self.tmom2_gpu.get() + +        tmean = tmom1 / mom0 +        tvar = np.maximum(tmom2 / mom0 - tmean**2, 0.0) # roundoff can go neg +        trms = tvar**0.5 + +        if self.time_only: +            d = 1 +        else: +            d = 2 +        dimensionality_factor = ((4.0/(d+2)) / (mom0/scale_factor))**(-1.0/(d+4)) +                                  +        time_bandwidths = dimensionality_factor * trms  +        inv_time_bandwidths = np.zeros_like(time_bandwidths) +        inv_time_bandwidths[time_bandwidths  > 0] = time_bandwidths[time_bandwidths > 0] ** -1 +        #inv_time_bandwidths /= 4.0 + +        # precompute inverse to speed up GPU evaluation +        self.inv_time_bandwidths_gpu = ga.to_gpu( +            inv_time_bandwidths.astype(np.float32) +            ) + +        # Compute charge bandwidths if needed +        if self.time_only: +            self.inv_charge_bandwidths_gpu = ga.empty_like( +                self.inv_time_bandwidths_gpu +                ) +            self.inv_charge_bandwidths_gpu.fill(0.0) +        else: +            qmom1 = self.qmom1_gpu.get() +            qmom2 = self.qmom2_gpu.get() + +            qmean = qmom1 / mom0 +            qrms = (qmom2 / mom0 - qmean**2)**0.5 +            charge_bandwidths = dimensionality_factor * qrms + +            # precompute inverse to speed up GPU evaluation +            self.inv_charge_bandwidths_gpu = ga.to_gpu(  +                (charge_bandwidths**-1).astype(np.float32) +                ) + +    def setup_kernel(self, event_hit, event_time, event_charge): +        """Setup GPU arrays to accumulate moments and eventually +        compute a kernel estimate of PDF values for each hit channel. + +            event_hit: ndarray +              Hit or not-hit status for each channel in the detector. +            event_time: ndarray +              Hit time for each channel in the detector.  If channel  +              not hit, the time will be ignored. +            event_charge: ndarray +              Integrated charge for each channel in the detector. +              If channel not hit, the charge will be ignored. +        """ +        self.event_hit_gpu = ga.to_gpu(event_hit.astype(np.uint32)) +        self.event_time_gpu = ga.to_gpu(event_time.astype(np.float32)) +        self.event_charge_gpu = ga.to_gpu(event_charge.astype(np.float32)) +        self.hitcount_gpu.fill(0) +        self.pdf_values_gpu = ga.zeros(len(event_hit), dtype=np.float32) + +    def clear_kernel(self): +        self.hitcount_gpu.fill(0) +        self.pdf_values_gpu.fill(0.0) +             +    def accumulate_kernel(self, gpuchannels, nthreads_per_block=64): +        "Add the most recent results of run_daq() to the kernel PDF evaluation." +        self.gpu_funcs.accumulate_kernel_eval(np.int32(self.time_only), +                                              np.int32(len(self.event_hit_gpu)), +                                              self.event_hit_gpu, +                                              self.event_time_gpu, +                                              self.event_charge_gpu, +                                              gpuchannels.t, +                                              gpuchannels.q, +                                              np.float32(self.trange[0]), +                                              np.float32(self.trange[1]), +                                              np.float32(self.qrange[0]), +                                              np.float32(self.qrange[1]), +                                              self.inv_time_bandwidths_gpu, +                                              self.inv_charge_bandwidths_gpu, +                                              self.hitcount_gpu, +                                              self.pdf_values_gpu, +                                              block=(nthreads_per_block,1,1),  +                                              grid=(len(gpuchannels.t)//nthreads_per_block+1,1)) + + +    def get_kernel_eval(self): +        evhit = self.event_hit_gpu.get().astype(bool) +        hitcount = self.hitcount_gpu.get() + +        pdf_values = self.pdf_values_gpu.get() +        pdf_values /= np.maximum(1, hitcount) # avoid divide by zero + +        return hitcount, pdf_values, np.zeros_like(pdf_values) +  class GPUPDF(object):      def __init__(self):          self.module = get_cu_module('pdf.cu', options=cuda_options, diff --git a/chroma/likelihood.py b/chroma/likelihood.py index 49b04a7..c69dc72 100644 --- a/chroma/likelihood.py +++ b/chroma/likelihood.py @@ -1,4 +1,5 @@  import numpy as np +from math import sqrt  from uncertainties import ufloat, unumpy  from itertools import islice, izip, repeat  from chroma.tools import profile_if_possible @@ -78,20 +79,89 @@ class Likelihood(object):          # NLL calculation: note that negation is at the end          # Start with the probabilties of hitting (or not) the channels -        hit_channel_prob = np.log(hit_prob[self.event.channels.hit]).sum() + np.log(1.0-hit_prob[~self.event.channels.hit]).sum() +        hit_channel_prob = np.log(hit_prob[self.event.channels.hit]).sum() + np.log(1.0-hit_prob[~self.event.channels.hit])[1:].sum()          hit_channel_prob_uncert = ( (ntotal * hit_prob * (1.0 - hit_prob)) / hit_prob**2 ).sum()**0.5          log_likelihood = ufloat((hit_channel_prob, 0.0)) +        log_likelihood = ufloat((0.0,0.0)) # FIXME: skipping hit/not hit probabilities for now          # Then include the probability densities of the observed          # charges and times.          hit_pdf_ufloat = unumpy.uarray((pdf_prob[self.event.channels.hit],                                          pdf_prob_uncert[self.event.channels.hit]))          log_likelihood += unumpy.log(hit_pdf_ufloat).sum() +         +        return -log_likelihood, pdf_prob + +    def setup_kernel(self, vertex_generator, nevals, nreps, ndaq, oversample_factor): +        bandwidth_generator = islice(vertex_generator, nevals*oversample_factor) -        return -log_likelihood +        self.sim.setup_kernel(len(self.event.channels.hit), +                              bandwidth_generator, +                              self.trange, +                              self.qrange, +                              nreps=nreps, +                              ndaq=ndaq, +                              time_only=self.time_only, +                              scale_factor=oversample_factor) +         + +    @profile_if_possible +    def eval_kernel(self, vertex_generator, nevals, nreps=16, ndaq=50, navg=10): +        """ +        Return the negative log likelihood that the detector event set in the +        constructor or by set_event() was the result of a particle generated +        by `vertex_generator`.  If `nreps` set to > 1, each set of photon +        vertices will be propagated `nreps` times. +        """ +        ntotal = nevals * nreps * ndaq + +        mom1 = 0.0 +        mom2 = 0.0 +        for i in xrange(navg): +            kernel_generator = islice(vertex_generator, nevals) +            hitcount, pdf_prob, pdf_prob_uncert = \ +                self.sim.eval_kernel(self.event.channels, +                                     kernel_generator, +                                     self.trange,  +                                     self.qrange, +                                     nreps=nreps, +                                     ndaq=ndaq, +                                     time_only=self.time_only) +         +            # Normalize probabilities and put a floor to keep the log finite +            hit_prob = hitcount.astype(np.float32) / ntotal +            hit_prob = np.maximum(hit_prob, 0.5 / ntotal) + +            # Set all zero or nan probabilities to limiting PDF value +            bad_value = (pdf_prob <= 0.0) | np.isnan(pdf_prob) +            if self.time_only: +                pdf_floor = 1.0 / (self.trange[1] - self.trange[0]) +            else: +                pdf_floor = 1.0 / (self.trange[1] - self.trange[0]) / (self.qrange[1] - self.qrange[0]) +            pdf_prob[bad_value] = pdf_floor +            pdf_prob_uncert[bad_value] = pdf_floor + +            print 'channels with no data:', (bad_value & self.event.channels.hit).astype(int).sum() + +            # NLL calculation: note that negation is at the end +            # Start with the probabilties of hitting (or not) the channels +            log_likelihood = np.log(hit_prob[self.event.channels.hit]).sum() + np.log(1.0-hit_prob[~self.event.channels.hit])[1:].sum() +            log_likelihood = 0.0 # FIXME: Skipping hit/not-hit probabilities for now + +            # Then include the probability densities of the observed +            # charges and times. +            log_likelihood += np.log(pdf_prob[self.event.channels.hit]).sum() +            print 'll', log_likelihood +            mom1 += log_likelihood +            mom2 += log_likelihood**2 +         +        avg_like = mom1 / navg +        rms_like = (mom2 / navg - avg_like**2)**0.5 +        # Don't forget to return a negative log likelihood +        return ufloat((-avg_like, rms_like/sqrt(navg))), pdf_prob  if __name__ == '__main__': -    from chroma import detectors +    from chroma.demo import detector as build_detector      from chroma.sim import Simulation      from chroma.generator import constant_particle_gun      from chroma import tools @@ -99,7 +169,7 @@ if __name__ == '__main__':      tools.enable_debug_on_crash() -    detector = detectors.find('lbne') +    detector = build_detector()      sim = Simulation(detector, seed=0)      event = sim.simulate(islice(constant_particle_gun('e-',(0,0,0),(1,0,0),100.0), 1)).next() diff --git a/chroma/sim.py b/chroma/sim.py index 2e8f2f9..d15c02d 100644 --- a/chroma/sim.py +++ b/chroma/sim.py @@ -44,6 +44,7 @@ class Simulation(object):          self.gpu_geometry = gpu.GPUGeometry(detector)          self.gpu_daq = gpu.GPUDaq(self.gpu_geometry, max(self.detector.pmtids))          self.gpu_pdf = gpu.GPUPDF() +        self.gpu_pdf_kernel = gpu.GPUKernelPDF()          self.rng_states = gpu.get_rng_states(self.nthreads_per_block*self.max_blocks, seed=self.seed) @@ -115,7 +116,7 @@ class Simulation(object):          return self.gpu_pdf.get_pdfs() -    def eval_pdf(self, event_channels, iterable, min_twidth, trange, min_qwidth, qrange, min_bin_content=20, nreps=1, ndaq=1, time_only=True): +    def eval_pdf(self, event_channels, iterable, min_twidth, trange, min_qwidth, qrange, min_bin_content=200, nreps=1, ndaq=1, time_only=True):          """Returns tuple: 1D array of channel hit counts, 1D array of PDF          probability densities."""          self.gpu_pdf.setup_pdf_eval(event_channels.hit, @@ -145,5 +146,57 @@ class Simulation(object):          return self.gpu_pdf.get_pdf_eval() +    def setup_kernel(self, nchannels, bandwidth_iterable, +                         trange, qrange,  +                         nreps=1, ndaq=1, time_only=True, scale_factor=1.0): +        '''Call this before calling eval_pdf_kernel().  Sets up the +        event information and computes an appropriate kernel bandwidth''' +        self.gpu_pdf_kernel.setup_moments(nchannels, trange, qrange, +                                          time_only=True) +        # Compute bandwidth +        first_element, bandwidth_iterable = itertoolset.peek(bandwidth_iterable) +        if isinstance(first_element, event.Event): +            bandwidth_iterable = \ +                self.photon_generator.generate_events(bandwidth_iterable) +        for ev in bandwidth_iterable: +            gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps) +            gpu_photons.propagate(self.gpu_geometry, self.rng_states, +                                  nthreads_per_block=self.nthreads_per_block, +                                  max_blocks=self.max_blocks) +            for gpu_photon_slice in gpu_photons.iterate_copies(): +                for idaq in xrange(ndaq): +                    gpu_channels = self.gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) +                    self.gpu_pdf_kernel.accumulate_moments(gpu_channels) +             +        self.gpu_pdf_kernel.compute_bandwidth(scale_factor=scale_factor) + +    def eval_kernel(self, event_channels,  +                    kernel_iterable, +                    trange, qrange,  +                    nreps=1, ndaq=1, naverage=1, time_only=True): +        """Returns tuple: 1D array of channel hit counts, 1D array of PDF +        probability densities.""" + +        self.gpu_pdf_kernel.setup_kernel(event_channels.hit, +                                         event_channels.t, +                                         event_channels.q) +        first_element, kernel_iterable = itertoolset.peek(kernel_iterable) +        if isinstance(first_element, event.Event): +            kernel_iterable = \ +                self.photon_generator.generate_events(kernel_iterable) + +        # Evaluate likelihood using this bandwidth +        for ev in kernel_iterable: +            gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps) +            gpu_photons.propagate(self.gpu_geometry, self.rng_states, +                                  nthreads_per_block=self.nthreads_per_block, +                                  max_blocks=self.max_blocks) +            for gpu_photon_slice in gpu_photons.iterate_copies(): +                for idaq in xrange(ndaq): +                    gpu_channels = self.gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) +                    self.gpu_pdf_kernel.accumulate_kernel(gpu_channels) +         +        return self.gpu_pdf_kernel.get_kernel_eval() +      def __del__(self):          self.context.pop() | 
