diff options
-rw-r--r-- | chroma/cuda/pdf.cu | 139 | ||||
-rw-r--r-- | chroma/gpu/pdf.py | 155 | ||||
-rw-r--r-- | chroma/likelihood.py | 78 | ||||
-rw-r--r-- | chroma/sim.py | 55 |
4 files changed, 422 insertions, 5 deletions
diff --git a/chroma/cuda/pdf.cu b/chroma/cuda/pdf.cu index c9c2ea4..9f547d0 100644 --- a/chroma/cuda/pdf.cu +++ b/chroma/cuda/pdf.cu @@ -122,4 +122,143 @@ accumulate_pdf_eval(int time_only, int nchannels, unsigned int *event_hit, nearest_mc[offset+entry+1] = distance; } + +__global__ void +accumulate_moments(int time_only, int nchannels, + float *mc_time, + unsigned int *mc_charge, // quirk of DAQ! + float tmin, float tmax, + float qmin, float qmax, + unsigned int *mom0, + float *t_mom1, + float *t_mom2, + float *q_mom1, + float *q_mom2) + +{ + int id = threadIdx.x + blockDim.x * blockIdx.x; + + if (id >= nchannels) + return; + + // Was this channel hit in the Monte Carlo? + float channel_mc_time = mc_time[id]; + + // Is this channel inside the range of the PDF? + if (time_only) { + if (channel_mc_time < tmin || channel_mc_time > tmax) + return; // Nothing else to do + + mom0[id] += 1; + t_mom1[id] += channel_mc_time; + t_mom2[id] += channel_mc_time*channel_mc_time; + } + else { // time and charge PDF + float channel_mc_charge = mc_charge[id]; // int->float conversion because DAQ just returns an integer + + if (channel_mc_time < tmin || channel_mc_time > tmax || + channel_mc_charge < qmin || channel_mc_charge > qmax) + return; // Nothing else to do + + mom0[id] += 1; + t_mom1[id] += channel_mc_time; + t_mom2[id] += channel_mc_time*channel_mc_time; + q_mom1[id] += channel_mc_charge; + q_mom2[id] += channel_mc_charge*channel_mc_charge; + } +} + +static const float invroot2 = 0.70710678118654746f; // 1/sqrt(2) +static const float rootPiBy2 = 1.2533141373155001f; // sqrt(M_PI/2) + +__global__ void +accumulate_kernel_eval(int time_only, int nchannels, unsigned int *event_hit, + float *event_time, float *event_charge, float *mc_time, + unsigned int *mc_charge, // quirk of DAQ! + float tmin, float tmax, + float qmin, float qmax, + float *inv_time_bandwidths, + float *inv_charge_bandwidths, + unsigned int *hitcount, + float *pdf_values) +{ + int id = threadIdx.x + blockDim.x * blockIdx.x; + + if (id >= nchannels) + return; + + // Was this channel hit in the Monte Carlo? + float channel_mc_time = mc_time[id]; + + // Is this channel inside the range of the PDF? + if (time_only) { + if (channel_mc_time < tmin || channel_mc_time > tmax) + return; // Nothing else to do + + // This MC information is contained in the PDF + hitcount[id] += 1; + + // Was this channel hit in the event-of-interest? + int channel_event_hit = event_hit[id]; + if (!channel_event_hit) + return; // No need to update PDF value for unhit channel + + // Kernel argument + float channel_event_time = event_time[id]; + float inv_bandwidth = inv_time_bandwidths[id]; + float arg = (channel_mc_time - channel_event_time) * inv_bandwidth; + + // evaluate 1D Gaussian normalized within time window + float term = expf(-0.5f * arg * arg) * inv_bandwidth; + + float norm = tmax - tmin; + if (inv_bandwidth > 0.0f) { + float loarg = (tmin - channel_mc_time)*inv_bandwidth*invroot2; + float hiarg = (tmax - channel_mc_time)*inv_bandwidth*invroot2; + norm = (erff(hiarg) - erff(loarg)) * rootPiBy2; + } + pdf_values[id] += term / norm; + } + else { // time and charge PDF + float channel_mc_charge = mc_charge[id]; // int->float conversion because DAQ just returns an integer + + if (channel_mc_time < tmin || channel_mc_time > tmax || + channel_mc_charge < qmin || channel_mc_charge > qmax) + return; // Nothing else to do + + // This MC information is contained in the PDF + hitcount[id] += 1; + + // Was this channel hit in the event-of-interest? + int channel_event_hit = event_hit[id]; + if (!channel_event_hit) + return; // No need to update PDF value for unhit channel + + + // Kernel argument: time dim + float channel_event_obs = event_time[id]; + float inv_bandwidth = inv_time_bandwidths[id]; + float arg = (channel_mc_time - channel_event_obs) * inv_bandwidth; + + float loarg = (tmin - channel_mc_time)*inv_bandwidth*invroot2; + float hiarg = (tmax - channel_mc_time)*inv_bandwidth*invroot2; + float norm = (erff(hiarg) - erff(loarg)) * rootPiBy2; + + // Kernel argument: charge dim + channel_event_obs = event_time[id]; + inv_bandwidth = inv_time_bandwidths[id]; + arg *= (channel_mc_charge - channel_event_obs) * inv_bandwidth; + + loarg = (tmin - channel_mc_charge)*inv_bandwidth*invroot2; + hiarg = (tmax - channel_mc_charge)*inv_bandwidth*invroot2; + norm *= (erff(hiarg) - erff(loarg)) * rootPiBy2; + + // evaluate 2D Gaussian normalized within time window + float term = expf(-0.5f * arg * arg); + + pdf_values[id] += term / norm; + } +} + + } // extern "C" diff --git a/chroma/gpu/pdf.py b/chroma/gpu/pdf.py index 0ad78d2..ba0d2af 100644 --- a/chroma/gpu/pdf.py +++ b/chroma/gpu/pdf.py @@ -3,6 +3,161 @@ from pycuda import gpuarray as ga from chroma.gpu.tools import get_cu_module, cuda_options, GPUFuncs +class GPUKernelPDF(object): + def __init__(self): + self.module = get_cu_module('pdf.cu', options=cuda_options, + include_source_directory=False) + self.gpu_funcs = GPUFuncs(self.module) + + def setup_moments(self, nchannels, trange, qrange, time_only=True): + """Setup GPU arrays to accumulate moments and eventually + compute a kernel estimate of PDF values for each hit channel. + + trange: (float, float) + Range of time dimension in PDF + qrange: (float, float) + Range of charge dimension in PDF + time_only: bool + If True, only the time observable will be used in the PDF. + """ + self.hitcount_gpu = ga.zeros(nchannels, dtype=np.uint32) + self.tmom1_gpu = ga.zeros(nchannels, dtype=np.float32) + self.tmom2_gpu = ga.zeros(nchannels, dtype=np.float32) + self.qmom1_gpu = ga.zeros(nchannels, dtype=np.float32) + self.qmom2_gpu = ga.zeros(nchannels, dtype=np.float32) + + self.trange = trange + self.qrange = qrange + self.time_only = time_only + + def clear_moments(self): + "Reset PDF evaluation counters to start accumulating new Monte Carlo." + self.hitcount_gpu.fill(0) + self.tmom1_gpu.fill(0.0) + self.tmom2_gpu.fill(0.0) + self.qmom1_gpu.fill(0.0) + self.qmom2_gpu.fill(0.0) + + def accumulate_moments(self, gpuchannels, nthreads_per_block=64): + """Add the most recent results of run_daq() to the accumulate of + moments for future bandwidth calculation.""" + self.gpu_funcs.accumulate_moments(np.int32(self.time_only), + np.int32(len(gpuchannels.t)), + gpuchannels.t, + gpuchannels.q, + np.float32(self.trange[0]), + np.float32(self.trange[1]), + np.float32(self.qrange[0]), + np.float32(self.qrange[1]), + self.hitcount_gpu, + self.tmom1_gpu, + self.tmom2_gpu, + self.qmom1_gpu, + self.qmom2_gpu, + block=(nthreads_per_block,1,1), + grid=(len(gpuchannels.t)//nthreads_per_block+1,1)) + + def compute_bandwidth(self, scale_factor=1.0): + """Use the MC information accumulated by accumulate_moments() to + estimate the best bandwidth to use when kernel estimating.""" + + hitcount = self.hitcount_gpu.get() + mom0 = np.maximum(hitcount, 1) + tmom1 = self.tmom1_gpu.get() + tmom2 = self.tmom2_gpu.get() + + tmean = tmom1 / mom0 + tvar = np.maximum(tmom2 / mom0 - tmean**2, 0.0) # roundoff can go neg + trms = tvar**0.5 + + if self.time_only: + d = 1 + else: + d = 2 + dimensionality_factor = ((4.0/(d+2)) / (mom0/scale_factor))**(-1.0/(d+4)) + + time_bandwidths = dimensionality_factor * trms + inv_time_bandwidths = np.zeros_like(time_bandwidths) + inv_time_bandwidths[time_bandwidths > 0] = time_bandwidths[time_bandwidths > 0] ** -1 + #inv_time_bandwidths /= 4.0 + + # precompute inverse to speed up GPU evaluation + self.inv_time_bandwidths_gpu = ga.to_gpu( + inv_time_bandwidths.astype(np.float32) + ) + + # Compute charge bandwidths if needed + if self.time_only: + self.inv_charge_bandwidths_gpu = ga.empty_like( + self.inv_time_bandwidths_gpu + ) + self.inv_charge_bandwidths_gpu.fill(0.0) + else: + qmom1 = self.qmom1_gpu.get() + qmom2 = self.qmom2_gpu.get() + + qmean = qmom1 / mom0 + qrms = (qmom2 / mom0 - qmean**2)**0.5 + charge_bandwidths = dimensionality_factor * qrms + + # precompute inverse to speed up GPU evaluation + self.inv_charge_bandwidths_gpu = ga.to_gpu( + (charge_bandwidths**-1).astype(np.float32) + ) + + def setup_kernel(self, event_hit, event_time, event_charge): + """Setup GPU arrays to accumulate moments and eventually + compute a kernel estimate of PDF values for each hit channel. + + event_hit: ndarray + Hit or not-hit status for each channel in the detector. + event_time: ndarray + Hit time for each channel in the detector. If channel + not hit, the time will be ignored. + event_charge: ndarray + Integrated charge for each channel in the detector. + If channel not hit, the charge will be ignored. + """ + self.event_hit_gpu = ga.to_gpu(event_hit.astype(np.uint32)) + self.event_time_gpu = ga.to_gpu(event_time.astype(np.float32)) + self.event_charge_gpu = ga.to_gpu(event_charge.astype(np.float32)) + self.hitcount_gpu.fill(0) + self.pdf_values_gpu = ga.zeros(len(event_hit), dtype=np.float32) + + def clear_kernel(self): + self.hitcount_gpu.fill(0) + self.pdf_values_gpu.fill(0.0) + + def accumulate_kernel(self, gpuchannels, nthreads_per_block=64): + "Add the most recent results of run_daq() to the kernel PDF evaluation." + self.gpu_funcs.accumulate_kernel_eval(np.int32(self.time_only), + np.int32(len(self.event_hit_gpu)), + self.event_hit_gpu, + self.event_time_gpu, + self.event_charge_gpu, + gpuchannels.t, + gpuchannels.q, + np.float32(self.trange[0]), + np.float32(self.trange[1]), + np.float32(self.qrange[0]), + np.float32(self.qrange[1]), + self.inv_time_bandwidths_gpu, + self.inv_charge_bandwidths_gpu, + self.hitcount_gpu, + self.pdf_values_gpu, + block=(nthreads_per_block,1,1), + grid=(len(gpuchannels.t)//nthreads_per_block+1,1)) + + + def get_kernel_eval(self): + evhit = self.event_hit_gpu.get().astype(bool) + hitcount = self.hitcount_gpu.get() + + pdf_values = self.pdf_values_gpu.get() + pdf_values /= np.maximum(1, hitcount) # avoid divide by zero + + return hitcount, pdf_values, np.zeros_like(pdf_values) + class GPUPDF(object): def __init__(self): self.module = get_cu_module('pdf.cu', options=cuda_options, diff --git a/chroma/likelihood.py b/chroma/likelihood.py index 49b04a7..c69dc72 100644 --- a/chroma/likelihood.py +++ b/chroma/likelihood.py @@ -1,4 +1,5 @@ import numpy as np +from math import sqrt from uncertainties import ufloat, unumpy from itertools import islice, izip, repeat from chroma.tools import profile_if_possible @@ -78,20 +79,89 @@ class Likelihood(object): # NLL calculation: note that negation is at the end # Start with the probabilties of hitting (or not) the channels - hit_channel_prob = np.log(hit_prob[self.event.channels.hit]).sum() + np.log(1.0-hit_prob[~self.event.channels.hit]).sum() + hit_channel_prob = np.log(hit_prob[self.event.channels.hit]).sum() + np.log(1.0-hit_prob[~self.event.channels.hit])[1:].sum() hit_channel_prob_uncert = ( (ntotal * hit_prob * (1.0 - hit_prob)) / hit_prob**2 ).sum()**0.5 log_likelihood = ufloat((hit_channel_prob, 0.0)) + log_likelihood = ufloat((0.0,0.0)) # FIXME: skipping hit/not hit probabilities for now # Then include the probability densities of the observed # charges and times. hit_pdf_ufloat = unumpy.uarray((pdf_prob[self.event.channels.hit], pdf_prob_uncert[self.event.channels.hit])) log_likelihood += unumpy.log(hit_pdf_ufloat).sum() + + return -log_likelihood, pdf_prob + + def setup_kernel(self, vertex_generator, nevals, nreps, ndaq, oversample_factor): + bandwidth_generator = islice(vertex_generator, nevals*oversample_factor) - return -log_likelihood + self.sim.setup_kernel(len(self.event.channels.hit), + bandwidth_generator, + self.trange, + self.qrange, + nreps=nreps, + ndaq=ndaq, + time_only=self.time_only, + scale_factor=oversample_factor) + + + @profile_if_possible + def eval_kernel(self, vertex_generator, nevals, nreps=16, ndaq=50, navg=10): + """ + Return the negative log likelihood that the detector event set in the + constructor or by set_event() was the result of a particle generated + by `vertex_generator`. If `nreps` set to > 1, each set of photon + vertices will be propagated `nreps` times. + """ + ntotal = nevals * nreps * ndaq + + mom1 = 0.0 + mom2 = 0.0 + for i in xrange(navg): + kernel_generator = islice(vertex_generator, nevals) + hitcount, pdf_prob, pdf_prob_uncert = \ + self.sim.eval_kernel(self.event.channels, + kernel_generator, + self.trange, + self.qrange, + nreps=nreps, + ndaq=ndaq, + time_only=self.time_only) + + # Normalize probabilities and put a floor to keep the log finite + hit_prob = hitcount.astype(np.float32) / ntotal + hit_prob = np.maximum(hit_prob, 0.5 / ntotal) + + # Set all zero or nan probabilities to limiting PDF value + bad_value = (pdf_prob <= 0.0) | np.isnan(pdf_prob) + if self.time_only: + pdf_floor = 1.0 / (self.trange[1] - self.trange[0]) + else: + pdf_floor = 1.0 / (self.trange[1] - self.trange[0]) / (self.qrange[1] - self.qrange[0]) + pdf_prob[bad_value] = pdf_floor + pdf_prob_uncert[bad_value] = pdf_floor + + print 'channels with no data:', (bad_value & self.event.channels.hit).astype(int).sum() + + # NLL calculation: note that negation is at the end + # Start with the probabilties of hitting (or not) the channels + log_likelihood = np.log(hit_prob[self.event.channels.hit]).sum() + np.log(1.0-hit_prob[~self.event.channels.hit])[1:].sum() + log_likelihood = 0.0 # FIXME: Skipping hit/not-hit probabilities for now + + # Then include the probability densities of the observed + # charges and times. + log_likelihood += np.log(pdf_prob[self.event.channels.hit]).sum() + print 'll', log_likelihood + mom1 += log_likelihood + mom2 += log_likelihood**2 + + avg_like = mom1 / navg + rms_like = (mom2 / navg - avg_like**2)**0.5 + # Don't forget to return a negative log likelihood + return ufloat((-avg_like, rms_like/sqrt(navg))), pdf_prob if __name__ == '__main__': - from chroma import detectors + from chroma.demo import detector as build_detector from chroma.sim import Simulation from chroma.generator import constant_particle_gun from chroma import tools @@ -99,7 +169,7 @@ if __name__ == '__main__': tools.enable_debug_on_crash() - detector = detectors.find('lbne') + detector = build_detector() sim = Simulation(detector, seed=0) event = sim.simulate(islice(constant_particle_gun('e-',(0,0,0),(1,0,0),100.0), 1)).next() diff --git a/chroma/sim.py b/chroma/sim.py index 2e8f2f9..d15c02d 100644 --- a/chroma/sim.py +++ b/chroma/sim.py @@ -44,6 +44,7 @@ class Simulation(object): self.gpu_geometry = gpu.GPUGeometry(detector) self.gpu_daq = gpu.GPUDaq(self.gpu_geometry, max(self.detector.pmtids)) self.gpu_pdf = gpu.GPUPDF() + self.gpu_pdf_kernel = gpu.GPUKernelPDF() self.rng_states = gpu.get_rng_states(self.nthreads_per_block*self.max_blocks, seed=self.seed) @@ -115,7 +116,7 @@ class Simulation(object): return self.gpu_pdf.get_pdfs() - def eval_pdf(self, event_channels, iterable, min_twidth, trange, min_qwidth, qrange, min_bin_content=20, nreps=1, ndaq=1, time_only=True): + def eval_pdf(self, event_channels, iterable, min_twidth, trange, min_qwidth, qrange, min_bin_content=200, nreps=1, ndaq=1, time_only=True): """Returns tuple: 1D array of channel hit counts, 1D array of PDF probability densities.""" self.gpu_pdf.setup_pdf_eval(event_channels.hit, @@ -145,5 +146,57 @@ class Simulation(object): return self.gpu_pdf.get_pdf_eval() + def setup_kernel(self, nchannels, bandwidth_iterable, + trange, qrange, + nreps=1, ndaq=1, time_only=True, scale_factor=1.0): + '''Call this before calling eval_pdf_kernel(). Sets up the + event information and computes an appropriate kernel bandwidth''' + self.gpu_pdf_kernel.setup_moments(nchannels, trange, qrange, + time_only=True) + # Compute bandwidth + first_element, bandwidth_iterable = itertoolset.peek(bandwidth_iterable) + if isinstance(first_element, event.Event): + bandwidth_iterable = \ + self.photon_generator.generate_events(bandwidth_iterable) + for ev in bandwidth_iterable: + gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps) + gpu_photons.propagate(self.gpu_geometry, self.rng_states, + nthreads_per_block=self.nthreads_per_block, + max_blocks=self.max_blocks) + for gpu_photon_slice in gpu_photons.iterate_copies(): + for idaq in xrange(ndaq): + gpu_channels = self.gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + self.gpu_pdf_kernel.accumulate_moments(gpu_channels) + + self.gpu_pdf_kernel.compute_bandwidth(scale_factor=scale_factor) + + def eval_kernel(self, event_channels, + kernel_iterable, + trange, qrange, + nreps=1, ndaq=1, naverage=1, time_only=True): + """Returns tuple: 1D array of channel hit counts, 1D array of PDF + probability densities.""" + + self.gpu_pdf_kernel.setup_kernel(event_channels.hit, + event_channels.t, + event_channels.q) + first_element, kernel_iterable = itertoolset.peek(kernel_iterable) + if isinstance(first_element, event.Event): + kernel_iterable = \ + self.photon_generator.generate_events(kernel_iterable) + + # Evaluate likelihood using this bandwidth + for ev in kernel_iterable: + gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps) + gpu_photons.propagate(self.gpu_geometry, self.rng_states, + nthreads_per_block=self.nthreads_per_block, + max_blocks=self.max_blocks) + for gpu_photon_slice in gpu_photons.iterate_copies(): + for idaq in xrange(ndaq): + gpu_channels = self.gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) + self.gpu_pdf_kernel.accumulate_kernel(gpu_channels) + + return self.gpu_pdf_kernel.get_kernel_eval() + def __del__(self): self.context.pop() |