diff options
-rw-r--r-- | chroma/cuda/pdf.cu | 40 | ||||
-rw-r--r-- | chroma/gpu/pdf.py | 40 | ||||
-rw-r--r-- | chroma/likelihood.py | 25 | ||||
-rw-r--r-- | chroma/sim.py | 12 |
4 files changed, 75 insertions, 42 deletions
diff --git a/chroma/cuda/pdf.cu b/chroma/cuda/pdf.cu index 41c4b4a..bf42742 100644 --- a/chroma/cuda/pdf.cu +++ b/chroma/cuda/pdf.cu @@ -180,7 +180,9 @@ accumulate_kernel_eval(int time_only, int nchannels, unsigned int *event_hit, float *inv_time_bandwidths, float *inv_charge_bandwidths, unsigned int *hitcount, - float *pdf_values) + float *time_pdf_values, + float *charge_pdf_values) + { int id = threadIdx.x + blockDim.x * blockIdx.x; @@ -217,7 +219,7 @@ accumulate_kernel_eval(int time_only, int nchannels, unsigned int *event_hit, float hiarg = (tmax - channel_mc_time)*inv_bandwidth*invroot2; norm = (erff(hiarg) - erff(loarg)) * rootPiBy2; } - pdf_values[id] += term / norm; + time_pdf_values[id] += term / norm; } else { // time and charge PDF float channel_mc_charge = mc_charge[id]; // int->float conversion because DAQ just returns an integer @@ -240,23 +242,31 @@ accumulate_kernel_eval(int time_only, int nchannels, unsigned int *event_hit, float inv_bandwidth = inv_time_bandwidths[id]; float arg = (channel_mc_time - channel_event_obs) * inv_bandwidth; - float loarg = (tmin - channel_mc_time)*inv_bandwidth*invroot2; - float hiarg = (tmax - channel_mc_time)*inv_bandwidth*invroot2; - float norm = (erff(hiarg) - erff(loarg)) * rootPiBy2; + float norm = tmax - tmin; + if (inv_bandwidth > 0.0f) { + float loarg = (tmin - channel_mc_time)*inv_bandwidth*invroot2; + float hiarg = (tmax - channel_mc_time)*inv_bandwidth*invroot2; + norm = (erff(hiarg) - erff(loarg)) * rootPiBy2; + } + float term = expf(-0.5f * arg * arg); - // Kernel argument: charge dim - channel_event_obs = event_time[id]; - inv_bandwidth = inv_time_bandwidths[id]; - arg *= (channel_mc_charge - channel_event_obs) * inv_bandwidth; + time_pdf_values[id] += term / norm; - loarg = (tmin - channel_mc_charge)*inv_bandwidth*invroot2; - hiarg = (tmax - channel_mc_charge)*inv_bandwidth*invroot2; - norm *= (erff(hiarg) - erff(loarg)) * rootPiBy2; + // Kernel argument: charge dim + channel_event_obs = event_charge[id]; + inv_bandwidth = inv_charge_bandwidths[id]; + arg = (channel_mc_charge - channel_event_obs) * inv_bandwidth; + + norm = qmax - qmin; + if (inv_bandwidth > 0.0f) { + float loarg = (qmin - channel_mc_charge)*inv_bandwidth*invroot2; + float hiarg = (qmax - channel_mc_charge)*inv_bandwidth*invroot2; + norm = (erff(hiarg) - erff(loarg)) * rootPiBy2; + } - // evaluate 2D Gaussian normalized within time window - float term = expf(-0.5f * arg * arg); + term = expf(-0.5f * arg * arg); - pdf_values[id] += term / norm; + charge_pdf_values[id] += term / norm; } } diff --git a/chroma/gpu/pdf.py b/chroma/gpu/pdf.py index 5fb4b1a..bd3c448 100644 --- a/chroma/gpu/pdf.py +++ b/chroma/gpu/pdf.py @@ -1,6 +1,6 @@ import numpy as np from pycuda import gpuarray as ga - +import pycuda.driver as cuda from chroma.gpu.tools import get_cu_module, cuda_options, GPUFuncs class GPUKernelPDF(object): @@ -56,11 +56,15 @@ class GPUKernelPDF(object): self.qmom2_gpu, block=(nthreads_per_block,1,1), grid=(len(gpuchannels.t)//nthreads_per_block+1,1)) + cuda - def compute_bandwidth(self, scale_factor=1.0): + def compute_bandwidth(self, event_hit, event_time, event_charge, + scale_factor=1.0): """Use the MC information accumulated by accumulate_moments() to estimate the best bandwidth to use when kernel estimating.""" + rho = 1.0 + hitcount = self.hitcount_gpu.get() mom0 = np.maximum(hitcount, 1) tmom1 = self.tmom1_gpu.get() @@ -75,11 +79,10 @@ class GPUKernelPDF(object): else: d = 2 dimensionality_factor = ((4.0/(d+2)) / (mom0/scale_factor))**(-1.0/(d+4)) - - time_bandwidths = dimensionality_factor * trms + gaussian_density = np.minimum(1.0/trms, (1.0/np.sqrt(2.0*np.pi)) * np.exp(-0.5*((event_time - tmean)/trms)) / trms) + time_bandwidths = dimensionality_factor / gaussian_density * rho inv_time_bandwidths = np.zeros_like(time_bandwidths) inv_time_bandwidths[time_bandwidths > 0] = time_bandwidths[time_bandwidths > 0] ** -1 - #inv_time_bandwidths /= 4.0 # precompute inverse to speed up GPU evaluation self.inv_time_bandwidths_gpu = ga.to_gpu( @@ -98,7 +101,10 @@ class GPUKernelPDF(object): qmean = qmom1 / mom0 qrms = (qmom2 / mom0 - qmean**2)**0.5 - charge_bandwidths = dimensionality_factor * qrms + + gaussian_density = np.minimum(1.0/qrms, (1.0/np.sqrt(2.0*np.pi)) * np.exp(-0.5*((event_charge - qmean)/qrms)) / qrms) + + charge_bandwidths = dimensionality_factor / gaussian_density * rho # precompute inverse to speed up GPU evaluation self.inv_charge_bandwidths_gpu = ga.to_gpu( @@ -122,11 +128,13 @@ class GPUKernelPDF(object): self.event_time_gpu = ga.to_gpu(event_time.astype(np.float32)) self.event_charge_gpu = ga.to_gpu(event_charge.astype(np.float32)) self.hitcount_gpu.fill(0) - self.pdf_values_gpu = ga.zeros(len(event_hit), dtype=np.float32) + self.time_pdf_values_gpu = ga.zeros(len(event_hit), dtype=np.float32) + self.charge_pdf_values_gpu = ga.zeros(len(event_hit), dtype=np.float32) def clear_kernel(self): self.hitcount_gpu.fill(0) - self.pdf_values_gpu.fill(0.0) + self.time_pdf_values_gpu.fill(0.0) + self.charge_pdf_values_gpu.fill(0.0) def accumulate_kernel(self, gpuchannels, nthreads_per_block=64): "Add the most recent results of run_daq() to the kernel PDF evaluation." @@ -144,18 +152,26 @@ class GPUKernelPDF(object): self.inv_time_bandwidths_gpu, self.inv_charge_bandwidths_gpu, self.hitcount_gpu, - self.pdf_values_gpu, + self.time_pdf_values_gpu, + self.charge_pdf_values_gpu, block=(nthreads_per_block,1,1), grid=(len(gpuchannels.t)//nthreads_per_block+1,1)) def get_kernel_eval(self): - evhit = self.event_hit_gpu.get().astype(bool) hitcount = self.hitcount_gpu.get() + hit = self.event_hit_gpu.get().astype(bool) + time_pdf_values = self.time_pdf_values_gpu.get() + time_pdf_values /= np.maximum(1, hitcount) # avoid divide by zero - pdf_values = self.pdf_values_gpu.get() - pdf_values /= np.maximum(1, hitcount) # avoid divide by zero + charge_pdf_values = self.charge_pdf_values_gpu.get() + charge_pdf_values /= np.maximum(1, hitcount) # avoid divide by zero + if self.time_only: + pdf_values = time_pdf_values + else: + pdf_values = time_pdf_values * charge_pdf_values + return hitcount, pdf_values, np.zeros_like(pdf_values) class GPUPDF(object): diff --git a/chroma/likelihood.py b/chroma/likelihood.py index 9c4010d..5e8e678 100644 --- a/chroma/likelihood.py +++ b/chroma/likelihood.py @@ -7,7 +7,7 @@ from chroma.tools import profile_if_possible class Likelihood(object): "Class to evaluate likelihoods for detector events." def __init__(self, sim, event=None, tbins=100, trange=(-0.5, 999.5), - qbins=10, qrange=(-0.5, 9.5), time_only=True): + qbins=10, qrange=(-0.5, 49.5), time_only=True): """ Args: - sim: chroma.sim.Simulation @@ -46,7 +46,8 @@ class Likelihood(object): time+charge) probability density for each channel using the variable bin window method. - Returns: (array of hit probabilities, array of PDF values) + Returns: (array of hit probabilities, array of PDF values, + array of PDF uncertainties) ''' ntotal = nevals * nreps * ndaq @@ -108,7 +109,7 @@ class Likelihood(object): def setup_kernel(self, vertex_generator, nevals, nreps, ndaq, oversample_factor): bandwidth_generator = islice(vertex_generator, nevals*oversample_factor) - self.sim.setup_kernel(len(self.event.channels.hit), + self.sim.setup_kernel(self.event.channels, bandwidth_generator, self.trange, self.qrange, @@ -116,7 +117,6 @@ class Likelihood(object): ndaq=ndaq, time_only=self.time_only, scale_factor=oversample_factor) - @profile_if_possible def eval_kernel(self, vertex_generator, nevals, nreps=16, ndaq=50, navg=10): @@ -128,6 +128,7 @@ class Likelihood(object): """ ntotal = nevals * nreps * ndaq + mom0 = 0 mom1 = 0.0 mom2 = 0.0 for i in xrange(navg): @@ -143,7 +144,7 @@ class Likelihood(object): # Normalize probabilities and put a floor to keep the log finite hit_prob = hitcount.astype(np.float32) / ntotal - hit_prob = np.maximum(hit_prob, 0.5 / ntotal) + hit_prob[self.event.channels.hit] = np.maximum(hit_prob[self.event.channels.hit], 0.5 / ntotal) # Set all zero or nan probabilities to limiting PDF value bad_value = (pdf_prob <= 0.0) | np.isnan(pdf_prob) @@ -158,20 +159,22 @@ class Likelihood(object): # NLL calculation: note that negation is at the end # Start with the probabilties of hitting (or not) the channels - log_likelihood = np.log(hit_prob[self.event.channels.hit]).sum() + np.log(1.0-hit_prob[~self.event.channels.hit])[1:].sum() + log_likelihood = np.log(hit_prob[self.event.channels.hit]).sum() + np.log(1.0-hit_prob[~self.event.channels.hit]).sum() log_likelihood = 0.0 # FIXME: Skipping hit/not-hit probabilities for now # Then include the probability densities of the observed # charges and times. log_likelihood += np.log(pdf_prob[self.event.channels.hit]).sum() print 'll', log_likelihood - mom1 += log_likelihood - mom2 += log_likelihood**2 + if np.isfinite(log_likelihood): + mom0 += 1 + mom1 += log_likelihood + mom2 += log_likelihood**2 - avg_like = mom1 / navg - rms_like = (mom2 / navg - avg_like**2)**0.5 + avg_like = mom1 / mom0 + rms_like = (mom2 / mom0 - avg_like**2)**0.5 # Don't forget to return a negative log likelihood - return ufloat((-avg_like, rms_like/sqrt(navg))), pdf_prob + return ufloat((-avg_like, rms_like/sqrt(mom0))) if __name__ == '__main__': from chroma.demo import detector as build_detector diff --git a/chroma/sim.py b/chroma/sim.py index 49bf142..93bcdfb 100644 --- a/chroma/sim.py +++ b/chroma/sim.py @@ -152,13 +152,14 @@ class Simulation(object): return self.gpu_pdf.get_pdf_eval() - def setup_kernel(self, nchannels, bandwidth_iterable, + def setup_kernel(self, event_channels, bandwidth_iterable, trange, qrange, nreps=1, ndaq=1, time_only=True, scale_factor=1.0): '''Call this before calling eval_pdf_kernel(). Sets up the event information and computes an appropriate kernel bandwidth''' + nchannels = len(event_channels.hit) self.gpu_pdf_kernel.setup_moments(nchannels, trange, qrange, - time_only=True) + time_only=time_only) # Compute bandwidth first_element, bandwidth_iterable = itertoolset.peek(bandwidth_iterable) if isinstance(first_element, event.Event): @@ -174,9 +175,12 @@ class Simulation(object): gpu_channels = self.gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) self.gpu_pdf_kernel.accumulate_moments(gpu_channels) - self.gpu_pdf_kernel.compute_bandwidth(scale_factor=scale_factor) + self.gpu_pdf_kernel.compute_bandwidth(event_channels.hit, + event_channels.t, + event_channels.q, + scale_factor=scale_factor) - def eval_kernel(self, event_channels, + def eval_kernel(self, event_channels, kernel_iterable, trange, qrange, nreps=1, ndaq=1, naverage=1, time_only=True): |