4 files changed, 75 insertions, 42 deletions
diff --git a/chroma/cuda/pdf.cu b/chroma/cuda/pdf.cu
index 41c4b4a..bf42742 100644
--- a/chroma/cuda/pdf.cu
+++ b/chroma/cuda/pdf.cu
@@ -180,7 +180,9 @@ accumulate_kernel_eval(int time_only, int nchannels, unsigned int *event_hit,
 		       float *inv_time_bandwidths,
 		       float *inv_charge_bandwidths,
 		       unsigned int *hitcount,
-		       float *pdf_values)
+		       float *time_pdf_values,
+		       float *charge_pdf_values)
+		       
 {
     int id = threadIdx.x + blockDim.x * blockIdx.x;
 	
@@ -217,7 +219,7 @@ accumulate_kernel_eval(int time_only, int nchannels, unsigned int *event_hit,
 	  float hiarg = (tmax - channel_mc_time)*inv_bandwidth*invroot2;
 	  norm = (erff(hiarg) - erff(loarg)) * rootPiBy2;
 	}
-	pdf_values[id] += term / norm;
+	time_pdf_values[id] += term / norm;
     }
     else { // time and charge PDF
 	float channel_mc_charge = mc_charge[id]; // int->float conversion because DAQ just returns an integer
@@ -240,23 +242,31 @@ accumulate_kernel_eval(int time_only, int nchannels, unsigned int *event_hit,
 	float inv_bandwidth = inv_time_bandwidths[id];
 	float arg = (channel_mc_time - channel_event_obs) * inv_bandwidth;
 
-	float loarg = (tmin - channel_mc_time)*inv_bandwidth*invroot2;
-	float hiarg = (tmax - channel_mc_time)*inv_bandwidth*invroot2;
-	float norm = (erff(hiarg) - erff(loarg)) * rootPiBy2;
+	float norm = tmax - tmin;
+	if (inv_bandwidth > 0.0f) {
+	  float loarg = (tmin - channel_mc_time)*inv_bandwidth*invroot2;
+	  float hiarg = (tmax - channel_mc_time)*inv_bandwidth*invroot2;
+	  norm = (erff(hiarg) - erff(loarg)) * rootPiBy2;
+	}
+	float term = expf(-0.5f * arg * arg);
 
-	// Kernel argument: charge dim
-	channel_event_obs = event_time[id];
-	inv_bandwidth = inv_time_bandwidths[id];
-	arg *= (channel_mc_charge - channel_event_obs) * inv_bandwidth;
+	time_pdf_values[id] += term / norm;
 
-	loarg = (tmin - channel_mc_charge)*inv_bandwidth*invroot2;
-	hiarg = (tmax - channel_mc_charge)*inv_bandwidth*invroot2;
-	norm *= (erff(hiarg) - erff(loarg)) * rootPiBy2;
+	// Kernel argument: charge dim
+	channel_event_obs = event_charge[id];
+	inv_bandwidth = inv_charge_bandwidths[id];
+	arg = (channel_mc_charge - channel_event_obs) * inv_bandwidth;
+	
+	norm = qmax - qmin;
+	if (inv_bandwidth > 0.0f) {
+	  float loarg = (qmin - channel_mc_charge)*inv_bandwidth*invroot2;
+	  float hiarg = (qmax - channel_mc_charge)*inv_bandwidth*invroot2;
+	  norm = (erff(hiarg) - erff(loarg)) * rootPiBy2;
+	}
 
-	// evaluate 2D Gaussian normalized within time window
-	float term = expf(-0.5f * arg * arg);
+	term = expf(-0.5f * arg * arg);
 
-	pdf_values[id] += term / norm;
+	charge_pdf_values[id] += term / norm;
     }
 }
 
diff --git a/chroma/gpu/pdf.py b/chroma/gpu/pdf.py
index 5fb4b1a..bd3c448 100644
--- a/chroma/gpu/pdf.py
+++ b/chroma/gpu/pdf.py
@@ -1,6 +1,6 @@
 import numpy as np
 from pycuda import gpuarray as ga
-
+import pycuda.driver as cuda
 from chroma.gpu.tools import get_cu_module, cuda_options, GPUFuncs
 
 class GPUKernelPDF(object):
@@ -56,11 +56,15 @@ class GPUKernelPDF(object):
                                           self.qmom2_gpu,
                                           block=(nthreads_per_block,1,1), 
                                           grid=(len(gpuchannels.t)//nthreads_per_block+1,1))
+        cuda
         
-    def compute_bandwidth(self, scale_factor=1.0):
+    def compute_bandwidth(self, event_hit, event_time, event_charge, 
+                          scale_factor=1.0):
         """Use the MC information accumulated by accumulate_moments() to
         estimate the best bandwidth to use when kernel estimating."""
 
+        rho = 1.0
+
         hitcount = self.hitcount_gpu.get()
         mom0 = np.maximum(hitcount, 1)
         tmom1 = self.tmom1_gpu.get()
@@ -75,11 +79,10 @@ class GPUKernelPDF(object):
         else:
             d = 2
         dimensionality_factor = ((4.0/(d+2)) / (mom0/scale_factor))**(-1.0/(d+4))
-                                 
-        time_bandwidths = dimensionality_factor * trms
+        gaussian_density = np.minimum(1.0/trms, (1.0/np.sqrt(2.0*np.pi)) * np.exp(-0.5*((event_time - tmean)/trms))  / trms)
+        time_bandwidths = dimensionality_factor / gaussian_density * rho
         inv_time_bandwidths = np.zeros_like(time_bandwidths)
         inv_time_bandwidths[time_bandwidths  > 0] = time_bandwidths[time_bandwidths > 0] ** -1
-        #inv_time_bandwidths /= 4.0
 
         # precompute inverse to speed up GPU evaluation
         self.inv_time_bandwidths_gpu = ga.to_gpu(
@@ -98,7 +101,10 @@ class GPUKernelPDF(object):
 
             qmean = qmom1 / mom0
             qrms = (qmom2 / mom0 - qmean**2)**0.5
-            charge_bandwidths = dimensionality_factor * qrms
+
+            gaussian_density = np.minimum(1.0/qrms, (1.0/np.sqrt(2.0*np.pi)) * np.exp(-0.5*((event_charge - qmean)/qrms))  / qrms)
+
+            charge_bandwidths = dimensionality_factor / gaussian_density * rho
 
             # precompute inverse to speed up GPU evaluation
             self.inv_charge_bandwidths_gpu = ga.to_gpu( 
@@ -122,11 +128,13 @@ class GPUKernelPDF(object):
         self.event_time_gpu = ga.to_gpu(event_time.astype(np.float32))
         self.event_charge_gpu = ga.to_gpu(event_charge.astype(np.float32))
         self.hitcount_gpu.fill(0)
-        self.pdf_values_gpu = ga.zeros(len(event_hit), dtype=np.float32)
+        self.time_pdf_values_gpu = ga.zeros(len(event_hit), dtype=np.float32)
+        self.charge_pdf_values_gpu = ga.zeros(len(event_hit), dtype=np.float32)
 
     def clear_kernel(self):
         self.hitcount_gpu.fill(0)
-        self.pdf_values_gpu.fill(0.0)
+        self.time_pdf_values_gpu.fill(0.0)
+        self.charge_pdf_values_gpu.fill(0.0)
             
     def accumulate_kernel(self, gpuchannels, nthreads_per_block=64):
         "Add the most recent results of run_daq() to the kernel PDF evaluation."
@@ -144,18 +152,26 @@ class GPUKernelPDF(object):
                                               self.inv_time_bandwidths_gpu,
                                               self.inv_charge_bandwidths_gpu,
                                               self.hitcount_gpu,
-                                              self.pdf_values_gpu,
+                                              self.time_pdf_values_gpu,
+                                              self.charge_pdf_values_gpu,
                                               block=(nthreads_per_block,1,1), 
                                               grid=(len(gpuchannels.t)//nthreads_per_block+1,1))
 
 
     def get_kernel_eval(self):
-        evhit = self.event_hit_gpu.get().astype(bool)
         hitcount = self.hitcount_gpu.get()
+        hit = self.event_hit_gpu.get().astype(bool)
+        time_pdf_values = self.time_pdf_values_gpu.get()
+        time_pdf_values /= np.maximum(1, hitcount) # avoid divide by zero
 
-        pdf_values = self.pdf_values_gpu.get()
-        pdf_values /= np.maximum(1, hitcount) # avoid divide by zero
+        charge_pdf_values = self.charge_pdf_values_gpu.get()
+        charge_pdf_values /= np.maximum(1, hitcount) # avoid divide by zero
 
+        if self.time_only:
+            pdf_values = time_pdf_values
+        else:
+            pdf_values = time_pdf_values * charge_pdf_values
+        
         return hitcount, pdf_values, np.zeros_like(pdf_values)
 
 class GPUPDF(object):
diff --git a/chroma/likelihood.py b/chroma/likelihood.py
index 9c4010d..5e8e678 100644
--- a/chroma/likelihood.py
+++ b/chroma/likelihood.py
@@ -7,7 +7,7 @@ from chroma.tools import profile_if_possible
 class Likelihood(object):
     "Class to evaluate likelihoods for detector events."
     def __init__(self, sim, event=None, tbins=100, trange=(-0.5, 999.5), 
-                 qbins=10, qrange=(-0.5, 9.5), time_only=True):
+                 qbins=10, qrange=(-0.5, 49.5), time_only=True):
         """
         Args:
             - sim: chroma.sim.Simulation
@@ -46,7 +46,8 @@ class Likelihood(object):
         time+charge) probability density for each channel using the
         variable bin window method.
 
-        Returns: (array of hit probabilities, array of PDF values)
+        Returns: (array of hit probabilities, array of PDF values, 
+                  array of PDF uncertainties)
         '''
 
         ntotal = nevals * nreps * ndaq
@@ -108,7 +109,7 @@ class Likelihood(object):
     def setup_kernel(self, vertex_generator, nevals, nreps, ndaq, oversample_factor):
         bandwidth_generator = islice(vertex_generator, nevals*oversample_factor)
 
-        self.sim.setup_kernel(len(self.event.channels.hit),
+        self.sim.setup_kernel(self.event.channels,
                               bandwidth_generator,
                               self.trange,
                               self.qrange,
@@ -116,7 +117,6 @@ class Likelihood(object):
                               ndaq=ndaq,
                               time_only=self.time_only,
                               scale_factor=oversample_factor)
-        
 
     @profile_if_possible
     def eval_kernel(self, vertex_generator, nevals, nreps=16, ndaq=50, navg=10):
@@ -128,6 +128,7 @@ class Likelihood(object):
         """
         ntotal = nevals * nreps * ndaq
 
+        mom0 = 0
         mom1 = 0.0
         mom2 = 0.0
         for i in xrange(navg):
@@ -143,7 +144,7 @@ class Likelihood(object):
         
             # Normalize probabilities and put a floor to keep the log finite
             hit_prob = hitcount.astype(np.float32) / ntotal
-            hit_prob = np.maximum(hit_prob, 0.5 / ntotal)
+            hit_prob[self.event.channels.hit] = np.maximum(hit_prob[self.event.channels.hit], 0.5 / ntotal)
 
             # Set all zero or nan probabilities to limiting PDF value
             bad_value = (pdf_prob <= 0.0) | np.isnan(pdf_prob)
@@ -158,20 +159,22 @@ class Likelihood(object):
 
             # NLL calculation: note that negation is at the end
             # Start with the probabilties of hitting (or not) the channels
-            log_likelihood = np.log(hit_prob[self.event.channels.hit]).sum() + np.log(1.0-hit_prob[~self.event.channels.hit])[1:].sum()
+            log_likelihood = np.log(hit_prob[self.event.channels.hit]).sum() + np.log(1.0-hit_prob[~self.event.channels.hit]).sum()
             log_likelihood = 0.0 # FIXME: Skipping hit/not-hit probabilities for now
 
             # Then include the probability densities of the observed
             # charges and times.
             log_likelihood += np.log(pdf_prob[self.event.channels.hit]).sum()
             print 'll', log_likelihood
-            mom1 += log_likelihood
-            mom2 += log_likelihood**2
+            if np.isfinite(log_likelihood):
+                mom0 += 1
+                mom1 += log_likelihood
+                mom2 += log_likelihood**2
         
-        avg_like = mom1 / navg
-        rms_like = (mom2 / navg - avg_like**2)**0.5
+        avg_like = mom1 / mom0
+        rms_like = (mom2 / mom0 - avg_like**2)**0.5
         # Don't forget to return a negative log likelihood
-        return ufloat((-avg_like, rms_like/sqrt(navg))), pdf_prob
+        return ufloat((-avg_like, rms_like/sqrt(mom0)))
 
 if __name__ == '__main__':
     from chroma.demo import detector as build_detector
diff --git a/chroma/sim.py b/chroma/sim.py
index 49bf142..93bcdfb 100644
--- a/chroma/sim.py
+++ b/chroma/sim.py
@@ -152,13 +152,14 @@ class Simulation(object):
         
         return self.gpu_pdf.get_pdf_eval()
 
-    def setup_kernel(self, nchannels, bandwidth_iterable,
+    def setup_kernel(self, event_channels, bandwidth_iterable,
                          trange, qrange, 
                          nreps=1, ndaq=1, time_only=True, scale_factor=1.0):
         '''Call this before calling eval_pdf_kernel().  Sets up the
         event information and computes an appropriate kernel bandwidth'''
+        nchannels = len(event_channels.hit)
         self.gpu_pdf_kernel.setup_moments(nchannels, trange, qrange,
-                                          time_only=True)
+                                          time_only=time_only)
         # Compute bandwidth
         first_element, bandwidth_iterable = itertoolset.peek(bandwidth_iterable)
         if isinstance(first_element, event.Event):
@@ -174,9 +175,12 @@ class Simulation(object):
                     gpu_channels = self.gpu_daq.acquire(gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks)
                     self.gpu_pdf_kernel.accumulate_moments(gpu_channels)
             
-        self.gpu_pdf_kernel.compute_bandwidth(scale_factor=scale_factor)
+        self.gpu_pdf_kernel.compute_bandwidth(event_channels.hit,
+                                              event_channels.t,
+                                              event_channels.q,
+                                              scale_factor=scale_factor)
 
-    def eval_kernel(self, event_channels, 
+    def eval_kernel(self, event_channels,
                     kernel_iterable,
                     trange, qrange, 
                     nreps=1, ndaq=1, naverage=1, time_only=True):