4 files changed, 280 insertions, 44 deletions
diff --git a/gpu.py b/gpu.py
index 4d2197b..ab71358 100644
--- a/gpu.py
+++ b/gpu.py
@@ -115,7 +115,8 @@ class GPU(object):
         self.daq_module = SourceModule(chroma.src.daq, options=cuda_options, no_extern_c=True)
         self.daq_funcs = CUDAFuncs(self.daq_module, 
                               ['reset_earliest_time_int', 'run_daq',
-                               'convert_sortable_int_to_float', 'bin_hits'])
+                               'convert_sortable_int_to_float', 'bin_hits',
+                               'accumulate_pdf_eval'])
 
     def print_device_usage(self):
         print 'device usage:'
@@ -422,5 +423,126 @@ class GPU(object):
         '''Returns the 1D hitcount array and the 3D [channel, time, charge] histogram'''
         return self.hitcount_gpu.get(), self.pdf_gpu.get()
 
+    def setup_pdf_eval(self, event_hit, event_time, event_charge,
+                       min_twidth, trange, min_qwidth, qrange, min_bin_content=10,
+                       time_only=True):
+        '''Setup GPU arrays to compute PDF values for the given event.
+        The pdf_eval calculation allows the PDF to be evaluated at a
+        single point for each channel as the Monte Carlo is run.  The
+        effective bin size will be as small as (`min_twidth`,
+        `min_qwidth`) around the point of interest, but will be large
+        enough to ensure that `min_bin_content` Monte Carlo events
+        fall into the bin.
+
+            event_hit: ndarray
+              Hit or not-hit status for each channel in the detector.
+            event_time: ndarray
+              Hit time for each channel in the detector.  If channel 
+              not hit, the time will be ignored.
+            event_charge: ndarray
+              Integrated charge for each channel in the detector.
+              If channel not hit, the charge will be ignored.
+
+            min_twidth: float
+              Minimum bin size in the time dimension
+            trange: (float, float)
+              Range of time dimension in PDF
+            min_qwidth: float
+              Minimum bin size in charge dimension
+            qrange: (float, float)
+              Range of charge dimension in PDF
+            min_bin_content: int
+              The bin will be expanded to include at least this many events
+            time_only: bool
+              If True, only the time observable will be used in the PDF.
+        '''
+        self.event_hit_gpu = gpuarray.to_gpu(event_hit.astype(np.uint32))
+        self.event_time_gpu = gpuarray.to_gpu(event_time.astype(np.float32))
+        self.event_charge_gpu = gpuarray.to_gpu(event_charge.astype(np.float32))
+
+        self.eval_hitcount_gpu = gpuarray.zeros(len(event_hit), dtype=np.uint32)
+        self.eval_bincount_gpu = gpuarray.zeros(len(event_hit), dtype=np.uint32)
+        self.nearest_mc_gpu = gpuarray.empty(shape=len(event_hit) * min_bin_content, 
+                                             dtype=np.float32)
+        self.nearest_mc_gpu.fill(1e9)
+
+        self.min_twidth = min_twidth
+        self.trange = trange
+        self.min_qwidth = min_qwidth
+        self.qrange = qrange
+        self.min_bin_content = min_bin_content
+        self.time_only = time_only
+
+    def clear_pdf_eval(self):
+        '''Reset PDF evaluation counters to start accumulating new Monte Carlo.'''
+        self.eval_hitcount_gpu.fill(0)
+        self.eval_bincount_gpu.fill(0)
+        self.nearest_mc_gpu.fill(1e9)
+
+    def accumulate_pdf_eval(self):
+        '''Add the most recent results of run_daq() to the PDF evaluation.'''
+        
+        self.daq_funcs.accumulate_pdf_eval(np.int32(self.time_only),
+                                           np.int32(len(self.event_hit_gpu)),
+                                           self.event_hit_gpu,
+                                           self.event_time_gpu,
+                                           self.event_charge_gpu,
+                                           self.earliest_time_gpu,
+                                           self.channel_q_gpu,
+                                           self.eval_hitcount_gpu,
+                                           self.eval_bincount_gpu,
+                                           np.float32(self.min_twidth),
+                                           np.float32(self.trange[0]),
+                                           np.float32(self.trange[1]),
+                                           np.float32(self.min_qwidth),
+                                           np.float32(self.qrange[0]),
+                                           np.float32(self.qrange[1]),
+                                           self.nearest_mc_gpu,
+                                           np.int32(self.min_bin_content),
+                                           block=(self.nthreads_per_block,1,1), 
+                                           grid=(len(self.earliest_time_gpu)//self.nthreads_per_block+1,1))
+
+    def get_pdf_eval(self):
+        evhit = self.event_hit_gpu.get().astype(bool)
+        hitcount = self.eval_hitcount_gpu.get()
+        bincount = self.eval_bincount_gpu.get()
+
+        pdf_value = np.zeros(len(hitcount), dtype=float)
+        pdf_frac_uncert = np.zeros_like(pdf_value)
+
+        # PDF value for high stats bins
+        high_stats = bincount >= self.min_bin_content
+        if high_stats.any():
+            if self.time_only:
+                pdf_value[high_stats] = bincount[high_stats].astype(float) / hitcount[high_stats] / self.min_twidth
+            else:
+                assert Exception('Unimplemented 2D (time,charge) mode!')
+
+            pdf_frac_uncert[high_stats] = 1.0/np.sqrt(bincount[high_stats])
+
+        # PDF value for low stats bins
+        low_stats = ~high_stats & (hitcount > 0) & evhit
+
+        nearest_mc = self.nearest_mc_gpu.get().reshape((len(hitcount), self.min_bin_content))
+
+        # Deal with the case where we did not even get min_bin_content events in the PDF
+        # but also clamp the lower range to ensure we don't index by a negative number in 2 lines
+        last_valid_entry = np.maximum(0, (nearest_mc < 1e9).astype(int).sum(axis=1) - 1)
+        distance = nearest_mc[np.arange(len(last_valid_entry)),last_valid_entry]
+
+        if low_stats.any():
+            if self.time_only:
+                pdf_value[low_stats] = (last_valid_entry[low_stats] + 1).astype(float) / hitcount[low_stats] / distance[low_stats] / 2.0
+            else:
+                assert Exception('Unimplemented 2D (time,charge) mode!')
+
+            pdf_frac_uncert[low_stats] = 1.0/np.sqrt(last_valid_entry[low_stats] + 1)
+
+        # PDFs with no stats got zero by default during array creation
+        
+        print 'high_stats:', high_stats.sum(), 'low_stats', low_stats.sum()
+
+        return hitcount, pdf_value, pdf_value * pdf_frac_uncert
+
     def __del__(self):
         self.context.pop()
diff --git a/likelihood.py b/likelihood.py
index 2487442..c797afe 100644
--- a/likelihood.py
+++ b/likelihood.py
@@ -1,6 +1,6 @@
 import numpy as np
 from histogram import HistogramDD
-from uncertainties import ufloat, umath
+from uncertainties import ufloat, umath, unumpy
 from math import sqrt
 from itertools import izip, compress
 from chroma.tools import profile_if_possible
@@ -8,7 +8,7 @@ from chroma.tools import profile_if_possible
 class Likelihood(object):
     "Class to evaluate likelihoods for detector events."
     def __init__(self, sim, event=None, tbins=100, trange=(-0.5e-9, 999.5e-9), 
-                 qbins=10, qrange=(-0.5, 9.5)):
+                 qbins=10, qrange=(-0.5, 9.5), time_only=True):
         """
         Args:
             - sim: chroma.sim.Simulation
@@ -22,14 +22,18 @@ class Likelihood(object):
                 Min and max time to include in PDF
             - qbins: int, *optional*
                 Number of charge bins in PDF
-            - qrange: tuple of 2 floats, *optional
+            - qrange: tuple of 2 floats, *optional*
                 Min and max charge to include in PDF
+            - time_only: bool, *optional*
+                Only use time observable (instead of time and charge) in computing
+                likelihood.  Defaults to True.
         """
         self.sim = sim
         self.tbins = tbins
         self.trange = trange
         self.qbins = qbins
         self.qrange = qrange
+        self.time_only = time_only
 
         if event is not None:
             self.set_event(event)
@@ -47,52 +51,38 @@ class Likelihood(object):
         will be propagated `nreps` times.
         """
 
-        hitcount, pdfcount = self.sim.create_pdf(nevals, vertex_generator,
-                                                 self.tbins, self.trange, 
-                                                 self.qbins, self.qrange,
-                                                 nreps=nreps)
-
+        hitcount, pdf_prob, pdf_prob_uncert = self.sim.eval_pdf(self.event.channels,
+                                                                nevals, vertex_generator,
+                                                                2.0e-9, self.trange, 
+                                                                1, self.qrange,
+                                                                nreps=nreps,
+                                                                time_only=self.time_only)
+        
         # Normalize probabilities and put a floor to keep the log finite
         hit_prob = hitcount.astype(np.float32) / (nreps * nevals)
         hit_prob = np.maximum(hit_prob, 0.5 / (nreps*nevals))
 
-        # Normalize t,q PDFs
-        pdf = pdfcount.astype(np.float32)
-        norm = pdf.sum(axis=2).sum(axis=1) / (self.qrange[1] - self.qrange[0]) / (self.trange[1] - self.trange[0])
+        # Set all zero or nan probabilities to limiting PDF value
+        bad_value = (pdf_prob <= 0.0) | np.isnan(pdf_prob)
+        if self.time_only:
+            pdf_floor = 1.0 / (self.trange[1] - self.trange[0])
+        else:
+            pdf_floor = 1.0 / (self.trange[1] - self.trange[0]) / (self.qrange[1] - self.qrange[0])
+        pdf_prob[bad_value] = pdf_floor
+        pdf_prob_uncert[bad_value] = pdf_floor
 
-        pdf /= np.maximum(norm[:,np.newaxis,np.newaxis], 1)
-        
-        # NLL calculation: note that negation is at the end
+        print 'channels with no data:', (bad_value & self.event.channels.hit).astype(int).sum()
 
+        # NLL calculation: note that negation is at the end
         # Start with the probabilties of hitting (or not) the channels
-        log_likelihood = ufloat((
-                  np.log(hit_prob[self.event.channels.hit]).sum()
-                  +np.log(1.0-hit_prob[~self.event.channels.hit]).sum(),
-                  0.0))
+        hit_channel_prob = np.log(hit_prob[self.event.channels.hit]).sum() + np.log(1.0-hit_prob[~self.event.channels.hit]).sum()
+        hit_channel_prob_uncert = ( (nevals * nreps * hit_prob * (1.0 - hit_prob)) / hit_prob**2 ).sum()**0.5
+        log_likelihood = ufloat((hit_channel_prob, 0.0))
 
         # Then include the probability densities of the observed charges and times
-        for i, t, q in compress(izip(range(self.event.channels.hit.size),self.event.channels.t,self.event.channels.q),self.event.channels.hit):
-            tbin = (t - self.trange[0]) / (self.trange[1] - self.trange[0]) * self.tbins
-            qbin = (q - self.trange[0]) / (self.qrange[1] - self.qrange[0]) * self.qbins
-            if tbin < 0 or tbin >= self.tbins or qbin < 0 or qbin >= self.qbins:
-                probability = 0.0
-                nentries = 0
-            else:
-                probability = pdf[i, tbin, qbin]
-                # If probability is zero, avoid warning here, but catch the problem
-                # in the next if block
-                probability_err = probability / max(1, sqrt(pdfcount[i, tbin, qbin]))
-                nentries = norm[i]
-
-            if probability == 0.0 or np.isnan(probability):
-                if nentries > 0:
-                    probability = 0.5/nentries
-                    probability_err = probability
-                else:
-                    probability = 0.5/(self.qrange[1] - self.qrange[0])/(self.trange[1] - self.trange[0])
-                    probability_err = probability
-
-            log_likelihood += umath.log(ufloat((probability, probability_err)))
+        hit_pdf_ufloat = unumpy.uarray((pdf_prob[self.event.channels.hit],
+                                        pdf_prob_uncert[self.event.channels.hit]))
+        log_likelihood += unumpy.log(hit_pdf_ufloat).sum()
 
         return -log_likelihood
 
@@ -111,11 +101,11 @@ if __name__ == '__main__':
 
     event = sim.simulate(1, constant_particle_gun('e-',(0,0,0),(1,0,0),100.0)).next()
 
-    print 'nhit = %i' % np.count_nonzero(event.hits.hit)
+    print 'nhit = %i' % np.count_nonzero(event.channels.hit)
 
     likelihood = Likelihood(sim, event)
 
-    for x in np.linspace(-10.0, 10.0, 10*5+1):
+    for x in np.linspace(-10.0, 10.0, 2*10*5+1):
         start = time.time()
-        l = likelihood.eval(constant_particle_gun('e-',(x,0,0),(1,0,0),100.0),100, nreps=10)
+        l = likelihood.eval(constant_particle_gun('e-',(0,0,0),(1,0,0),100.0),100, nreps=200)
         print 'x = %5.1f, %s (%1.1f sec)' % (x, l, time.time() - start)
diff --git a/sim.py b/sim.py
index 8d991b2..3044f3e 100755
--- a/sim.py
+++ b/sim.py
@@ -116,6 +116,32 @@ class Simulation(object):
             self.gpu_worker.add_hits_to_pdf()
         
         return self.gpu_worker.get_pdfs()
+
+
+    def eval_pdf(self, event_channels, nevents, vertex_generator, 
+                 min_twidth, trange, min_qwidth, qrange,
+                 min_bin_content=20, nreps=1, time_only=True):
+        photon_gen = repeating_iterator(self.photon_generator.generate_events(nevents, vertex_generator),
+                                        nreps)
+        return self.eval_pdf_from_photons(event_channels, nevents*nreps, photon_gen,
+                                          min_twidth, trange, min_qwidth, qrange, 
+                                          min_bin_content=min_bin_content, time_only=time_only)
+
+    def eval_pdf_from_photons(self, event_channels, nevents, photon_generator,
+                              min_twidth, trange, min_qwidth, qrange,
+                              min_bin_content=20, time_only=True):
+        '''Returns tuple: 1D array of channel hit counts, 1D array of PDF probability densities'''
+        self.gpu_worker.setup_pdf_eval(event_channels.hit, event_channels.t, event_channels.q,
+                                       min_twidth, trange, min_qwidth, qrange,
+                                       min_bin_content=min_bin_content, time_only=True)
+
+        for ev in itertools.islice(photon_generator, nevents):
+            self.gpu_worker.load_photons(ev.photon_start)
+            self.gpu_worker.propagate()
+            self.gpu_worker.run_daq()
+            self.gpu_worker.accumulate_pdf_eval()
+        
+        return self.gpu_worker.get_pdf_eval()
         
 
 @profile_if_possible
diff --git a/src/daq.cu b/src/daq.cu
index a34fbbe..cb53d56 100644
--- a/src/daq.cu
+++ b/src/daq.cu
@@ -106,5 +106,103 @@ __global__ void bin_hits(int nchannels,
 		pdf[bin] += 1;
 	}
 }
+	
+__global__ void accumulate_pdf_eval(int time_only,
+				    int nchannels,
+				    unsigned int *event_hit,
+				    float *event_time,
+				    float *event_charge,
+				    float *mc_time,
+				    unsigned int *mc_charge, // quirk of DAQ!
+				    unsigned int *hitcount, 
+				    unsigned int *bincount,
+				    float min_twidth, float tmin, float tmax,
+				    float min_qwidth, float qmin, float qmax,
+				    float *nearest_mc,
+				    int min_bin_content)
+{
+	int id = threadIdx.x + blockDim.x * blockIdx.x;
+	
+	if (id >= nchannels)
+		return;
+	
+	// Was this channel hit in the Monte Carlo?
+	float channel_mc_time = mc_time[id];
+	if (channel_mc_time >= 1e8)
+		return; // Nothing else to do
+	
+	// Is this channel inside the range of the PDF?
+	float distance;
+	int channel_bincount = bincount[id];
+	if (time_only) {
+		if (channel_mc_time < tmin || channel_mc_time > tmax)
+			return;  // Nothing else to do
+		
+		// This MC information is contained in the PDF
+		hitcount[id] += 1;
+	  
+		// Was this channel hit in the event-of-interest?
+		int channel_event_hit = event_hit[id];
+		if (!channel_event_hit)
+			return; // No need to update PDF value for unhit channel
+		
+		// Are we inside the minimum size bin?
+		float channel_event_time = event_time[id];
+		distance = fabsf(channel_mc_time - channel_event_time);
+		if (distance < min_twidth/2.0)
+		  bincount[id] = channel_bincount + 1;
+
+	} else { // time and charge PDF
+		float channel_mc_charge = mc_charge[id]; // int->float conversion because DAQ just returns an integer
+	
+		if (channel_mc_time < tmin || channel_mc_time > tmax ||
+		    channel_mc_charge < qmin || channel_mc_charge > qmax)
+			return;  // Nothing else to do
+		
+		// This MC information is contained in the PDF
+		hitcount[id] += 1;
+	  
+		// Was this channel hit in the event-of-interest?
+		int channel_event_hit = event_hit[id];
+		if (!channel_event_hit)
+			return; // No need to update PDF value for unhit channel
+		
+		// Are we inside the minimum size bin?
+		float channel_event_time = event_time[id];
+		float channel_event_charge = event_charge[id];
+		float normalized_time_distance = fabsf(channel_event_time - channel_mc_time)/min_twidth/2.0;
+		float normalized_charge_distance = fabsf(channel_event_charge - channel_mc_charge)/min_qwidth/2.0;
+		distance = sqrt(normalized_time_distance*normalized_time_distance + normalized_charge_distance*normalized_charge_distance);
+
+		if (distance < 1.0f)
+		  bincount[id] = channel_bincount + 1;
+	}
+
+	// Do we need to keep updating the nearest_mc list?
+	if (channel_bincount + 1 >= min_bin_content)
+		return; // No need to perform insertion into nearest_mc because bincount is a better estimate of the PDF value
+
+	// insertion sort the distance into the array of nearest MC points
+	int offset = min_bin_content * id;
+	int entry = min_bin_content - 1;
+	
+	// If last entry less than new entry, nothing to update
+	if (distance > nearest_mc[offset + entry])
+		return;
+
+	// Find where to insert the new entry while sliding the rest
+	// to the right
+	entry--;
+	while (entry >= 0) {
+		if (nearest_mc[offset+entry] >= distance)
+			nearest_mc[offset+entry+1] = nearest_mc[offset+entry];
+		else
+			break;
+		entry--;
+	}
+
+	nearest_mc[offset+entry+1] = distance;
+}
+
 
 } // extern "C"