summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gpu.py124
-rw-r--r--likelihood.py76
-rwxr-xr-xsim.py26
-rw-r--r--src/daq.cu98
4 files changed, 280 insertions, 44 deletions
diff --git a/gpu.py b/gpu.py
index 4d2197b..ab71358 100644
--- a/gpu.py
+++ b/gpu.py
@@ -115,7 +115,8 @@ class GPU(object):
self.daq_module = SourceModule(chroma.src.daq, options=cuda_options, no_extern_c=True)
self.daq_funcs = CUDAFuncs(self.daq_module,
['reset_earliest_time_int', 'run_daq',
- 'convert_sortable_int_to_float', 'bin_hits'])
+ 'convert_sortable_int_to_float', 'bin_hits',
+ 'accumulate_pdf_eval'])
def print_device_usage(self):
print 'device usage:'
@@ -422,5 +423,126 @@ class GPU(object):
'''Returns the 1D hitcount array and the 3D [channel, time, charge] histogram'''
return self.hitcount_gpu.get(), self.pdf_gpu.get()
+ def setup_pdf_eval(self, event_hit, event_time, event_charge,
+ min_twidth, trange, min_qwidth, qrange, min_bin_content=10,
+ time_only=True):
+ '''Setup GPU arrays to compute PDF values for the given event.
+ The pdf_eval calculation allows the PDF to be evaluated at a
+ single point for each channel as the Monte Carlo is run. The
+ effective bin size will be as small as (`min_twidth`,
+ `min_qwidth`) around the point of interest, but will be large
+ enough to ensure that `min_bin_content` Monte Carlo events
+ fall into the bin.
+
+ event_hit: ndarray
+ Hit or not-hit status for each channel in the detector.
+ event_time: ndarray
+ Hit time for each channel in the detector. If channel
+ not hit, the time will be ignored.
+ event_charge: ndarray
+ Integrated charge for each channel in the detector.
+ If channel not hit, the charge will be ignored.
+
+ min_twidth: float
+ Minimum bin size in the time dimension
+ trange: (float, float)
+ Range of time dimension in PDF
+ min_qwidth: float
+ Minimum bin size in charge dimension
+ qrange: (float, float)
+ Range of charge dimension in PDF
+ min_bin_content: int
+ The bin will be expanded to include at least this many events
+ time_only: bool
+ If True, only the time observable will be used in the PDF.
+ '''
+ self.event_hit_gpu = gpuarray.to_gpu(event_hit.astype(np.uint32))
+ self.event_time_gpu = gpuarray.to_gpu(event_time.astype(np.float32))
+ self.event_charge_gpu = gpuarray.to_gpu(event_charge.astype(np.float32))
+
+ self.eval_hitcount_gpu = gpuarray.zeros(len(event_hit), dtype=np.uint32)
+ self.eval_bincount_gpu = gpuarray.zeros(len(event_hit), dtype=np.uint32)
+ self.nearest_mc_gpu = gpuarray.empty(shape=len(event_hit) * min_bin_content,
+ dtype=np.float32)
+ self.nearest_mc_gpu.fill(1e9)
+
+ self.min_twidth = min_twidth
+ self.trange = trange
+ self.min_qwidth = min_qwidth
+ self.qrange = qrange
+ self.min_bin_content = min_bin_content
+ self.time_only = time_only
+
+ def clear_pdf_eval(self):
+ '''Reset PDF evaluation counters to start accumulating new Monte Carlo.'''
+ self.eval_hitcount_gpu.fill(0)
+ self.eval_bincount_gpu.fill(0)
+ self.nearest_mc_gpu.fill(1e9)
+
+ def accumulate_pdf_eval(self):
+ '''Add the most recent results of run_daq() to the PDF evaluation.'''
+
+ self.daq_funcs.accumulate_pdf_eval(np.int32(self.time_only),
+ np.int32(len(self.event_hit_gpu)),
+ self.event_hit_gpu,
+ self.event_time_gpu,
+ self.event_charge_gpu,
+ self.earliest_time_gpu,
+ self.channel_q_gpu,
+ self.eval_hitcount_gpu,
+ self.eval_bincount_gpu,
+ np.float32(self.min_twidth),
+ np.float32(self.trange[0]),
+ np.float32(self.trange[1]),
+ np.float32(self.min_qwidth),
+ np.float32(self.qrange[0]),
+ np.float32(self.qrange[1]),
+ self.nearest_mc_gpu,
+ np.int32(self.min_bin_content),
+ block=(self.nthreads_per_block,1,1),
+ grid=(len(self.earliest_time_gpu)//self.nthreads_per_block+1,1))
+
+ def get_pdf_eval(self):
+ evhit = self.event_hit_gpu.get().astype(bool)
+ hitcount = self.eval_hitcount_gpu.get()
+ bincount = self.eval_bincount_gpu.get()
+
+ pdf_value = np.zeros(len(hitcount), dtype=float)
+ pdf_frac_uncert = np.zeros_like(pdf_value)
+
+ # PDF value for high stats bins
+ high_stats = bincount >= self.min_bin_content
+ if high_stats.any():
+ if self.time_only:
+ pdf_value[high_stats] = bincount[high_stats].astype(float) / hitcount[high_stats] / self.min_twidth
+ else:
+ assert Exception('Unimplemented 2D (time,charge) mode!')
+
+ pdf_frac_uncert[high_stats] = 1.0/np.sqrt(bincount[high_stats])
+
+ # PDF value for low stats bins
+ low_stats = ~high_stats & (hitcount > 0) & evhit
+
+ nearest_mc = self.nearest_mc_gpu.get().reshape((len(hitcount), self.min_bin_content))
+
+ # Deal with the case where we did not even get min_bin_content events in the PDF
+ # but also clamp the lower range to ensure we don't index by a negative number in 2 lines
+ last_valid_entry = np.maximum(0, (nearest_mc < 1e9).astype(int).sum(axis=1) - 1)
+ distance = nearest_mc[np.arange(len(last_valid_entry)),last_valid_entry]
+
+ if low_stats.any():
+ if self.time_only:
+ pdf_value[low_stats] = (last_valid_entry[low_stats] + 1).astype(float) / hitcount[low_stats] / distance[low_stats] / 2.0
+ else:
+ assert Exception('Unimplemented 2D (time,charge) mode!')
+
+ pdf_frac_uncert[low_stats] = 1.0/np.sqrt(last_valid_entry[low_stats] + 1)
+
+ # PDFs with no stats got zero by default during array creation
+
+ print 'high_stats:', high_stats.sum(), 'low_stats', low_stats.sum()
+
+ return hitcount, pdf_value, pdf_value * pdf_frac_uncert
+
def __del__(self):
self.context.pop()
diff --git a/likelihood.py b/likelihood.py
index 2487442..c797afe 100644
--- a/likelihood.py
+++ b/likelihood.py
@@ -1,6 +1,6 @@
import numpy as np
from histogram import HistogramDD
-from uncertainties import ufloat, umath
+from uncertainties import ufloat, umath, unumpy
from math import sqrt
from itertools import izip, compress
from chroma.tools import profile_if_possible
@@ -8,7 +8,7 @@ from chroma.tools import profile_if_possible
class Likelihood(object):
"Class to evaluate likelihoods for detector events."
def __init__(self, sim, event=None, tbins=100, trange=(-0.5e-9, 999.5e-9),
- qbins=10, qrange=(-0.5, 9.5)):
+ qbins=10, qrange=(-0.5, 9.5), time_only=True):
"""
Args:
- sim: chroma.sim.Simulation
@@ -22,14 +22,18 @@ class Likelihood(object):
Min and max time to include in PDF
- qbins: int, *optional*
Number of charge bins in PDF
- - qrange: tuple of 2 floats, *optional
+ - qrange: tuple of 2 floats, *optional*
Min and max charge to include in PDF
+ - time_only: bool, *optional*
+ Only use time observable (instead of time and charge) in computing
+ likelihood. Defaults to True.
"""
self.sim = sim
self.tbins = tbins
self.trange = trange
self.qbins = qbins
self.qrange = qrange
+ self.time_only = time_only
if event is not None:
self.set_event(event)
@@ -47,52 +51,38 @@ class Likelihood(object):
will be propagated `nreps` times.
"""
- hitcount, pdfcount = self.sim.create_pdf(nevals, vertex_generator,
- self.tbins, self.trange,
- self.qbins, self.qrange,
- nreps=nreps)
-
+ hitcount, pdf_prob, pdf_prob_uncert = self.sim.eval_pdf(self.event.channels,
+ nevals, vertex_generator,
+ 2.0e-9, self.trange,
+ 1, self.qrange,
+ nreps=nreps,
+ time_only=self.time_only)
+
# Normalize probabilities and put a floor to keep the log finite
hit_prob = hitcount.astype(np.float32) / (nreps * nevals)
hit_prob = np.maximum(hit_prob, 0.5 / (nreps*nevals))
- # Normalize t,q PDFs
- pdf = pdfcount.astype(np.float32)
- norm = pdf.sum(axis=2).sum(axis=1) / (self.qrange[1] - self.qrange[0]) / (self.trange[1] - self.trange[0])
+ # Set all zero or nan probabilities to limiting PDF value
+ bad_value = (pdf_prob <= 0.0) | np.isnan(pdf_prob)
+ if self.time_only:
+ pdf_floor = 1.0 / (self.trange[1] - self.trange[0])
+ else:
+ pdf_floor = 1.0 / (self.trange[1] - self.trange[0]) / (self.qrange[1] - self.qrange[0])
+ pdf_prob[bad_value] = pdf_floor
+ pdf_prob_uncert[bad_value] = pdf_floor
- pdf /= np.maximum(norm[:,np.newaxis,np.newaxis], 1)
-
- # NLL calculation: note that negation is at the end
+ print 'channels with no data:', (bad_value & self.event.channels.hit).astype(int).sum()
+ # NLL calculation: note that negation is at the end
# Start with the probabilties of hitting (or not) the channels
- log_likelihood = ufloat((
- np.log(hit_prob[self.event.channels.hit]).sum()
- +np.log(1.0-hit_prob[~self.event.channels.hit]).sum(),
- 0.0))
+ hit_channel_prob = np.log(hit_prob[self.event.channels.hit]).sum() + np.log(1.0-hit_prob[~self.event.channels.hit]).sum()
+ hit_channel_prob_uncert = ( (nevals * nreps * hit_prob * (1.0 - hit_prob)) / hit_prob**2 ).sum()**0.5
+ log_likelihood = ufloat((hit_channel_prob, 0.0))
# Then include the probability densities of the observed charges and times
- for i, t, q in compress(izip(range(self.event.channels.hit.size),self.event.channels.t,self.event.channels.q),self.event.channels.hit):
- tbin = (t - self.trange[0]) / (self.trange[1] - self.trange[0]) * self.tbins
- qbin = (q - self.trange[0]) / (self.qrange[1] - self.qrange[0]) * self.qbins
- if tbin < 0 or tbin >= self.tbins or qbin < 0 or qbin >= self.qbins:
- probability = 0.0
- nentries = 0
- else:
- probability = pdf[i, tbin, qbin]
- # If probability is zero, avoid warning here, but catch the problem
- # in the next if block
- probability_err = probability / max(1, sqrt(pdfcount[i, tbin, qbin]))
- nentries = norm[i]
-
- if probability == 0.0 or np.isnan(probability):
- if nentries > 0:
- probability = 0.5/nentries
- probability_err = probability
- else:
- probability = 0.5/(self.qrange[1] - self.qrange[0])/(self.trange[1] - self.trange[0])
- probability_err = probability
-
- log_likelihood += umath.log(ufloat((probability, probability_err)))
+ hit_pdf_ufloat = unumpy.uarray((pdf_prob[self.event.channels.hit],
+ pdf_prob_uncert[self.event.channels.hit]))
+ log_likelihood += unumpy.log(hit_pdf_ufloat).sum()
return -log_likelihood
@@ -111,11 +101,11 @@ if __name__ == '__main__':
event = sim.simulate(1, constant_particle_gun('e-',(0,0,0),(1,0,0),100.0)).next()
- print 'nhit = %i' % np.count_nonzero(event.hits.hit)
+ print 'nhit = %i' % np.count_nonzero(event.channels.hit)
likelihood = Likelihood(sim, event)
- for x in np.linspace(-10.0, 10.0, 10*5+1):
+ for x in np.linspace(-10.0, 10.0, 2*10*5+1):
start = time.time()
- l = likelihood.eval(constant_particle_gun('e-',(x,0,0),(1,0,0),100.0),100, nreps=10)
+ l = likelihood.eval(constant_particle_gun('e-',(0,0,0),(1,0,0),100.0),100, nreps=200)
print 'x = %5.1f, %s (%1.1f sec)' % (x, l, time.time() - start)
diff --git a/sim.py b/sim.py
index 8d991b2..3044f3e 100755
--- a/sim.py
+++ b/sim.py
@@ -116,6 +116,32 @@ class Simulation(object):
self.gpu_worker.add_hits_to_pdf()
return self.gpu_worker.get_pdfs()
+
+
+ def eval_pdf(self, event_channels, nevents, vertex_generator,
+ min_twidth, trange, min_qwidth, qrange,
+ min_bin_content=20, nreps=1, time_only=True):
+ photon_gen = repeating_iterator(self.photon_generator.generate_events(nevents, vertex_generator),
+ nreps)
+ return self.eval_pdf_from_photons(event_channels, nevents*nreps, photon_gen,
+ min_twidth, trange, min_qwidth, qrange,
+ min_bin_content=min_bin_content, time_only=time_only)
+
+ def eval_pdf_from_photons(self, event_channels, nevents, photon_generator,
+ min_twidth, trange, min_qwidth, qrange,
+ min_bin_content=20, time_only=True):
+ '''Returns tuple: 1D array of channel hit counts, 1D array of PDF probability densities'''
+ self.gpu_worker.setup_pdf_eval(event_channels.hit, event_channels.t, event_channels.q,
+ min_twidth, trange, min_qwidth, qrange,
+ min_bin_content=min_bin_content, time_only=True)
+
+ for ev in itertools.islice(photon_generator, nevents):
+ self.gpu_worker.load_photons(ev.photon_start)
+ self.gpu_worker.propagate()
+ self.gpu_worker.run_daq()
+ self.gpu_worker.accumulate_pdf_eval()
+
+ return self.gpu_worker.get_pdf_eval()
@profile_if_possible
diff --git a/src/daq.cu b/src/daq.cu
index a34fbbe..cb53d56 100644
--- a/src/daq.cu
+++ b/src/daq.cu
@@ -106,5 +106,103 @@ __global__ void bin_hits(int nchannels,
pdf[bin] += 1;
}
}
+
+__global__ void accumulate_pdf_eval(int time_only,
+ int nchannels,
+ unsigned int *event_hit,
+ float *event_time,
+ float *event_charge,
+ float *mc_time,
+ unsigned int *mc_charge, // quirk of DAQ!
+ unsigned int *hitcount,
+ unsigned int *bincount,
+ float min_twidth, float tmin, float tmax,
+ float min_qwidth, float qmin, float qmax,
+ float *nearest_mc,
+ int min_bin_content)
+{
+ int id = threadIdx.x + blockDim.x * blockIdx.x;
+
+ if (id >= nchannels)
+ return;
+
+ // Was this channel hit in the Monte Carlo?
+ float channel_mc_time = mc_time[id];
+ if (channel_mc_time >= 1e8)
+ return; // Nothing else to do
+
+ // Is this channel inside the range of the PDF?
+ float distance;
+ int channel_bincount = bincount[id];
+ if (time_only) {
+ if (channel_mc_time < tmin || channel_mc_time > tmax)
+ return; // Nothing else to do
+
+ // This MC information is contained in the PDF
+ hitcount[id] += 1;
+
+ // Was this channel hit in the event-of-interest?
+ int channel_event_hit = event_hit[id];
+ if (!channel_event_hit)
+ return; // No need to update PDF value for unhit channel
+
+ // Are we inside the minimum size bin?
+ float channel_event_time = event_time[id];
+ distance = fabsf(channel_mc_time - channel_event_time);
+ if (distance < min_twidth/2.0)
+ bincount[id] = channel_bincount + 1;
+
+ } else { // time and charge PDF
+ float channel_mc_charge = mc_charge[id]; // int->float conversion because DAQ just returns an integer
+
+ if (channel_mc_time < tmin || channel_mc_time > tmax ||
+ channel_mc_charge < qmin || channel_mc_charge > qmax)
+ return; // Nothing else to do
+
+ // This MC information is contained in the PDF
+ hitcount[id] += 1;
+
+ // Was this channel hit in the event-of-interest?
+ int channel_event_hit = event_hit[id];
+ if (!channel_event_hit)
+ return; // No need to update PDF value for unhit channel
+
+ // Are we inside the minimum size bin?
+ float channel_event_time = event_time[id];
+ float channel_event_charge = event_charge[id];
+ float normalized_time_distance = fabsf(channel_event_time - channel_mc_time)/min_twidth/2.0;
+ float normalized_charge_distance = fabsf(channel_event_charge - channel_mc_charge)/min_qwidth/2.0;
+ distance = sqrt(normalized_time_distance*normalized_time_distance + normalized_charge_distance*normalized_charge_distance);
+
+ if (distance < 1.0f)
+ bincount[id] = channel_bincount + 1;
+ }
+
+ // Do we need to keep updating the nearest_mc list?
+ if (channel_bincount + 1 >= min_bin_content)
+ return; // No need to perform insertion into nearest_mc because bincount is a better estimate of the PDF value
+
+ // insertion sort the distance into the array of nearest MC points
+ int offset = min_bin_content * id;
+ int entry = min_bin_content - 1;
+
+ // If last entry less than new entry, nothing to update
+ if (distance > nearest_mc[offset + entry])
+ return;
+
+ // Find where to insert the new entry while sliding the rest
+ // to the right
+ entry--;
+ while (entry >= 0) {
+ if (nearest_mc[offset+entry] >= distance)
+ nearest_mc[offset+entry+1] = nearest_mc[offset+entry];
+ else
+ break;
+ entry--;
+ }
+
+ nearest_mc[offset+entry+1] = distance;
+}
+
} // extern "C"