aboutsummaryrefslogtreecommitdiff
path: root/likelihood.c
diff options
context:
space:
mode:
authortlatorre <tlatorre@uchicago.edu>2018-08-14 09:53:09 -0500
committertlatorre <tlatorre@uchicago.edu>2018-08-14 09:53:09 -0500
commit0b7f199c0d93074484ea580504485a32dc29f5e2 (patch)
treee167b6d102b87b7a5eca4558e7f39265d5edc502 /likelihood.c
parent636595905c9f63e6bfcb6d331312090ac2075377 (diff)
downloadsddm-0b7f199c0d93074484ea580504485a32dc29f5e2.tar.gz
sddm-0b7f199c0d93074484ea580504485a32dc29f5e2.tar.bz2
sddm-0b7f199c0d93074484ea580504485a32dc29f5e2.zip
initial commit of likelihood fit for muons
This commit contains code to fit for the energy, position, and direction of muons in the SNO detector. Currently, we read events from SNOMAN zebra files and fill an event struct containing the PMT hits and fit it with the Nelder Mead simplex algorithm from GSL. I've also added code to read in ZEBRA title bank files to read in the DQXX files for a specific run. Any problems with channels in the DQCH and DQCR banks are flagged in the event struct by masking in a bit in the flags variable and these PMT hits are not included in the likelihood calculation. The likelihood for an event is calculated by integrating along the particle track for each PMT and computing the expected number of PE. The charge likelihood is then calculated by looping over all possible number of PE and computing: P(q|n)*P(n|mu) where q is the calibrated QHS charge, n is the number of PE, and mu is the expected number of photoelectrons. The latter is calculated assuming the distribution of PE at a given PMT follows a Poisson distribution (which I think should be correct given the track, but is probably not perfect for tracks which scatter a lot). The time part of the likelihood is calculated by integrating over the track for each PMT and calculating the average time at which the PMT is hit. We then assume the PDF for the photons to arrive is approximately a delta function and compute the first order statistic for a given time to compute the probability that the first photon arrived at a given time. So far I've only tested this with single tracks but the method was designed to be easy to use when you are fitting for multiple particles.
Diffstat (limited to 'likelihood.c')
-rw-r--r--likelihood.c274
1 files changed, 274 insertions, 0 deletions
diff --git a/likelihood.c b/likelihood.c
new file mode 100644
index 0000000..98a4ad7
--- /dev/null
+++ b/likelihood.c
@@ -0,0 +1,274 @@
+#include "likelihood.h"
+#include <stdlib.h> /* for size_t */
+#include "pmt.h"
+#include <gsl/gsl_integration.h>
+#include "muon.h"
+#include "misc.h"
+#include <gsl/gsl_sf_gamma.h>
+#include "sno.h"
+#include "vector.h"
+#include "event.h"
+#include "optics.h"
+#include "sno_charge.h"
+#include "pdg.h"
+
+double F(double t, double mu_noise, double mu_indirect, double *mu_direct, size_t n, double *ts, double tmean, double sigma)
+{
+ /* Returns the CDF for the time distribution of photons at time `t`. */
+ size_t i;
+ double p, mu_total;
+
+ p = mu_noise*t/GTVALID + mu_indirect*(pow(sigma,2)*norm(tmean,t,sigma) + (t-tmean)*norm_cdf(t,tmean,sigma))/(GTVALID-tmean);
+
+ mu_total = mu_noise + mu_indirect;
+ for (i = 0; i < n; i++) {
+ p += mu_direct[i]*norm_cdf(t,ts[i],sigma);
+ mu_total += mu_direct[i];
+ }
+
+ return p/mu_total;
+}
+
+double f(double t, double mu_noise, double mu_indirect, double *mu_direct, size_t n, double *ts, double tmean, double sigma)
+{
+ /* Returns the probability that a photon is detected at time `t`.
+ *
+ * The probability distribution is the sum of three different components:
+ * dark noise, indirect light, and direct light. The dark noise is assumed
+ * to be constant in time. The direct light is assumed to be a delta
+ * function around the times `ts`, where each element of `ts` comes from a
+ * different particle. This assumption is probably valid for particles
+ * like muons which don't scatter much, and the hope is that it is *good
+ * enough* for electrons too. The probability distribution for indirect
+ * light is assumed to be a step function past some time `tmean`.
+ *
+ * The probability returned is calculated by taking the sum of these three
+ * components and convolving it with a gaussian with standard deviation
+ * `sigma` which should typically be the PMT transit time spread. */
+ size_t i;
+ double p, mu_total;
+
+ p = mu_noise/GTVALID + mu_indirect*norm_cdf(t,tmean,sigma)/(GTVALID-tmean);
+
+ mu_total = mu_noise + mu_indirect;
+ for (i = 0; i < n; i++) {
+ p += mu_direct[i]*norm(t,ts[i],sigma);
+ mu_total += mu_direct[i];
+ }
+
+ return p/mu_total;
+}
+
+double log_pt(double t, size_t n, double mu_noise, double mu_indirect, double *mu_direct, size_t n2, double *ts, double tmean, double sigma)
+{
+ /* Returns the first order statistic for observing a PMT hit at time `t`
+ * given `n` hits.
+ *
+ * The first order statistic is computed from the probability distribution
+ * above. It's not obvious whether one should take the first order
+ * statistic before or after convolving with the PMT transit time spread.
+ * Since at least some of the transit time spread in SNO comes from the
+ * different transit times across the face of the PMT, it seems better to
+ * convolve first which is what we do here. In addition, the problem is not
+ * analytically tractable if you do things the other way around. */
+ return log(n) + (n-1)*log1p(-F(t,mu_noise,mu_indirect,mu_direct,n2,ts,tmean,sigma)) + log(f(t,mu_noise,mu_indirect,mu_direct,n2,ts,tmean,sigma));
+}
+
+static double gsl_muon_time(double x, void *params)
+{
+ double *params2 = (double *) params;
+ double T0 = params2[0];
+ double pos0[3], dir[3], pos[3], pmt_dir[3];
+ int i;
+ double t;
+ i = (int) params2[1];
+ pos0[0] = params2[2];
+ pos0[1] = params2[3];
+ pos0[2] = params2[4];
+ dir[0] = params2[5];
+ dir[1] = params2[6];
+ dir[2] = params2[7];
+
+ pos[0] = pos0[0] + dir[0]*x;
+ pos[1] = pos0[1] + dir[1]*x;
+ pos[2] = pos0[2] + dir[2]*x;
+
+ SUB(pmt_dir,pmts[i].pos,pos);
+
+ double distance = NORM(pmt_dir);
+
+ /* FIXME: I just calculate delta assuming 400 nm light. */
+ double wavelength0 = 400.0;
+ double n = get_index(HEAVY_WATER_DENSITY, wavelength0, 10.0);
+
+ t = x/SPEED_OF_LIGHT + distance*n/SPEED_OF_LIGHT;
+
+ return t*get_expected_charge(x, get_T(T0, x, HEAVY_WATER_DENSITY), pos, dir, pmts[i].pos, pmts[i].normal, PMT_RADIUS);
+}
+
+static double gsl_muon_charge(double x, void *params)
+{
+ double *params2 = (double *) params;
+ double T0 = params2[0];
+ double pos0[3], dir[3], pos[3];
+ int i;
+ i = (int) params2[1];
+ pos0[0] = params2[2];
+ pos0[1] = params2[3];
+ pos0[2] = params2[4];
+ dir[0] = params2[5];
+ dir[1] = params2[6];
+ dir[2] = params2[7];
+
+ pos[0] = pos0[0] + dir[0]*x;
+ pos[1] = pos0[1] + dir[1]*x;
+ pos[2] = pos0[2] + dir[2]*x;
+
+ return get_expected_charge(x, get_T(T0, x, HEAVY_WATER_DENSITY), pos, dir, pmts[i].pos, pmts[i].normal, PMT_RADIUS);
+}
+
+double nll_muon(event *ev, double T, double *pos, double *dir, double t0)
+{
+ size_t i, j;
+ double params[8];
+ double total_charge;
+ double logp[MAX_PE], nll, range, pmt_dir[3], R, x, cos_theta, theta, theta_cerenkov;
+ double tmean = 0.0;
+ int npmt = 0;
+
+ double mu_direct[MAX_PMTS];
+ double ts[MAX_PMTS];
+ double mu[MAX_PMTS];
+ double mu_noise, mu_indirect;
+
+ gsl_integration_cquad_workspace *w = gsl_integration_cquad_workspace_alloc(100);
+ double result, error;
+
+ size_t nevals;
+
+ gsl_function F;
+ F.params = &params;
+
+ range = get_range(T, HEAVY_WATER_DENSITY);
+
+ total_charge = 0.0;
+ npmt = 0;
+ for (i = 0; i < MAX_PMTS; i++) {
+ if (ev->pmt_hits[i].flags || (pmts[i].pmt_type != PMT_NORMAL && pmts[i].pmt_type != PMT_OWL)) continue;
+
+ params[0] = T;
+ params[1] = i;
+ params[2] = pos[0];
+ params[3] = pos[1];
+ params[4] = pos[2];
+ params[5] = dir[0];
+ params[6] = dir[1];
+ params[7] = dir[2];
+
+ /* First, we try to compute the distance along the track where the
+ * PMT is at the Cerenkov angle. The reason for this is because for
+ * heavy particles like muons which don't scatter much, the probability
+ * distribution for getting a photon hit along the track looks kind of
+ * like a delta function, i.e. the PMT is only hit over a very narrow
+ * window when the angle between the track direction and the PMT is
+ * *very* close to the Cerenkov angle (it's not a perfect delta
+ * function since there is some width due to dispersion). In this case,
+ * it's possible that the numerical integration completely skips over
+ * the delta function and so predicts an expected charge of 0. To fix
+ * this, we compute the integral in two steps, one up to the point
+ * along the track where the PMT is at the Cerenkov angle and another
+ * from that point to the end of the track. Since the integration
+ * routine always samples points near the beginning and end of the
+ * integral, this allows the routine to correctly compute that the
+ * integral is non zero. */
+
+ SUB(pmt_dir,pmts[i].pos,pos);
+ /* Compute the distance to the PMT. */
+ R = NORM(pmt_dir);
+ normalize(pmt_dir);
+
+ /* Calculate the cosine of the angle between the track direction and the
+ * vector to the PMT. */
+ cos_theta = DOT(dir,pmt_dir);
+ /* Compute the angle between the track direction and the PMT. */
+ theta = acos(cos_theta);
+ /* Compute the Cerenkov angle. Note that this isn't entirely correct
+ * since we aren't including the factor of beta, but since the point is
+ * just to split up the integral, we only need to find a point along
+ * the track close enough such that the integral isn't completely zero.
+ */
+ theta_cerenkov = acos(1/get_index(WATER_DENSITY,400.0,10.0));
+
+ /* Now, we compute the distance along the track where the PMT is at the
+ * Cerenkov angle. */
+ x = R*sin(theta_cerenkov-theta)/sin(theta_cerenkov);
+
+ if (x > 0 && x < range) {
+ /* Split up the integral at the point where the PMT is at the
+ * Cerenkov angle. */
+ F.function = &gsl_muon_charge;
+ gsl_integration_cquad(&F, 0, x, 0, 1e-2, w, &result, &error, &nevals);
+ mu_direct[i] = result;
+ gsl_integration_cquad(&F, x, range, 0, 1e-2, w, &result, &error, &nevals);
+ mu_direct[i] += result;
+
+ F.function = &gsl_muon_time;
+ gsl_integration_cquad(&F, 0, x, 0, 1e-2, w, &result, &error, &nevals);
+ ts[i] = result;
+ gsl_integration_cquad(&F, x, range, 0, 1e-2, w, &result, &error, &nevals);
+ ts[i] += result;
+ } else {
+ F.function = &gsl_muon_charge;
+ gsl_integration_cquad(&F, 0, range, 0, 1e-2, w, &result, &error, &nevals);
+ mu_direct[i] = result;
+
+ F.function = &gsl_muon_time;
+ gsl_integration_cquad(&F, 0, range, 0, 1e-2, w, &result, &error, &nevals);
+ ts[i] = result;
+ }
+
+ total_charge += mu_direct[i];
+
+ if (mu_direct[i] > 0.001) {
+ ts[i] /= mu_direct[i];
+ ts[i] += t0;
+ tmean += ts[i];
+ npmt += 1;
+ } else {
+ ts[i] = 0.0;
+ }
+ }
+
+ tmean /= npmt;
+
+ gsl_integration_cquad_workspace_free(w);
+
+ mu_noise = DARK_RATE*GTVALID*1e-9;
+ mu_indirect = total_charge/CHARGE_FRACTION;
+
+ for (i = 0; i < MAX_PMTS; i++) {
+ if (ev->pmt_hits[i].flags || (pmts[i].pmt_type != PMT_NORMAL && pmts[i].pmt_type != PMT_OWL)) continue;
+ mu[i] = mu_direct[i] + mu_indirect + mu_noise;
+ }
+
+ nll = 0;
+ for (i = 0; i < MAX_PMTS; i++) {
+ if (ev->pmt_hits[i].flags || (pmts[i].pmt_type != PMT_NORMAL && pmts[i].pmt_type != PMT_OWL)) continue;
+
+ if (ev->pmt_hits[i].hit) {
+ logp[0] = -INFINITY;
+ for (j = 1; j < MAX_PE; j++) {
+ logp[j] = log(pq(ev->pmt_hits[i].qhs,j)) - mu[i] + j*log(mu[i]) - gsl_sf_lnfact(j) + log_pt(ev->pmt_hits[i].t, j, mu_noise, mu_indirect, &mu_direct[i], 1, &ts[i], tmean, 1.5);
+ }
+ nll -= logsumexp(logp, sizeof(logp)/sizeof(double));
+ } else {
+ logp[0] = -mu[i];
+ for (j = 1; j < MAX_PE; j++) {
+ logp[j] = log(get_pmiss(j)) - mu[i] + j*log(mu[i]) - gsl_sf_lnfact(j);
+ }
+ nll -= logsumexp(logp, sizeof(logp)/sizeof(double));
+ }
+ }
+
+ return nll;
+}