1 files changed, 234 insertions, 29 deletions
diff --git a/utils/plot-energy b/utils/plot-energy
index 6044adb..8539040 100755
--- a/utils/plot-energy
+++ b/utils/plot-energy
@@ -18,6 +18,7 @@ from __future__ import print_function, division
 import numpy as np
 from scipy.stats import iqr, poisson
 from matplotlib.lines import Line2D
+from scipy.stats import iqr, norm, beta
 
 PSUP_RADIUS = 840.0
 
@@ -193,27 +194,29 @@ def michel_cut(ev):
 
     michel = michel[michel.nhit >= 100]
 
-    # Accept events which had a muon more than 800 nanoseconds but less
-    # than 20 microseconds before them. The 800 nanoseconds cut comes from
-    # Richie's thesis. He also mentions that the In Time Channel Spread Cut
-    # is very effective at cutting electrons events caused by muons, so I
-    # should implement that.
+    # Accept events which had a muon more than 800 nanoseconds but less than 20
+    # microseconds before them. The 800 nanoseconds cut comes from Richie's
+    # thesis. He also mentions that the In Time Channel Spread Cut is very
+    # effective at cutting electron events caused by muons, so I should
+    # implement that.
     #
     # Note: We currently don't look across run boundaries. This should be a
     # *very* small effect, and the logic to do so would be very complicated
     # since I would have to deal with 50 MHz clock rollovers, etc.
-    #
-    # Do a simple python for loop here over the runs since we create
-    # temporary arrays with shape (michel.size,prompt_plus_muons.size)
-    # which could be too big for the full dataset.
-    #
-    # There might be some clever way to do this without the loop in Pandas,
-    # but I don't know how.
     if prompt_plus_muons.size and michel.size:
-        return michel[np.any((michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \
-                             (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3),axis=0)]
+        mask = (michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \
+               (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3)
+        michel = michel.iloc[np.any(mask,axis=0)]
+        michel['muon_gtid'] = pd.Series(prompt_plus_muons['gtid'].iloc[np.argmax(mask[:,np.any(mask,axis=0)],axis=0)].values,
+                                        index=michel.index.values,
+                                        dtype=np.int32)
+        return michel
     else:
-        return pd.DataFrame(columns=ev.columns)
+        # Return an empty slice since we need it to have the same datatype as
+        # the other dataframes
+        michel = ev[:0]
+        michel['muon_gtid'] = -1
+        return michel
 
 def atmospheric_events(ev):
     """
@@ -330,6 +333,174 @@ def plot_corner_plot(ev, title, save=None):
 
     plt.suptitle(title)
 
+def intersect_sphere(pos, dir, R):
+    """
+    Compute the first intersection of a ray starting at `pos` with direction
+    `dir` and a sphere centered at the origin with radius `R`. The distance to
+    the intersection is returned.
+    
+    Example:
+    
+        pos = np.array([0,0,0])
+        dir = np.array([1,0,0])
+    
+        l = intersect_sphere(pos,dir,PSUP_RADIUS):
+        if l is not None:
+            hit = pos + l*dir
+            print("ray intersects sphere at %.2f %.2f %.2f", hit[0], hit[1], hit[2])
+        else:
+            print("ray didn't intersect sphere")
+    """
+
+    b = 2*np.dot(dir,pos)
+    c = np.dot(pos,pos) - R*R
+
+    if b*b - 4*c <= 0:
+        # Ray doesn't intersect the sphere.
+        return None
+
+    # First, check the shorter solution.
+    l = (-b - np.sqrt(b*b - 4*c))/2
+
+    # If the shorter solution is less than 0, check the second solution.
+    if l < 0:
+        l = (-b + np.sqrt(b*b - 4*c))/2
+
+    # If the distance is still negative, we didn't intersect the sphere.
+    if l < 0:
+        return None
+
+    return l
+
+def get_dx(row):
+    pos = np.array([row.x,row.y,row.z])
+    dir = np.array([np.sin(row.theta1)*np.cos(row.phi1),
+                    np.sin(row.theta1)*np.sin(row.phi1),
+                    np.cos(row.theta1)])
+    l = intersect_sphere(pos,-dir,PSUP_RADIUS)
+    if l is not None:
+        pos -= dir*l
+        michel_pos = np.array([row.x_michel,row.y_michel,row.z_michel])
+        return np.linalg.norm(michel_pos-pos)
+    else:
+        return 0
+
+def dx_to_energy(dx):
+    lines = []
+    with open("../src/muE_water_liquid.txt") as f:
+        for i, line in enumerate(f):
+            if i < 10:
+                continue
+            if 'Minimum ionization' in line:
+                continue
+            if 'Muon critical energy' in line:
+                continue
+            lines.append(line)
+    data = np.genfromtxt(lines)
+    return np.interp(dx,data[:,8],data[:,0])
+
+def iqr_std_err(x):
+    """
+    Returns the approximate standard deviation assuming the central part of the
+    distribution is gaussian.
+    """
+    x = x.dropna()
+    n = len(x)
+    if n == 0:
+        return np.nan
+    # see https://stats.stackexchange.com/questions/110902/error-on-interquartile-range
+    std = iqr(x.values)/1.3489795
+    return 1.573*std/np.sqrt(n)
+
+def iqr_std(x):
+    """
+    Returns the approximate standard deviation assuming the central part of the
+    distribution is gaussian.
+    """
+    x = x.dropna()
+    n = len(x)
+    if n == 0:
+        return np.nan
+    return iqr(x.values)/1.3489795
+
+def quantile_error(x,q):
+    """
+    Returns the standard error for the qth quantile of `x`. The error is
+    computed using the Maritz-Jarrett method described here:
+    https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/quantse.htm.
+    """
+    x = np.sort(x)
+    n = len(x)
+    m = int(q*n+0.5)
+    A = m - 1
+    B = n - m
+    i = np.arange(1,len(x)+1)
+    w = beta.cdf(i/n,A,B) - beta.cdf((i-1)/n,A,B)
+    return np.sqrt(np.sum(w*x**2)-np.sum(w*x)**2)
+
+def q90_err(x):
+    """
+    Returns the error on the 90th percentile for all the non NaN values in a
+    Series `x`.
+    """
+    x = x.dropna()
+    n = len(x)
+    if n == 0:
+        return np.nan
+    return quantile_error(x.values,0.9)
+
+def q90(x):
+    """
+    Returns the 90th percentile for all the non NaN values in a Series `x`.
+    """
+    x = x.dropna()
+    n = len(x)
+    if n == 0:
+        return np.nan
+    return np.percentile(x.values,90.0)
+
+def median(x):
+    """
+    Returns the median for all the non NaN values in a Series `x`.
+    """
+    x = x.dropna()
+    n = len(x)
+    if n == 0:
+        return np.nan
+    return np.median(x.values)
+
+def median_err(x):
+    """
+    Returns the approximate error on the median for all the non NaN values in a
+    Series `x`. The error on the median is approximated assuming the central
+    part of the distribution is gaussian.
+    """
+    x = x.dropna()
+    n = len(x)
+    if n == 0:
+        return np.nan
+    # First we estimate the standard deviation using the interquartile range.
+    # Here we are essentially assuming the central part of the distribution is
+    # gaussian.
+    std = iqr(x.values)/1.3489795
+    median = np.median(x.values)
+    # Now we estimate the error on the median for a gaussian
+    # See https://stats.stackexchange.com/questions/45124/central-limit-theorem-for-sample-medians.
+    return 1/(2*np.sqrt(n)*norm.pdf(median,median,std))
+
+def std_err(x):
+    x = x.dropna()
+    mean = np.mean(x)
+    std = np.std(x)
+    n = len(x)
+    if n == 0:
+        return np.nan
+    elif n == 1:
+        return 0.0
+    u4 = np.mean((x-mean)**4)
+    error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std)
+    return error
+
 if __name__ == '__main__':
     import argparse
     import matplotlib.pyplot as plt
@@ -384,7 +555,7 @@ if __name__ == '__main__':
                 (np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)),run))
 
     # unwrap the 50 MHz clock within each run
-    ev.gtr = ev.groupby(['run'],as_index=False)['gtr'].transform(unwrap_50_mhz_clock)
+    ev.gtr = ev.groupby(['run'],group_keys=False)['gtr'].transform(unwrap_50_mhz_clock)
 
     for run, ev_run in ev.groupby('run'):
         # Warn about GTID jumps since we could be missing a potential flasher
@@ -395,7 +566,7 @@ if __name__ == '__main__':
 
     # calculate the time difference between each event and the previous event
     # so we can tag retrigger events
-    ev['dt'] = ev.groupby(['run'],as_index=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values))))
+    ev['dt'] = ev.groupby(['run'],group_keys=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values))))
 
     # This is a bit of a hack. It appears that many times the fit will
     # actually do much better by including a very low energy electron or
@@ -462,22 +633,22 @@ if __name__ == '__main__':
     # the *second* event after the breakdown didn't get tagged correctly. If we
     # apply the data cleaning cuts first and then tag prompt events then this
     # event will get tagged as a prompt event.
-    ev = ev.groupby('run',as_index=False).apply(prompt_event).reset_index(level=0,drop=True)
+    ev = ev.groupby('run',group_keys=False).apply(prompt_event)
 
     print("number of events after prompt nhit cut = %i" % np.count_nonzero(ev.prompt))
 
     # flasher follower cut
-    ev = ev.groupby('run',as_index=False).apply(flasher_follower_cut).reset_index(level=0,drop=True)
+    ev = ev.groupby('run',group_keys=False).apply(flasher_follower_cut)
 
     # breakdown follower cut
-    ev = ev.groupby('run',as_index=False).apply(breakdown_follower_cut).reset_index(level=0,drop=True)
+    ev = ev.groupby('run',group_keys=False).apply(breakdown_follower_cut)
 
     # retrigger cut
-    ev = ev.groupby('run',as_index=False).apply(retrigger_cut).reset_index(level=0,drop=True)
-
-    ev = ev[ev.prompt]
+    ev = ev.groupby('run',group_keys=False).apply(retrigger_cut)
 
     if args.dc:
+        ev = ev[ev.prompt]
+
         ev.set_index(['run','gtid'])
 
         ev = pd.merge(fits,ev,how='inner',on=['run','gtid'])
@@ -549,18 +720,20 @@ if __name__ == '__main__':
     # 2. Nhit >= 100
     # 3. It must be > 800 ns and less than 20 microseconds from a prompt event
     #    or a muon
-    michel = ev.groupby('run',as_index=False).apply(michel_cut).reset_index(level=0,drop=True)
+    michel = ev.groupby('run',group_keys=False).apply(michel_cut)
+
+    print("number of michel events = %i" % len(michel))
 
     # Tag atmospheric events.
     #
     # Note: We don't cut atmospheric events or muons yet because we still need
     # all the events in order to apply the muon follower cut.
-    ev = ev.groupby('run',as_index=False).apply(atmospheric_events).reset_index(level=0,drop=True)
+    ev = ev.groupby('run',group_keys=False).apply(atmospheric_events)
 
     print("number of events after neutron follower cut = %i" % np.count_nonzero(ev.prompt & (~ev.atm)))
 
     # remove events 200 microseconds after a muon
-    ev = ev.groupby('run',as_index=False).apply(muon_follower_cut).reset_index(level=0,drop=True)
+    ev = ev.groupby('run',group_keys=False).apply(muon_follower_cut)
 
     # Get rid of muon events in our main event sample
     ev = ev[(ev.dc & DC_MUON) == 0]
@@ -636,8 +809,8 @@ if __name__ == '__main__':
     muons = muons[muons.id == 22]
 
     # require r < 6 meters
-    prompt = prompt[np.sqrt(prompt.x.values**2 + prompt.y.values**2 + prompt.z.values**2) < AV_RADIUS]
-    atm = atm[np.sqrt(atm.x.values**2 + atm.y.values**2 + atm.z.values**2) < AV_RADIUS]
+    prompt = prompt[prompt.r_psup < 0.9]
+    atm = atm[atm.r_psup < 0.9]
 
     print("number of events after radius cut = %i" % len(prompt))
 
@@ -667,8 +840,40 @@ if __name__ == '__main__':
     # fit
     michel = michel[michel.id == 20]
 
+    stopping_muons = pd.merge(muons,michel,left_on=['run','gtid'],right_on=['run','muon_gtid'],suffixes=('','_michel'))
+
+    # project muon to PSUP
+    stopping_muons['dx'] = stopping_muons.apply(get_dx,axis=1)
+    # energy based on distance travelled
+    stopping_muons['T_dx'] = dx_to_energy(stopping_muons.dx)
+    stopping_muons['dT'] = stopping_muons['energy1'] - stopping_muons['T_dx']
+
+    plt.figure()
+    plt.hist((stopping_muons['energy1']-stopping_muons['T_dx'])*100/stopping_muons['T_dx'], bins=np.linspace(-100,100,200), histtype='step')
+    plt.xlabel("Fractional energy difference (\%)")
+    plt.title("Fractional energy difference for Stopping Muons")
+
+    # 100 bins between 50 MeV and 10 GeV
+    bins = np.arange(50,10000,1000)
+
+    pd_bins = pd.cut(stopping_muons['energy1'],bins)
+
+    T = (bins[1:] + bins[:-1])/2
+
+    dT = stopping_muons.groupby(pd_bins)['dT'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err])
+
+    plt.figure()
+    plt.errorbar(T,dT['median']*100/T,yerr=dT['median_err']*100/T)
+    plt.xlabel("Kinetic Energy (MeV)")
+    plt.ylabel(r"Energy bias (\%)")
+
+    plt.figure()
+    plt.errorbar(T,dT['iqr_std']*100/T,yerr=dT['iqr_std_err']*100/T)
+    plt.xlabel("Kinetic Energy (MeV)")
+    plt.ylabel(r"Energy resolution (\%)")
+
     plt.figure()
-    plt.hist(michel.ke.values, bins=np.linspace(20,100,100), histtype='step', label="My fitter")
+    plt.hist(michel.ke.values, bins=np.linspace(0,100,100), histtype='step', label="My fitter")
     if michel.size:
         plt.hist(michel[~np.isnan(michel.rsp_energy.values)].rsp_energy.values, bins=np.linspace(20,100,100), histtype='step',label="RSP")
     plt.xlabel("Energy (MeV)")