update plot-energy to plot energy bias and resolution for stopping muons

To select stopping muons we simply look for the muons before a Michel event. The muon distance is calculated by first projecting the muon fit back to the PSUP along the fitted direction and then taking the distance between this point and the fitted position of the Michel event. I then calculate the expected kinetic energy of the muon by using the muon lookup tables of the CSDA range to convert the distance to an energy. I also changed a few other things like changing as_index=False -> group_keys=False when grouping events. The reason for this is just that if we do this we don't have to reset the index and drop the new index after calling apply(). I also fixed a small bug I had introduced recently where I selected only prompt events before finding the michel events and atmospheric events. This commit updates the plot-energy script to plot the energy bias and resolution for stopping muons by computing the expected
author: tlatorre <tlatorre@uchicago.edu> 2020-01-20 11:35:52 -0600
committer: tlatorre <tlatorre@uchicago.edu> 2020-01-20 11:35:52 -0600
commit: dba25e918c13fcadd8859cb16e236911bb935780 (patch)
tree: 1b6d31ce59ab9b322fbe5488786c02d6763ca057
parent: e2c312637edc755e9e782a8dda220a0f674a6722 (diff)
download: sddm-dba25e918c13fcadd8859cb16e236911bb935780.tar.gz
sddm-dba25e918c13fcadd8859cb16e236911bb935780.tar.bz2
sddm-dba25e918c13fcadd8859cb16e236911bb935780.zip
1 files changed, 234 insertions, 29 deletions
diff --git a/utils/plot-energy b/utils/plot-energy
index 6044adb..8539040 100755
--- a/utils/plot-energy
+++ b/utils/plot-energy
@@ -18,6 +18,7 @@ from __future__ import print_function, division
 import numpy as np
 from scipy.stats import iqr, poisson
 from matplotlib.lines import Line2D
+from scipy.stats import iqr, norm, beta
 
 PSUP_RADIUS = 840.0
 
@@ -193,27 +194,29 @@ def michel_cut(ev):
 
     michel = michel[michel.nhit >= 100]
 
-    # Accept events which had a muon more than 800 nanoseconds but less
-    # than 20 microseconds before them. The 800 nanoseconds cut comes from
-    # Richie's thesis. He also mentions that the In Time Channel Spread Cut
-    # is very effective at cutting electrons events caused by muons, so I
-    # should implement that.
+    # Accept events which had a muon more than 800 nanoseconds but less than 20
+    # microseconds before them. The 800 nanoseconds cut comes from Richie's
+    # thesis. He also mentions that the In Time Channel Spread Cut is very
+    # effective at cutting electron events caused by muons, so I should
+    # implement that.
     #
     # Note: We currently don't look across run boundaries. This should be a
     # *very* small effect, and the logic to do so would be very complicated
     # since I would have to deal with 50 MHz clock rollovers, etc.
-    #
-    # Do a simple python for loop here over the runs since we create
-    # temporary arrays with shape (michel.size,prompt_plus_muons.size)
-    # which could be too big for the full dataset.
-    #
-    # There might be some clever way to do this without the loop in Pandas,
-    # but I don't know how.
     if prompt_plus_muons.size and michel.size:
-        return michel[np.any((michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \
-                             (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3),axis=0)]
+        mask = (michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \
+               (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3)
+        michel = michel.iloc[np.any(mask,axis=0)]
+        michel['muon_gtid'] = pd.Series(prompt_plus_muons['gtid'].iloc[np.argmax(mask[:,np.any(mask,axis=0)],axis=0)].values,
+                                        index=michel.index.values,
+                                        dtype=np.int32)
+        return michel
     else:
-        return pd.DataFrame(columns=ev.columns)
+        # Return an empty slice since we need it to have the same datatype as
+        # the other dataframes
+        michel = ev[:0]
+        michel['muon_gtid'] = -1
+        return michel
 
 def atmospheric_events(ev):
     """
@@ -330,6 +333,174 @@ def plot_corner_plot(ev, title, save=None):
 
     plt.suptitle(title)
 
+def intersect_sphere(pos, dir, R):
+    """
+    Compute the first intersection of a ray starting at `pos` with direction
+    `dir` and a sphere centered at the origin with radius `R`. The distance to
+    the intersection is returned.
+    
+    Example:
+    
+        pos = np.array([0,0,0])
+        dir = np.array([1,0,0])
+    
+        l = intersect_sphere(pos,dir,PSUP_RADIUS):
+        if l is not None:
+            hit = pos + l*dir
+            print("ray intersects sphere at %.2f %.2f %.2f", hit[0], hit[1], hit[2])
+        else:
+            print("ray didn't intersect sphere")
+    """
+
+    b = 2*np.dot(dir,pos)
+    c = np.dot(pos,pos) - R*R
+
+    if b*b - 4*c <= 0:
+        # Ray doesn't intersect the sphere.
+        return None
+
+    # First, check the shorter solution.
+    l = (-b - np.sqrt(b*b - 4*c))/2
+
+    # If the shorter solution is less than 0, check the second solution.
+    if l < 0:
+        l = (-b + np.sqrt(b*b - 4*c))/2
+
+    # If the distance is still negative, we didn't intersect the sphere.
+    if l < 0:
+        return None
+
+    return l
+
+def get_dx(row):
+    pos = np.array([row.x,row.y,row.z])
+    dir = np.array([np.sin(row.theta1)*np.cos(row.phi1),
+                    np.sin(row.theta1)*np.sin(row.phi1),
+                    np.cos(row.theta1)])
+    l = intersect_sphere(pos,-dir,PSUP_RADIUS)
+    if l is not None:
+        pos -= dir*l
+        michel_pos = np.array([row.x_michel,row.y_michel,row.z_michel])
+        return np.linalg.norm(michel_pos-pos)
+    else:
+        return 0
+
+def dx_to_energy(dx):
+    lines = []
+    with open("../src/muE_water_liquid.txt") as f:
+        for i, line in enumerate(f):
+            if i < 10:
+                continue
+            if 'Minimum ionization' in line:
+                continue
+            if 'Muon critical energy' in line:
+                continue
+            lines.append(line)
+    data = np.genfromtxt(lines)
+    return np.interp(dx,data[:,8],data[:,0])
+
+def iqr_std_err(x):
+    """
+    Returns the approximate standard deviation assuming the central part of the
+    distribution is gaussian.
+    """
+    x = x.dropna()
+    n = len(x)
+    if n == 0:
+        return np.nan
+    # see https://stats.stackexchange.com/questions/110902/error-on-interquartile-range
+    std = iqr(x.values)/1.3489795
+    return 1.573*std/np.sqrt(n)
+
+def iqr_std(x):
+    """
+    Returns the approximate standard deviation assuming the central part of the
+    distribution is gaussian.
+    """
+    x = x.dropna()
+    n = len(x)
+    if n == 0:
+        return np.nan
+    return iqr(x.values)/1.3489795
+
+def quantile_error(x,q):
+    """
+    Returns the standard error for the qth quantile of `x`. The error is
+    computed using the Maritz-Jarrett method described here:
+    https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/quantse.htm.
+    """
+    x = np.sort(x)
+    n = len(x)
+    m = int(q*n+0.5)
+    A = m - 1
+    B = n - m
+    i = np.arange(1,len(x)+1)
+    w = beta.cdf(i/n,A,B) - beta.cdf((i-1)/n,A,B)
+    return np.sqrt(np.sum(w*x**2)-np.sum(w*x)**2)
+
+def q90_err(x):
+    """
+    Returns the error on the 90th percentile for all the non NaN values in a
+    Series `x`.
+    """
+    x = x.dropna()
+    n = len(x)
+    if n == 0:
+        return np.nan
+    return quantile_error(x.values,0.9)
+
+def q90(x):
+    """
+    Returns the 90th percentile for all the non NaN values in a Series `x`.
+    """
+    x = x.dropna()
+    n = len(x)
+    if n == 0:
+        return np.nan
+    return np.percentile(x.values,90.0)
+
+def median(x):
+    """
+    Returns the median for all the non NaN values in a Series `x`.
+    """
+    x = x.dropna()
+    n = len(x)
+    if n == 0:
+        return np.nan
+    return np.median(x.values)
+
+def median_err(x):
+    """
+    Returns the approximate error on the median for all the non NaN values in a
+    Series `x`. The error on the median is approximated assuming the central
+    part of the distribution is gaussian.
+    """
+    x = x.dropna()
+    n = len(x)
+    if n == 0:
+        return np.nan
+    # First we estimate the standard deviation using the interquartile range.
+    # Here we are essentially assuming the central part of the distribution is
+    # gaussian.
+    std = iqr(x.values)/1.3489795
+    median = np.median(x.values)
+    # Now we estimate the error on the median for a gaussian
+    # See https://stats.stackexchange.com/questions/45124/central-limit-theorem-for-sample-medians.
+    return 1/(2*np.sqrt(n)*norm.pdf(median,median,std))
+
+def std_err(x):
+    x = x.dropna()
+    mean = np.mean(x)
+    std = np.std(x)
+    n = len(x)
+    if n == 0:
+        return np.nan
+    elif n == 1:
+        return 0.0
+    u4 = np.mean((x-mean)**4)
+    error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std)
+    return error
+
 if __name__ == '__main__':
     import argparse
     import matplotlib.pyplot as plt
@@ -384,7 +555,7 @@ if __name__ == '__main__':
                 (np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)),run))
 
     # unwrap the 50 MHz clock within each run
-    ev.gtr = ev.groupby(['run'],as_index=False)['gtr'].transform(unwrap_50_mhz_clock)
+    ev.gtr = ev.groupby(['run'],group_keys=False)['gtr'].transform(unwrap_50_mhz_clock)
 
     for run, ev_run in ev.groupby('run'):
         # Warn about GTID jumps since we could be missing a potential flasher
@@ -395,7 +566,7 @@ if __name__ == '__main__':
 
     # calculate the time difference between each event and the previous event
     # so we can tag retrigger events
-    ev['dt'] = ev.groupby(['run'],as_index=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values))))
+    ev['dt'] = ev.groupby(['run'],group_keys=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values))))
 
     # This is a bit of a hack. It appears that many times the fit will
     # actually do much better by including a very low energy electron or
@@ -462,22 +633,22 @@ if __name__ == '__main__':
     # the *second* event after the breakdown didn't get tagged correctly. If we
     # apply the data cleaning cuts first and then tag prompt events then this
     # event will get tagged as a prompt event.
-    ev = ev.groupby('run',as_index=False).apply(prompt_event).reset_index(level=0,drop=True)
+    ev = ev.groupby('run',group_keys=False).apply(prompt_event)
 
     print("number of events after prompt nhit cut = %i" % np.count_nonzero(ev.prompt))
 
     # flasher follower cut
-    ev = ev.groupby('run',as_index=False).apply(flasher_follower_cut).reset_index(level=0,drop=True)
+    ev = ev.groupby('run',group_keys=False).apply(flasher_follower_cut)
 
     # breakdown follower cut
-    ev = ev.groupby('run',as_index=False).apply(breakdown_follower_cut).reset_index(level=0,drop=True)
+    ev = ev.groupby('run',group_keys=False).apply(breakdown_follower_cut)
 
     # retrigger cut
-    ev = ev.groupby('run',as_index=False).apply(retrigger_cut).reset_index(level=0,drop=True)
-
-    ev = ev[ev.prompt]
+    ev = ev.groupby('run',group_keys=False).apply(retrigger_cut)
 
     if args.dc:
+        ev = ev[ev.prompt]
+
         ev.set_index(['run','gtid'])
 
         ev = pd.merge(fits,ev,how='inner',on=['run','gtid'])
@@ -549,18 +720,20 @@ if __name__ == '__main__':
     # 2. Nhit >= 100
     # 3. It must be > 800 ns and less than 20 microseconds from a prompt event
     #    or a muon
-    michel = ev.groupby('run',as_index=False).apply(michel_cut).reset_index(level=0,drop=True)
+    michel = ev.groupby('run',group_keys=False).apply(michel_cut)
+
+    print("number of michel events = %i" % len(michel))
 
     # Tag atmospheric events.
     #
     # Note: We don't cut atmospheric events or muons yet because we still need
     # all the events in order to apply the muon follower cut.
-    ev = ev.groupby('run',as_index=False).apply(atmospheric_events).reset_index(level=0,drop=True)
+    ev = ev.groupby('run',group_keys=False).apply(atmospheric_events)
 
     print("number of events after neutron follower cut = %i" % np.count_nonzero(ev.prompt & (~ev.atm)))
 
     # remove events 200 microseconds after a muon
-    ev = ev.groupby('run',as_index=False).apply(muon_follower_cut).reset_index(level=0,drop=True)
+    ev = ev.groupby('run',group_keys=False).apply(muon_follower_cut)
 
     # Get rid of muon events in our main event sample
     ev = ev[(ev.dc & DC_MUON) == 0]
@@ -636,8 +809,8 @@ if __name__ == '__main__':
     muons = muons[muons.id == 22]
 
     # require r < 6 meters
-    prompt = prompt[np.sqrt(prompt.x.values**2 + prompt.y.values**2 + prompt.z.values**2) < AV_RADIUS]
-    atm = atm[np.sqrt(atm.x.values**2 + atm.y.values**2 + atm.z.values**2) < AV_RADIUS]
+    prompt = prompt[prompt.r_psup < 0.9]
+    atm = atm[atm.r_psup < 0.9]
 
     print("number of events after radius cut = %i" % len(prompt))
 
@@ -667,8 +840,40 @@ if __name__ == '__main__':
     # fit
     michel = michel[michel.id == 20]
 
+    stopping_muons = pd.merge(muons,michel,left_on=['run','gtid'],right_on=['run','muon_gtid'],suffixes=('','_michel'))
+
+    # project muon to PSUP
+    stopping_muons['dx'] = stopping_muons.apply(get_dx,axis=1)
+    # energy based on distance travelled
+    stopping_muons['T_dx'] = dx_to_energy(stopping_muons.dx)
+    stopping_muons['dT'] = stopping_muons['energy1'] - stopping_muons['T_dx']
+
+    plt.figure()
+    plt.hist((stopping_muons['energy1']-stopping_muons['T_dx'])*100/stopping_muons['T_dx'], bins=np.linspace(-100,100,200), histtype='step')
+    plt.xlabel("Fractional energy difference (\%)")
+    plt.title("Fractional energy difference for Stopping Muons")
+
+    # 100 bins between 50 MeV and 10 GeV
+    bins = np.arange(50,10000,1000)
+
+    pd_bins = pd.cut(stopping_muons['energy1'],bins)
+
+    T = (bins[1:] + bins[:-1])/2
+
+    dT = stopping_muons.groupby(pd_bins)['dT'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err])
+
+    plt.figure()
+    plt.errorbar(T,dT['median']*100/T,yerr=dT['median_err']*100/T)
+    plt.xlabel("Kinetic Energy (MeV)")
+    plt.ylabel(r"Energy bias (\%)")
+
+    plt.figure()
+    plt.errorbar(T,dT['iqr_std']*100/T,yerr=dT['iqr_std_err']*100/T)
+    plt.xlabel("Kinetic Energy (MeV)")
+    plt.ylabel(r"Energy resolution (\%)")
+
     plt.figure()
-    plt.hist(michel.ke.values, bins=np.linspace(20,100,100), histtype='step', label="My fitter")
+    plt.hist(michel.ke.values, bins=np.linspace(0,100,100), histtype='step', label="My fitter")
     if michel.size:
         plt.hist(michel[~np.isnan(michel.rsp_energy.values)].rsp_energy.values, bins=np.linspace(20,100,100), histtype='step',label="RSP")
     plt.xlabel("Energy (MeV)")
author	tlatorre <tlatorre@uchicago.edu>	2020-01-20 11:35:52 -0600
committer	tlatorre <tlatorre@uchicago.edu>	2020-01-20 11:35:52 -0600
commit	dba25e918c13fcadd8859cb16e236911bb935780 (patch)
tree	1b6d31ce59ab9b322fbe5488786c02d6763ca057
parent	e2c312637edc755e9e782a8dda220a0f674a6722 (diff)
download	sddm-dba25e918c13fcadd8859cb16e236911bb935780.tar.gz sddm-dba25e918c13fcadd8859cb16e236911bb935780.tar.bz2 sddm-dba25e918c13fcadd8859cb16e236911bb935780.zip