diff options
| -rwxr-xr-x | utils/plot-energy | 263 | 
1 files changed, 234 insertions, 29 deletions
| diff --git a/utils/plot-energy b/utils/plot-energy index 6044adb..8539040 100755 --- a/utils/plot-energy +++ b/utils/plot-energy @@ -18,6 +18,7 @@ from __future__ import print_function, division  import numpy as np  from scipy.stats import iqr, poisson  from matplotlib.lines import Line2D +from scipy.stats import iqr, norm, beta  PSUP_RADIUS = 840.0 @@ -193,27 +194,29 @@ def michel_cut(ev):      michel = michel[michel.nhit >= 100] -    # Accept events which had a muon more than 800 nanoseconds but less -    # than 20 microseconds before them. The 800 nanoseconds cut comes from -    # Richie's thesis. He also mentions that the In Time Channel Spread Cut -    # is very effective at cutting electrons events caused by muons, so I -    # should implement that. +    # Accept events which had a muon more than 800 nanoseconds but less than 20 +    # microseconds before them. The 800 nanoseconds cut comes from Richie's +    # thesis. He also mentions that the In Time Channel Spread Cut is very +    # effective at cutting electron events caused by muons, so I should +    # implement that.      #      # Note: We currently don't look across run boundaries. This should be a      # *very* small effect, and the logic to do so would be very complicated      # since I would have to deal with 50 MHz clock rollovers, etc. -    # -    # Do a simple python for loop here over the runs since we create -    # temporary arrays with shape (michel.size,prompt_plus_muons.size) -    # which could be too big for the full dataset. -    # -    # There might be some clever way to do this without the loop in Pandas, -    # but I don't know how.      if prompt_plus_muons.size and michel.size: -        return michel[np.any((michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \ -                             (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3),axis=0)] +        mask = (michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \ +               (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3) +        michel = michel.iloc[np.any(mask,axis=0)] +        michel['muon_gtid'] = pd.Series(prompt_plus_muons['gtid'].iloc[np.argmax(mask[:,np.any(mask,axis=0)],axis=0)].values, +                                        index=michel.index.values, +                                        dtype=np.int32) +        return michel      else: -        return pd.DataFrame(columns=ev.columns) +        # Return an empty slice since we need it to have the same datatype as +        # the other dataframes +        michel = ev[:0] +        michel['muon_gtid'] = -1 +        return michel  def atmospheric_events(ev):      """ @@ -330,6 +333,174 @@ def plot_corner_plot(ev, title, save=None):      plt.suptitle(title) +def intersect_sphere(pos, dir, R): +    """ +    Compute the first intersection of a ray starting at `pos` with direction +    `dir` and a sphere centered at the origin with radius `R`. The distance to +    the intersection is returned. +     +    Example: +     +        pos = np.array([0,0,0]) +        dir = np.array([1,0,0]) +     +        l = intersect_sphere(pos,dir,PSUP_RADIUS): +        if l is not None: +            hit = pos + l*dir +            print("ray intersects sphere at %.2f %.2f %.2f", hit[0], hit[1], hit[2]) +        else: +            print("ray didn't intersect sphere") +    """ + +    b = 2*np.dot(dir,pos) +    c = np.dot(pos,pos) - R*R + +    if b*b - 4*c <= 0: +        # Ray doesn't intersect the sphere. +        return None + +    # First, check the shorter solution. +    l = (-b - np.sqrt(b*b - 4*c))/2 + +    # If the shorter solution is less than 0, check the second solution. +    if l < 0: +        l = (-b + np.sqrt(b*b - 4*c))/2 + +    # If the distance is still negative, we didn't intersect the sphere. +    if l < 0: +        return None + +    return l + +def get_dx(row): +    pos = np.array([row.x,row.y,row.z]) +    dir = np.array([np.sin(row.theta1)*np.cos(row.phi1), +                    np.sin(row.theta1)*np.sin(row.phi1), +                    np.cos(row.theta1)]) +    l = intersect_sphere(pos,-dir,PSUP_RADIUS) +    if l is not None: +        pos -= dir*l +        michel_pos = np.array([row.x_michel,row.y_michel,row.z_michel]) +        return np.linalg.norm(michel_pos-pos) +    else: +        return 0 + +def dx_to_energy(dx): +    lines = [] +    with open("../src/muE_water_liquid.txt") as f: +        for i, line in enumerate(f): +            if i < 10: +                continue +            if 'Minimum ionization' in line: +                continue +            if 'Muon critical energy' in line: +                continue +            lines.append(line) +    data = np.genfromtxt(lines) +    return np.interp(dx,data[:,8],data[:,0]) + +def iqr_std_err(x): +    """ +    Returns the approximate standard deviation assuming the central part of the +    distribution is gaussian. +    """ +    x = x.dropna() +    n = len(x) +    if n == 0: +        return np.nan +    # see https://stats.stackexchange.com/questions/110902/error-on-interquartile-range +    std = iqr(x.values)/1.3489795 +    return 1.573*std/np.sqrt(n) + +def iqr_std(x): +    """ +    Returns the approximate standard deviation assuming the central part of the +    distribution is gaussian. +    """ +    x = x.dropna() +    n = len(x) +    if n == 0: +        return np.nan +    return iqr(x.values)/1.3489795 + +def quantile_error(x,q): +    """ +    Returns the standard error for the qth quantile of `x`. The error is +    computed using the Maritz-Jarrett method described here: +    https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/quantse.htm. +    """ +    x = np.sort(x) +    n = len(x) +    m = int(q*n+0.5) +    A = m - 1 +    B = n - m +    i = np.arange(1,len(x)+1) +    w = beta.cdf(i/n,A,B) - beta.cdf((i-1)/n,A,B) +    return np.sqrt(np.sum(w*x**2)-np.sum(w*x)**2) + +def q90_err(x): +    """ +    Returns the error on the 90th percentile for all the non NaN values in a +    Series `x`. +    """ +    x = x.dropna() +    n = len(x) +    if n == 0: +        return np.nan +    return quantile_error(x.values,0.9) + +def q90(x): +    """ +    Returns the 90th percentile for all the non NaN values in a Series `x`. +    """ +    x = x.dropna() +    n = len(x) +    if n == 0: +        return np.nan +    return np.percentile(x.values,90.0) + +def median(x): +    """ +    Returns the median for all the non NaN values in a Series `x`. +    """ +    x = x.dropna() +    n = len(x) +    if n == 0: +        return np.nan +    return np.median(x.values) + +def median_err(x): +    """ +    Returns the approximate error on the median for all the non NaN values in a +    Series `x`. The error on the median is approximated assuming the central +    part of the distribution is gaussian. +    """ +    x = x.dropna() +    n = len(x) +    if n == 0: +        return np.nan +    # First we estimate the standard deviation using the interquartile range. +    # Here we are essentially assuming the central part of the distribution is +    # gaussian. +    std = iqr(x.values)/1.3489795 +    median = np.median(x.values) +    # Now we estimate the error on the median for a gaussian +    # See https://stats.stackexchange.com/questions/45124/central-limit-theorem-for-sample-medians. +    return 1/(2*np.sqrt(n)*norm.pdf(median,median,std)) + +def std_err(x): +    x = x.dropna() +    mean = np.mean(x) +    std = np.std(x) +    n = len(x) +    if n == 0: +        return np.nan +    elif n == 1: +        return 0.0 +    u4 = np.mean((x-mean)**4) +    error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std) +    return error +  if __name__ == '__main__':      import argparse      import matplotlib.pyplot as plt @@ -384,7 +555,7 @@ if __name__ == '__main__':                  (np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)),run))      # unwrap the 50 MHz clock within each run -    ev.gtr = ev.groupby(['run'],as_index=False)['gtr'].transform(unwrap_50_mhz_clock) +    ev.gtr = ev.groupby(['run'],group_keys=False)['gtr'].transform(unwrap_50_mhz_clock)      for run, ev_run in ev.groupby('run'):          # Warn about GTID jumps since we could be missing a potential flasher @@ -395,7 +566,7 @@ if __name__ == '__main__':      # calculate the time difference between each event and the previous event      # so we can tag retrigger events -    ev['dt'] = ev.groupby(['run'],as_index=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values)))) +    ev['dt'] = ev.groupby(['run'],group_keys=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values))))      # This is a bit of a hack. It appears that many times the fit will      # actually do much better by including a very low energy electron or @@ -462,22 +633,22 @@ if __name__ == '__main__':      # the *second* event after the breakdown didn't get tagged correctly. If we      # apply the data cleaning cuts first and then tag prompt events then this      # event will get tagged as a prompt event. -    ev = ev.groupby('run',as_index=False).apply(prompt_event).reset_index(level=0,drop=True) +    ev = ev.groupby('run',group_keys=False).apply(prompt_event)      print("number of events after prompt nhit cut = %i" % np.count_nonzero(ev.prompt))      # flasher follower cut -    ev = ev.groupby('run',as_index=False).apply(flasher_follower_cut).reset_index(level=0,drop=True) +    ev = ev.groupby('run',group_keys=False).apply(flasher_follower_cut)      # breakdown follower cut -    ev = ev.groupby('run',as_index=False).apply(breakdown_follower_cut).reset_index(level=0,drop=True) +    ev = ev.groupby('run',group_keys=False).apply(breakdown_follower_cut)      # retrigger cut -    ev = ev.groupby('run',as_index=False).apply(retrigger_cut).reset_index(level=0,drop=True) - -    ev = ev[ev.prompt] +    ev = ev.groupby('run',group_keys=False).apply(retrigger_cut)      if args.dc: +        ev = ev[ev.prompt] +          ev.set_index(['run','gtid'])          ev = pd.merge(fits,ev,how='inner',on=['run','gtid']) @@ -549,18 +720,20 @@ if __name__ == '__main__':      # 2. Nhit >= 100      # 3. It must be > 800 ns and less than 20 microseconds from a prompt event      #    or a muon -    michel = ev.groupby('run',as_index=False).apply(michel_cut).reset_index(level=0,drop=True) +    michel = ev.groupby('run',group_keys=False).apply(michel_cut) + +    print("number of michel events = %i" % len(michel))      # Tag atmospheric events.      #      # Note: We don't cut atmospheric events or muons yet because we still need      # all the events in order to apply the muon follower cut. -    ev = ev.groupby('run',as_index=False).apply(atmospheric_events).reset_index(level=0,drop=True) +    ev = ev.groupby('run',group_keys=False).apply(atmospheric_events)      print("number of events after neutron follower cut = %i" % np.count_nonzero(ev.prompt & (~ev.atm)))      # remove events 200 microseconds after a muon -    ev = ev.groupby('run',as_index=False).apply(muon_follower_cut).reset_index(level=0,drop=True) +    ev = ev.groupby('run',group_keys=False).apply(muon_follower_cut)      # Get rid of muon events in our main event sample      ev = ev[(ev.dc & DC_MUON) == 0] @@ -636,8 +809,8 @@ if __name__ == '__main__':      muons = muons[muons.id == 22]      # require r < 6 meters -    prompt = prompt[np.sqrt(prompt.x.values**2 + prompt.y.values**2 + prompt.z.values**2) < AV_RADIUS] -    atm = atm[np.sqrt(atm.x.values**2 + atm.y.values**2 + atm.z.values**2) < AV_RADIUS] +    prompt = prompt[prompt.r_psup < 0.9] +    atm = atm[atm.r_psup < 0.9]      print("number of events after radius cut = %i" % len(prompt)) @@ -667,8 +840,40 @@ if __name__ == '__main__':      # fit      michel = michel[michel.id == 20] +    stopping_muons = pd.merge(muons,michel,left_on=['run','gtid'],right_on=['run','muon_gtid'],suffixes=('','_michel')) + +    # project muon to PSUP +    stopping_muons['dx'] = stopping_muons.apply(get_dx,axis=1) +    # energy based on distance travelled +    stopping_muons['T_dx'] = dx_to_energy(stopping_muons.dx) +    stopping_muons['dT'] = stopping_muons['energy1'] - stopping_muons['T_dx'] + +    plt.figure() +    plt.hist((stopping_muons['energy1']-stopping_muons['T_dx'])*100/stopping_muons['T_dx'], bins=np.linspace(-100,100,200), histtype='step') +    plt.xlabel("Fractional energy difference (\%)") +    plt.title("Fractional energy difference for Stopping Muons") + +    # 100 bins between 50 MeV and 10 GeV +    bins = np.arange(50,10000,1000) + +    pd_bins = pd.cut(stopping_muons['energy1'],bins) + +    T = (bins[1:] + bins[:-1])/2 + +    dT = stopping_muons.groupby(pd_bins)['dT'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err]) + +    plt.figure() +    plt.errorbar(T,dT['median']*100/T,yerr=dT['median_err']*100/T) +    plt.xlabel("Kinetic Energy (MeV)") +    plt.ylabel(r"Energy bias (\%)") + +    plt.figure() +    plt.errorbar(T,dT['iqr_std']*100/T,yerr=dT['iqr_std_err']*100/T) +    plt.xlabel("Kinetic Energy (MeV)") +    plt.ylabel(r"Energy resolution (\%)") +      plt.figure() -    plt.hist(michel.ke.values, bins=np.linspace(20,100,100), histtype='step', label="My fitter") +    plt.hist(michel.ke.values, bins=np.linspace(0,100,100), histtype='step', label="My fitter")      if michel.size:          plt.hist(michel[~np.isnan(michel.rsp_energy.values)].rsp_energy.values, bins=np.linspace(20,100,100), histtype='step',label="RSP")      plt.xlabel("Energy (MeV)") | 
