aboutsummaryrefslogtreecommitdiff
path: root/utils/plot-energy
diff options
context:
space:
mode:
Diffstat (limited to 'utils/plot-energy')
-rwxr-xr-xutils/plot-energy263
1 files changed, 234 insertions, 29 deletions
diff --git a/utils/plot-energy b/utils/plot-energy
index 6044adb..8539040 100755
--- a/utils/plot-energy
+++ b/utils/plot-energy
@@ -18,6 +18,7 @@ from __future__ import print_function, division
import numpy as np
from scipy.stats import iqr, poisson
from matplotlib.lines import Line2D
+from scipy.stats import iqr, norm, beta
PSUP_RADIUS = 840.0
@@ -193,27 +194,29 @@ def michel_cut(ev):
michel = michel[michel.nhit >= 100]
- # Accept events which had a muon more than 800 nanoseconds but less
- # than 20 microseconds before them. The 800 nanoseconds cut comes from
- # Richie's thesis. He also mentions that the In Time Channel Spread Cut
- # is very effective at cutting electrons events caused by muons, so I
- # should implement that.
+ # Accept events which had a muon more than 800 nanoseconds but less than 20
+ # microseconds before them. The 800 nanoseconds cut comes from Richie's
+ # thesis. He also mentions that the In Time Channel Spread Cut is very
+ # effective at cutting electron events caused by muons, so I should
+ # implement that.
#
# Note: We currently don't look across run boundaries. This should be a
# *very* small effect, and the logic to do so would be very complicated
# since I would have to deal with 50 MHz clock rollovers, etc.
- #
- # Do a simple python for loop here over the runs since we create
- # temporary arrays with shape (michel.size,prompt_plus_muons.size)
- # which could be too big for the full dataset.
- #
- # There might be some clever way to do this without the loop in Pandas,
- # but I don't know how.
if prompt_plus_muons.size and michel.size:
- return michel[np.any((michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \
- (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3),axis=0)]
+ mask = (michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \
+ (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3)
+ michel = michel.iloc[np.any(mask,axis=0)]
+ michel['muon_gtid'] = pd.Series(prompt_plus_muons['gtid'].iloc[np.argmax(mask[:,np.any(mask,axis=0)],axis=0)].values,
+ index=michel.index.values,
+ dtype=np.int32)
+ return michel
else:
- return pd.DataFrame(columns=ev.columns)
+ # Return an empty slice since we need it to have the same datatype as
+ # the other dataframes
+ michel = ev[:0]
+ michel['muon_gtid'] = -1
+ return michel
def atmospheric_events(ev):
"""
@@ -330,6 +333,174 @@ def plot_corner_plot(ev, title, save=None):
plt.suptitle(title)
+def intersect_sphere(pos, dir, R):
+ """
+ Compute the first intersection of a ray starting at `pos` with direction
+ `dir` and a sphere centered at the origin with radius `R`. The distance to
+ the intersection is returned.
+
+ Example:
+
+ pos = np.array([0,0,0])
+ dir = np.array([1,0,0])
+
+ l = intersect_sphere(pos,dir,PSUP_RADIUS):
+ if l is not None:
+ hit = pos + l*dir
+ print("ray intersects sphere at %.2f %.2f %.2f", hit[0], hit[1], hit[2])
+ else:
+ print("ray didn't intersect sphere")
+ """
+
+ b = 2*np.dot(dir,pos)
+ c = np.dot(pos,pos) - R*R
+
+ if b*b - 4*c <= 0:
+ # Ray doesn't intersect the sphere.
+ return None
+
+ # First, check the shorter solution.
+ l = (-b - np.sqrt(b*b - 4*c))/2
+
+ # If the shorter solution is less than 0, check the second solution.
+ if l < 0:
+ l = (-b + np.sqrt(b*b - 4*c))/2
+
+ # If the distance is still negative, we didn't intersect the sphere.
+ if l < 0:
+ return None
+
+ return l
+
+def get_dx(row):
+ pos = np.array([row.x,row.y,row.z])
+ dir = np.array([np.sin(row.theta1)*np.cos(row.phi1),
+ np.sin(row.theta1)*np.sin(row.phi1),
+ np.cos(row.theta1)])
+ l = intersect_sphere(pos,-dir,PSUP_RADIUS)
+ if l is not None:
+ pos -= dir*l
+ michel_pos = np.array([row.x_michel,row.y_michel,row.z_michel])
+ return np.linalg.norm(michel_pos-pos)
+ else:
+ return 0
+
+def dx_to_energy(dx):
+ lines = []
+ with open("../src/muE_water_liquid.txt") as f:
+ for i, line in enumerate(f):
+ if i < 10:
+ continue
+ if 'Minimum ionization' in line:
+ continue
+ if 'Muon critical energy' in line:
+ continue
+ lines.append(line)
+ data = np.genfromtxt(lines)
+ return np.interp(dx,data[:,8],data[:,0])
+
+def iqr_std_err(x):
+ """
+ Returns the approximate standard deviation assuming the central part of the
+ distribution is gaussian.
+ """
+ x = x.dropna()
+ n = len(x)
+ if n == 0:
+ return np.nan
+ # see https://stats.stackexchange.com/questions/110902/error-on-interquartile-range
+ std = iqr(x.values)/1.3489795
+ return 1.573*std/np.sqrt(n)
+
+def iqr_std(x):
+ """
+ Returns the approximate standard deviation assuming the central part of the
+ distribution is gaussian.
+ """
+ x = x.dropna()
+ n = len(x)
+ if n == 0:
+ return np.nan
+ return iqr(x.values)/1.3489795
+
+def quantile_error(x,q):
+ """
+ Returns the standard error for the qth quantile of `x`. The error is
+ computed using the Maritz-Jarrett method described here:
+ https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/quantse.htm.
+ """
+ x = np.sort(x)
+ n = len(x)
+ m = int(q*n+0.5)
+ A = m - 1
+ B = n - m
+ i = np.arange(1,len(x)+1)
+ w = beta.cdf(i/n,A,B) - beta.cdf((i-1)/n,A,B)
+ return np.sqrt(np.sum(w*x**2)-np.sum(w*x)**2)
+
+def q90_err(x):
+ """
+ Returns the error on the 90th percentile for all the non NaN values in a
+ Series `x`.
+ """
+ x = x.dropna()
+ n = len(x)
+ if n == 0:
+ return np.nan
+ return quantile_error(x.values,0.9)
+
+def q90(x):
+ """
+ Returns the 90th percentile for all the non NaN values in a Series `x`.
+ """
+ x = x.dropna()
+ n = len(x)
+ if n == 0:
+ return np.nan
+ return np.percentile(x.values,90.0)
+
+def median(x):
+ """
+ Returns the median for all the non NaN values in a Series `x`.
+ """
+ x = x.dropna()
+ n = len(x)
+ if n == 0:
+ return np.nan
+ return np.median(x.values)
+
+def median_err(x):
+ """
+ Returns the approximate error on the median for all the non NaN values in a
+ Series `x`. The error on the median is approximated assuming the central
+ part of the distribution is gaussian.
+ """
+ x = x.dropna()
+ n = len(x)
+ if n == 0:
+ return np.nan
+ # First we estimate the standard deviation using the interquartile range.
+ # Here we are essentially assuming the central part of the distribution is
+ # gaussian.
+ std = iqr(x.values)/1.3489795
+ median = np.median(x.values)
+ # Now we estimate the error on the median for a gaussian
+ # See https://stats.stackexchange.com/questions/45124/central-limit-theorem-for-sample-medians.
+ return 1/(2*np.sqrt(n)*norm.pdf(median,median,std))
+
+def std_err(x):
+ x = x.dropna()
+ mean = np.mean(x)
+ std = np.std(x)
+ n = len(x)
+ if n == 0:
+ return np.nan
+ elif n == 1:
+ return 0.0
+ u4 = np.mean((x-mean)**4)
+ error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std)
+ return error
+
if __name__ == '__main__':
import argparse
import matplotlib.pyplot as plt
@@ -384,7 +555,7 @@ if __name__ == '__main__':
(np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)),run))
# unwrap the 50 MHz clock within each run
- ev.gtr = ev.groupby(['run'],as_index=False)['gtr'].transform(unwrap_50_mhz_clock)
+ ev.gtr = ev.groupby(['run'],group_keys=False)['gtr'].transform(unwrap_50_mhz_clock)
for run, ev_run in ev.groupby('run'):
# Warn about GTID jumps since we could be missing a potential flasher
@@ -395,7 +566,7 @@ if __name__ == '__main__':
# calculate the time difference between each event and the previous event
# so we can tag retrigger events
- ev['dt'] = ev.groupby(['run'],as_index=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values))))
+ ev['dt'] = ev.groupby(['run'],group_keys=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values))))
# This is a bit of a hack. It appears that many times the fit will
# actually do much better by including a very low energy electron or
@@ -462,22 +633,22 @@ if __name__ == '__main__':
# the *second* event after the breakdown didn't get tagged correctly. If we
# apply the data cleaning cuts first and then tag prompt events then this
# event will get tagged as a prompt event.
- ev = ev.groupby('run',as_index=False).apply(prompt_event).reset_index(level=0,drop=True)
+ ev = ev.groupby('run',group_keys=False).apply(prompt_event)
print("number of events after prompt nhit cut = %i" % np.count_nonzero(ev.prompt))
# flasher follower cut
- ev = ev.groupby('run',as_index=False).apply(flasher_follower_cut).reset_index(level=0,drop=True)
+ ev = ev.groupby('run',group_keys=False).apply(flasher_follower_cut)
# breakdown follower cut
- ev = ev.groupby('run',as_index=False).apply(breakdown_follower_cut).reset_index(level=0,drop=True)
+ ev = ev.groupby('run',group_keys=False).apply(breakdown_follower_cut)
# retrigger cut
- ev = ev.groupby('run',as_index=False).apply(retrigger_cut).reset_index(level=0,drop=True)
-
- ev = ev[ev.prompt]
+ ev = ev.groupby('run',group_keys=False).apply(retrigger_cut)
if args.dc:
+ ev = ev[ev.prompt]
+
ev.set_index(['run','gtid'])
ev = pd.merge(fits,ev,how='inner',on=['run','gtid'])
@@ -549,18 +720,20 @@ if __name__ == '__main__':
# 2. Nhit >= 100
# 3. It must be > 800 ns and less than 20 microseconds from a prompt event
# or a muon
- michel = ev.groupby('run',as_index=False).apply(michel_cut).reset_index(level=0,drop=True)
+ michel = ev.groupby('run',group_keys=False).apply(michel_cut)
+
+ print("number of michel events = %i" % len(michel))
# Tag atmospheric events.
#
# Note: We don't cut atmospheric events or muons yet because we still need
# all the events in order to apply the muon follower cut.
- ev = ev.groupby('run',as_index=False).apply(atmospheric_events).reset_index(level=0,drop=True)
+ ev = ev.groupby('run',group_keys=False).apply(atmospheric_events)
print("number of events after neutron follower cut = %i" % np.count_nonzero(ev.prompt & (~ev.atm)))
# remove events 200 microseconds after a muon
- ev = ev.groupby('run',as_index=False).apply(muon_follower_cut).reset_index(level=0,drop=True)
+ ev = ev.groupby('run',group_keys=False).apply(muon_follower_cut)
# Get rid of muon events in our main event sample
ev = ev[(ev.dc & DC_MUON) == 0]
@@ -636,8 +809,8 @@ if __name__ == '__main__':
muons = muons[muons.id == 22]
# require r < 6 meters
- prompt = prompt[np.sqrt(prompt.x.values**2 + prompt.y.values**2 + prompt.z.values**2) < AV_RADIUS]
- atm = atm[np.sqrt(atm.x.values**2 + atm.y.values**2 + atm.z.values**2) < AV_RADIUS]
+ prompt = prompt[prompt.r_psup < 0.9]
+ atm = atm[atm.r_psup < 0.9]
print("number of events after radius cut = %i" % len(prompt))
@@ -667,8 +840,40 @@ if __name__ == '__main__':
# fit
michel = michel[michel.id == 20]
+ stopping_muons = pd.merge(muons,michel,left_on=['run','gtid'],right_on=['run','muon_gtid'],suffixes=('','_michel'))
+
+ # project muon to PSUP
+ stopping_muons['dx'] = stopping_muons.apply(get_dx,axis=1)
+ # energy based on distance travelled
+ stopping_muons['T_dx'] = dx_to_energy(stopping_muons.dx)
+ stopping_muons['dT'] = stopping_muons['energy1'] - stopping_muons['T_dx']
+
+ plt.figure()
+ plt.hist((stopping_muons['energy1']-stopping_muons['T_dx'])*100/stopping_muons['T_dx'], bins=np.linspace(-100,100,200), histtype='step')
+ plt.xlabel("Fractional energy difference (\%)")
+ plt.title("Fractional energy difference for Stopping Muons")
+
+ # 100 bins between 50 MeV and 10 GeV
+ bins = np.arange(50,10000,1000)
+
+ pd_bins = pd.cut(stopping_muons['energy1'],bins)
+
+ T = (bins[1:] + bins[:-1])/2
+
+ dT = stopping_muons.groupby(pd_bins)['dT'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err])
+
+ plt.figure()
+ plt.errorbar(T,dT['median']*100/T,yerr=dT['median_err']*100/T)
+ plt.xlabel("Kinetic Energy (MeV)")
+ plt.ylabel(r"Energy bias (\%)")
+
+ plt.figure()
+ plt.errorbar(T,dT['iqr_std']*100/T,yerr=dT['iqr_std_err']*100/T)
+ plt.xlabel("Kinetic Energy (MeV)")
+ plt.ylabel(r"Energy resolution (\%)")
+
plt.figure()
- plt.hist(michel.ke.values, bins=np.linspace(20,100,100), histtype='step', label="My fitter")
+ plt.hist(michel.ke.values, bins=np.linspace(0,100,100), histtype='step', label="My fitter")
if michel.size:
plt.hist(michel[~np.isnan(michel.rsp_energy.values)].rsp_energy.values, bins=np.linspace(20,100,100), histtype='step',label="RSP")
plt.xlabel("Energy (MeV)")