diff options
-rwxr-xr-x | utils/plot-energy | 263 |
1 files changed, 234 insertions, 29 deletions
diff --git a/utils/plot-energy b/utils/plot-energy index 6044adb..8539040 100755 --- a/utils/plot-energy +++ b/utils/plot-energy @@ -18,6 +18,7 @@ from __future__ import print_function, division import numpy as np from scipy.stats import iqr, poisson from matplotlib.lines import Line2D +from scipy.stats import iqr, norm, beta PSUP_RADIUS = 840.0 @@ -193,27 +194,29 @@ def michel_cut(ev): michel = michel[michel.nhit >= 100] - # Accept events which had a muon more than 800 nanoseconds but less - # than 20 microseconds before them. The 800 nanoseconds cut comes from - # Richie's thesis. He also mentions that the In Time Channel Spread Cut - # is very effective at cutting electrons events caused by muons, so I - # should implement that. + # Accept events which had a muon more than 800 nanoseconds but less than 20 + # microseconds before them. The 800 nanoseconds cut comes from Richie's + # thesis. He also mentions that the In Time Channel Spread Cut is very + # effective at cutting electron events caused by muons, so I should + # implement that. # # Note: We currently don't look across run boundaries. This should be a # *very* small effect, and the logic to do so would be very complicated # since I would have to deal with 50 MHz clock rollovers, etc. - # - # Do a simple python for loop here over the runs since we create - # temporary arrays with shape (michel.size,prompt_plus_muons.size) - # which could be too big for the full dataset. - # - # There might be some clever way to do this without the loop in Pandas, - # but I don't know how. if prompt_plus_muons.size and michel.size: - return michel[np.any((michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \ - (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3),axis=0)] + mask = (michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \ + (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3) + michel = michel.iloc[np.any(mask,axis=0)] + michel['muon_gtid'] = pd.Series(prompt_plus_muons['gtid'].iloc[np.argmax(mask[:,np.any(mask,axis=0)],axis=0)].values, + index=michel.index.values, + dtype=np.int32) + return michel else: - return pd.DataFrame(columns=ev.columns) + # Return an empty slice since we need it to have the same datatype as + # the other dataframes + michel = ev[:0] + michel['muon_gtid'] = -1 + return michel def atmospheric_events(ev): """ @@ -330,6 +333,174 @@ def plot_corner_plot(ev, title, save=None): plt.suptitle(title) +def intersect_sphere(pos, dir, R): + """ + Compute the first intersection of a ray starting at `pos` with direction + `dir` and a sphere centered at the origin with radius `R`. The distance to + the intersection is returned. + + Example: + + pos = np.array([0,0,0]) + dir = np.array([1,0,0]) + + l = intersect_sphere(pos,dir,PSUP_RADIUS): + if l is not None: + hit = pos + l*dir + print("ray intersects sphere at %.2f %.2f %.2f", hit[0], hit[1], hit[2]) + else: + print("ray didn't intersect sphere") + """ + + b = 2*np.dot(dir,pos) + c = np.dot(pos,pos) - R*R + + if b*b - 4*c <= 0: + # Ray doesn't intersect the sphere. + return None + + # First, check the shorter solution. + l = (-b - np.sqrt(b*b - 4*c))/2 + + # If the shorter solution is less than 0, check the second solution. + if l < 0: + l = (-b + np.sqrt(b*b - 4*c))/2 + + # If the distance is still negative, we didn't intersect the sphere. + if l < 0: + return None + + return l + +def get_dx(row): + pos = np.array([row.x,row.y,row.z]) + dir = np.array([np.sin(row.theta1)*np.cos(row.phi1), + np.sin(row.theta1)*np.sin(row.phi1), + np.cos(row.theta1)]) + l = intersect_sphere(pos,-dir,PSUP_RADIUS) + if l is not None: + pos -= dir*l + michel_pos = np.array([row.x_michel,row.y_michel,row.z_michel]) + return np.linalg.norm(michel_pos-pos) + else: + return 0 + +def dx_to_energy(dx): + lines = [] + with open("../src/muE_water_liquid.txt") as f: + for i, line in enumerate(f): + if i < 10: + continue + if 'Minimum ionization' in line: + continue + if 'Muon critical energy' in line: + continue + lines.append(line) + data = np.genfromtxt(lines) + return np.interp(dx,data[:,8],data[:,0]) + +def iqr_std_err(x): + """ + Returns the approximate standard deviation assuming the central part of the + distribution is gaussian. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + # see https://stats.stackexchange.com/questions/110902/error-on-interquartile-range + std = iqr(x.values)/1.3489795 + return 1.573*std/np.sqrt(n) + +def iqr_std(x): + """ + Returns the approximate standard deviation assuming the central part of the + distribution is gaussian. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + return iqr(x.values)/1.3489795 + +def quantile_error(x,q): + """ + Returns the standard error for the qth quantile of `x`. The error is + computed using the Maritz-Jarrett method described here: + https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/quantse.htm. + """ + x = np.sort(x) + n = len(x) + m = int(q*n+0.5) + A = m - 1 + B = n - m + i = np.arange(1,len(x)+1) + w = beta.cdf(i/n,A,B) - beta.cdf((i-1)/n,A,B) + return np.sqrt(np.sum(w*x**2)-np.sum(w*x)**2) + +def q90_err(x): + """ + Returns the error on the 90th percentile for all the non NaN values in a + Series `x`. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + return quantile_error(x.values,0.9) + +def q90(x): + """ + Returns the 90th percentile for all the non NaN values in a Series `x`. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + return np.percentile(x.values,90.0) + +def median(x): + """ + Returns the median for all the non NaN values in a Series `x`. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + return np.median(x.values) + +def median_err(x): + """ + Returns the approximate error on the median for all the non NaN values in a + Series `x`. The error on the median is approximated assuming the central + part of the distribution is gaussian. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + # First we estimate the standard deviation using the interquartile range. + # Here we are essentially assuming the central part of the distribution is + # gaussian. + std = iqr(x.values)/1.3489795 + median = np.median(x.values) + # Now we estimate the error on the median for a gaussian + # See https://stats.stackexchange.com/questions/45124/central-limit-theorem-for-sample-medians. + return 1/(2*np.sqrt(n)*norm.pdf(median,median,std)) + +def std_err(x): + x = x.dropna() + mean = np.mean(x) + std = np.std(x) + n = len(x) + if n == 0: + return np.nan + elif n == 1: + return 0.0 + u4 = np.mean((x-mean)**4) + error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std) + return error + if __name__ == '__main__': import argparse import matplotlib.pyplot as plt @@ -384,7 +555,7 @@ if __name__ == '__main__': (np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)),run)) # unwrap the 50 MHz clock within each run - ev.gtr = ev.groupby(['run'],as_index=False)['gtr'].transform(unwrap_50_mhz_clock) + ev.gtr = ev.groupby(['run'],group_keys=False)['gtr'].transform(unwrap_50_mhz_clock) for run, ev_run in ev.groupby('run'): # Warn about GTID jumps since we could be missing a potential flasher @@ -395,7 +566,7 @@ if __name__ == '__main__': # calculate the time difference between each event and the previous event # so we can tag retrigger events - ev['dt'] = ev.groupby(['run'],as_index=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values)))) + ev['dt'] = ev.groupby(['run'],group_keys=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values)))) # This is a bit of a hack. It appears that many times the fit will # actually do much better by including a very low energy electron or @@ -462,22 +633,22 @@ if __name__ == '__main__': # the *second* event after the breakdown didn't get tagged correctly. If we # apply the data cleaning cuts first and then tag prompt events then this # event will get tagged as a prompt event. - ev = ev.groupby('run',as_index=False).apply(prompt_event).reset_index(level=0,drop=True) + ev = ev.groupby('run',group_keys=False).apply(prompt_event) print("number of events after prompt nhit cut = %i" % np.count_nonzero(ev.prompt)) # flasher follower cut - ev = ev.groupby('run',as_index=False).apply(flasher_follower_cut).reset_index(level=0,drop=True) + ev = ev.groupby('run',group_keys=False).apply(flasher_follower_cut) # breakdown follower cut - ev = ev.groupby('run',as_index=False).apply(breakdown_follower_cut).reset_index(level=0,drop=True) + ev = ev.groupby('run',group_keys=False).apply(breakdown_follower_cut) # retrigger cut - ev = ev.groupby('run',as_index=False).apply(retrigger_cut).reset_index(level=0,drop=True) - - ev = ev[ev.prompt] + ev = ev.groupby('run',group_keys=False).apply(retrigger_cut) if args.dc: + ev = ev[ev.prompt] + ev.set_index(['run','gtid']) ev = pd.merge(fits,ev,how='inner',on=['run','gtid']) @@ -549,18 +720,20 @@ if __name__ == '__main__': # 2. Nhit >= 100 # 3. It must be > 800 ns and less than 20 microseconds from a prompt event # or a muon - michel = ev.groupby('run',as_index=False).apply(michel_cut).reset_index(level=0,drop=True) + michel = ev.groupby('run',group_keys=False).apply(michel_cut) + + print("number of michel events = %i" % len(michel)) # Tag atmospheric events. # # Note: We don't cut atmospheric events or muons yet because we still need # all the events in order to apply the muon follower cut. - ev = ev.groupby('run',as_index=False).apply(atmospheric_events).reset_index(level=0,drop=True) + ev = ev.groupby('run',group_keys=False).apply(atmospheric_events) print("number of events after neutron follower cut = %i" % np.count_nonzero(ev.prompt & (~ev.atm))) # remove events 200 microseconds after a muon - ev = ev.groupby('run',as_index=False).apply(muon_follower_cut).reset_index(level=0,drop=True) + ev = ev.groupby('run',group_keys=False).apply(muon_follower_cut) # Get rid of muon events in our main event sample ev = ev[(ev.dc & DC_MUON) == 0] @@ -636,8 +809,8 @@ if __name__ == '__main__': muons = muons[muons.id == 22] # require r < 6 meters - prompt = prompt[np.sqrt(prompt.x.values**2 + prompt.y.values**2 + prompt.z.values**2) < AV_RADIUS] - atm = atm[np.sqrt(atm.x.values**2 + atm.y.values**2 + atm.z.values**2) < AV_RADIUS] + prompt = prompt[prompt.r_psup < 0.9] + atm = atm[atm.r_psup < 0.9] print("number of events after radius cut = %i" % len(prompt)) @@ -667,8 +840,40 @@ if __name__ == '__main__': # fit michel = michel[michel.id == 20] + stopping_muons = pd.merge(muons,michel,left_on=['run','gtid'],right_on=['run','muon_gtid'],suffixes=('','_michel')) + + # project muon to PSUP + stopping_muons['dx'] = stopping_muons.apply(get_dx,axis=1) + # energy based on distance travelled + stopping_muons['T_dx'] = dx_to_energy(stopping_muons.dx) + stopping_muons['dT'] = stopping_muons['energy1'] - stopping_muons['T_dx'] + + plt.figure() + plt.hist((stopping_muons['energy1']-stopping_muons['T_dx'])*100/stopping_muons['T_dx'], bins=np.linspace(-100,100,200), histtype='step') + plt.xlabel("Fractional energy difference (\%)") + plt.title("Fractional energy difference for Stopping Muons") + + # 100 bins between 50 MeV and 10 GeV + bins = np.arange(50,10000,1000) + + pd_bins = pd.cut(stopping_muons['energy1'],bins) + + T = (bins[1:] + bins[:-1])/2 + + dT = stopping_muons.groupby(pd_bins)['dT'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err]) + + plt.figure() + plt.errorbar(T,dT['median']*100/T,yerr=dT['median_err']*100/T) + plt.xlabel("Kinetic Energy (MeV)") + plt.ylabel(r"Energy bias (\%)") + + plt.figure() + plt.errorbar(T,dT['iqr_std']*100/T,yerr=dT['iqr_std_err']*100/T) + plt.xlabel("Kinetic Energy (MeV)") + plt.ylabel(r"Energy resolution (\%)") + plt.figure() - plt.hist(michel.ke.values, bins=np.linspace(20,100,100), histtype='step', label="My fitter") + plt.hist(michel.ke.values, bins=np.linspace(0,100,100), histtype='step', label="My fitter") if michel.size: plt.hist(michel[~np.isnan(michel.rsp_energy.values)].rsp_energy.values, bins=np.linspace(20,100,100), histtype='step',label="RSP") plt.xlabel("Energy (MeV)") |