diff options
Diffstat (limited to 'utils/plot-energy')
-rwxr-xr-x | utils/plot-energy | 157 |
1 files changed, 143 insertions, 14 deletions
diff --git a/utils/plot-energy b/utils/plot-energy index 66691e0..6044adb 100755 --- a/utils/plot-energy +++ b/utils/plot-energy @@ -15,15 +15,12 @@ # this program. If not, see <https://www.gnu.org/licenses/>. from __future__ import print_function, division -import yaml -try: - from yaml import CLoader as Loader -except ImportError: - from yaml.loader import SafeLoader as Loader import numpy as np -from scipy.stats import iqr +from scipy.stats import iqr, poisson from matplotlib.lines import Line2D +PSUP_RADIUS = 840.0 + # from https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python class bcolors: HEADER = '\033[95m' @@ -41,6 +38,11 @@ class bcolors: import matplotlib matplotlib.use("Qt5Agg") +font = {'family':'serif', 'serif': ['computer modern roman']} +matplotlib.rc('font',**font) + +matplotlib.rc('text', usetex=True) + SNOMAN_MASS = { 20: 0.511, 21: 0.511, @@ -64,7 +66,7 @@ DC_FTS = 0x200 DC_ITC = 0x400 DC_BREAKDOWN = 0x800 -def plot_hist(df, title=None): +def plot_hist(df, title=None, muons=False): for id, df_id in sorted(df.groupby('id')): if id == 20: plt.subplot(3,4,1) @@ -85,8 +87,12 @@ def plot_hist(df, title=None): elif id == 222222: plt.subplot(3,4,12) - plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step') - plt.xlabel("Energy (MeV)") + if muons: + plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step') + plt.xlabel("log10(Energy (GeV))") + else: + plt.hist(df_id.ke.values, bins=np.linspace(20,1000e3,100), histtype='step') + plt.xlabel("Energy (MeV)") plt.title(str(id)) if title: @@ -259,12 +265,71 @@ def gtid_sort(ev, first_gtid): if run not in first_gtid: print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run) - first_gtid[run] = ev.gtid[0] + first_gtid[run] = ev.gtid.iloc[0] ev.loc[ev.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 return ev +def prompt_event(ev): + ev['prompt'] = (ev.nhit >= 100) + ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6)) + return ev + +def plot_corner_plot(ev, title, save=None): + variables = ['r_psup','psi','z','udotr'] + labels = [r'$(r/r_\mathrm{PSUP})^3$',r'$\psi$','z',r'$\vec{u}\cdot\vec{r}$'] + limits = [(0,1),(0,10),(-840,840),(-1,1)] + cuts = [0.9,6,0,-0.5] + + ev = ev.dropna(subset=variables) + + f = plt.figure(figsize=(8,8)) + for i in range(len(variables)): + for j in range(len(variables)): + if j > i: + continue + ax = plt.subplot(len(variables),len(variables),i*len(variables)+j+1) + if i == j: + plt.hist(ev[variables[i]],bins=np.linspace(limits[i][0],limits[i][1],100),histtype='step') + plt.gca().set_xlim(limits[i]) + else: + p_i_lo = np.count_nonzero(ev[variables[i]] < cuts[i])/len(ev) + p_j_lo = np.count_nonzero(ev[variables[j]] < cuts[j])/len(ev) + p_lolo = p_i_lo*p_j_lo + p_lohi = p_i_lo*(1-p_j_lo) + p_hilo = (1-p_i_lo)*p_j_lo + p_hihi = (1-p_i_lo)*(1-p_j_lo) + n_lolo = np.count_nonzero((ev[variables[i]] < cuts[i]) & (ev[variables[j]] < cuts[j])) + n_lohi = np.count_nonzero((ev[variables[i]] < cuts[i]) & (ev[variables[j]] >= cuts[j])) + n_hilo = np.count_nonzero((ev[variables[i]] >= cuts[i]) & (ev[variables[j]] < cuts[j])) + n_hihi = np.count_nonzero((ev[variables[i]] >= cuts[i]) & (ev[variables[j]] >= cuts[j])) + n = len(ev) + observed = np.array([n_lolo,n_lohi,n_hilo,n_hihi]) + expected = n*np.array([p_lolo,p_lohi,p_hilo,p_hihi]) + psi = -poisson.logpmf(observed,expected).sum() + poisson.logpmf(observed,observed).sum() + psi /= np.std(-poisson.logpmf(np.random.poisson(observed,size=(10000,4)),observed).sum(axis=1) + poisson.logpmf(observed,observed).sum()) + plt.scatter(ev[variables[j]],ev[variables[i]],s=0.5) + plt.gca().set_xlim(limits[j]) + plt.gca().set_ylim(limits[i]) + plt.title(r"$\psi = %.1f$" % psi) + if i == len(variables) - 1: + plt.xlabel(labels[j]) + else: + plt.setp(ax.get_xticklabels(),visible=False) + if j == 0: + plt.ylabel(labels[i]) + else: + plt.setp(ax.get_yticklabels(),visible=False) + plt.axvline(cuts[j],color='k',ls='--',alpha=0.5) + if i != j: + plt.axhline(cuts[i],color='k',ls='--',alpha=0.5) + + if save: + plt.savefig(save) + + plt.suptitle(title) + if __name__ == '__main__': import argparse import matplotlib.pyplot as plt @@ -275,6 +340,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser("plot fit results") parser.add_argument("filenames", nargs='+', help="input files") + parser.add_argument("--dc", action='store_true', default=False, help="plot corner plots for backgrounds") args = parser.parse_args() ev = pd.concat([pd.read_hdf(filename, "ev") for filename in args.filenames],ignore_index=True) @@ -354,6 +420,11 @@ if __name__ == '__main__': # different parameters. fits['w'] = fits['n']*np.log(0.1*0.001) + np.log(fits['energy1']) + fits['n']*np.log(1e-4/(4*np.pi)) + # Apply a fudge factor to the Ockham factor of 100 for each extra particle + # FIXME: I chose 100 a while ago but didn't really investigate what the + # optimal value was or exactly why it was needed. Should do this. + fits['w'] -= fits['n']*100 + # Note: we index on the left hand site with loc to avoid a copy error # # See https://www.dataquest.io/blog/settingwithcopywarning/ @@ -362,12 +433,19 @@ if __name__ == '__main__': fits['fmin'] = fits['fmin'] - fits['w'] - fits['psi'] /= fits.merge(ev,on=['run','gtid'])['nhit'] - fits['ke'] = fits['energy1'] + # See https://stackoverflow.com/questions/11976503/how-to-keep-index-when-using-pandas-merge + # for how to properly divide the psi column by nhit_cal which is in the ev + # dataframe before we actually merge + fits['psi'] /= fits.reset_index().merge(ev,on=['run','gtid']).set_index('index')['nhit_cal'] + fits.loc[fits['n'] == 1,'ke'] = fits['energy1'] + fits.loc[fits['n'] == 2,'ke'] = fits['energy1'] + fits['energy2'] + fits.loc[fits['n'] == 3,'ke'] = fits['energy1'] + fits['energy2'] + fits['energy3'] fits['id'] = fits['id1'] fits.loc[fits['n'] == 2, 'id'] = fits['id1']*100 + fits['id2'] fits.loc[fits['n'] == 3, 'id'] = fits['id1']*10000 + fits['id2']*100 + fits['id3'] fits['theta'] = fits['theta1'] + fits['r'] = np.sqrt(fits.x**2 + fits.y**2 + fits.z**2) + fits['r_psup'] = (fits['r']/PSUP_RADIUS)**3 print("number of events = %i" % len(ev)) @@ -384,8 +462,7 @@ if __name__ == '__main__': # the *second* event after the breakdown didn't get tagged correctly. If we # apply the data cleaning cuts first and then tag prompt events then this # event will get tagged as a prompt event. - ev['prompt'] = (ev.nhit >= 100) - ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6)) + ev = ev.groupby('run',as_index=False).apply(prompt_event).reset_index(level=0,drop=True) print("number of events after prompt nhit cut = %i" % np.count_nonzero(ev.prompt)) @@ -398,6 +475,58 @@ if __name__ == '__main__': # retrigger cut ev = ev.groupby('run',as_index=False).apply(retrigger_cut).reset_index(level=0,drop=True) + ev = ev[ev.prompt] + + if args.dc: + ev.set_index(['run','gtid']) + + ev = pd.merge(fits,ev,how='inner',on=['run','gtid']) + ev_single_particle = ev[(ev.id2 == 0) & (ev.id3 == 0)] + ev_single_particle = ev_single_particle.sort_values('fmin').groupby(['run','gtid']).nth(0) + ev = ev.sort_values('fmin').groupby(['run','gtid']).nth(0) + + ev['cos_theta'] = np.cos(ev['theta1']) + ev['udotr'] = np.sin(ev_single_particle.theta1)*np.cos(ev_single_particle.phi1)*ev_single_particle.x + \ + np.sin(ev_single_particle.theta1)*np.sin(ev_single_particle.phi1)*ev_single_particle.y + \ + np.cos(ev_single_particle.theta1)*ev_single_particle.z + ev['udotr'] /= ev.r + + flashers = ev[ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ITC | DC_BREAKDOWN) == DC_FLASHER] + muon = ev[ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ITC | DC_BREAKDOWN | DC_MUON) == DC_MUON] + neck = ev[(ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_NECK)) == DC_NECK] + noise = ev[(ev.dc & (DC_ITC | DC_QVNHIT | DC_JUNK | DC_CRATE_ISOTROPY)) != 0] + breakdown = ev[ev.nhit >= 1000] + breakdown = breakdown[breakdown.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_NECK | DC_ITC) == 0] + breakdown = breakdown[breakdown.dc & (DC_FLASHER | DC_BREAKDOWN) != 0] + signal = ev[ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ITC | DC_BREAKDOWN | DC_MUON) == 0] + + with pd.option_context('display.max_rows', None, 'display.max_columns', None): + print("Noise events") + print(noise[['psi','x','y','z','id1','id2']]) + print("Muons") + print(muon[['psi','r','id1','id2','id3','energy1','energy2','energy3']]) + print("Neck") + print(neck[neck.psi < 6][['psi','r','id1','cos_theta']]) + print("Flashers") + print(flashers[flashers.udotr > 0]) + print("Signal") + print(signal) + + # save as PDF b/c EPS doesn't support alpha values + plot_corner_plot(breakdown,"Breakdowns",save="breakdown_corner_plot.pdf") + plot_corner_plot(muon,"Muons",save='muon_corner_plot.pdf') + plot_corner_plot(flashers,"Flashers",save="flashers_corner_plot.pdf") + plot_corner_plot(neck,"Neck",save="neck_corner_plot.pdf") + plot_corner_plot(noise,"Noise",save="noise_corner_plot.pdf") + plot_corner_plot(signal,"Signal",save="signal_corner_plot.pdf") + + plt.figure() + plot_hist(flashers,"Flashers") + plt.figure() + plot_hist(muon,"Muons",muons=True) + plt.show() + sys.exit(0) + # First, do basic data cleaning which is done for all events. ev = ev[ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ITC | DC_BREAKDOWN) == 0] |