#!/usr/bin/env python # Copyright (c) 2019, Anthony Latorre # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along with # this program. If not, see . from __future__ import print_function, division import numpy as np from scipy.stats import iqr, norm, beta from matplotlib.lines import Line2D import itertools IDP_E_MINUS = 20 IDP_MU_MINUS = 22 SNOMAN_MASS = { 20: 0.511, 21: 0.511, 22: 105.658, 23: 105.658 } def plot_hist(x, label=None): # determine the bin width using the Freedman Diaconis rule # see https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule h = 2*iqr(x)/len(x)**(1/3) n = max(int((np.max(x)-np.min(x))/h),10) bins = np.linspace(np.min(x),np.max(x),n) plt.hist(x, bins=bins, histtype='step', label=label) def plot_legend(n): plt.figure(n) ax = plt.gca() handles, labels = ax.get_legend_handles_labels() new_handles = [Line2D([],[],c=h.get_edgecolor()) for h in handles] plt.legend(handles=new_handles,labels=labels) def get_stats(x): """ Returns a tuple (mean, error mean, std, error std) for the values in x. The formula for the standard error on the standard deviation comes from https://stats.stackexchange.com/questions/156518. """ mean = np.mean(x) std = np.std(x) n = len(x) u4 = np.mean((x-mean)**4) error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std) return mean, std/np.sqrt(n), std, error def iqr_std_err(x): """ Returns the approximate standard deviation assuming the central part of the distribution is gaussian. """ x = x.dropna() n = len(x) if n == 0: return np.nan # see https://stats.stackexchange.com/questions/110902/error-on-interquartile-range std = iqr(x.values)/1.3489795 return 1.573*std/np.sqrt(n) def iqr_std(x): """ Returns the approximate standard deviation assuming the central part of the distribution is gaussian. """ x = x.dropna() n = len(x) if n == 0: return np.nan return iqr(x.values)/1.3489795 def quantile_error(x,q): """ Returns the standard error for the qth quantile of `x`. The error is computed using the Maritz-Jarrett method described here: https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/quantse.htm. """ x = np.sort(x) n = len(x) m = int(q*n+0.5) A = m - 1 B = n - m i = np.arange(1,len(x)+1) w = beta.cdf(i/n,A,B) - beta.cdf((i-1)/n,A,B) return np.sqrt(np.sum(w*x**2)-np.sum(w*x)**2) def q90_err(x): """ Returns the error on the 90th percentile for all the non NaN values in a Series `x`. """ x = x.dropna() n = len(x) if n == 0: return np.nan return quantile_error(x.values,0.9) def q90(x): """ Returns the 90th percentile for all the non NaN values in a Series `x`. """ x = x.dropna() n = len(x) if n == 0: return np.nan return np.percentile(x.values,90.0) def median(x): """ Returns the median for all the non NaN values in a Series `x`. """ x = x.dropna() n = len(x) if n == 0: return np.nan return np.median(x.values) def median_err(x): """ Returns the approximate error on the median for all the non NaN values in a Series `x`. The error on the median is approximated assuming the central part of the distribution is gaussian. """ x = x.dropna() n = len(x) if n == 0: return np.nan # First we estimate the standard deviation using the interquartile range. # Here we are essentially assuming the central part of the distribution is # gaussian. std = iqr(x.values)/1.3489795 median = np.median(x.values) # Now we estimate the error on the median for a gaussian # See https://stats.stackexchange.com/questions/45124/central-limit-theorem-for-sample-medians. return 1/(2*np.sqrt(n)*norm.pdf(median,median,std)) def std_err(x): x = x.dropna() mean = np.mean(x) std = np.std(x) n = len(x) if n == 0: return np.nan elif n == 1: return 0.0 u4 = np.mean((x-mean)**4) error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std) return error # Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py def despine(fig=None, ax=None, top=True, right=True, left=False, bottom=False, offset=None, trim=False): """Remove the top and right spines from plot(s). fig : matplotlib figure, optional Figure to despine all axes of, default uses current figure. ax : matplotlib axes, optional Specific axes object to despine. top, right, left, bottom : boolean, optional If True, remove that spine. offset : int or dict, optional Absolute distance, in points, spines should be moved away from the axes (negative values move spines inward). A single value applies to all spines; a dict can be used to set offset values per side. trim : bool, optional If True, limit spines to the smallest and largest major tick on each non-despined axis. Returns ------- None """ # Get references to the axes we want if fig is None and ax is None: axes = plt.gcf().axes elif fig is not None: axes = fig.axes elif ax is not None: axes = [ax] for ax_i in axes: for side in ["top", "right", "left", "bottom"]: # Toggle the spine objects is_visible = not locals()[side] ax_i.spines[side].set_visible(is_visible) if offset is not None and is_visible: try: val = offset.get(side, 0) except AttributeError: val = offset _set_spine_position(ax_i.spines[side], ('outward', val)) # Potentially move the ticks if left and not right: maj_on = any( t.tick1line.get_visible() for t in ax_i.yaxis.majorTicks ) min_on = any( t.tick1line.get_visible() for t in ax_i.yaxis.minorTicks ) ax_i.yaxis.set_ticks_position("right") for t in ax_i.yaxis.majorTicks: t.tick2line.set_visible(maj_on) for t in ax_i.yaxis.minorTicks: t.tick2line.set_visible(min_on) if bottom and not top: maj_on = any( t.tick1line.get_visible() for t in ax_i.xaxis.majorTicks ) min_on = any( t.tick1line.get_visible() for t in ax_i.xaxis.minorTicks ) ax_i.xaxis.set_ticks_position("top") for t in ax_i.xaxis.majorTicks: t.tick2line.set_visible(maj_on) for t in ax_i.xaxis.minorTicks: t.tick2line.set_visible(min_on) if trim: # clip off the parts of the spines that extend past major ticks xticks = ax_i.get_xticks() if xticks.size: firsttick = np.compress(xticks >= min(ax_i.get_xlim()), xticks)[0] lasttick = np.compress(xticks <= max(ax_i.get_xlim()), xticks)[-1] ax_i.spines['bottom'].set_bounds(firsttick, lasttick) ax_i.spines['top'].set_bounds(firsttick, lasttick) newticks = xticks.compress(xticks <= lasttick) newticks = newticks.compress(newticks >= firsttick) ax_i.set_xticks(newticks) yticks = ax_i.get_yticks() if yticks.size: firsttick = np.compress(yticks >= min(ax_i.get_ylim()), yticks)[0] lasttick = np.compress(yticks <= max(ax_i.get_ylim()), yticks)[-1] ax_i.spines['left'].set_bounds(firsttick, lasttick) ax_i.spines['right'].set_bounds(firsttick, lasttick) newticks = yticks.compress(yticks <= lasttick) newticks = newticks.compress(newticks >= firsttick) ax_i.set_yticks(newticks) if __name__ == '__main__': import argparse import numpy as np import h5py import pandas as pd parser = argparse.ArgumentParser("plot fit results") parser.add_argument("filenames", nargs='+', help="input files") parser.add_argument("--save", action="store_true", default=False, help="save plots") args = parser.parse_args() # Read in all the data. # # Note: We have to add the filename as a column to each dataset since this # script is used to analyze MC data which all has the same run number. ev = pd.concat([pd.read_hdf(filename, "ev").assign(filename=filename) for filename in args.filenames],ignore_index=True) fits = pd.concat([pd.read_hdf(filename, "fits").assign(filename=filename) for filename in args.filenames],ignore_index=True) mcgn = pd.concat([pd.read_hdf(filename, "mcgn").assign(filename=filename) for filename in args.filenames],ignore_index=True) # get rid of 2nd events like Michel electrons ev = ev.sort_values(['run','gtid']).groupby(['filename','evn'],as_index=False).nth(0) # Now, we merge all three datasets together to produce a single # dataframe. To do so, we join the ev dataframe with the mcgn frame # on the evn column, and then join with the fits on the run and # gtid columns. # # At the end we will have a single dataframe with one row for each # fit, i.e. it will look like: # # >>> data # run gtid nhit, ... mcgn_x, mcgn_y, mcgn_z, ..., fit_id1, fit_x, fit_y, fit_z, ... # # Before merging, we prefix the primary seed track table with mcgn_ # and the fit table with fit_ just to make things easier. # Prefix track and fit frames mcgn = mcgn.add_prefix("mcgn_") fits = fits.add_prefix("fit_") # merge ev and mcgn on evn data = ev.merge(mcgn,left_on=['filename','evn'],right_on=['mcgn_filename','mcgn_evn']) # merge data and fits on run and gtid data = data.merge(fits,left_on=['filename','run','gtid'],right_on=['fit_filename','fit_run','fit_gtid']) # calculate true kinetic energy mass = [SNOMAN_MASS[id] for id in data['mcgn_id'].values] data['T'] = data['mcgn_energy'].values - mass data['dx'] = data['fit_x'].values - data['mcgn_x'].values data['dy'] = data['fit_y'].values - data['mcgn_y'].values data['dz'] = data['fit_z'].values - data['mcgn_z'].values data['dT'] = data['fit_energy1'].values - data['T'].values true_dir = np.dstack((data['mcgn_dirx'],data['mcgn_diry'],data['mcgn_dirz'])).squeeze() dir = np.dstack((np.sin(data['fit_theta1'])*np.cos(data['fit_phi1']), np.sin(data['fit_theta1'])*np.sin(data['fit_phi1']), np.cos(data['fit_theta1']))).squeeze() data['theta'] = np.degrees(np.arccos((true_dir*dir).sum(axis=-1))) # only select fits which have at least 2 fits data = data.groupby(['filename','run','gtid']).filter(lambda x: len(x) > 1) data_true = data[data['fit_id1'] == data['mcgn_id']] data_e = data[data['fit_id1'] == IDP_E_MINUS] data_mu = data[data['fit_id1'] == IDP_MU_MINUS] data_true = data_true.set_index(['filename','run','gtid']) data_e = data_e.set_index(['filename','run','gtid']) data_mu = data_mu.set_index(['filename','run','gtid']) data_true['ratio'] = data_mu['fit_fmin']-data_e['fit_fmin'] data_true['te'] = data_e['fit_time'] data_true['tm'] = data_mu['fit_time'] data_true['Te'] = data_e['fit_energy1'] # 100 bins between 50 MeV and 1 GeV bins = np.arange(50,1000,100) markers = itertools.cycle(('o', 'v')) if args.save: # default \textwidth for a fullpage article in Latex is 16.50764 cm. # You can figure this out by compiling the following TeX document: # # \documentclass{article} # \usepackage{fullpage} # \usepackage{layouts} # \begin{document} # textwidth in cm: \printinunitsof{cm}\prntlen{\textwidth} # \end{document} width = 16.50764 width /= 2.54 # cm -> inches # According to this page: # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf, # Tufte suggests an aspect ratio of 1.5 - 1.6. height = width/1.5 FIGSIZE = (width,height) import matplotlib.pyplot as plt font = {'family':'serif', 'serif': ['computer modern roman']} plt.rc('font',**font) plt.rc('text', usetex=True) else: # on retina screens, the default plots are way too small # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1 # Qt5 will scale everything using the dpi in ~/.Xresources import matplotlib matplotlib.use("Qt5Agg") import matplotlib.pyplot as plt # Default figure size. Currently set to my monitor width and height so that # things are properly formatted FIGSIZE = (13.78,7.48) # Make the defalt font bigger plt.rc('font', size=22) fig3, ax3 = plt.subplots(3,1,figsize=FIGSIZE,num=3,sharex=True) fig4, ax4 = plt.subplots(3,1,figsize=FIGSIZE,num=4,sharex=True) for id in [IDP_E_MINUS, IDP_MU_MINUS]: events = data_true[data_true['mcgn_id'] == id] pd_bins = pd.cut(events['T'],bins) dT = events.groupby(pd_bins)['dT'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err]) dx = events.groupby(pd_bins)['dx'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err]) dy = events.groupby(pd_bins)['dy'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err]) dz = events.groupby(pd_bins)['dz'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err]) theta = events.groupby(pd_bins)['theta'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err,q90,q90_err]) label = 'Muon' if id == IDP_MU_MINUS else 'Electron' T = (bins[1:] + bins[:-1])/2 marker = markers.next() plt.figure(1,figsize=FIGSIZE) plt.errorbar(T,dT['median']*100/T,yerr=dT['median_err']*100/T,fmt=marker,label=label) plt.figure(2,figsize=FIGSIZE) plt.errorbar(T,dT['iqr_std']*100/T,yerr=dT['iqr_std_err']*100/T,fmt=marker,label=label) ax3[0].errorbar(T,dx['median'],yerr=dx['median_err'],fmt=marker,label=label) ax3[1].errorbar(T,dy['median'],yerr=dy['median_err'],fmt=marker,label=label) ax3[2].errorbar(T,dz['median'],yerr=dz['median_err'],fmt=marker,label=label) ax4[0].errorbar(T,dx['iqr_std'],yerr=dx['iqr_std_err'],fmt=marker,label=label) ax4[1].errorbar(T,dy['iqr_std'],yerr=dy['iqr_std_err'],fmt=marker,label=label) ax4[2].errorbar(T,dz['iqr_std'],yerr=dz['iqr_std_err'],fmt=marker,label=label) plt.figure(5,figsize=FIGSIZE) plt.errorbar(T,theta['std'],yerr=theta['std_err'],fmt=marker,label=label) plt.figure(6,figsize=FIGSIZE) plt.scatter(events['Te'],events['ratio'],marker=marker,label=label) fig = plt.figure(1) despine(fig,trim=True) plt.xlabel("Kinetic Energy (MeV)") plt.ylabel(r"Energy bias (\%)") plt.legend() plt.tight_layout() fig = plt.figure(2) despine(fig,trim=True) plt.xlabel("Kinetic Energy (MeV)") plt.ylabel(r"Energy resolution (\%)") plt.legend() plt.tight_layout() ax3[0].set_ylabel("X") ax3[0].set_ylim((-5,5)) ax3[1].set_ylabel("Y") ax3[1].set_ylim((-5,5)) ax3[2].set_xlabel("Kinetic Energy (MeV)") ax3[2].set_ylabel("Z") ax3[2].set_ylim((-5,5)) despine(ax=ax3[0],trim=True) despine(ax=ax3[1],trim=True) despine(ax=ax3[2],trim=True) h,l = ax3[0].get_legend_handles_labels() fig3.legend(h,l,loc='upper right') fig3.subplots_adjust(right=0.75) fig3.tight_layout() fig3.subplots_adjust(top=0.9) ax4[0].set_ylabel("X") ax4[0].set_ylim((0,ax4[0].get_ylim()[1])) ax4[1].set_ylabel("Y") ax4[1].set_ylim((0,ax4[1].get_ylim()[1])) ax4[2].set_xlabel("Kinetic Energy (MeV)") ax4[2].set_ylabel("Z") ax4[2].set_ylim((0,ax4[2].get_ylim()[1])) despine(ax=ax4[0],trim=True) despine(ax=ax4[1],trim=True) despine(ax=ax4[2],trim=True) h,l = ax4[0].get_legend_handles_labels() fig4.legend(h,l,loc='upper right') fig4.subplots_adjust(right=0.75) fig4.tight_layout() fig4.subplots_adjust(top=0.9) fig = plt.figure(5) despine(fig,trim=True) plt.xlabel("Kinetic Energy (MeV)") plt.ylabel("Angular resolution (deg)") plt.ylim((0,plt.gca().get_ylim()[1])) plt.legend() plt.tight_layout() fig = plt.figure(6) plt.xticks(range(0,1250,100)) plt.hlines(0,0,1200,color='k',linestyles='--',alpha=0.5) despine(fig,trim=True) plt.xlabel("Reconstructed Electron Energy (MeV)") plt.ylabel(r"Log Likelihood Ratio (e/$\mu$)") plt.legend() plt.tight_layout() if args.save: fig = plt.figure(1) plt.savefig("energy_bias.pdf") plt.savefig("energy_bias.eps") fig = plt.figure(2) plt.savefig("energy_resolution.pdf") plt.savefig("energy_resolution.eps") fig = plt.figure(3) plt.savefig("position_bias.pdf") plt.savefig("position_bias.eps") fig = plt.figure(4) plt.savefig("position_resolution.pdf") plt.savefig("position_resolution.eps") fig = plt.figure(5) plt.savefig("angular_resolution.pdf") plt.savefig("angular_resolution.eps") fig = plt.figure(6) plt.savefig("likelihood_ratio.pdf") plt.savefig("likelihood_ratio.eps") else: plt.figure(1) plt.title("Energy Bias") plt.figure(2) plt.title("Energy Resolution") plt.figure(3) fig3.suptitle("Position Bias (cm)") plt.figure(4) fig4.suptitle("Position Resolution (cm)") plt.figure(5) plt.title("Angular Resolution") plt.figure(6) plt.title("Log Likelihood Ratio vs Reconstructed Electron Energy") plt.show()