update plot-fit-results to use median and iqr instead of mean and std

This commit updates plot-fit-results to use the median when plotting the energy and position bias and the interquartile range (times 1.35) when plotting the energy and position resolution. The reason is that single large outliers for higher energy muons were causing the energy bias and resolution to no longer represent the central part of the distribution well.
author: tlatorre <tlatorre@uchicago.edu> 2019-11-22 10:22:19 -0600
committer: tlatorre <tlatorre@uchicago.edu> 2019-11-22 10:22:19 -0600
commit: 19adbadca5220babb99b6f0f8bbd36aaedee0049 (patch)
tree: ec0b9e673e0094dd985bab352840da5f858ffda7 /utils/plot-fit-results
parent: d6555d2dc164e9b2dcf246d86161b1485a408f8a (diff)
download: sddm-19adbadca5220babb99b6f0f8bbd36aaedee0049.tar.gz
sddm-19adbadca5220babb99b6f0f8bbd36aaedee0049.tar.bz2
sddm-19adbadca5220babb99b6f0f8bbd36aaedee0049.zip
1 files changed, 268 insertions, 53 deletions
diff --git a/utils/plot-fit-results b/utils/plot-fit-results
index ab08586..450de72 100755
--- a/utils/plot-fit-results
+++ b/utils/plot-fit-results
@@ -16,8 +16,9 @@
 
 from __future__ import print_function, division
 import numpy as np
-from scipy.stats import iqr
+from scipy.stats import iqr, norm
 from matplotlib.lines import Line2D
+import itertools
 
 # on retina screens, the default plots are way too small
 # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1
@@ -27,6 +28,12 @@ matplotlib.use("Qt5Agg")
 
 matplotlib.rc('font', size=22)
 
+font = {'family':'serif', 'serif': ['computer modern roman']}
+matplotlib.rc('font',**font)
+
+matplotlib.rc('text', usetex=True)
+
+
 IDP_E_MINUS  =    20
 IDP_MU_MINUS =    22
 
@@ -66,6 +73,59 @@ def get_stats(x):
     error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std)
     return mean, std/np.sqrt(n), std, error
 
+def iqr_std_err(x):
+    """
+    Returns the approximate standard deviation assuming the central part of the
+    distribution is gaussian.
+    """
+    x = x.dropna()
+    n = len(x)
+    if n == 0:
+        return np.nan
+    # see https://stats.stackexchange.com/questions/110902/error-on-interquartile-range
+    std = iqr(x.values)/1.3489795
+    return 1.573*std/np.sqrt(n)
+
+def iqr_std(x):
+    """
+    Returns the approximate standard deviation assuming the central part of the
+    distribution is gaussian.
+    """
+    x = x.dropna()
+    n = len(x)
+    if n == 0:
+        return np.nan
+    return iqr(x.values)/1.3489795
+
+def median(x):
+    """
+    Returns the median for all the non NaN values in a Series `x`.
+    """
+    x = x.dropna()
+    n = len(x)
+    if n == 0:
+        return np.nan
+    return np.median(x.values)
+
+def median_err(x):
+    """
+    Returns the approximate error on the median for all the non NaN values in a
+    Series `x`. The error on the median is approximated assuming the central
+    part of the distribution is gaussian.
+    """
+    x = x.dropna()
+    n = len(x)
+    if n == 0:
+        return np.nan
+    # First we estimate the standard deviation using the interquartile range.
+    # Here we are essentially assuming the central part of the distribution is
+    # gaussian.
+    std = iqr(x.values)/1.3489795
+    median = np.median(x.values)
+    # Now we estimate the error on the median for a gaussian
+    # See https://stats.stackexchange.com/questions/45124/central-limit-theorem-for-sample-medians.
+    return 1/(2*np.sqrt(n)*norm.pdf(median,median,std))
+
 def std_err(x):
     x = x.dropna()
     mean = np.mean(x)
@@ -77,6 +137,108 @@ def std_err(x):
     error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std)
     return error
 
+# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py
+def despine(fig=None, ax=None, top=True, right=True, left=False,
+            bottom=False, offset=None, trim=False):
+    """Remove the top and right spines from plot(s).
+
+    fig : matplotlib figure, optional
+        Figure to despine all axes of, default uses current figure.
+    ax : matplotlib axes, optional
+        Specific axes object to despine.
+    top, right, left, bottom : boolean, optional
+        If True, remove that spine.
+    offset : int or dict, optional
+        Absolute distance, in points, spines should be moved away
+        from the axes (negative values move spines inward). A single value
+        applies to all spines; a dict can be used to set offset values per
+        side.
+    trim : bool, optional
+        If True, limit spines to the smallest and largest major tick
+        on each non-despined axis.
+
+    Returns
+    -------
+    None
+
+    """
+    # Get references to the axes we want
+    if fig is None and ax is None:
+        axes = plt.gcf().axes
+    elif fig is not None:
+        axes = fig.axes
+    elif ax is not None:
+        axes = [ax]
+
+    for ax_i in axes:
+        for side in ["top", "right", "left", "bottom"]:
+            # Toggle the spine objects
+            is_visible = not locals()[side]
+            ax_i.spines[side].set_visible(is_visible)
+            if offset is not None and is_visible:
+                try:
+                    val = offset.get(side, 0)
+                except AttributeError:
+                    val = offset
+                _set_spine_position(ax_i.spines[side], ('outward', val))
+
+        # Potentially move the ticks
+        if left and not right:
+            maj_on = any(
+                t.tick1line.get_visible()
+                for t in ax_i.yaxis.majorTicks
+            )
+            min_on = any(
+                t.tick1line.get_visible()
+                for t in ax_i.yaxis.minorTicks
+            )
+            ax_i.yaxis.set_ticks_position("right")
+            for t in ax_i.yaxis.majorTicks:
+                t.tick2line.set_visible(maj_on)
+            for t in ax_i.yaxis.minorTicks:
+                t.tick2line.set_visible(min_on)
+
+        if bottom and not top:
+            maj_on = any(
+                t.tick1line.get_visible()
+                for t in ax_i.xaxis.majorTicks
+            )
+            min_on = any(
+                t.tick1line.get_visible()
+                for t in ax_i.xaxis.minorTicks
+            )
+            ax_i.xaxis.set_ticks_position("top")
+            for t in ax_i.xaxis.majorTicks:
+                t.tick2line.set_visible(maj_on)
+            for t in ax_i.xaxis.minorTicks:
+                t.tick2line.set_visible(min_on)
+
+        if trim:
+            # clip off the parts of the spines that extend past major ticks
+            xticks = ax_i.get_xticks()
+            if xticks.size:
+                firsttick = np.compress(xticks >= min(ax_i.get_xlim()),
+                                        xticks)[0]
+                lasttick = np.compress(xticks <= max(ax_i.get_xlim()),
+                                       xticks)[-1]
+                ax_i.spines['bottom'].set_bounds(firsttick, lasttick)
+                ax_i.spines['top'].set_bounds(firsttick, lasttick)
+                newticks = xticks.compress(xticks <= lasttick)
+                newticks = newticks.compress(newticks >= firsttick)
+                ax_i.set_xticks(newticks)
+
+            yticks = ax_i.get_yticks()
+            if yticks.size:
+                firsttick = np.compress(yticks >= min(ax_i.get_ylim()),
+                                        yticks)[0]
+                lasttick = np.compress(yticks <= max(ax_i.get_ylim()),
+                                       yticks)[-1]
+                ax_i.spines['left'].set_bounds(firsttick, lasttick)
+                ax_i.spines['right'].set_bounds(firsttick, lasttick)
+                newticks = yticks.compress(yticks <= lasttick)
+                newticks = newticks.compress(newticks >= firsttick)
+                ax_i.set_yticks(newticks)
+
 if __name__ == '__main__':
     import argparse
     import matplotlib.pyplot as plt
@@ -137,6 +299,7 @@ if __name__ == '__main__':
 
     data['theta'] = np.degrees(np.arccos((true_dir*dir).sum(axis=-1)))
 
+
     # only select fits which have at least 2 fits
     data = data.groupby(['filename','run','gtid']).filter(lambda x: len(x) > 1)
     data_true = data[data['fit_id1'] == data['mcgn_id']]
@@ -155,67 +318,119 @@ if __name__ == '__main__':
     # 100 bins between 50 MeV and 1 GeV
     bins = np.arange(50,1000,100)
 
+    markers = itertools.cycle(('o', 'v')) 
+
+    # Default figure size. Currently set to my monitor width and height so that
+    # things are properly formatted
+    FIGSIZE = (13.78,7.48)
+
+    fig3, ax3 = plt.subplots(3,1,figsize=FIGSIZE,num=3,sharex=True)
+    fig4, ax4 = plt.subplots(3,1,figsize=FIGSIZE,num=4,sharex=True)
+
     for id in [IDP_E_MINUS, IDP_MU_MINUS]:
         events = data_true[data_true['mcgn_id'] == id]
 
         pd_bins = pd.cut(events['T'],bins)
 
-        dT = events.groupby(pd_bins)['dT'].agg(['mean','sem','std',std_err])
-        dx = events.groupby(pd_bins)['dx'].agg(['mean','sem','std',std_err])
-        dy = events.groupby(pd_bins)['dy'].agg(['mean','sem','std',std_err])
-        dz = events.groupby(pd_bins)['dz'].agg(['mean','sem','std',std_err])
-        theta = events.groupby(pd_bins)['theta'].agg(['mean','sem','std',std_err])
+        dT = events.groupby(pd_bins)['dT'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err])
+        dx = events.groupby(pd_bins)['dx'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err])
+        dy = events.groupby(pd_bins)['dy'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err])
+        dz = events.groupby(pd_bins)['dz'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err])
+        theta = events.groupby(pd_bins)['theta'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err])
 
         label = 'Muon' if id == IDP_MU_MINUS else 'Electron'
 
         T = (bins[1:] + bins[:-1])/2
 
-        plt.figure(1)
-        plt.errorbar(T,dT['mean']*100/T,yerr=dT['sem']*100/T,fmt='o',label=label)
-        plt.xlabel("Kinetic Energy (MeV)")
-        plt.ylabel("Energy bias (%)")
-        plt.title("Energy Bias")
-        plt.legend()
-
-        plt.figure(2)
-        plt.errorbar(T,dT['std']*100/T,yerr=dT['std_err']*100/T,fmt='o',label=label)
-        plt.xlabel("Kinetic Energy (MeV)")
-        plt.ylabel("Energy resolution (%)")
-        plt.title("Energy Resolution")
-        plt.legend()
-
-        plt.figure(3)
-        plt.errorbar(T,dx['mean'],yerr=dx['sem'],fmt='o',label='%s (x)' % label)
-        plt.errorbar(T,dy['mean'],yerr=dy['sem'],fmt='o',label='%s (y)' % label)
-        plt.errorbar(T,dz['mean'],yerr=dz['sem'],fmt='o',label='%s (z)' % label)
-        plt.xlabel("Kinetic Energy (MeV)")
-        plt.ylabel("Position bias (cm)")
-        plt.title("Position Bias")
-        plt.legend()
-
-        plt.figure(4)
-        plt.errorbar(T,dx['std'],yerr=dx['std_err'],fmt='o',label='%s (x)' % label)
-        plt.errorbar(T,dy['std'],yerr=dy['std_err'],fmt='o',label='%s (y)' % label)
-        plt.errorbar(T,dz['std'],yerr=dz['std_err'],fmt='o',label='%s (z)' % label)
-        plt.xlabel("Kinetic Energy (MeV)")
-        plt.ylabel("Position resolution (cm)")
-        plt.title("Position Resolution")
-        plt.ylim((0,plt.gca().get_ylim()[1]))
-        plt.legend()
-
-        plt.figure(5)
-        plt.errorbar(T,theta['std'],yerr=theta['std_err'],fmt='o',label=label)
-        plt.xlabel("Kinetic Energy (MeV)")
-        plt.ylabel("Angular resolution (deg)")
-        plt.title("Angular Resolution")
-        plt.ylim((0,plt.gca().get_ylim()[1]))
-        plt.legend()
-
-        plt.figure(6)
-        plt.scatter(events['Te'],events['ratio'],label=label)
-        plt.xlabel("Reconstructed Electron Energy (MeV)")
-        plt.ylabel(r"Log Likelihood Ratio (e/$\mu$)")
-        plt.title("Log Likelihood Ratio vs Reconstructed Electron Energy")
-        plt.legend()
+        marker = markers.next()
+
+        plt.figure(1,figsize=FIGSIZE)
+        plt.errorbar(T,dT['median']*100/T,yerr=dT['median_err']*100/T,fmt=marker,label=label)
+
+        plt.figure(2,figsize=FIGSIZE)
+        plt.errorbar(T,dT['iqr_std']*100/T,yerr=dT['iqr_std_err']*100/T,fmt=marker,label=label)
+
+        ax3[0].errorbar(T,dx['median'],yerr=dx['median_err'],fmt=marker,label=label)
+        ax3[1].errorbar(T,dy['median'],yerr=dy['median_err'],fmt=marker,label=label)
+        ax3[2].errorbar(T,dz['median'],yerr=dz['median_err'],fmt=marker,label=label)
+
+        ax4[0].errorbar(T,dx['iqr_std'],yerr=dx['iqr_std_err'],fmt=marker,label=label)
+        ax4[1].errorbar(T,dy['iqr_std'],yerr=dy['iqr_std_err'],fmt=marker,label=label)
+        ax4[2].errorbar(T,dz['iqr_std'],yerr=dz['iqr_std_err'],fmt=marker,label=label)
+
+        plt.figure(5,figsize=FIGSIZE)
+        plt.errorbar(T,theta['iqr_std'],yerr=theta['iqr_std_err'],fmt=marker,label=label)
+
+        plt.figure(6,figsize=FIGSIZE)
+        plt.scatter(events['Te'],events['ratio'],marker=marker,label=label)
+
+    fig = plt.figure(1)
+    despine(fig,trim=True)
+    plt.xlabel("Kinetic Energy (MeV)")
+    plt.ylabel(r"Energy bias (\%)")
+    plt.title("Energy Bias")
+    plt.legend()
+    plt.tight_layout()
+
+    fig = plt.figure(2)
+    despine(fig,trim=True)
+    plt.xlabel("Kinetic Energy (MeV)")
+    plt.ylabel(r"Energy resolution (\%)")
+    plt.title("Energy Resolution")
+    plt.legend()
+    plt.tight_layout()
+
+    ax3[0].set_ylabel("X")
+    ax3[0].set_ylim((-5,5))
+    ax3[1].set_ylabel("Y")
+    ax3[1].set_ylim((-5,5))
+    ax3[2].set_xlabel("Kinetic Energy (MeV)")
+    ax3[2].set_ylabel("Z")
+    ax3[2].set_ylim((-5,5))
+    despine(ax=ax3[0],trim=True)
+    despine(ax=ax3[1],trim=True)
+    despine(ax=ax3[2],trim=True)
+    fig3.suptitle("Position Bias (cm)")
+    h,l = ax3[0].get_legend_handles_labels()
+    fig3.legend(h,l,loc='upper right')
+    fig3.subplots_adjust(right=0.75)
+    fig3.tight_layout()
+    fig3.subplots_adjust(top=0.9)
+
+    ax4[0].set_ylabel("X")
+    ax4[0].set_ylim((0,ax4[0].get_ylim()[1]))
+    ax4[1].set_ylabel("Y")
+    ax4[1].set_ylim((0,ax4[1].get_ylim()[1]))
+    ax4[2].set_xlabel("Kinetic Energy (MeV)")
+    ax4[2].set_ylabel("Z")
+    ax4[2].set_ylim((0,ax4[2].get_ylim()[1]))
+    despine(ax=ax4[0],trim=True)
+    despine(ax=ax4[1],trim=True)
+    despine(ax=ax4[2],trim=True)
+    fig4.suptitle("Position Resolution (cm)")
+    h,l = ax4[0].get_legend_handles_labels()
+    fig4.legend(h,l,loc='upper right')
+    fig4.subplots_adjust(right=0.75)
+    fig4.tight_layout()
+    fig4.subplots_adjust(top=0.9)
+
+    fig = plt.figure(5)
+    despine(fig,trim=True)
+    plt.xlabel("Kinetic Energy (MeV)")
+    plt.ylabel("Angular resolution (deg)")
+    plt.title("Angular Resolution")
+    plt.ylim((0,plt.gca().get_ylim()[1]))
+    plt.legend()
+    plt.tight_layout()
+
+    fig = plt.figure(6)
+    plt.xticks(range(0,1250,100))
+    plt.hlines(0,0,1200,color='k',linestyles='--',alpha=0.5)
+    despine(fig,trim=True)
+    plt.xlabel("Reconstructed Electron Energy (MeV)")
+    plt.ylabel(r"Log Likelihood Ratio (e/$\mu$)")
+    plt.title("Log Likelihood Ratio vs Reconstructed Electron Energy")
+    plt.legend()
+    plt.tight_layout()
 
     plt.show()
author	tlatorre <tlatorre@uchicago.edu>	2019-11-22 10:22:19 -0600
committer	tlatorre <tlatorre@uchicago.edu>	2019-11-22 10:22:19 -0600
commit	19adbadca5220babb99b6f0f8bbd36aaedee0049 (patch)
tree	ec0b9e673e0094dd985bab352840da5f858ffda7 /utils/plot-fit-results
parent	d6555d2dc164e9b2dcf246d86161b1485a408f8a (diff)
download	sddm-19adbadca5220babb99b6f0f8bbd36aaedee0049.tar.gz sddm-19adbadca5220babb99b6f0f8bbd36aaedee0049.tar.bz2 sddm-19adbadca5220babb99b6f0f8bbd36aaedee0049.zip