diff options
author | tlatorre <tlatorre@uchicago.edu> | 2019-07-11 09:42:23 -0500 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2019-07-11 09:42:23 -0500 |
commit | 21491ca1ca2afd6951e9b5b1e74b1c919c602b36 (patch) | |
tree | b21b772612125c574928e4fb37221077d6a012d3 /utils/plot-fit-results | |
parent | 034253ab63f1029291fa046ce15760aae72ae5c5 (diff) | |
download | sddm-21491ca1ca2afd6951e9b5b1e74b1c919c602b36.tar.gz sddm-21491ca1ca2afd6951e9b5b1e74b1c919c602b36.tar.bz2 sddm-21491ca1ca2afd6951e9b5b1e74b1c919c602b36.zip |
switch from YAML output to HDF5 to speed things up
Diffstat (limited to 'utils/plot-fit-results')
-rwxr-xr-x | utils/plot-fit-results | 297 |
1 files changed, 101 insertions, 196 deletions
diff --git a/utils/plot-fit-results b/utils/plot-fit-results index 773a0dc..7115b81 100755 --- a/utils/plot-fit-results +++ b/utils/plot-fit-results @@ -15,11 +15,6 @@ # this program. If not, see <https://www.gnu.org/licenses/>. from __future__ import print_function, division -import yaml -try: - from yaml import CLoader as Loader -except ImportError: - from yaml.loader import SafeLoader as Loader import numpy as np from scipy.stats import iqr from matplotlib.lines import Line2D @@ -71,228 +66,137 @@ def get_stats(x): error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std) return mean, std/np.sqrt(n), std, error +def std_err(x): + x = x.dropna() + mean = np.mean(x) + std = np.std(x) + n = len(x) + if n == 0: + return np.nan + u4 = np.mean((x-mean)**4) + error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std) + return error + if __name__ == '__main__': import argparse import matplotlib.pyplot as plt import numpy as np + import h5py + import pandas as pd parser = argparse.ArgumentParser("plot fit results") parser.add_argument("filenames", nargs='+', help="input files") args = parser.parse_args() - events = [] - - for filename in args.filenames: - print(filename) - with open(filename) as f: - data = yaml.load(f.read(),Loader=Loader) - - a = np.ma.empty(len(data['data']), - dtype=[('id',np.int), # particle id - ('T', np.double), # true energy - ('dx',np.double), # dx - ('dy',np.double), # dy - ('dz',np.double), # dz - ('dT',np.double), # dT - ('theta',np.double), # theta - ('ratio',np.double), # likelihood ratio - ('te',np.double), # time electron - ('tm',np.double), # time muon - ('Te',np.double)] # electron energy - ) - - for i, event in enumerate(data['data']): - # get the particle ID - id = event['mcgn'][0]['id'] - - a[i]['id'] = id - - if 'fit' not in event['ev'][0]: - # if nhit < 100 we don't fit the event - continue - - if id not in event['ev'][0]['fit']: - a[i] = np.ma.masked - continue - - mass = SNOMAN_MASS[id] - # for some reason it's the *second* track which seems to contain the - # initial energy - true_energy = event['mcgn'][0]['energy'] - # The MCTK bank has the particle's total energy (except for neutrons) - # so we need to convert it into kinetic energy - ke = true_energy - mass - - fit = event['ev'][0]['fit'] - - a[i]['T'] = ke - energy = fit[id]['energy'] - a[i]['dT'] = energy-ke - - # store the fit position residuals - true_posx = event['mcgn'][0]['posx'] - posx = fit[id]['posx'] - a[i]['dx'] = posx-true_posx - true_posy = event['mcgn'][0]['posy'] - posy = fit[id]['posy'] - a[i]['dy'] = posy-true_posy - true_posz = event['mcgn'][0]['posz'] - posz = fit[id]['posz'] - a[i]['dz'] = posz-true_posz - - # compute the angle between the fit direction and the true - # direction - dirx = event['mcgn'][0]['dirx'] - diry = event['mcgn'][0]['diry'] - dirz = event['mcgn'][0]['dirz'] - true_dir = [dirx,diry,dirz] - true_dir = np.array(true_dir)/np.linalg.norm(true_dir) - theta = fit[id]['theta'] - phi = fit[id]['phi'] - dir = [np.sin(theta)*np.cos(phi),np.sin(theta)*np.sin(phi),np.cos(theta)] - dir = np.array(dir)/np.linalg.norm(dir) - a[i]['theta'] = np.degrees(np.arccos(np.dot(true_dir,dir))) - - # compute the log likelihood ratio - if IDP_E_MINUS in fit and IDP_MU_MINUS in fit: - fmin_electron = fit[IDP_E_MINUS]['fmin'] - fmin_muon = fit[IDP_MU_MINUS]['fmin'] - a[i]['ratio'] = fmin_muon-fmin_electron - else: - a[i]['ratio'] = np.ma.masked - - # store the time taken for electron and muon fits - if IDP_E_MINUS in fit: - a[i]['te'] = fit[IDP_E_MINUS]['time'] - a[i]['Te'] = fit[IDP_E_MINUS]['energy'] - else: - a[i]['te'] = np.ma.masked - a[i]['Te'] = np.ma.masked - if IDP_MU_MINUS in fit: - a[i]['tm'] = fit[IDP_MU_MINUS]['time'] - else: - a[i]['tm'] = np.ma.masked - - events.append(a) - - a = np.ma.concatenate(events) - + # Read in all the data. + # + # Note: We have to add the filename as a column to each dataset since this + # script is used to analyze MC data which all has the same run number. + ev = pd.concat([pd.read_hdf(filename, "ev").assign(filename=filename) for filename in args.filenames]) + fits = pd.concat([pd.read_hdf(filename, "fits").assign(filename=filename) for filename in args.filenames]) + mcgn = pd.concat([pd.read_hdf(filename, "mcgn").assign(filename=filename) for filename in args.filenames]) + + # get rid of 2nd events like Michel electrons + ev = ev.sort_values(['run','gtid']).groupby(['filename','evn'],as_index=False).first() + + # Now, we merge all three datasets together to produce a single + # dataframe. To do so, we join the ev dataframe with the mcgn frame + # on the evn column, and then join with the fits on the run and + # gtid columns. + # + # At the end we will have a single dataframe with one row for each + # fit, i.e. it will look like: + # + # >>> data + # run gtid nhit, ... mcgn_x, mcgn_y, mcgn_z, ..., fit_id1, fit_x, fit_y, fit_z, ... + # + # Before merging, we prefix the primary seed track table with mcgn_ + # and the fit table with fit_ just to make things easier. + + # Prefix track and fit frames + mcgn = mcgn.add_prefix("mcgn_") + fits = fits.add_prefix("fit_") + + # merge ev and mcgn on evn + data = ev.merge(mcgn,left_on=['filename','evn'],right_on=['mcgn_filename','mcgn_evn']) + # merge data and fits on run and gtid + data = data.merge(fits,left_on=['filename','run','gtid'],right_on=['fit_filename','fit_run','fit_gtid']) + + # calculate true kinetic energy + mass = [SNOMAN_MASS[id] for id in data['mcgn_id'].values] + data['T'] = data['mcgn_energy'].values - mass + data['dx'] = data['fit_x'].values - data['mcgn_x'].values + data['dy'] = data['fit_y'].values - data['mcgn_y'].values + data['dz'] = data['fit_z'].values - data['mcgn_z'].values + data['dT'] = data['fit_energy1'].values - data['T'].values + + true_dir = np.dstack((data['mcgn_dirx'],data['mcgn_diry'],data['mcgn_dirz'])).squeeze() + dir = np.dstack((np.sin(data['fit_theta1'])*np.cos(data['fit_phi1']), + np.sin(data['fit_theta1'])*np.sin(data['fit_phi1']), + np.cos(data['fit_theta1']))).squeeze() + + data['theta'] = np.degrees(np.arccos((true_dir*dir).sum(axis=-1))) + + # only select fits which have at least 2 fits + data = data.groupby(['filename','run','gtid']).filter(lambda x: len(x) > 1) + data_true = data[data['fit_id1'] == data['mcgn_id']] + data_e = data[data['fit_id1'] == IDP_E_MINUS] + data_mu = data[data['fit_id1'] == IDP_MU_MINUS] + + data_true = data_true.set_index(['filename','run','gtid']) + data_e = data_e.set_index(['filename','run','gtid']) + data_mu = data_mu.set_index(['filename','run','gtid']) + + data_true['ratio'] = data_mu['fit_fmin']-data_e['fit_fmin'] + data_true['te'] = data_e['fit_time'] + data_true['tm'] = data_mu['fit_time'] + data_true['Te'] = data_e['fit_energy1'] + + # 100 bins between 50 MeV and 1 GeV bins = np.arange(50,1000,100) - stats_array = np.ma.empty(len(bins)-1, - dtype=[('T', np.double), - ('dT', np.double), - ('dT_err', np.double), - ('dT_std', np.double), - ('dT_std_err', np.double), - ('dx', np.double), - ('dx_err', np.double), - ('dx_std', np.double), - ('dx_std_err', np.double), - ('dy', np.double), - ('dy_err', np.double), - ('dy_std', np.double), - ('dy_std_err', np.double), - ('dz', np.double), - ('dz_err', np.double), - ('dz_std', np.double), - ('dz_std_err', np.double), - ('theta', np.double), - ('theta_err', np.double), - ('theta_std', np.double), - ('theta_std_err', np.double)]) - - stats = {IDP_E_MINUS: stats_array, IDP_MU_MINUS: stats_array.copy()} - - for id in stats: - electron_events = a[a['id'] == id] - - for i, (ablah, b) in enumerate(zip(bins[:-1], bins[1:])): - events = electron_events[(electron_events['T'] >= ablah) & (electron_events['T'] < b)] - - if len(events) < 2: - stats[id][i] = np.ma.masked - continue - - stats[id][i]['T'] = (ablah+b)/2 - mean, mean_error, std, std_error = get_stats(events['dT'].compressed()) - stats[id][i]['dT'] = mean - stats[id][i]['dT_err'] = mean_error - stats[id][i]['dT_std'] = std - stats[id][i]['dT_std_err'] = std_error - mean, mean_error, std, std_error = get_stats(events['dx'].compressed()) - stats[id][i]['dx'] = mean - stats[id][i]['dx_err'] = mean_error - stats[id][i]['dx_std'] = std - stats[id][i]['dx_std_err'] = std_error - mean, mean_error, std, std_error = get_stats(events['dy'].compressed()) - stats[id][i]['dy'] = mean - stats[id][i]['dy_err'] = mean_error - stats[id][i]['dy_std'] = std - stats[id][i]['dy_std_err'] = std_error - mean, mean_error, std, std_error = get_stats(events['dz'].compressed()) - stats[id][i]['dz'] = mean - stats[id][i]['dz_err'] = mean_error - stats[id][i]['dz_std'] = std - stats[id][i]['dz_std_err'] = std_error - mean, mean_error, std, std_error = get_stats(events['theta'].compressed()) - stats[id][i]['theta'] = mean - stats[id][i]['theta_err'] = mean_error - stats[id][i]['theta_std'] = std - stats[id][i]['theta_std_err'] = std_error - - for id in stats: + for id in [IDP_E_MINUS, IDP_MU_MINUS]: + events = data_true[data_true['mcgn_id'] == id] + + pd_bins = pd.cut(events['T'],bins) + + dT = events.groupby(pd_bins)['dT'].agg(['mean','sem','std',std_err]) + dx = events.groupby(pd_bins)['dx'].agg(['mean','sem','std',std_err]) + dy = events.groupby(pd_bins)['dy'].agg(['mean','sem','std',std_err]) + dz = events.groupby(pd_bins)['dz'].agg(['mean','sem','std',std_err]) + theta = events.groupby(pd_bins)['theta'].agg(['mean','sem','std',std_err]) + label = 'Muon' if id == IDP_MU_MINUS else 'Electron' - T = stats[id]['T'] - dT = stats[id]['dT'] - dT_err = stats[id]['dT_err'] - std_dT = stats[id]['dT_std'] - std_dT_err = stats[id]['dT_std_err'] - dx = stats[id]['dx'] - dx_err = stats[id]['dx_err'] - std_dx = stats[id]['dx_std'] - std_dx_err = stats[id]['dx_std_err'] - dy = stats[id]['dy'] - dy_err = stats[id]['dy_err'] - std_dy = stats[id]['dy_std'] - std_dy_err = stats[id]['dy_std_err'] - dz = stats[id]['dz'] - dz_err = stats[id]['dz_err'] - std_dz = stats[id]['dz_std'] - std_dz_err = stats[id]['dz_std_err'] - theta = stats[id]['theta'] - theta_err = stats[id]['theta_err'] - std_theta = stats[id]['theta_std'] - std_theta_err = stats[id]['theta_std_err'] + T = (bins[1:] + bins[:-1])/2 plt.figure(1) - plt.errorbar(T,dT*100/T,yerr=dT_err*100/T,fmt='o',label=label) + plt.errorbar(T,dT['mean']*100/T,yerr=dT['sem']*100/T,fmt='o',label=label) plt.xlabel("Kinetic Energy (MeV)") plt.ylabel("Energy bias (%)") plt.title("Energy Bias") plt.legend() plt.figure(2) - plt.errorbar(T,std_dT*100/T,yerr=std_dT_err*100/T,fmt='o',label=label) + plt.errorbar(T,dT['std']*100/T,yerr=dT['std_err']*100/T,fmt='o',label=label) plt.xlabel("Kinetic Energy (MeV)") plt.ylabel("Energy resolution (%)") plt.title("Energy Resolution") plt.legend() plt.figure(3) - plt.errorbar(T,dx,yerr=dx_err,fmt='o',label='%s (x)' % label) - plt.errorbar(T,dy,yerr=dy_err,fmt='o',label='%s (y)' % label) - plt.errorbar(T,dz,yerr=dz_err,fmt='o',label='%s (z)' % label) + plt.errorbar(T,dx['mean'],yerr=dx['sem'],fmt='o',label='%s (x)' % label) + plt.errorbar(T,dy['mean'],yerr=dy['sem'],fmt='o',label='%s (y)' % label) + plt.errorbar(T,dz['mean'],yerr=dz['sem'],fmt='o',label='%s (z)' % label) plt.xlabel("Kinetic Energy (MeV)") plt.ylabel("Position bias (cm)") plt.title("Position Bias") plt.legend() plt.figure(4) - plt.errorbar(T,std_dx,yerr=std_dx_err,fmt='o',label='%s (x)' % label) - plt.errorbar(T,std_dy,yerr=std_dy_err,fmt='o',label='%s (y)' % label) - plt.errorbar(T,std_dz,yerr=std_dz_err,fmt='o',label='%s (z)' % label) + plt.errorbar(T,dx['std'],yerr=dx['std_err'],fmt='o',label='%s (x)' % label) + plt.errorbar(T,dy['std'],yerr=dy['std_err'],fmt='o',label='%s (y)' % label) + plt.errorbar(T,dz['std'],yerr=dz['std_err'],fmt='o',label='%s (z)' % label) plt.xlabel("Kinetic Energy (MeV)") plt.ylabel("Position resolution (cm)") plt.title("Position Resolution") @@ -300,7 +204,7 @@ if __name__ == '__main__': plt.legend() plt.figure(5) - plt.errorbar(T,std_theta,yerr=std_theta_err,fmt='o',label=label) + plt.errorbar(T,theta['std'],yerr=theta['std_err'],fmt='o',label=label) plt.xlabel("Kinetic Energy (MeV)") plt.ylabel("Angular resolution (deg)") plt.title("Angular Resolution") @@ -308,9 +212,10 @@ if __name__ == '__main__': plt.legend() plt.figure(6) - plt.scatter(a[a['id'] == id]['Te'],a[a['id'] == id]['ratio'],label=label) + plt.scatter(events['Te'],events['ratio'],label=label) plt.xlabel("Reconstructed Electron Energy (MeV)") plt.ylabel(r"Log Likelihood Ratio (e/$\mu$)") plt.title("Log Likelihood Ratio vs Reconstructed Electron Energy") plt.legend() + plt.show() |