#!/usr/bin/env python # Copyright (c) 2019, Anthony Latorre # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along with # this program. If not, see . from __future__ import print_function, division if __name__ == '__main__': import argparse import numpy as np import h5py import pandas as pd import itertools from sddm import IDP_E_MINUS, IDP_MU_MINUS, SNOMAN_MASS from sddm.plot import plot_hist, plot_legend, get_stats, despine, iqr_std_err, iqr_std, quantile_error, q90_err, q90, median, median_err, std_err from sddm import setup_matplotlib parser = argparse.ArgumentParser("plot fit results") parser.add_argument("filenames", nargs='+', help="input files") parser.add_argument("-o", "--output", default=None, help="output file") parser.add_argument("--save", action="store_true", default=False, help="save plots") args = parser.parse_args() setup_matplotlib(args.save) import matplotlib.pyplot as plt # Read in all the data. # # Note: We have to add the filename as a column to each dataset since this # script is used to analyze MC data which all has the same run number. ev = pd.concat([pd.read_hdf(filename, "ev").assign(filename=filename) for filename in args.filenames],ignore_index=True) fits = pd.concat([pd.read_hdf(filename, "fits").assign(filename=filename) for filename in args.filenames],ignore_index=True) mcgn = pd.concat([pd.read_hdf(filename, "mcgn").assign(filename=filename) for filename in args.filenames],ignore_index=True) # get rid of 2nd events like Michel electrons ev = ev.sort_values(['run','gtid']).groupby(['filename','evn'],as_index=False).nth(0) # Now, we merge all three datasets together to produce a single # dataframe. To do so, we join the ev dataframe with the mcgn frame # on the evn column, and then join with the fits on the run and # gtid columns. # # At the end we will have a single dataframe with one row for each # fit, i.e. it will look like: # # >>> data # run gtid nhit, ... mcgn_x, mcgn_y, mcgn_z, ..., fit_id1, fit_x, fit_y, fit_z, ... # # Before merging, we prefix the primary seed track table with mcgn_ # and the fit table with fit_ just to make things easier. # Prefix track and fit frames mcgn = mcgn.add_prefix("mcgn_") fits = fits.add_prefix("fit_") # merge ev and mcgn on evn data = ev.merge(mcgn,left_on=['filename','evn'],right_on=['mcgn_filename','mcgn_evn']) # merge data and fits on run and gtid data = data.merge(fits,left_on=['filename','run','gtid'],right_on=['fit_filename','fit_run','fit_gtid']) # For this script, we only want the single particle fit results data = data[(data.fit_id2 == 0) & (data.fit_id3 == 0)] # Select only the best fit for a given run, gtid, and particle # combo data = data.sort_values('fit_fmin').groupby(['filename','run','gtid','fit_id1','fit_id2','fit_id3'],as_index=False).nth(0).reset_index(level=0,drop=True) # calculate true kinetic energy mass = [SNOMAN_MASS[id] for id in data['mcgn_id'].values] data['T'] = data['mcgn_energy'].values - mass data['dx'] = data['fit_x'].values - data['mcgn_x'].values data['dy'] = data['fit_y'].values - data['mcgn_y'].values data['dz'] = data['fit_z'].values - data['mcgn_z'].values data['dT'] = data['fit_energy1'].values - data['T'].values true_dir = np.dstack((data['mcgn_dirx'],data['mcgn_diry'],data['mcgn_dirz'])).squeeze() dir = np.dstack((np.sin(data['fit_theta1'])*np.cos(data['fit_phi1']), np.sin(data['fit_theta1'])*np.sin(data['fit_phi1']), np.cos(data['fit_theta1']))).squeeze() data['theta'] = np.degrees(np.arccos((true_dir*dir).sum(axis=-1))) # only select fits which have at least 2 fits data = data.groupby(['filename','run','gtid']).filter(lambda x: len(x) > 1) data_true = data[data['fit_id1'] == data['mcgn_id']] data_e = data[data['fit_id1'] == IDP_E_MINUS] data_mu = data[data['fit_id1'] == IDP_MU_MINUS] data_true = data_true.set_index(['filename','run','gtid']) data_e = data_e.set_index(['filename','run','gtid']) data_mu = data_mu.set_index(['filename','run','gtid']) data_true['ratio'] = data_mu['fit_fmin']-data_e['fit_fmin'] data_true['te'] = data_e['fit_time'] data_true['tm'] = data_mu['fit_time'] data_true['Te'] = data_e['fit_energy1'] # 100 bins between 50 MeV and 1 GeV bins = np.arange(50,1000,100) T = (bins[1:] + bins[:-1])/2 markers = itertools.cycle(('o', 'v')) fig3, ax3 = plt.subplots(3,1,num=3,sharex=True) fig4, ax4 = plt.subplots(3,1,num=4,sharex=True) output = pd.DataFrame({'T':T}) for id in [IDP_E_MINUS, IDP_MU_MINUS]: events = data_true[data_true['mcgn_id'] == id] pd_bins = pd.cut(events['T'],bins) dT = events.groupby(pd_bins)['dT'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err]) dx = events.groupby(pd_bins)['dx'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err]) dy = events.groupby(pd_bins)['dy'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err]) dz = events.groupby(pd_bins)['dz'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err]) theta = events.groupby(pd_bins)['theta'].agg(['mean','sem','std',std_err,median,median_err,iqr_std,iqr_std_err,q90,q90_err]) label = 'Muon' if id == IDP_MU_MINUS else 'Electron' marker = markers.next() plt.figure(1) plt.errorbar(T,dT['median']*100/T,yerr=dT['median_err']*100/T,fmt=marker,label=label) plt.figure(2) plt.errorbar(T,dT['iqr_std']*100/T,yerr=dT['iqr_std_err']*100/T,fmt=marker,label=label) ax3[0].errorbar(T,dx['median'],yerr=dx['median_err'],fmt=marker,label=label) ax3[1].errorbar(T,dy['median'],yerr=dy['median_err'],fmt=marker,label=label) ax3[2].errorbar(T,dz['median'],yerr=dz['median_err'],fmt=marker,label=label) if id == IDP_E_MINUS: output['e_bias'] = (dT['median']/T).values else: output['u_bias'] = (dT['median']/T).values ax4[0].errorbar(T,dx['iqr_std'],yerr=dx['iqr_std_err'],fmt=marker,label=label) ax4[1].errorbar(T,dy['iqr_std'],yerr=dy['iqr_std_err'],fmt=marker,label=label) ax4[2].errorbar(T,dz['iqr_std'],yerr=dz['iqr_std_err'],fmt=marker,label=label) plt.figure(5) plt.errorbar(T,theta['std'],yerr=theta['std_err'],fmt=marker,label=label) plt.figure(6) plt.scatter(events['Te'],events['ratio'],marker=marker,label=label) if args.output: with h5py.File(args.output,"w") as f: f.create_dataset('energy_bias',data=output.to_records()) fig = plt.figure(1) despine(fig,trim=True) plt.xlabel("Kinetic Energy (MeV)") plt.ylabel(r"Energy bias (\%)") plt.legend() plt.tight_layout() fig = plt.figure(2) despine(fig,trim=True) plt.xlabel("Kinetic Energy (MeV)") plt.ylabel(r"Energy resolution (\%)") plt.legend() plt.tight_layout() ax3[0].set_ylabel("X") ax3[0].set_ylim((-5,5)) ax3[1].set_ylabel("Y") ax3[1].set_ylim((-5,5)) ax3[2].set_xlabel("Kinetic Energy (MeV)") ax3[2].set_ylabel("Z") ax3[2].set_ylim((-5,5)) despine(ax=ax3[0],trim=True) despine(ax=ax3[1],trim=True) despine(ax=ax3[2],trim=True) h,l = ax3[0].get_legend_handles_labels() fig3.legend(h,l,loc='upper right') fig3.subplots_adjust(right=0.75) fig3.tight_layout() fig3.subplots_adjust(top=0.9) ax4[0].set_ylabel("X") ax4[0].set_ylim((0,ax4[0].get_ylim()[1])) ax4[1].set_ylabel("Y") ax4[1].set_ylim((0,ax4[1].get_ylim()[1])) ax4[2].set_xlabel("Kinetic Energy (MeV)") ax4[2].set_ylabel("Z") ax4[2].set_ylim((0,ax4[2].get_ylim()[1])) despine(ax=ax4[0],trim=True) despine(ax=ax4[1],trim=True) despine(ax=ax4[2],trim=True) h,l = ax4[0].get_legend_handles_labels() fig4.legend(h,l,loc='upper right') fig4.subplots_adjust(right=0.75) fig4.tight_layout() fig4.subplots_adjust(top=0.9) fig = plt.figure(5) despine(fig,trim=True) plt.xlabel("Kinetic Energy (MeV)") plt.ylabel("Angular resolution (deg)") plt.ylim((0,plt.gca().get_ylim()[1])) plt.legend() plt.tight_layout() fig = plt.figure(6) plt.xticks(range(0,1250,100)) plt.hlines(0,0,1200,color='k',linestyles='--',alpha=0.5) despine(fig,trim=True) plt.xlabel("Reconstructed Electron Energy (MeV)") plt.ylabel(r"Log Likelihood Ratio (e/$\mu$)") plt.legend() plt.tight_layout() if args.save: fig = plt.figure(1) plt.savefig("energy_bias.pdf") plt.savefig("energy_bias.eps") fig = plt.figure(2) plt.savefig("energy_resolution.pdf") plt.savefig("energy_resolution.eps") fig = plt.figure(3) plt.savefig("position_bias.pdf") plt.savefig("position_bias.eps") fig = plt.figure(4) plt.savefig("position_resolution.pdf") plt.savefig("position_resolution.eps") fig = plt.figure(5) plt.savefig("angular_resolution.pdf") plt.savefig("angular_resolution.eps") fig = plt.figure(6) plt.savefig("likelihood_ratio.pdf") plt.savefig("likelihood_ratio.eps") else: plt.figure(1) plt.title("Energy Bias") plt.figure(2) plt.title("Energy Resolution") plt.figure(3) fig3.suptitle("Position Bias (cm)") plt.figure(4) fig4.suptitle("Position Resolution (cm)") plt.figure(5) plt.title("Angular Resolution") plt.figure(6) plt.title("Log Likelihood Ratio vs Reconstructed Electron Energy") plt.show() >239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367