diff options
author | tlatorre <tlatorre@uchicago.edu> | 2019-07-11 09:42:23 -0500 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2019-07-11 09:42:23 -0500 |
commit | 21491ca1ca2afd6951e9b5b1e74b1c919c602b36 (patch) | |
tree | b21b772612125c574928e4fb37221077d6a012d3 /utils/plot | |
parent | 034253ab63f1029291fa046ce15760aae72ae5c5 (diff) | |
download | sddm-21491ca1ca2afd6951e9b5b1e74b1c919c602b36.tar.gz sddm-21491ca1ca2afd6951e9b5b1e74b1c919c602b36.tar.bz2 sddm-21491ca1ca2afd6951e9b5b1e74b1c919c602b36.zip |
switch from YAML output to HDF5 to speed things up
Diffstat (limited to 'utils/plot')
-rwxr-xr-x | utils/plot | 171 |
1 files changed, 82 insertions, 89 deletions
@@ -74,6 +74,8 @@ if __name__ == '__main__': import argparse import matplotlib.pyplot as plt import numpy as np + import h5py + import pandas as pd parser = argparse.ArgumentParser("plot fit results") parser.add_argument("filenames", nargs='+', help="input files") @@ -81,121 +83,113 @@ if __name__ == '__main__': for filename in args.filenames: print(filename) - with open(filename) as f: - data = yaml.load(f.read(),Loader=Loader) - - dx = [] - dy = [] - dz = [] - dT = [] - thetas = [] - likelihood_ratio = [] - t_electron = [] - t_muon = [] - psi = [] - for event in data['data']: - # get the particle ID - id = event['mcgn'][0]['id'] - - if 'ev' not in event: - continue - - if 'fit' not in event['ev'][0]: - # if nhit < 100 we don't fit the event - continue - - if id not in event['ev'][0]['fit']: - continue - - fit = event['ev'][0]['fit'] - - mass = SNOMAN_MASS[id] - # for some reason it's the *second* track which seems to contain the - # initial energy - true_energy = event['mcgn'][0]['energy'] - # The MCTK bank has the particle's total energy (except for neutrons) - # so we need to convert it into kinetic energy - ke = true_energy - mass - energy = fit[id]['energy'] - dT.append(energy-ke) - true_posx = event['mcgn'][0]['posx'] - posx = fit[id]['posx'] - dx.append(posx-true_posx) - true_posy = event['mcgn'][0]['posy'] - posy = fit[id]['posy'] - dy.append(posy-true_posy) - true_posz = event['mcgn'][0]['posz'] - posz = fit[id]['posz'] - dz.append(posz-true_posz) - dirx = event['mcgn'][0]['dirx'] - diry = event['mcgn'][0]['diry'] - dirz = event['mcgn'][0]['dirz'] - true_dir = [dirx,diry,dirz] - true_dir = np.array(true_dir)/np.linalg.norm(true_dir) - theta = fit[id]['theta'] - phi = fit[id]['phi'] - dir = [np.sin(theta)*np.cos(phi),np.sin(theta)*np.sin(phi),np.cos(theta)] - dir = np.array(dir)/np.linalg.norm(dir) - thetas.append(np.degrees(np.arccos(np.dot(true_dir,dir)))) - if IDP_E_MINUS in fit and IDP_MU_MINUS in fit: - fmin_electron = fit[IDP_E_MINUS]['fmin'] - fmin_muon = fit[IDP_MU_MINUS]['fmin'] - likelihood_ratio.append(fmin_muon-fmin_electron) - if IDP_E_MINUS in fit: - t_electron.append(fit[IDP_E_MINUS]['time']) - if IDP_MU_MINUS in fit: - t_muon.append(fit[IDP_MU_MINUS]['time']) - - if 'nhit' in event['ev'][0]: - nhit = event['ev'][0]['nhit'] - psi.append(fit[id]['psi']/nhit) - - if len(t_electron) < 2 or len(t_muon) < 2: + + with h5py.File(filename) as f: + ev = pd.read_hdf(filename, "ev") + mcgn = pd.read_hdf(filename, "mcgn") + fits = pd.read_hdf(filename, "fits") + + # get rid of 2nd events like Michel electrons + ev = ev.sort_values(['run','gtid']).groupby(['evn'],as_index=False).first() + + # Now, we merge all three datasets together to produce a single + # dataframe. To do so, we join the ev dataframe with the mcgn frame + # on the evn column, and then join with the fits on the run and + # gtid columns. + # + # At the end we will have a single dataframe with one row for each + # fit, i.e. it will look like: + # + # >>> data + # run gtid nhit, ... mcgn_x, mcgn_y, mcgn_z, ..., fit_id1, fit_x, fit_y, fit_z, ... + # + # Before merging, we prefix the primary seed track table with mcgn_ + # and the fit table with fit_ just to make things easier. + + # Prefix track and fit frames + mcgn = mcgn.add_prefix("mcgn_") + fits = fits.add_prefix("fit_") + + # merge ev and mcgn on evn + data = ev.merge(mcgn,left_on=['evn'],right_on=['mcgn_evn']) + # merge data and fits on run and gtid + data = data.merge(fits,left_on=['run','gtid'],right_on=['fit_run','fit_gtid']) + + # calculate true kinetic energy + mass = [SNOMAN_MASS[id] for id in data['mcgn_id'].values] + data['T'] = data['mcgn_energy'].values - mass + data['dx'] = data['fit_x'].values - data['mcgn_x'].values + data['dy'] = data['fit_y'].values - data['mcgn_y'].values + data['dz'] = data['fit_z'].values - data['mcgn_z'].values + data['dT'] = data['fit_energy1'].values - data['T'].values + + true_dir = np.dstack((data['mcgn_dirx'],data['mcgn_diry'],data['mcgn_dirz'])).squeeze() + dir = np.dstack((np.sin(data['fit_theta1'])*np.cos(data['fit_phi1']), + np.sin(data['fit_theta1'])*np.sin(data['fit_phi1']), + np.cos(data['fit_theta1']))).squeeze() + + data['theta'] = np.degrees(np.arccos((true_dir*dir).sum(axis=-1))) + + # only select fits which have at least 2 fits + data = data.groupby(['run','gtid']).filter(lambda x: len(x) > 1) + data_true = data[data['fit_id1'] == data['mcgn_id']] + data_e = data[data['fit_id1'] == IDP_E_MINUS] + data_mu = data[data['fit_id1'] == IDP_MU_MINUS] + + data_true = data_true.set_index(['run','gtid']) + data_e = data_e.set_index(['run','gtid']) + data_mu = data_mu.set_index(['run','gtid']) + + data_true['ratio'] = data_mu['fit_fmin']-data_e['fit_fmin'] + data_true['te'] = data_e['fit_time'] + data_true['tm'] = data_mu['fit_time'] + data_true['Te'] = data_e['fit_energy1'] + + if len(data_true) < 2: continue - mean, mean_error, std, std_error = get_stats(dT) + mean, mean_error, std, std_error = get_stats(data_true.dT) print("dT = %.2g +/- %.2g" % (mean, mean_error)) print("std(dT) = %.2g +/- %.2g" % (std, std_error)) - mean, mean_error, std, std_error = get_stats(dx) + mean, mean_error, std, std_error = get_stats(data_true.dx) print("dx = %4.2g +/- %.2g" % (mean, mean_error)) print("std(dx) = %4.2g +/- %.2g" % (std, std_error)) - mean, mean_error, std, std_error = get_stats(dy) + mean, mean_error, std, std_error = get_stats(data_true.dy) print("dy = %4.2g +/- %.2g" % (mean, mean_error)) print("std(dy) = %4.2g +/- %.2g" % (std, std_error)) - mean, mean_error, std, std_error = get_stats(dz) + mean, mean_error, std, std_error = get_stats(data_true.dz) print("dz = %4.2g +/- %.2g" % (mean, mean_error)) print("std(dz) = %4.2g +/- %.2g" % (std, std_error)) - mean, mean_error, std, std_error = get_stats(thetas) + mean, mean_error, std, std_error = get_stats(data_true.theta) print("std(theta) = %4.2g +/- %.2g" % (std, std_error)) plt.figure(1) - plot_hist(dT, label=filename) + plot_hist(data_true.dT, label=filename) plt.xlabel("Kinetic Energy difference (MeV)") plt.figure(2) - plot_hist(dx, label=filename) + plot_hist(data_true.dx, label=filename) plt.xlabel("X Position difference (cm)") plt.figure(3) - plot_hist(dy, label=filename) + plot_hist(data_true.dy, label=filename) plt.xlabel("Y Position difference (cm)") plt.figure(4) - plot_hist(dz, label=filename) + plot_hist(data_true.dz, label=filename) plt.xlabel("Z Position difference (cm)") plt.figure(5) - plot_hist(thetas, label=filename) + plot_hist(data_true.theta, label=filename) plt.xlabel(r"$\theta$ (deg)") plt.figure(6) - plot_hist(likelihood_ratio, label=filename) + plot_hist(data_true.ratio, label=filename) plt.xlabel(r"Log Likelihood Ratio ($e/\mu$)") plt.figure(7) - plot_hist(np.array(t_electron)/1e3/60.0, label=filename) + plot_hist(data_true.te/1e3/60.0, label=filename) plt.xlabel(r"Electron Fit time (minutes)") plt.figure(8) - plot_hist(np.array(t_muon)/1e3/60.0, label=filename) + plot_hist(data_true.tm/1e3/60.0, label=filename) plt.xlabel(r"Muon Fit time (minutes)") - if len(psi): - plt.figure(9) - plot_hist(psi, label=filename) - plt.xlabel(r"$\Psi$/Nhit") + plt.figure(9) + plot_hist(data_true.fit_psi/data_true.nhit, label=filename) + plt.xlabel(r"$\Psi$/Nhit") plot_legend(1) plot_legend(2) @@ -205,6 +199,5 @@ if __name__ == '__main__': plot_legend(6) plot_legend(7) plot_legend(8) - if len(psi): - plot_legend(9) + plot_legend(9) plt.show() |