switch from YAML output to HDF5 to speed things up

author: tlatorre <tlatorre@uchicago.edu> 2019-07-11 09:42:23 -0500
committer: tlatorre <tlatorre@uchicago.edu> 2019-07-11 09:42:23 -0500
commit: 21491ca1ca2afd6951e9b5b1e74b1c919c602b36 (patch)
tree: b21b772612125c574928e4fb37221077d6a012d3 /utils/plot
parent: 034253ab63f1029291fa046ce15760aae72ae5c5 (diff)
download: sddm-21491ca1ca2afd6951e9b5b1e74b1c919c602b36.tar.gz
sddm-21491ca1ca2afd6951e9b5b1e74b1c919c602b36.tar.bz2
sddm-21491ca1ca2afd6951e9b5b1e74b1c919c602b36.zip
1 files changed, 82 insertions, 89 deletions
diff --git a/utils/plot b/utils/plot
index ac93eda..1bac500 100755
--- a/utils/plot
+++ b/utils/plot
@@ -74,6 +74,8 @@ if __name__ == '__main__':
     import argparse
     import matplotlib.pyplot as plt
     import numpy as np
+    import h5py
+    import pandas as pd
 
     parser = argparse.ArgumentParser("plot fit results")
     parser.add_argument("filenames", nargs='+', help="input files")
@@ -81,121 +83,113 @@ if __name__ == '__main__':
 
     for filename in args.filenames:
         print(filename)
-        with open(filename) as f:
-            data = yaml.load(f.read(),Loader=Loader)
-
-        dx = []
-        dy = []
-        dz = []
-        dT = []
-        thetas = []
-        likelihood_ratio = []
-        t_electron = []
-        t_muon = []
-        psi = []
-        for event in data['data']:
-            # get the particle ID
-            id = event['mcgn'][0]['id']
-
-            if 'ev' not in event:
-                continue
-
-            if 'fit' not in event['ev'][0]:
-                # if nhit < 100 we don't fit the event
-                continue
-
-            if id not in event['ev'][0]['fit']:
-                continue
-
-            fit = event['ev'][0]['fit']
-
-            mass = SNOMAN_MASS[id]
-            # for some reason it's the *second* track which seems to contain the
-            # initial energy
-            true_energy = event['mcgn'][0]['energy']
-            # The MCTK bank has the particle's total energy (except for neutrons)
-            # so we need to convert it into kinetic energy
-            ke = true_energy - mass
-            energy = fit[id]['energy']
-            dT.append(energy-ke)
-            true_posx = event['mcgn'][0]['posx']
-            posx = fit[id]['posx']
-            dx.append(posx-true_posx)
-            true_posy = event['mcgn'][0]['posy']
-            posy = fit[id]['posy']
-            dy.append(posy-true_posy)
-            true_posz = event['mcgn'][0]['posz']
-            posz = fit[id]['posz']
-            dz.append(posz-true_posz)
-            dirx = event['mcgn'][0]['dirx']
-            diry = event['mcgn'][0]['diry']
-            dirz = event['mcgn'][0]['dirz']
-            true_dir = [dirx,diry,dirz]
-            true_dir = np.array(true_dir)/np.linalg.norm(true_dir)
-            theta = fit[id]['theta']
-            phi = fit[id]['phi']
-            dir = [np.sin(theta)*np.cos(phi),np.sin(theta)*np.sin(phi),np.cos(theta)]
-            dir = np.array(dir)/np.linalg.norm(dir)
-            thetas.append(np.degrees(np.arccos(np.dot(true_dir,dir))))
-            if IDP_E_MINUS in fit and IDP_MU_MINUS in fit:
-                fmin_electron = fit[IDP_E_MINUS]['fmin']
-                fmin_muon = fit[IDP_MU_MINUS]['fmin']
-                likelihood_ratio.append(fmin_muon-fmin_electron)
-            if IDP_E_MINUS in fit:
-                t_electron.append(fit[IDP_E_MINUS]['time'])
-            if IDP_MU_MINUS in fit:
-                t_muon.append(fit[IDP_MU_MINUS]['time'])
-
-            if 'nhit' in event['ev'][0]:
-                nhit = event['ev'][0]['nhit']
-                psi.append(fit[id]['psi']/nhit)
-
-        if len(t_electron) < 2 or len(t_muon) < 2:
+        
+        with h5py.File(filename) as f:
+            ev = pd.read_hdf(filename, "ev")
+            mcgn = pd.read_hdf(filename, "mcgn")
+            fits = pd.read_hdf(filename, "fits")
+
+            # get rid of 2nd events like Michel electrons
+            ev = ev.sort_values(['run','gtid']).groupby(['evn'],as_index=False).first()
+
+            # Now, we merge all three datasets together to produce a single
+            # dataframe. To do so, we join the ev dataframe with the mcgn frame
+            # on the evn column, and then join with the fits on the run and
+            # gtid columns.
+            #
+            # At the end we will have a single dataframe with one row for each
+            # fit, i.e. it will look like:
+            #
+            # >>> data
+            #   run   gtid nhit, ... mcgn_x, mcgn_y, mcgn_z, ..., fit_id1, fit_x, fit_y, fit_z, ...
+            #
+            # Before merging, we prefix the primary seed track table with mcgn_
+            # and the fit table with fit_ just to make things easier.
+
+            # Prefix track and fit frames
+            mcgn = mcgn.add_prefix("mcgn_")
+            fits = fits.add_prefix("fit_")
+
+            # merge ev and mcgn on evn
+            data = ev.merge(mcgn,left_on=['evn'],right_on=['mcgn_evn'])
+            # merge data and fits on run and gtid
+            data = data.merge(fits,left_on=['run','gtid'],right_on=['fit_run','fit_gtid'])
+
+            # calculate true kinetic energy
+            mass = [SNOMAN_MASS[id] for id in data['mcgn_id'].values]
+            data['T'] = data['mcgn_energy'].values - mass
+            data['dx'] = data['fit_x'].values - data['mcgn_x'].values
+            data['dy'] = data['fit_y'].values - data['mcgn_y'].values
+            data['dz'] = data['fit_z'].values - data['mcgn_z'].values
+            data['dT'] = data['fit_energy1'].values - data['T'].values
+
+            true_dir = np.dstack((data['mcgn_dirx'],data['mcgn_diry'],data['mcgn_dirz'])).squeeze()
+            dir = np.dstack((np.sin(data['fit_theta1'])*np.cos(data['fit_phi1']),
+                             np.sin(data['fit_theta1'])*np.sin(data['fit_phi1']),
+                             np.cos(data['fit_theta1']))).squeeze()
+
+            data['theta'] = np.degrees(np.arccos((true_dir*dir).sum(axis=-1)))
+
+            # only select fits which have at least 2 fits
+            data = data.groupby(['run','gtid']).filter(lambda x: len(x) > 1)
+            data_true = data[data['fit_id1'] == data['mcgn_id']]
+            data_e = data[data['fit_id1'] == IDP_E_MINUS]
+            data_mu = data[data['fit_id1'] == IDP_MU_MINUS]
+
+            data_true = data_true.set_index(['run','gtid'])
+            data_e = data_e.set_index(['run','gtid'])
+            data_mu = data_mu.set_index(['run','gtid'])
+
+            data_true['ratio'] = data_mu['fit_fmin']-data_e['fit_fmin']
+            data_true['te'] = data_e['fit_time']
+            data_true['tm'] = data_mu['fit_time']
+            data_true['Te'] = data_e['fit_energy1']
+
+        if len(data_true) < 2:
             continue
 
-        mean, mean_error, std, std_error = get_stats(dT)
+        mean, mean_error, std, std_error = get_stats(data_true.dT)
         print("dT      = %.2g +/- %.2g" % (mean, mean_error))
         print("std(dT) = %.2g +/- %.2g" % (std, std_error))
-        mean, mean_error, std, std_error = get_stats(dx)
+        mean, mean_error, std, std_error = get_stats(data_true.dx)
         print("dx      = %4.2g +/- %.2g" % (mean, mean_error))
         print("std(dx) = %4.2g +/- %.2g" % (std, std_error))
-        mean, mean_error, std, std_error = get_stats(dy)
+        mean, mean_error, std, std_error = get_stats(data_true.dy)
         print("dy      = %4.2g +/- %.2g" % (mean, mean_error))
         print("std(dy) = %4.2g +/- %.2g" % (std, std_error))
-        mean, mean_error, std, std_error = get_stats(dz)
+        mean, mean_error, std, std_error = get_stats(data_true.dz)
         print("dz      = %4.2g +/- %.2g" % (mean, mean_error))
         print("std(dz) = %4.2g +/- %.2g" % (std, std_error))
-        mean, mean_error, std, std_error = get_stats(thetas)
+        mean, mean_error, std, std_error = get_stats(data_true.theta)
         print("std(theta) = %4.2g +/- %.2g" % (std, std_error))
 
         plt.figure(1)
-        plot_hist(dT, label=filename)
+        plot_hist(data_true.dT, label=filename)
         plt.xlabel("Kinetic Energy difference (MeV)")
         plt.figure(2)
-        plot_hist(dx, label=filename)
+        plot_hist(data_true.dx, label=filename)
         plt.xlabel("X Position difference (cm)")
         plt.figure(3)
-        plot_hist(dy, label=filename)
+        plot_hist(data_true.dy, label=filename)
         plt.xlabel("Y Position difference (cm)")
         plt.figure(4)
-        plot_hist(dz, label=filename)
+        plot_hist(data_true.dz, label=filename)
         plt.xlabel("Z Position difference (cm)")
         plt.figure(5)
-        plot_hist(thetas, label=filename)
+        plot_hist(data_true.theta, label=filename)
         plt.xlabel(r"$\theta$ (deg)")
         plt.figure(6)
-        plot_hist(likelihood_ratio, label=filename)
+        plot_hist(data_true.ratio, label=filename)
         plt.xlabel(r"Log Likelihood Ratio ($e/\mu$)")
         plt.figure(7)
-        plot_hist(np.array(t_electron)/1e3/60.0, label=filename)
+        plot_hist(data_true.te/1e3/60.0, label=filename)
         plt.xlabel(r"Electron Fit time (minutes)")
         plt.figure(8)
-        plot_hist(np.array(t_muon)/1e3/60.0, label=filename)
+        plot_hist(data_true.tm/1e3/60.0, label=filename)
         plt.xlabel(r"Muon Fit time (minutes)")
-        if len(psi):
-            plt.figure(9)
-            plot_hist(psi, label=filename)
-            plt.xlabel(r"$\Psi$/Nhit")
+        plt.figure(9)
+        plot_hist(data_true.fit_psi/data_true.nhit, label=filename)
+        plt.xlabel(r"$\Psi$/Nhit")
 
     plot_legend(1)
     plot_legend(2)
@@ -205,6 +199,5 @@ if __name__ == '__main__':
     plot_legend(6)
     plot_legend(7)
     plot_legend(8)
-    if len(psi):
-        plot_legend(9)
+    plot_legend(9)
     plt.show()
author	tlatorre <tlatorre@uchicago.edu>	2019-07-11 09:42:23 -0500
committer	tlatorre <tlatorre@uchicago.edu>	2019-07-11 09:42:23 -0500
commit	21491ca1ca2afd6951e9b5b1e74b1c919c602b36 (patch)
tree	b21b772612125c574928e4fb37221077d6a012d3 /utils/plot
parent	034253ab63f1029291fa046ce15760aae72ae5c5 (diff)
download	sddm-21491ca1ca2afd6951e9b5b1e74b1c919c602b36.tar.gz sddm-21491ca1ca2afd6951e9b5b1e74b1c919c602b36.tar.bz2 sddm-21491ca1ca2afd6951e9b5b1e74b1c919c602b36.zip