From 3c3720143201915729759478c7e7e36c71ac3bbd Mon Sep 17 00:00:00 2001
From: tlatorre <tlatorre@uchicago.edu>
Date: Fri, 12 Jul 2019 10:28:31 -0500
Subject: fix a couple of bugs in plot-energy

This commit fixes two small bugs in the plotting scripts. First, after the HDF5
commit I wasn't correctly computing the particle ID string which I had been
using before which was needed in order to plot things correctly. Second, I
realized that the dataframe groupby function first() actually selects the first
non-null column from each group! What I really wanted was the first row from
each group, so all instances of .first() were updated to .nth(0).

See https://stackoverflow.com/questions/20067636/pandas-dataframe-get-first-row-of-each-group.
---
 utils/plot             |  2 +-
 utils/plot-energy      | 12 +++++++-----
 utils/plot-fit-results |  2 +-
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/utils/plot b/utils/plot
index 1bac500..fd5ea51 100755
--- a/utils/plot
+++ b/utils/plot
@@ -90,7 +90,7 @@ if __name__ == '__main__':
             fits = pd.read_hdf(filename, "fits")
 
             # get rid of 2nd events like Michel electrons
-            ev = ev.sort_values(['run','gtid']).groupby(['evn'],as_index=False).first()
+            ev = ev.sort_values(['run','gtid']).groupby(['evn'],as_index=False).nth(0)
 
             # Now, we merge all three datasets together to produce a single
             # dataframe. To do so, we join the ev dataframe with the mcgn frame
diff --git a/utils/plot-energy b/utils/plot-energy
index 4a8521b..969cc4b 100755
--- a/utils/plot-energy
+++ b/utils/plot-energy
@@ -149,7 +149,9 @@ if __name__ == '__main__':
 
     fits['psi'] /= fits.merge(ev,on=['run','gtid'])['nhit']
     fits['ke'] = fits['energy1']
-    fits['id'] = fits['id1'] + fits['id2']*100 + fits['id3']*10000
+    fits['id'] = fits['id1']
+    fits.loc[fits['n'] == 2, 'id'] = fits['id1']*100 + fits['id2']
+    fits.loc[fits['n'] == 3, 'id'] = fits['id1']*10000 + fits['id2']*100 + fits['id3']
     fits['theta'] = fits['theta1']
 
     # Make sure events are in order. We use run number and GTID here which
@@ -347,10 +349,10 @@ if __name__ == '__main__':
     michel = michel[~nan_michel]
 
     # get the best fit
-    prompt = prompt.sort_values('fmin').groupby(['run','gtid']).first()
-    atm = atm.sort_values('fmin').groupby(['run','gtid']).first()
-    michel_best_fit = michel.sort_values('fmin').groupby(['run','gtid']).first()
-    muon_best_fit = muons.sort_values('fmin').groupby(['run','gtid']).first()
+    prompt = prompt.sort_values('fmin').groupby(['run','gtid']).nth(0)
+    atm = atm.sort_values('fmin').groupby(['run','gtid']).nth(0)
+    michel_best_fit = michel.sort_values('fmin').groupby(['run','gtid']).nth(0)
+    muon_best_fit = muons.sort_values('fmin').groupby(['run','gtid']).nth(0)
     muons = muons[muons.id == 22]
 
     # require r < 6 meters
diff --git a/utils/plot-fit-results b/utils/plot-fit-results
index 7115b81..cb90c8d 100755
--- a/utils/plot-fit-results
+++ b/utils/plot-fit-results
@@ -97,7 +97,7 @@ if __name__ == '__main__':
     mcgn = pd.concat([pd.read_hdf(filename, "mcgn").assign(filename=filename) for filename in args.filenames])
 
     # get rid of 2nd events like Michel electrons
-    ev = ev.sort_values(['run','gtid']).groupby(['filename','evn'],as_index=False).first()
+    ev = ev.sort_values(['run','gtid']).groupby(['filename','evn'],as_index=False).nth(0)
 
     # Now, we merge all three datasets together to produce a single
     # dataframe. To do so, we join the ev dataframe with the mcgn frame
-- 
cgit