From 3c3720143201915729759478c7e7e36c71ac3bbd Mon Sep 17 00:00:00 2001 From: tlatorre Date: Fri, 12 Jul 2019 10:28:31 -0500 Subject: fix a couple of bugs in plot-energy This commit fixes two small bugs in the plotting scripts. First, after the HDF5 commit I wasn't correctly computing the particle ID string which I had been using before which was needed in order to plot things correctly. Second, I realized that the dataframe groupby function first() actually selects the first non-null column from each group! What I really wanted was the first row from each group, so all instances of .first() were updated to .nth(0). See https://stackoverflow.com/questions/20067636/pandas-dataframe-get-first-row-of-each-group. --- utils/plot | 2 +- utils/plot-energy | 12 +++++++----- utils/plot-fit-results | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/utils/plot b/utils/plot index 1bac500..fd5ea51 100755 --- a/utils/plot +++ b/utils/plot @@ -90,7 +90,7 @@ if __name__ == '__main__': fits = pd.read_hdf(filename, "fits") # get rid of 2nd events like Michel electrons - ev = ev.sort_values(['run','gtid']).groupby(['evn'],as_index=False).first() + ev = ev.sort_values(['run','gtid']).groupby(['evn'],as_index=False).nth(0) # Now, we merge all three datasets together to produce a single # dataframe. To do so, we join the ev dataframe with the mcgn frame diff --git a/utils/plot-energy b/utils/plot-energy index 4a8521b..969cc4b 100755 --- a/utils/plot-energy +++ b/utils/plot-energy @@ -149,7 +149,9 @@ if __name__ == '__main__': fits['psi'] /= fits.merge(ev,on=['run','gtid'])['nhit'] fits['ke'] = fits['energy1'] - fits['id'] = fits['id1'] + fits['id2']*100 + fits['id3']*10000 + fits['id'] = fits['id1'] + fits.loc[fits['n'] == 2, 'id'] = fits['id1']*100 + fits['id2'] + fits.loc[fits['n'] == 3, 'id'] = fits['id1']*10000 + fits['id2']*100 + fits['id3'] fits['theta'] = fits['theta1'] # Make sure events are in order. We use run number and GTID here which @@ -347,10 +349,10 @@ if __name__ == '__main__': michel = michel[~nan_michel] # get the best fit - prompt = prompt.sort_values('fmin').groupby(['run','gtid']).first() - atm = atm.sort_values('fmin').groupby(['run','gtid']).first() - michel_best_fit = michel.sort_values('fmin').groupby(['run','gtid']).first() - muon_best_fit = muons.sort_values('fmin').groupby(['run','gtid']).first() + prompt = prompt.sort_values('fmin').groupby(['run','gtid']).nth(0) + atm = atm.sort_values('fmin').groupby(['run','gtid']).nth(0) + michel_best_fit = michel.sort_values('fmin').groupby(['run','gtid']).nth(0) + muon_best_fit = muons.sort_values('fmin').groupby(['run','gtid']).nth(0) muons = muons[muons.id == 22] # require r < 6 meters diff --git a/utils/plot-fit-results b/utils/plot-fit-results index 7115b81..cb90c8d 100755 --- a/utils/plot-fit-results +++ b/utils/plot-fit-results @@ -97,7 +97,7 @@ if __name__ == '__main__': mcgn = pd.concat([pd.read_hdf(filename, "mcgn").assign(filename=filename) for filename in args.filenames]) # get rid of 2nd events like Michel electrons - ev = ev.sort_values(['run','gtid']).groupby(['filename','evn'],as_index=False).first() + ev = ev.sort_values(['run','gtid']).groupby(['filename','evn'],as_index=False).nth(0) # Now, we merge all three datasets together to produce a single # dataframe. To do so, we join the ev dataframe with the mcgn frame -- cgit