fix a SettingWithCopyWarning in plot-energy

author: tlatorre <tlatorre@uchicago.edu> 2019-09-30 16:23:47 -0500
committer: tlatorre <tlatorre@uchicago.edu> 2019-09-30 16:23:47 -0500
commit: 2d16c41cea48bb4660469e94de9949fbc3880c74 (patch)
tree: 83b000c76230cc78b286e71bf6fe1c1ce7ebae2c /utils/plot-energy
parent: 33bea0e6a0c3b585b6feefc8465465fb0fc819e5 (diff)
download: sddm-2d16c41cea48bb4660469e94de9949fbc3880c74.tar.gz
sddm-2d16c41cea48bb4660469e94de9949fbc3880c74.tar.bz2
sddm-2d16c41cea48bb4660469e94de9949fbc3880c74.zip
1 files changed, 33 insertions, 6 deletions
diff --git a/utils/plot-energy b/utils/plot-energy
index 9000f4a..ce4fb89 100755
--- a/utils/plot-energy
+++ b/utils/plot-energy
@@ -235,6 +235,36 @@ def atmospheric_events(ev):
 
     return ev
 
+def gtid_sort(ev, first_gtid):
+    """
+    Adds 0x1000000 to the gtid_sort column for all gtids before the first gtid
+    in a run, which should be passed as a dictionary. This column can then be
+    used to sort the events sequentially.
+
+    This function should be passed to ev.groupby('run').apply(). We use this
+    idiom instead of just looping over the groupby results since groupby()
+    makes a copy of the dataframe, i.e.
+
+        for run, ev_run in ev.groupby('run'):
+            ev_run.loc[ev_run.gtid < first_gtid[run],'gtid_sort'] += 0x1000000
+
+    would produce a SettingWithCopyWarning, so instead we use:
+
+        ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid)
+
+    which doesn't have this problem.
+    """
+    # see https://stackoverflow.com/questions/32460593/including-the-group-name-in-the-apply-function-pandas-python
+    run = ev.name
+
+    if run not in first_gtid:
+        print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run)
+        first_gtid[run] = ev_run.gtid[0]
+
+    ev.loc[ev.gtid < first_gtid[run],'gtid_sort'] += 0x1000000
+
+    return ev
+
 if __name__ == '__main__':
     import argparse
     import matplotlib.pyplot as plt
@@ -274,11 +304,8 @@ if __name__ == '__main__':
     # Therefore, we can just add 0x1000000 to all GTIDs before the first GTID
     # in the event and sort on that. We get the first GTID from the RHDR bank.
     ev['gtid_sort'] = ev['gtid'].copy()
-    for run, ev_run in ev.groupby('run'):
-        if run not in first_gtid:
-            print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run)
-            first_gtid[run] = ev_run.gtid[0]
-        ev_run.loc[ev_run.gtid < first_gtid[run],'gtid_sort'] += 0x1000000
+
+    ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid).reset_index(level=0,drop=True)
 
     ev = ev.sort_values(by=['run','gtid_sort'],kind='mergesort')
 
@@ -377,7 +404,7 @@ if __name__ == '__main__':
     # 00-orphan cut
     ev = ev[(ev.gtid & 0xff) != 0]
 
-    print("number of events after data cleaning = %i" % len(ev))
+    print("number of events after data cleaning = %i" % np.count_nonzero(ev.prompt))
 
     # Now, we select events tagged by the muon tag which should tag only
     # external muons. We keep the sample of muons since it's needed later to
author	tlatorre <tlatorre@uchicago.edu>	2019-09-30 16:23:47 -0500
committer	tlatorre <tlatorre@uchicago.edu>	2019-09-30 16:23:47 -0500
commit	2d16c41cea48bb4660469e94de9949fbc3880c74 (patch)
tree	83b000c76230cc78b286e71bf6fe1c1ce7ebae2c /utils/plot-energy
parent	33bea0e6a0c3b585b6feefc8465465fb0fc819e5 (diff)
download	sddm-2d16c41cea48bb4660469e94de9949fbc3880c74.tar.gz sddm-2d16c41cea48bb4660469e94de9949fbc3880c74.tar.bz2 sddm-2d16c41cea48bb4660469e94de9949fbc3880c74.zip