diff options
author | tlatorre <tlatorre@uchicago.edu> | 2019-09-30 16:23:47 -0500 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2019-09-30 16:23:47 -0500 |
commit | 2d16c41cea48bb4660469e94de9949fbc3880c74 (patch) | |
tree | 83b000c76230cc78b286e71bf6fe1c1ce7ebae2c /utils/plot-energy | |
parent | 33bea0e6a0c3b585b6feefc8465465fb0fc819e5 (diff) | |
download | sddm-2d16c41cea48bb4660469e94de9949fbc3880c74.tar.gz sddm-2d16c41cea48bb4660469e94de9949fbc3880c74.tar.bz2 sddm-2d16c41cea48bb4660469e94de9949fbc3880c74.zip |
fix a SettingWithCopyWarning in plot-energy
Diffstat (limited to 'utils/plot-energy')
-rwxr-xr-x | utils/plot-energy | 39 |
1 files changed, 33 insertions, 6 deletions
diff --git a/utils/plot-energy b/utils/plot-energy index 9000f4a..ce4fb89 100755 --- a/utils/plot-energy +++ b/utils/plot-energy @@ -235,6 +235,36 @@ def atmospheric_events(ev): return ev +def gtid_sort(ev, first_gtid): + """ + Adds 0x1000000 to the gtid_sort column for all gtids before the first gtid + in a run, which should be passed as a dictionary. This column can then be + used to sort the events sequentially. + + This function should be passed to ev.groupby('run').apply(). We use this + idiom instead of just looping over the groupby results since groupby() + makes a copy of the dataframe, i.e. + + for run, ev_run in ev.groupby('run'): + ev_run.loc[ev_run.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 + + would produce a SettingWithCopyWarning, so instead we use: + + ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid) + + which doesn't have this problem. + """ + # see https://stackoverflow.com/questions/32460593/including-the-group-name-in-the-apply-function-pandas-python + run = ev.name + + if run not in first_gtid: + print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run) + first_gtid[run] = ev_run.gtid[0] + + ev.loc[ev.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 + + return ev + if __name__ == '__main__': import argparse import matplotlib.pyplot as plt @@ -274,11 +304,8 @@ if __name__ == '__main__': # Therefore, we can just add 0x1000000 to all GTIDs before the first GTID # in the event and sort on that. We get the first GTID from the RHDR bank. ev['gtid_sort'] = ev['gtid'].copy() - for run, ev_run in ev.groupby('run'): - if run not in first_gtid: - print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run) - first_gtid[run] = ev_run.gtid[0] - ev_run.loc[ev_run.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 + + ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid).reset_index(level=0,drop=True) ev = ev.sort_values(by=['run','gtid_sort'],kind='mergesort') @@ -377,7 +404,7 @@ if __name__ == '__main__': # 00-orphan cut ev = ev[(ev.gtid & 0xff) != 0] - print("number of events after data cleaning = %i" % len(ev)) + print("number of events after data cleaning = %i" % np.count_nonzero(ev.prompt)) # Now, we select events tagged by the muon tag which should tag only # external muons. We keep the sample of muons since it's needed later to |