aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortlatorre <tlatorre@uchicago.edu>2020-12-27 12:28:38 -0600
committertlatorre <tlatorre@uchicago.edu>2020-12-27 12:28:38 -0600
commitbb2580de2ce6cf90b703d327a46acf41f5bf62b5 (patch)
tree677da88e8f4ab4b12427fd9bc467847e41b26da7
parent5f79a1732be5753726ae08e94004cddcde423f37 (diff)
downloadsddm-bb2580de2ce6cf90b703d327a46acf41f5bf62b5.tar.gz
sddm-bb2580de2ce6cf90b703d327a46acf41f5bf62b5.tar.bz2
sddm-bb2580de2ce6cf90b703d327a46acf41f5bf62b5.zip
reduce memory usage
-rw-r--r--utils/sddm/plot_energy.py35
1 files changed, 32 insertions, 3 deletions
diff --git a/utils/sddm/plot_energy.py b/utils/sddm/plot_energy.py
index dae499b..c421db8 100644
--- a/utils/sddm/plot_energy.py
+++ b/utils/sddm/plot_energy.py
@@ -479,7 +479,7 @@ def burst_cut(ev):
ev.loc[ev.prompt_50,'prompt_50'] &= np.concatenate(([True],np.diff(ev[ev.prompt_50].gtr.values) > 50e6))
burst = (ev.nhit >= 100)
burst.loc[burst] &= np.concatenate(([True],np.diff(ev[burst].gtr.values) > BURST_WINDOW))
- return ev.groupby(burst.cumsum()).filter(lambda ev: len(ev[ev.prompt_50]) <= BURST_MAX_EVENTS).reset_index()
+ return ev.groupby(burst.cumsum()).filter(lambda ev: len(ev[ev.prompt_50]) <= BURST_MAX_EVENTS).reset_index(drop=True)
def get_events(filenames, merge_fits=False, nhit_thresh=None, mc=False):
ev = pd.concat([read_hdf(filename, "ev").assign(filename=filename) for filename in filenames],ignore_index=True)
@@ -492,6 +492,14 @@ def get_events(filenames, merge_fits=False, nhit_thresh=None, mc=False):
mcgn['unique_id'] = mcgn.apply(get_unique_id,axis=1)
ev = pd.merge(ev,mcgn[['evn','unique_id']],on=['evn'],how='left')
+ # Delete columns we never use
+ del ev['ftk_energy']
+ if 'dte' in ev.columns:
+ del ev['dte']
+ del ev['hmsc']
+ del fits['time']
+ del fits['t0']
+
if nhit_thresh is not None:
ev = ev[ev.nhit_cal >= nhit_thresh]
@@ -535,6 +543,8 @@ def get_events(filenames, merge_fits=False, nhit_thresh=None, mc=False):
ev = ev.sort_values(by=['run','gtid_sort'],kind='mergesort')
+ del ev['gtid_sort']
+
for run, ev_run in ev.groupby('run'):
# Warn about 50 MHz clock jumps since they could indicate that the
# events aren't in order.
@@ -583,6 +593,8 @@ def get_events(filenames, merge_fits=False, nhit_thresh=None, mc=False):
fits['fmin'] = fits['fmin'] - fits['w']
+ del fits['w']
+
# See https://stackoverflow.com/questions/11976503/how-to-keep-index-when-using-pandas-merge
# for how to properly divide the psi column by nhit_cal which is in the ev
# dataframe before we actually merge
@@ -598,7 +610,7 @@ def get_events(filenames, merge_fits=False, nhit_thresh=None, mc=False):
fits['r_psup'] = (fits['r']/PSUP_RADIUS)**3
ev['ftp_r'] = np.sqrt(ev.ftp_x**2 + ev.ftp_y**2 + ev.ftp_z**2)
- ev['ftp_r_psup'] = (ev['ftp_r']/PSUP_RADIUS)**3
+ #ev['ftp_r_psup'] = (ev['ftp_r']/PSUP_RADIUS)**3
# Now, select prompt events.
#
@@ -628,8 +640,12 @@ def get_events(filenames, merge_fits=False, nhit_thresh=None, mc=False):
# to simulate
ev = ev.groupby('run',group_keys=False).apply(retrigger_cut)
+ del ev['gtid_diff']
+
ev = ev.groupby('run',group_keys=False).apply(burst_cut)
+ del ev['prompt_50']
+
ev['nhit_cal_over_nhit'] = ev['nhit_cal']/ev['nhit']
# Label instrumentals
@@ -642,6 +658,8 @@ def get_events(filenames, merge_fits=False, nhit_thresh=None, mc=False):
ev['signal'] &= ev['nhit_cal_over_nhit'] > 0.8
ev['instrumental'] = ~ev.signal
+ del ev['nhit_cal_over_nhit']
+
n_pulse_gt = np.count_nonzero((ev.trg_type & TRIG_PULSE_GT) != 0)
# Pulse GT went at approximately 5 Hz for the duration of SNO
time_pulse_gt = n_pulse_gt/5.0
@@ -652,6 +670,9 @@ def get_events(filenames, merge_fits=False, nhit_thresh=None, mc=False):
# Note: We don't take the max and min here because there might be bit flips
# in the 10 MHz clock
time_10_mhz = np.nanpercentile(t10,99.9) - np.nanpercentile(t10,0.1)
+ del ev['jdy']
+ del ev['ut1']
+ del ev['ut2']
else:
time_10_mhz = np.nan
@@ -667,6 +688,12 @@ def get_events(filenames, merge_fits=False, nhit_thresh=None, mc=False):
# Tag atmospheric events.
ev = ev.groupby('run',group_keys=False).apply(atmospheric_events)
+ del ev['rsp_energy']
+ del ev['ftp_x']
+ del ev['ftp_y']
+ del ev['ftp_z']
+ del ev['ftp_r']
+
# Cut on nhit_cal >= 100 here to reduce memory usage when merging fits. We
# only need the lower nhit events for neutrons.
ev = ev[ev.neutron | (ev.nhit_cal >= 100)]
@@ -675,8 +702,10 @@ def get_events(filenames, merge_fits=False, nhit_thresh=None, mc=False):
if not mc:
ev = ev[(ev.trg_type & (TRIG_NHIT_100_LO | TRIG_NHIT_100_MED | TRIG_NHIT_100_HI | TRIG_NHIT_20 | TRIG_NHIT_20_LB)) != 0]
+ del ev['trg_type']
+
# Require all 10 fits
- fits = fits.groupby(['run','gtid']).filter(lambda x: len(x) >= 10).reset_index()
+ fits = fits.groupby(['run','gtid']).filter(lambda x: len(x) >= 10).reset_index(drop=True)
if merge_fits:
ev = pd.merge(ev,fits,how='left',on=['run','gtid'])