update follower cuts to be more memory efficient by using ffill()

author: tlatorre <tlatorre@uchicago.edu> 2020-06-16 01:56:22 -0500
committer: tlatorre <tlatorre@uchicago.edu> 2020-06-16 01:56:22 -0500
commit: c713bca9ea51cec82471bb04cf0eec87f9e36912 (patch)
tree: ad1338eccb7a1273ba25a0e030f647662d991254
parent: 0b70890d0462b56d54f6ba76d6df142f5f992ebe (diff)
download: sddm-c713bca9ea51cec82471bb04cf0eec87f9e36912.tar.gz
sddm-c713bca9ea51cec82471bb04cf0eec87f9e36912.tar.bz2
sddm-c713bca9ea51cec82471bb04cf0eec87f9e36912.zip
3 files changed, 37 insertions, 82 deletions
diff --git a/utils/plot-energy b/utils/plot-energy
index ede0613..4568bcc 100755
--- a/utils/plot-energy
+++ b/utils/plot-energy
@@ -136,16 +136,10 @@ if __name__ == '__main__':
     # 2. Nhit >= 100
     # 3. It must be > 800 ns and less than 20 microseconds from a prompt event
     #    or a muon
-    michel = ev.groupby('run',group_keys=False).apply(michel_cut)
+    michel = ev[ev.michel]
 
     print("number of michel events = %i" % len(michel))
 
-    # Tag atmospheric events.
-    #
-    # Note: We don't cut atmospheric events or muons yet because we still need
-    # all the events in order to apply the muon follower cut.
-    ev = ev.groupby('run',group_keys=False).apply(atmospheric_events)
-
     print("number of events after neutron follower cut = %i" % np.count_nonzero(ev.prompt & (~ev.atm)))
 
     # remove events 200 microseconds after a muon
diff --git a/utils/sddm/__init__.py b/utils/sddm/__init__.py
index d3a1be4..4df4da8 100644
--- a/utils/sddm/__init__.py
+++ b/utils/sddm/__init__.py
@@ -182,4 +182,4 @@ def read_hdf(filename, df):
     """
     with h5py.File(filename) as f:
         data = f[df][:]
-    return pd.DataFrame.from_records(data)
+    return pd.DataFrame(data)
diff --git a/utils/sddm/plot_energy.py b/utils/sddm/plot_energy.py
index 8027193..1971c8d 100755
--- a/utils/sddm/plot_energy.py
+++ b/utils/sddm/plot_energy.py
@@ -82,75 +82,34 @@ def breakdown_follower_cut(ev):
     """
     Cuts all events within 1 second of breakdown events.
     """
-    breakdowns = ev[ev.dc & DC_BREAKDOWN != 0]
-    return ev[~np.any((ev.gtr.values > breakdowns.gtr.values[:,np.newaxis]) & \
-                      (ev.gtr.values < breakdowns.gtr.values[:,np.newaxis] + 1e9),axis=0)]
+    time_since_last_breakdown = ev.gtr - ev.gtr.where(ev.dc & DC_BREAKDOWN != 0).ffill()
+    return ev[~((time_since_last_breakdown > 0) & (time_since_last_breakdown < 1e9))]
 
 def flasher_follower_cut(ev):
     """
     Cuts all events within 200 microseconds of flasher events.
     """
-    flashers = ev[ev.dc & DC_FLASHER != 0]
-    return ev[~np.any((ev.gtr.values > flashers.gtr.values[:,np.newaxis]) & \
-                      (ev.gtr.values < flashers.gtr.values[:,np.newaxis] + 200e3),axis=0)]
+    time_since_last_flasher = ev.gtr - ev.gtr.where(ev.dc & DC_FLASHER != 0).ffill()
+    return ev[~((time_since_last_flasher > 0) & (time_since_last_flasher < 200e3))]
 
 def muon_follower_cut(ev):
     """
     Cuts all events 200 microseconds after a muon.
     """
-    muons = ev[ev.dc & DC_MUON != 0]
-    return ev[~np.any((ev.gtr.values > muons.gtr.values[:,np.newaxis]) & \
-                      (ev.gtr.values < muons.gtr.values[:,np.newaxis] + 200e3),axis=0)]
-
-def michel_cut(ev):
-    """
-    Looks for Michel electrons after muons.
-    """
-    prompt_plus_muons = ev[ev.prompt | ((ev.dc & DC_MUON) != 0)]
-
-    # Michel electrons and neutrons can be any event which is not a prompt
-    # event
-    follower = ev[~ev.prompt]
-
-    # require Michel events to pass more of the SNO data cleaning cuts
-    michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0]
-
-    michel = michel[michel.nhit >= 100]
-
-    # Accept events which had a muon more than 800 nanoseconds but less than 20
-    # microseconds before them. The 800 nanoseconds cut comes from Richie's
-    # thesis. He also mentions that the In Time Channel Spread Cut is very
-    # effective at cutting electron events caused by muons, so I should
-    # implement that.
-    #
-    # Note: We currently don't look across run boundaries. This should be a
-    # *very* small effect, and the logic to do so would be very complicated
-    # since I would have to deal with 50 MHz clock rollovers, etc.
-    if prompt_plus_muons.size and michel.size:
-        mask = (michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \
-               (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3)
-        michel = michel.iloc[np.any(mask,axis=0)]
-        michel['muon_gtid'] = pd.Series(prompt_plus_muons['gtid'].iloc[np.argmax(mask[:,np.any(mask,axis=0)],axis=0)].values,
-                                        index=michel.index.values,
-                                        dtype=np.int32)
-        return michel
-    else:
-        # Return an empty slice since we need it to have the same datatype as
-        # the other dataframes
-        michel = ev[:0]
-        michel['muon_gtid'] = -1
-        return michel
+    time_since_last_muon = ev.gtr - ev.gtr.where(ev.dc & DC_MUON != 0).ffill()
+    return ev[~((time_since_last_muon > 0) & (time_since_last_muon < 200e3))]
 
 def tag_michels(ev):
     """
-    Tags michel events with the michel column and also adds a column linking to
-    the muon GTID. The muon also gets labelled with the stopping_muon column.
+    Looks for Michel electrons after muons.
     """
-    michels = michel_cut(ev)
-    ev['michel'] = np.zeros(len(ev),dtype=np.bool)
-    ev.loc[michels.index,'michel'] = True
-    ev['muon_gtid'] = -np.ones(len(ev),dtype=np.int)
-    ev.loc[michels.index,'muon_gtid'] = michels['muon_gtid']
+    time_since_last_prompt_plus_muon = ev.gtr - ev.gtr.where(ev.prompt | ((ev.dc & DC_MUON) != 0)).ffill()
+    ev['muon_gtid'] = ev.gtid.where(ev.prompt | ((ev.dc & DC_MUON) != 0)).ffill()
+    ev['michel'] = ~ev.prompt
+    ev['michel'] &= ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0
+    ev['michel'] &= ev.nhit >= 100
+    ev['michel'] &= (time_since_last_prompt_plus_muon > 800) & (time_since_last_prompt_plus_muon < 200e3)
+    ev.loc[~ev.michel,'muon_gtid'] = -1
     ev['stopping_muon'] = np.zeros(len(ev),dtype=np.bool)
     ev.loc[ev.gtid.isin(ev.muon_gtid[ev.muon_gtid > 0].values),'stopping_muon'] = 1
     return ev
@@ -159,25 +118,13 @@ def atmospheric_events(ev):
     """
     Tags atmospheric events which have a neutron follower.
     """
-    prompt = ev[ev.prompt]
-
-    # Michel electrons and neutrons can be any event which is not a prompt
-    # event
-    follower = ev[~ev.prompt]
-
-    ev['atm'] = np.zeros(len(ev),dtype=np.bool)
-
-    if prompt.size and follower.size:
-        # neutron followers have to obey stricter set of data cleaning cuts
-        neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0]
-        neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)]
-        # FIXME: What should the radius cut be here? AV? (r/r_psup)^3 < 0.9?
-        neutron = neutron[neutron.ftp_r < AV_RADIUS]
-        neutron = neutron[neutron.rsp_energy > 4.0]
-
-        # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt)
-        ev.loc[ev.prompt,'atm'] = np.any((neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \
-                                         (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1)
+    ev['neutron'] = ~ev.prompt
+    ev['neutron'] &= ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0
+    ev['neutron'] &= ~np.isnan(ev.ftp_x) & ~np.isnan(ev.rsp_energy)
+    ev['neutron'] &= ev.ftp_r < AV_RADIUS
+    ev['neutron'] &= ev.rsp_energy > 4.0
+    time_till_next_neutron = ev.gtr.where(ev.neutron).bfill() - ev.gtr
+    ev['atm'] = ev.prompt & (time_till_next_neutron > 20e3) & (time_till_next_neutron < 250e6)
 
     return ev
 
@@ -595,8 +542,22 @@ def get_events(filenames, merge_fits=False):
     ev['signal'] = ~(ev.noise | ev.neck | ev.flasher | ev.breakdown | ev.muon)
     ev['instrumental'] = ~ev.signal
 
+    # Try to identify Michel electrons. Currently, the event selection is based
+    # on Richie's thesis. Here, we do the following:
+    #
+    # 1. Apply more data cleaning cuts to potential Michel electrons
+    # 2. Nhit >= 100
+    # 3. It must be > 800 ns and less than 20 microseconds from a prompt event
+    #    or a muon
     ev = ev.groupby('run',group_keys=False).apply(tag_michels)
 
+    # Tag atmospheric events.
+    ev = ev.groupby('run',group_keys=False).apply(atmospheric_events)
+
+    # Cut on nhit_cal >= 100 here to reduce memory usage when merging fits. We
+    # only need the lower nhit events for neutrons.
+    ev = ev[ev.nhit_cal >= 100]
+
     if merge_fits:
         ev = pd.merge(ev,fits,how='left',on=['run','gtid'])
         # Reset n, id1, id2, and id3 columns to integers
author	tlatorre <tlatorre@uchicago.edu>	2020-06-16 01:56:22 -0500
committer	tlatorre <tlatorre@uchicago.edu>	2020-06-16 01:56:22 -0500
commit	c713bca9ea51cec82471bb04cf0eec87f9e36912 (patch)
tree	ad1338eccb7a1273ba25a0e030f647662d991254
parent	0b70890d0462b56d54f6ba76d6df142f5f992ebe (diff)
download	sddm-c713bca9ea51cec82471bb04cf0eec87f9e36912.tar.gz sddm-c713bca9ea51cec82471bb04cf0eec87f9e36912.tar.bz2 sddm-c713bca9ea51cec82471bb04cf0eec87f9e36912.zip