1 files changed, 108 insertions, 103 deletions
diff --git a/utils/plot-energy b/utils/plot-energy
index 896a6e5..49ede76 100755
--- a/utils/plot-energy
+++ b/utils/plot-energy
@@ -164,6 +164,77 @@ def flasher_follower_cut(ev):
     return ev[~np.any((ev.gtr.values > flashers.gtr.values[:,np.newaxis]) & \
                       (ev.gtr.values < flashers.gtr.values[:,np.newaxis] + 200e3),axis=0)]
 
+def muon_follower_cut(ev):
+    """
+    Cuts all events 200 microseconds after a muon.
+    """
+    muons = ev[ev.dc & DC_MUON != 0]
+    return ev[~np.any((ev.gtr.values > muons.gtr.values[:,np.newaxis]) & \
+                      (ev.gtr.values < muons.gtr.values[:,np.newaxis] + 200e3),axis=0)]
+
+def michel_cut(ev):
+    """
+    Looks for Michel electrons after muons.
+    """
+    prompt_plus_muons = ev[ev.prompt | ((ev.dc & DC_MUON) != 0)]
+
+    # Michel electrons and neutrons can be any event which is not a prompt
+    # event
+    follower = ev[~ev.prompt]
+
+    # require Michel events to pass more of the SNO data cleaning cuts
+    michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0]
+
+    michel = michel[michel.nhit >= 100]
+
+    # Accept events which had a muon more than 800 nanoseconds but less
+    # than 20 microseconds before them. The 800 nanoseconds cut comes from
+    # Richie's thesis. He also mentions that the In Time Channel Spread Cut
+    # is very effective at cutting electrons events caused by muons, so I
+    # should implement that.
+    #
+    # Note: We currently don't look across run boundaries. This should be a
+    # *very* small effect, and the logic to do so would be very complicated
+    # since I would have to deal with 50 MHz clock rollovers, etc.
+    #
+    # Do a simple python for loop here over the runs since we create
+    # temporary arrays with shape (michel.size,prompt_plus_muons.size)
+    # which could be too big for the full dataset.
+    #
+    # There might be some clever way to do this without the loop in Pandas,
+    # but I don't know how.
+    if prompt_plus_muons.size and michel.size:
+        return michel[np.any((michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \
+                             (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3),axis=0)]
+    else:
+        return pd.DataFrame(columns=ev.columns)
+
+def atmospheric_events(ev):
+    """
+    Tags atmospheric events which have a neutron follower.
+    """
+    prompt = ev[ev.prompt]
+
+    # Michel electrons and neutrons can be any event which is not a prompt
+    # event
+    follower = ev[~ev.prompt]
+
+    ev['atm'] = np.zeros(len(ev),dtype=np.bool)
+
+    if prompt.size and follower.size:
+        # neutron followers have to obey stricter set of data cleaning cuts
+        neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0]
+        neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)]
+        r = np.sqrt(neutron.ftp_x**2 + neutron.ftp_y**2 + neutron.ftp_z**2)
+        neutron = neutron[r < AV_RADIUS]
+        neutron = neutron[neutron.rsp_energy > 4.0]
+
+        # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt)
+        ev.loc[ev.prompt,'atm'] = np.any((neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \
+                                         (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1)
+
+    return ev
+
 if __name__ == '__main__':
     import argparse
     import matplotlib.pyplot as plt
@@ -204,8 +275,9 @@ if __name__ == '__main__':
         # Warn about 50 MHz clock jumps since they could indicate that the
         # events aren't in order.
         dt = np.diff(ev_run.gtr)
-        if np.count_nonzero(np.abs(dt) > 1e9):
-            print_warning("Warning: %i 50 MHz clock jumps in run %i. Are the events in order?" % (np.count_nonzero(np.abs(dt) > 1e9),run))
+        if np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)):
+            print_warning("Warning: %i 50 MHz clock jumps in run %i. Are the events in order?" % \
+                (np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)),run))
 
     # unwrap the 50 MHz clock within each run
     ev.gtr = ev.groupby(['run'],as_index=False)['gtr'].transform(unwrap_50_mhz_clock)
@@ -261,6 +333,24 @@ if __name__ == '__main__':
 
     print("number of events = %i" % len(ev))
 
+    # Now, select prompt events.
+    #
+    # We define a prompt event here as any event with an NHIT > 100 and whose
+    # previous > 100 nhit event was more than 250 ms ago
+    #
+    # Note: It's important we do this *before* applying the data cleaning cuts
+    # since otherwise we may have a prompt event identified only after the
+    # cuts.
+    #
+    # For example, suppose there was a breakdown and for whatever reason
+    # the *second* event after the breakdown didn't get tagged correctly. If we
+    # apply the data cleaning cuts first and then tag prompt events then this
+    # event will get tagged as a prompt event.
+    ev['prompt'] = (ev.nhit >= 100)
+    ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6))
+
+    print("number of events after prompt nhit cut = %i" % np.count_nonzero(ev.prompt))
+
     # flasher follower cut
     ev = ev.groupby('run',as_index=False).apply(flasher_follower_cut).reset_index(level=0,drop=True)
 
@@ -274,7 +364,7 @@ if __name__ == '__main__':
     ev = ev[ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ITC | DC_BREAKDOWN) == 0]
 
     # 00-orphan cut
-    ev = ev[~(ev.gtid & 0xff) == 0]
+    ev = ev[(ev.gtid & 0xff) != 0]
 
     print("number of events after data cleaning = %i" % len(ev))
 
@@ -285,119 +375,34 @@ if __name__ == '__main__':
 
     print("number of muons = %i" % len(muons))
 
-    # Now, select prompt events.
-    #
-    # We define a prompt event here as any event with an NHIT > 100 and whose
-    # previous > 100 nhit event was more than 250 ms ago
-    prompt = ev[ev.nhit >= 100]
-    prompt_mask = np.concatenate(([True],np.diff(prompt.gtr.values) > 250e6))
-
-    # Michel electrons and neutrons can be any event which is not a prompt
-    # event
-    follower = pd.concat([ev[ev.nhit < 100],prompt[~prompt_mask]])
-
-    # Apply the prompt mask
-    prompt = prompt[prompt_mask]
-
-    # Try to identify Michel electrons. Currenly, the event selection is based
+    # Try to identify Michel electrons. Currently, the event selection is based
     # on Richie's thesis. Here, we do the following:
     #
     # 1. Apply more data cleaning cuts to potential Michel electrons
     # 2. Nhit >= 100
     # 3. It must be > 800 ns and less than 20 microseconds from a prompt event
     #    or a muon
-    prompt_plus_muons = pd.concat([prompt,muons])
-
-    print("number of events after prompt nhit cut = %i" % len(prompt))
-
-    if prompt_plus_muons.size and follower.size:
-        # require Michel events to pass more of the SNO data cleaning cuts
-        michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0]
-
-        michel = michel[michel.nhit >= 100]
-
-        # Accept events which had a muon more than 800 nanoseconds but less
-        # than 20 microseconds before them. The 800 nanoseconds cut comes from
-        # Richie's thesis. He also mentions that the In Time Channel Spread Cut
-        # is very effective at cutting electrons events caused by muons, so I
-        # should implement that.
-        #
-        # Note: We currently don't look across run boundaries. This should be a
-        # *very* small effect, and the logic to do so would be very complicated
-        # since I would have to deal with 50 MHz clock rollovers, etc.
-        #
-        # Do a simple python for loop here over the runs since we create
-        # temporary arrays with shape (michel.size,prompt_plus_muons.size)
-        # which could be too big for the full dataset.
-        #
-        # There might be some clever way to do this without the loop in Pandas,
-        # but I don't know how.
-        michel_sum = None
-        for run, michel_run in michel.groupby(['run']):
-            prompt_plus_muons_run = prompt_plus_muons[prompt_plus_muons['run'] == run]
-            michel_run = michel_run[np.any((michel_run.gtr.values > prompt_plus_muons_run.gtr.values[:,np.newaxis] + 800) & \
-                                           (michel_run.gtr.values < prompt_plus_muons_run.gtr.values[:,np.newaxis] + 20e3),axis=0)]
-
-            if michel_sum is None:
-                michel_sum = michel_run
-            else:
-                michel_sum = michel_sum.append(michel_run)
-
-        if michel_sum is not None:
-            michel = michel_sum
-        else:
-            michel = pd.DataFrame(columns=follower.columns)
-    else:
-        michel = pd.DataFrame(columns=follower.columns)
+    michel = ev.groupby('run',as_index=False).apply(michel_cut).reset_index(level=0,drop=True)
 
-    if prompt.size and follower.size:
-        # neutron followers have to obey stricter set of data cleaning cuts
-        neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0]
-        neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)]
-        r = np.sqrt(neutron.ftp_x**2 + neutron.ftp_y**2 + neutron.ftp_z**2)
-        neutron = neutron[r < AV_RADIUS]
-        neutron = neutron[neutron.rsp_energy > 4.0]
+    # Tag atmospheric events.
+    #
+    # Note: We don't cut atmospheric events or muons yet because we still need
+    # all the events in order to apply the muon follower cut.
+    ev = ev.groupby('run',as_index=False).apply(atmospheric_events).reset_index(level=0,drop=True)
 
-        # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt)
-        prompt_sum = None
-        atm_sum = None
-        for run, prompt_run in prompt.groupby(['run']):
-            neutron_run = neutron[neutron['run'] == run]
-
-            neutron_mask = np.any((neutron_run.gtr.values > prompt_run.gtr.values[:,np.newaxis] + 20e3) & \
-                                  (neutron_run.gtr.values < prompt_run.gtr.values[:,np.newaxis] + 250e6),axis=1)
-
-            if prompt_sum is None:
-                prompt_sum = prompt_run[~neutron_mask]
-            else:
-                prompt_sum = prompt_sum.append(prompt_run[~neutron_mask])
-
-            if atm_sum is None:
-                atm_sum = prompt_run[neutron_mask]
-            else:
-                atm_sum = atm_sum.append(prompt_run[neutron_mask])
-
-        atm = atm_sum
-        prompt = prompt_sum
-    else:
-        atm = pd.DataFrame(columns=prompt.columns)
+    print("number of events after neutron follower cut = %i" % np.count_nonzero(ev.prompt & (~ev.atm)))
 
-    print("number of events after neutron follower cut = %i" % len(prompt))
+    # remove events 200 microseconds after a muon
+    ev = ev.groupby('run',as_index=False).apply(muon_follower_cut).reset_index(level=0,drop=True)
 
     # Get rid of muon events in our main event sample
-    prompt = prompt[(prompt.dc & DC_MUON) == 0]
-    atm = atm[(atm.dc & DC_MUON) == 0]
+    ev = ev[(ev.dc & DC_MUON) == 0]
 
-    print("number of events after muon cut = %i" % len(prompt))
+    prompt = ev[ev.prompt & ~ev.atm]
 
-    if muons.size:
-        # remove events 200 microseconds after a muon
-        prompt = prompt[~np.any((prompt.run.values == muons.run.values[:,np.newaxis]) & \
-                                (prompt.gtr.values > muons.gtr.values[:,np.newaxis]) & \
-                                (prompt.gtr.values <= (muons.gtr.values[:,np.newaxis] + 200e3)),axis=0)]
-        atm = atm[~np.any((atm.run.values == muons.run.values[:,np.newaxis]) & \
-                          (atm.gtr.values > muons.gtr.values[:,np.newaxis]) & \
-                          (atm.gtr.values <= (muons.gtr.values[:,np.newaxis] + 200e3)),axis=0)]
+    atm = ev[ev.atm]
+
+    print("number of events after muon cut = %i" % len(prompt))
 
     # Check to see if there are any events with missing fit information
     atm_ra = atm[['run','gtid']].to_records(index=False)