tag instrumentals and michels in get_events()

- label instrumentals in get_events() - tag michels and stopping muons in get_events() - update submit-grid-jobs to use get_events()
author: tlatorre <tlatorre@uchicago.edu> 2020-06-14 02:29:26 -0500
committer: tlatorre <tlatorre@uchicago.edu> 2020-06-14 02:29:26 -0500
commit: 83920fb80f48b0e7f4f2eea65b14f088102e4c24 (patch)
tree: 5a9c85671b356d4310c794ef11eb637f79c711d9 /utils
parent: 6ea901a252248477add3f686707ad827fa5ea1e2 (diff)
download: sddm-83920fb80f48b0e7f4f2eea65b14f088102e4c24.tar.gz
sddm-83920fb80f48b0e7f4f2eea65b14f088102e4c24.tar.bz2
sddm-83920fb80f48b0e7f4f2eea65b14f088102e4c24.zip
2 files changed, 33 insertions, 58 deletions
diff --git a/utils/sddm/plot_energy.py b/utils/sddm/plot_energy.py
index 1c1b7e3..b4882d9 100755
--- a/utils/sddm/plot_energy.py
+++ b/utils/sddm/plot_energy.py
@@ -141,6 +141,20 @@ def michel_cut(ev):
         michel['muon_gtid'] = -1
         return michel
 
+def tag_michels(ev):
+    """
+    Tags michel events with the michel column and also adds a column linking to
+    the muon GTID. The muon also gets labelled with the stopping_muon column.
+    """
+    michels = michel_cut(ev)
+    ev['michel'] = np.zeros(len(ev),dtype=np.bool)
+    ev.loc[michels.index,'michel'] = True
+    ev['muon_gtid'] = -np.ones(len(ev),dtype=np.int)
+    ev.loc[michels.index,'muon_gtid'] = michels['muon_gtid']
+    ev['stopping_muon'] = np.zeros(len(ev),dtype=np.bool)
+    ev.loc[ev.gtid.isin(ev.muon_gtid[ev.muon_gtid > 0].values),'stopping_muon'] = 1
+    return ev
+
 def atmospheric_events(ev):
     """
     Tags atmospheric events which have a neutron follower.
@@ -452,6 +466,12 @@ def get_events(filenames, merge_fits=False):
     fits = pd.concat([pd.read_hdf(filename, "fits") for filename in filenames],ignore_index=True)
     rhdr = pd.concat([pd.read_hdf(filename, "rhdr") for filename in filenames],ignore_index=True)
 
+    if len(ev) == 0:
+        if merge_fits:
+            return ev
+        else:
+            return ev, fits
+
     first_gtid = rhdr.set_index('run').to_dict()['first_gtid']
 
     # First, remove junk events since orphans won't have a 50 MHz clock and so
@@ -566,6 +586,17 @@ def get_events(filenames, merge_fits=False):
     # retrigger cut
     ev = ev.groupby('run',group_keys=False).apply(retrigger_cut)
 
+    # Label instrumentals
+    ev['noise'] = ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_ITC | DC_ESUM) != 0
+    ev['neck'] = ((ev.dc & DC_NECK) != 0) & ~ev.noise
+    ev['flasher'] = ((ev.dc & DC_FLASHER) != 0) & ~(ev.noise | ev.neck) & (ev.nhit < 1000)
+    ev['breakdown'] = ((ev.dc & (DC_FLASHER | DC_BREAKDOWN)) != 0) & ~(ev.noise | ev.neck) & (ev.nhit >= 1000)
+    ev['muon'] = ((ev.dc & DC_MUON) != 0) & ~(ev.noise | ev.neck | ev.flasher | ev.breakdown)
+    ev['signal'] = ~(ev.noise | ev.neck | ev.flasher | ev.breakdown | ev.muon)
+    ev['instrumental'] = ~ev.signal
+
+    ev = ev.groupby('run',group_keys=False).apply(tag_michels)
+
     if merge_fits:
         ev = pd.merge(fits,ev,how='inner',on=['run','gtid'])
         # Set the index to (run, gtid) so we can set columns from the single particle results
diff --git a/utils/submit-grid-jobs b/utils/submit-grid-jobs
index f73a57b..c5f72b0 100755
--- a/utils/submit-grid-jobs
+++ b/utils/submit-grid-jobs
@@ -444,6 +444,7 @@ if __name__ == '__main__':
     import pandas as pd
     from sddm.dc import DC_MUON, DC_JUNK, DC_CRATE_ISOTROPY, DC_QVNHIT, DC_NECK, DC_FLASHER, DC_ESUM, DC_OWL, DC_OWL_TRIGGER, DC_FTS, DC_ITC, DC_BREAKDOWN
     from sddm import read_hdf
+    from sddm.plot_energy import get_events
 
     parser = argparse.ArgumentParser("submit grid jobs", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument("filenames", nargs='*', help="input files")
@@ -556,8 +557,7 @@ if __name__ == '__main__':
             else:
                 check_call([zdab_cat,filename,"-o",output.name],stderr=f)
 
-        ev = read_hdf(output.name, "ev")
-        rhdr = read_hdf(output.name, "rhdr")
+        ev, fits = get_events([output.name], merge_fits=False)
 
         if len(ev) == 0:
             continue
@@ -567,62 +567,6 @@ if __name__ == '__main__':
             log.notice("Skipping %s because it's already in the database" % tail)
             continue
 
-        first_gtid = rhdr.set_index('run').to_dict()['first_gtid']
-
-        # First, remove junk events since orphans won't have a 50 MHz clock and so
-        # could screw up the 50 MHz clock unwrapping
-        ev = ev[ev.dc & DC_JUNK == 0]
-
-        # We need the events to be in time order here in order to calculate the
-        # delta t between events. It's not obvious exactly how to do this. You
-        # could sort by GTID, but that wraps around. Similarly we can't sort by the
-        # 50 MHz clock because it also wraps around. Finally, I'm hesitant to sort
-        # by the 10 MHz clock since it can be unreliable.
-        #
-        # Update: Phil proposed a clever way to get the events in order using the
-        # GTID:
-        #
-        # > The GTID rollover should be easy to handle because there should never
-        # > be two identical GTID's in a run. So if you order the events by GTID,
-        # > you can assume that events with GTID's that come logically before the
-        # > first GTID in the run must have occurred after the other events.
-        #
-        # Therefore, we can just add 0x1000000 to all GTIDs before the first GTID
-        # in the event and sort on that. We get the first GTID from the RHDR bank.
-        ev['gtid_sort'] = ev['gtid'].copy()
-
-        ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid).reset_index(level=0,drop=True)
-
-        ev = ev.sort_values(by=['run','gtid_sort'],kind='mergesort')
-
-        for run, ev_run in ev.groupby('run'):
-            # Warn about 50 MHz clock jumps since they could indicate that the
-            # events aren't in order.
-            dt = np.diff(ev_run.gtr)
-            if np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)):
-                log.warn("Warning: %i 50 MHz clock jumps in run %i. Are the events in order?" % \
-                    (np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)),run))
-
-        # unwrap the 50 MHz clock within each run
-        ev.gtr = ev.groupby(['run'],group_keys=False)['gtr'].transform(unwrap_50_mhz_clock)
-
-        for run, ev_run in ev.groupby('run'):
-            # Warn about GTID jumps since we could be missing a potential flasher
-            # and/or breakdown, and we need all the events in order to do a
-            # retrigger cut
-            if np.count_nonzero(np.diff(ev_run.gtid) != 1):
-                log.warn("Warning: %i GTID jumps in run %i" % (np.count_nonzero(np.diff(ev_run.gtid) != 1),run))
-
-        # calculate the time difference between each event and the previous event
-        # so we can tag retrigger events
-        ev['dt'] = ev.groupby(['run'],group_keys=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values))))
-
-        # Now, select prompt events.
-        ev = ev.groupby('run',group_keys=False).apply(prompt_event)
-
-        ev['instrumental'] = False
-        ev.loc[ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ITC | DC_BREAKDOWN | DC_MUON) != 0,'instrumental'] = True
-
         nevents = 0
         njobs = 0
         for index, event in ev.iterrows():
author	tlatorre <tlatorre@uchicago.edu>	2020-06-14 02:29:26 -0500
committer	tlatorre <tlatorre@uchicago.edu>	2020-06-14 02:29:26 -0500
commit	83920fb80f48b0e7f4f2eea65b14f088102e4c24 (patch)
tree	5a9c85671b356d4310c794ef11eb637f79c711d9 /utils
parent	6ea901a252248477add3f686707ad827fa5ea1e2 (diff)
download	sddm-83920fb80f48b0e7f4f2eea65b14f088102e4c24.tar.gz sddm-83920fb80f48b0e7f4f2eea65b14f088102e4c24.tar.bz2 sddm-83920fb80f48b0e7f4f2eea65b14f088102e4c24.zip