1 files changed, 139 insertions, 0 deletions
diff --git a/utils/sddm/plot_energy.py b/utils/sddm/plot_energy.py
index 0c6dcf6..f496b0b 100755
--- a/utils/sddm/plot_energy.py
+++ b/utils/sddm/plot_energy.py
@@ -347,3 +347,142 @@ def michel_spectrum(T):
     y[mask] = GF**2*MUON_MASS**5*x[mask]**2*(6-4*x[mask]+FINE_STRUCTURE_CONSTANT*f(x[mask])/np.pi)/(192*np.pi**3)
     y *= 2*MUON_MASS
     return y
+
+def get_events(filenames, merge_fits=False):
+    ev = pd.concat([pd.read_hdf(filename, "ev") for filename in filenames],ignore_index=True)
+    fits = pd.concat([pd.read_hdf(filename, "fits") for filename in filenames],ignore_index=True)
+    rhdr = pd.concat([pd.read_hdf(filename, "rhdr") for filename in filenames],ignore_index=True)
+
+    first_gtid = rhdr.set_index('run').to_dict()['first_gtid']
+
+    # First, remove junk events since orphans won't have a 50 MHz clock and so
+    # could screw up the 50 MHz clock unwrapping
+    ev = ev[ev.dc & DC_JUNK == 0]
+
+    # We need the events to be in time order here in order to calculate the
+    # delta t between events. It's not obvious exactly how to do this. You
+    # could sort by GTID, but that wraps around. Similarly we can't sort by the
+    # 50 MHz clock because it also wraps around. Finally, I'm hesitant to sort
+    # by the 10 MHz clock since it can be unreliable.
+    #
+    # Update: Phil proposed a clever way to get the events in order using the
+    # GTID:
+    #
+    # > The GTID rollover should be easy to handle because there should never
+    # > be two identical GTID's in a run. So if you order the events by GTID,
+    # > you can assume that events with GTID's that come logically before the
+    # > first GTID in the run must have occurred after the other events.
+    #
+    # Therefore, we can just add 0x1000000 to all GTIDs before the first GTID
+    # in the event and sort on that. We get the first GTID from the RHDR bank.
+    ev['gtid_sort'] = ev['gtid'].copy()
+
+    ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid).reset_index(level=0,drop=True)
+
+    ev = ev.sort_values(by=['run','gtid_sort'],kind='mergesort')
+
+    for run, ev_run in ev.groupby('run'):
+        # Warn about 50 MHz clock jumps since they could indicate that the
+        # events aren't in order.
+        dt = np.diff(ev_run.gtr)
+        if np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)):
+            print_warning("Warning: %i 50 MHz clock jumps in run %i. Are the events in order?" % \
+                (np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)),run))
+
+    # unwrap the 50 MHz clock within each run
+    ev.gtr = ev.groupby(['run'],group_keys=False)['gtr'].transform(unwrap_50_mhz_clock)
+
+    for run, ev_run in ev.groupby('run'):
+        # Warn about GTID jumps since we could be missing a potential flasher
+        # and/or breakdown, and we need all the events in order to do a
+        # retrigger cut
+        if np.count_nonzero(np.diff(ev_run.gtid) != 1):
+            print_warning("Warning: %i GTID jumps in run %i" % (np.count_nonzero(np.diff(ev_run.gtid) != 1),run))
+
+    # calculate the time difference between each event and the previous event
+    # so we can tag retrigger events
+    ev['dt'] = ev.groupby(['run'],group_keys=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values))))
+
+    # Calculate the approximate Ockham factor.
+    # See Chapter 20 in "Probability Theory: The Logic of Science" by Jaynes
+    #
+    # Note: This is a really approximate form by assuming that the shape of
+    # the likelihood space is equal to the average uncertainty in the
+    # different parameters.
+    fits['w'] = fits['n']*np.log(0.05/10e3) + np.log(fits['energy1']) + fits['n']*np.log(1e-4/(4*np.pi))
+
+    # Apply a fudge factor to the Ockham factor of 100 for each extra particle
+    # FIXME: I chose 100 a while ago but didn't really investigate what the
+    # optimal value was or exactly why it was needed. Should do this.
+    fits['w'] -= fits['n']*100
+
+    # Note: we index on the left hand site with loc to avoid a copy error
+    #
+    # See https://www.dataquest.io/blog/settingwithcopywarning/
+    fits.loc[fits['n'] > 1, 'w'] += np.log(fits[fits['n'] > 1]['energy2'])
+    fits.loc[fits['n'] > 2, 'w'] += np.log(fits[fits['n'] > 2]['energy3'])
+
+    fits['fmin'] = fits['fmin'] - fits['w']
+
+    # See https://stackoverflow.com/questions/11976503/how-to-keep-index-when-using-pandas-merge
+    # for how to properly divide the psi column by nhit_cal which is in the ev
+    # dataframe before we actually merge
+    fits['psi'] /= fits.reset_index().merge(ev,on=['run','gtid']).set_index('index')['nhit_cal']
+    fits.loc[fits['n'] == 1,'ke'] = fits['energy1']
+    fits.loc[fits['n'] == 2,'ke'] = fits['energy1'] + fits['energy2']
+    fits.loc[fits['n'] == 3,'ke'] = fits['energy1'] + fits['energy2'] + fits['energy3']
+    fits['id'] = fits['id1']
+    fits.loc[fits['n'] == 2, 'id'] = fits['id1']*100 + fits['id2']
+    fits.loc[fits['n'] == 3, 'id'] = fits['id1']*10000 + fits['id2']*100 + fits['id3']
+    fits['theta'] = fits['theta1']
+    fits['r'] = np.sqrt(fits.x**2 + fits.y**2 + fits.z**2)
+    fits['r_psup'] = (fits['r']/PSUP_RADIUS)**3
+
+    ev['ftp_r'] = np.sqrt(ev.ftp_x**2 + ev.ftp_y**2 + ev.ftp_z**2)
+    ev['ftp_r_psup'] = (ev['ftp_r']/PSUP_RADIUS)**3
+
+    # Now, select prompt events.
+    #
+    # We define a prompt event here as any event with an NHIT > 100 and whose
+    # previous > 100 nhit event was more than 250 ms ago
+    #
+    # Note: It's important we do this *before* applying the data cleaning cuts
+    # since otherwise we may have a prompt event identified only after the
+    # cuts.
+    #
+    # For example, suppose there was a breakdown and for whatever reason
+    # the *second* event after the breakdown didn't get tagged correctly. If we
+    # apply the data cleaning cuts first and then tag prompt events then this
+    # event will get tagged as a prompt event.
+    ev = ev.groupby('run',group_keys=False).apply(prompt_event)
+
+    print("number of events after prompt nhit cut = %i" % np.count_nonzero(ev.prompt))
+
+    # flasher follower cut
+    ev = ev.groupby('run',group_keys=False).apply(flasher_follower_cut)
+
+    # breakdown follower cut
+    ev = ev.groupby('run',group_keys=False).apply(breakdown_follower_cut)
+
+    # retrigger cut
+    ev = ev.groupby('run',group_keys=False).apply(retrigger_cut)
+
+    if merge_fits:
+        ev.set_index(['run','gtid'])
+
+        ev = pd.merge(fits,ev,how='inner',on=['run','gtid'])
+        ev_single_particle = ev[(ev.id2 == 0) & (ev.id3 == 0)]
+        ev_single_particle = ev_single_particle.sort_values('fmin').groupby(['run','gtid']).nth(0)
+        ev = ev.sort_values('fmin').groupby(['run','gtid']).nth(0)
+        ev['psi'] /= ev.nhit_cal
+
+        ev['cos_theta'] = np.cos(ev_single_particle['theta1'])
+        ev['r'] = np.sqrt(ev.x**2 + ev.y**2 + ev.z**2)
+        ev['udotr'] = np.sin(ev_single_particle.theta1)*np.cos(ev_single_particle.phi1)*ev_single_particle.x + \
+                      np.sin(ev_single_particle.theta1)*np.sin(ev_single_particle.phi1)*ev_single_particle.y + \
+                      np.cos(ev_single_particle.theta1)*ev_single_particle.z
+        ev['udotr'] /= ev.r
+
+        return ev
+
+    return ev, fits