add a script to do a closure test on the contamination analysis

author: tlatorre <tlatorre@uchicago.edu> 2020-05-12 11:34:47 -0500
committer: tlatorre <tlatorre@uchicago.edu> 2020-05-12 11:34:47 -0500
commit: 764bf1b496de0d3d3a22b988a0634ea68434bb26 (patch)
tree: 83dde566645f6b7f1abef44ae9654bb62482a0ac /utils/plot-energy
parent: c24438b1fa9d368f2b05d623c7a2cb0d27852cfc (diff)
download: sddm-764bf1b496de0d3d3a22b988a0634ea68434bb26.tar.gz
sddm-764bf1b496de0d3d3a22b988a0634ea68434bb26.tar.bz2
sddm-764bf1b496de0d3d3a22b988a0634ea68434bb26.zip
1 files changed, 1 insertions, 119 deletions
diff --git a/utils/plot-energy b/utils/plot-energy
index 4cd116b..6997f3b 100755
--- a/utils/plot-energy
+++ b/utils/plot-energy
@@ -117,125 +117,7 @@ if __name__ == '__main__':
 
     import matplotlib.pyplot as plt
 
-    ev = pd.concat([pd.read_hdf(filename, "ev") for filename in args.filenames],ignore_index=True)
-    fits = pd.concat([pd.read_hdf(filename, "fits") for filename in args.filenames],ignore_index=True)
-    rhdr = pd.concat([pd.read_hdf(filename, "rhdr") for filename in args.filenames],ignore_index=True)
-
-    first_gtid = rhdr.set_index('run').to_dict()['first_gtid']
-
-    # First, remove junk events since orphans won't have a 50 MHz clock and so
-    # could screw up the 50 MHz clock unwrapping
-    ev = ev[ev.dc & DC_JUNK == 0]
-
-    # We need the events to be in time order here in order to calculate the
-    # delta t between events. It's not obvious exactly how to do this. You
-    # could sort by GTID, but that wraps around. Similarly we can't sort by the
-    # 50 MHz clock because it also wraps around. Finally, I'm hesitant to sort
-    # by the 10 MHz clock since it can be unreliable.
-    #
-    # Update: Phil proposed a clever way to get the events in order using the
-    # GTID:
-    #
-    # > The GTID rollover should be easy to handle because there should never
-    # > be two identical GTID's in a run. So if you order the events by GTID,
-    # > you can assume that events with GTID's that come logically before the
-    # > first GTID in the run must have occurred after the other events.
-    #
-    # Therefore, we can just add 0x1000000 to all GTIDs before the first GTID
-    # in the event and sort on that. We get the first GTID from the RHDR bank.
-    ev['gtid_sort'] = ev['gtid'].copy()
-
-    ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid).reset_index(level=0,drop=True)
-
-    ev = ev.sort_values(by=['run','gtid_sort'],kind='mergesort')
-
-    for run, ev_run in ev.groupby('run'):
-        # Warn about 50 MHz clock jumps since they could indicate that the
-        # events aren't in order.
-        dt = np.diff(ev_run.gtr)
-        if np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)):
-            print_warning("Warning: %i 50 MHz clock jumps in run %i. Are the events in order?" % \
-                (np.count_nonzero((np.abs(dt) > 1e9) & (dt > -0x7ffffffffff*20.0/2)),run))
-
-    # unwrap the 50 MHz clock within each run
-    ev.gtr = ev.groupby(['run'],group_keys=False)['gtr'].transform(unwrap_50_mhz_clock)
-
-    for run, ev_run in ev.groupby('run'):
-        # Warn about GTID jumps since we could be missing a potential flasher
-        # and/or breakdown, and we need all the events in order to do a
-        # retrigger cut
-        if np.count_nonzero(np.diff(ev_run.gtid) != 1):
-            print_warning("Warning: %i GTID jumps in run %i" % (np.count_nonzero(np.diff(ev_run.gtid) != 1),run))
-
-    # calculate the time difference between each event and the previous event
-    # so we can tag retrigger events
-    ev['dt'] = ev.groupby(['run'],group_keys=False)['gtr'].transform(lambda x: np.concatenate(([1e9],np.diff(x.values))))
-
-    # Calculate the approximate Ockham factor.
-    # See Chapter 20 in "Probability Theory: The Logic of Science" by Jaynes
-    #
-    # Note: This is a really approximate form by assuming that the shape of
-    # the likelihood space is equal to the average uncertainty in the
-    # different parameters.
-    fits['w'] = fits['n']*np.log(0.05/10e3) + np.log(fits['energy1']) + fits['n']*np.log(1e-4/(4*np.pi))
-
-    # Apply a fudge factor to the Ockham factor of 100 for each extra particle
-    # FIXME: I chose 100 a while ago but didn't really investigate what the
-    # optimal value was or exactly why it was needed. Should do this.
-    fits['w'] -= fits['n']*100
-
-    # Note: we index on the left hand site with loc to avoid a copy error
-    #
-    # See https://www.dataquest.io/blog/settingwithcopywarning/
-    fits.loc[fits['n'] > 1, 'w'] += np.log(fits[fits['n'] > 1]['energy2'])
-    fits.loc[fits['n'] > 2, 'w'] += np.log(fits[fits['n'] > 2]['energy3'])
-
-    fits['fmin'] = fits['fmin'] - fits['w']
-
-    # See https://stackoverflow.com/questions/11976503/how-to-keep-index-when-using-pandas-merge
-    # for how to properly divide the psi column by nhit_cal which is in the ev
-    # dataframe before we actually merge
-    fits['psi'] /= fits.reset_index().merge(ev,on=['run','gtid']).set_index('index')['nhit_cal']
-    fits.loc[fits['n'] == 1,'ke'] = fits['energy1']
-    fits.loc[fits['n'] == 2,'ke'] = fits['energy1'] + fits['energy2']
-    fits.loc[fits['n'] == 3,'ke'] = fits['energy1'] + fits['energy2'] + fits['energy3']
-    fits['id'] = fits['id1']
-    fits.loc[fits['n'] == 2, 'id'] = fits['id1']*100 + fits['id2']
-    fits.loc[fits['n'] == 3, 'id'] = fits['id1']*10000 + fits['id2']*100 + fits['id3']
-    fits['theta'] = fits['theta1']
-    fits['r'] = np.sqrt(fits.x**2 + fits.y**2 + fits.z**2)
-    fits['r_psup'] = (fits['r']/PSUP_RADIUS)**3
-
-    ev['ftp_r'] = np.sqrt(ev.ftp_x**2 + ev.ftp_y**2 + ev.ftp_z**2)
-    ev['ftp_r_psup'] = (ev['ftp_r']/PSUP_RADIUS)**3
-
-    print("number of events = %i" % len(ev))
-
-    # Now, select prompt events.
-    #
-    # We define a prompt event here as any event with an NHIT > 100 and whose
-    # previous > 100 nhit event was more than 250 ms ago
-    #
-    # Note: It's important we do this *before* applying the data cleaning cuts
-    # since otherwise we may have a prompt event identified only after the
-    # cuts.
-    #
-    # For example, suppose there was a breakdown and for whatever reason
-    # the *second* event after the breakdown didn't get tagged correctly. If we
-    # apply the data cleaning cuts first and then tag prompt events then this
-    # event will get tagged as a prompt event.
-    ev = ev.groupby('run',group_keys=False).apply(prompt_event)
-
-    print("number of events after prompt nhit cut = %i" % np.count_nonzero(ev.prompt))
-
-    # flasher follower cut
-    ev = ev.groupby('run',group_keys=False).apply(flasher_follower_cut)
-
-    # breakdown follower cut
-    ev = ev.groupby('run',group_keys=False).apply(breakdown_follower_cut)
-
-    # retrigger cut
-    ev = ev.groupby('run',group_keys=False).apply(retrigger_cut)
+    ev, fits = get_events(args.filenames)
 
     if args.dc:
         ev = ev[ev.prompt]
author	tlatorre <tlatorre@uchicago.edu>	2020-05-12 11:34:47 -0500
committer	tlatorre <tlatorre@uchicago.edu>	2020-05-12 11:34:47 -0500
commit	764bf1b496de0d3d3a22b988a0634ea68434bb26 (patch)
tree	83dde566645f6b7f1abef44ae9654bb62482a0ac /utils/plot-energy
parent	c24438b1fa9d368f2b05d623c7a2cb0d27852cfc (diff)
download	sddm-764bf1b496de0d3d3a22b988a0634ea68434bb26.tar.gz sddm-764bf1b496de0d3d3a22b988a0634ea68434bb26.tar.bz2 sddm-764bf1b496de0d3d3a22b988a0634ea68434bb26.zip