1 files changed, 27 insertions, 0 deletions
diff --git a/utils/sddm/plot_energy.py b/utils/sddm/plot_energy.py
index 0533cd5..b1c9314 100644
--- a/utils/sddm/plot_energy.py
+++ b/utils/sddm/plot_energy.py
@@ -24,6 +24,7 @@ import pandas as pd
 from scipy.special import logsumexp
 from os.path import exists, join
 from os import environ
+import hashlib
 
 EPSILON = 1e-10
 
@@ -44,6 +45,26 @@ TRIG_NHIT_20      =      0x00000008
 TRIG_NHIT_20_LB   =      0x00000010
 TRIG_PULSE_GT     =      0x00000400
 
+def get_unique_id(df):
+    """
+    Create a unique ID field here by hashing the position of the event.
+    This field is later used to merge the GENIE systematics with the
+    Monte Carlo. Ideally we would have just used the run number and GENIE
+    event number but we can't do so because:
+
+        1. A significant fraction of the Monte Carlo failed to run the first
+           time and so I had to rerun it through SNOMAN which means the SNOMAN
+           event numbers no longer correspond to the GENIE event number for
+           these events.
+        2. Even so, the GENIE event number is still stored as the interaction
+           code in the MCPL file which is copied over to a pre-source MCVX
+           vertex object. However, unfortunately the pruner was set up to
+           delete these objects
+
+    Note: This has to match up with the calculation in read_mcpl_df().
+    """
+    return hashlib.sha1(np.array([df['x'],df['y'],df['z']]).astype(int).view(np.uint8)).hexdigest()
+
 def unwrap(p, delta, axis=-1):
     """
     A modified version of np.unwrap() useful for unwrapping the 50 MHz clock.
@@ -463,6 +484,12 @@ def get_events(filenames, merge_fits=False, nhit_thresh=None, mc=False):
     ev = pd.concat([read_hdf(filename, "ev").assign(filename=filename) for filename in filenames],ignore_index=True)
     fits = pd.concat([read_hdf(filename, "fits") for filename in filenames],ignore_index=True)
     rhdr = pd.concat([read_hdf(filename, "rhdr") for filename in filenames],ignore_index=True)
+    if mc:
+        mcgn = pd.concat([read_hdf(filename, "mcgn") for filename in filenames],ignore_index=True)
+        if len(mcgn):
+            mcgn = mcgn.groupby("evn").first().reset_index()
+            mcgn['unique_id'] = mcgn.apply(get_unique_id,axis=1)
+            ev = pd.merge(ev,mcgn[['evn','unique_id']],on=['evn'],how='left')
 
     if nhit_thresh is not None:
         ev = ev[ev.nhit_cal >= nhit_thresh]