update chi2 to correct for energy bias

This commit updates the chi2 script to correct for the energy bias of the reconstruction relative to Monte Carlo.
author: tlatorre <tlatorre@uchicago.edu> 2020-09-06 12:27:45 -0500
committer: tlatorre <tlatorre@uchicago.edu> 2020-09-06 12:27:45 -0500
commit: 9e26e8d2176af803988b1b11310975c9af1fe06d (patch)
tree: ee4a6ca41aae5aa82efde3977c05ecfe936fa8b7 /utils
parent: 0fe166726f6b93ab51ee7190596670930363068d (diff)
download: sddm-9e26e8d2176af803988b1b11310975c9af1fe06d.tar.gz
sddm-9e26e8d2176af803988b1b11310975c9af1fe06d.tar.bz2
sddm-9e26e8d2176af803988b1b11310975c9af1fe06d.zip
2 files changed, 56 insertions, 3 deletions
diff --git a/utils/chi2 b/utils/chi2
index f969498..dc78648 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -295,7 +295,7 @@ def get_mc_hists_posterior(data_mc,muon_hists,data_hists,x,bins):
         mc_hists[id] += muon_hists[id]*x[7]
     return mc_hists
 
-def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percentile=99.0, size=10000):
+def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percentile=50.0, size=10000):
     """
     Returns the p-value that the histogram of the data is drawn from the MC
     histogram.
@@ -421,6 +421,42 @@ def do_fit(data,muon,data_mc,bins,steps,print_nll=False):
 
     return xopt, samples
 
+# Energy bias of reconstruction relative to Monte Carlo.
+#
+# Note: You can recreate this array using:
+#
+#     ./plot-fit-results -o [filename]
+ENERGY_BIAS = np.array([
+    (100.0,  0.050140,  0.078106),
+    (200.0,  0.058666,  0.078106),
+    (300.0,  0.049239, -0.000318),
+    (400.0,  0.045581, -0.020932),
+    (500.0,  0.050757, -0.028394),
+    (600.0,  0.048310, -0.029017),
+    (700.0,  0.052434, -0.020770),
+    (800.0,  0.032920, -0.019298),
+    (900.0,  0.040963, -0.015354)],
+    dtype=[('T',np.float32),('e_bias',np.float32),('u_bias',np.float32)])
+
+def correct_energy_bias(df):
+    """
+    Corrects for the energy bias of the reconstruction relative to the true
+    Monte Carlo energy.
+    """
+    # Note: We subtract here since the values in the energy_bias array are MC
+    # reconstruction relative to truth. So, for example if we have an energy
+    # bias of -2% at 100 MeV, then the array will contain -0.02. In this case,
+    # our reconstruction is low relative to the truth, so we need to *increase*
+    # our estimate.
+    df.loc[df['id1'] == 20,'energy1'] -= df.loc[df['id1'] == 20,'energy1']*np.interp(df.loc[df['id1'] == 20,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['e_bias'])
+    df.loc[df['id2'] == 20,'energy2'] -= df.loc[df['id2'] == 20,'energy2']*np.interp(df.loc[df['id2'] == 20,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['e_bias'])
+    df.loc[df['id3'] == 20,'energy3'] -= df.loc[df['id3'] == 20,'energy3']*np.interp(df.loc[df['id3'] == 20,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['e_bias'])
+    df.loc[df['id1'] == 22,'energy1'] -= df.loc[df['id1'] == 22,'energy1']*np.interp(df.loc[df['id1'] == 22,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['u_bias'])
+    df.loc[df['id2'] == 22,'energy2'] -= df.loc[df['id2'] == 22,'energy2']*np.interp(df.loc[df['id2'] == 22,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['u_bias'])
+    df.loc[df['id3'] == 22,'energy3'] -= df.loc[df['id3'] == 22,'energy3']*np.interp(df.loc[df['id3'] == 22,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['u_bias'])
+    df['ke'] = df['energy1'].fillna(0) + df['energy2'].fillna(0) + df['energy3'].fillna(0)
+    return df
+
 if __name__ == '__main__':
     import argparse
     import numpy as np
@@ -457,10 +493,15 @@ if __name__ == '__main__':
         evs.append(get_events(df.filename.values, merge_fits=True, nhit_thresh=args.nhit_thresh))
     ev = pd.concat(evs)
 
+    ev = correct_energy_bias(ev)
+
     ev_mc = get_events(args.mc, merge_fits=True, nhit_thresh=args.nhit_thresh)
     muon_mc = get_events(args.muon_mc, merge_fits=True, nhit_thresh=args.nhit_thresh)
     weights = pd.concat([read_hdf(filename, "weights") for filename in args.weights],ignore_index=True)
 
+    ev_mc = correct_energy_bias(ev_mc)
+    muon_mc = correct_energy_bias(muon_mc)
+
     # Set all prompt events in the MC to be muons
     muon_mc.loc[muon_mc.prompt & muon_mc.filename.str.contains("cosmic"),'muon'] = True
 
diff --git a/utils/plot-fit-results b/utils/plot-fit-results
index c90c13a..20c3c68 100755
--- a/utils/plot-fit-results
+++ b/utils/plot-fit-results
@@ -28,6 +28,7 @@ if __name__ == '__main__':
 
     parser = argparse.ArgumentParser("plot fit results")
     parser.add_argument("filenames", nargs='+', help="input files")
+    parser.add_argument("-o", "--output", default=None, help="output file")
     parser.add_argument("--save", action="store_true", default=False, help="save plots")
     args = parser.parse_args()
 
@@ -110,11 +111,15 @@ if __name__ == '__main__':
     # 100 bins between 50 MeV and 1 GeV
     bins = np.arange(50,1000,100)
 
+    T = (bins[1:] + bins[:-1])/2
+
     markers = itertools.cycle(('o', 'v')) 
 
     fig3, ax3 = plt.subplots(3,1,num=3,sharex=True)
     fig4, ax4 = plt.subplots(3,1,num=4,sharex=True)
 
+    output = pd.DataFrame({'T':T})
+
     for id in [IDP_E_MINUS, IDP_MU_MINUS]:
         events = data_true[data_true['mcgn_id'] == id]
 
@@ -128,8 +133,6 @@ if __name__ == '__main__':
 
         label = 'Muon' if id == IDP_MU_MINUS else 'Electron'
 
-        T = (bins[1:] + bins[:-1])/2
-
         marker = markers.next()
 
         plt.figure(1)
@@ -142,6 +145,11 @@ if __name__ == '__main__':
         ax3[1].errorbar(T,dy['median'],yerr=dy['median_err'],fmt=marker,label=label)
         ax3[2].errorbar(T,dz['median'],yerr=dz['median_err'],fmt=marker,label=label)
 
+        if id == IDP_E_MINUS:
+            output['e_bias'] = (dT['median']/T).values
+        else:
+            output['u_bias'] = (dT['median']/T).values
+
         ax4[0].errorbar(T,dx['iqr_std'],yerr=dx['iqr_std_err'],fmt=marker,label=label)
         ax4[1].errorbar(T,dy['iqr_std'],yerr=dy['iqr_std_err'],fmt=marker,label=label)
         ax4[2].errorbar(T,dz['iqr_std'],yerr=dz['iqr_std_err'],fmt=marker,label=label)
@@ -152,6 +160,10 @@ if __name__ == '__main__':
         plt.figure(6)
         plt.scatter(events['Te'],events['ratio'],marker=marker,label=label)
 
+    if args.output:
+        with h5py.File(args.output,"w") as f:
+            f.create_dataset('energy_bias',data=output.to_records())
+
     fig = plt.figure(1)
     despine(fig,trim=True)
     plt.xlabel("Kinetic Energy (MeV)")
author	tlatorre <tlatorre@uchicago.edu>	2020-09-06 12:27:45 -0500
committer	tlatorre <tlatorre@uchicago.edu>	2020-09-06 12:27:45 -0500
commit	9e26e8d2176af803988b1b11310975c9af1fe06d (patch)
tree	ee4a6ca41aae5aa82efde3977c05ecfe936fa8b7 /utils
parent	0fe166726f6b93ab51ee7190596670930363068d (diff)
download	sddm-9e26e8d2176af803988b1b11310975c9af1fe06d.tar.gz sddm-9e26e8d2176af803988b1b11310975c9af1fe06d.tar.bz2 sddm-9e26e8d2176af803988b1b11310975c9af1fe06d.zip