update how the energy bias is applied in chi2 and plot-michels

This commit updates how the energy bias is applied when we correct for the energy bias in correct_energy_bias(). The correct way to apply this correction is to compute: T_corrected = T_reco/(1+bias) whereas previously we were multiplying by (1-bias).
author: tlatorre <tlatorre@uchicago.edu> 2020-10-05 11:15:33 -0500
committer: tlatorre <tlatorre@uchicago.edu> 2020-10-05 11:15:33 -0500
commit: 1cd79549ee8ceb0f5e64e06611d891098905c05a (patch)
tree: 326a84888fd7ae44846c206846e5be24982bd092
parent: 1cc5fc8014ab75ac55672aa962228ac54ae8d9db (diff)
download: sddm-1cd79549ee8ceb0f5e64e06611d891098905c05a.tar.gz
sddm-1cd79549ee8ceb0f5e64e06611d891098905c05a.tar.bz2
sddm-1cd79549ee8ceb0f5e64e06611d891098905c05a.zip
2 files changed, 16 insertions, 14 deletions
diff --git a/utils/chi2 b/utils/chi2
index 23c8560..205037b 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -489,17 +489,19 @@ def correct_energy_bias(df):
     Corrects for the energy bias of the reconstruction relative to the true
     Monte Carlo energy.
     """
-    # Note: We subtract here since the values in the energy_bias array are MC
-    # reconstruction relative to truth. So, for example if we have an energy
-    # bias of -2% at 100 MeV, then the array will contain -0.02. In this case,
-    # our reconstruction is low relative to the truth, so we need to *increase*
-    # our estimate.
-    df.loc[df['id1'] == 20,'energy1'] -= df.loc[df['id1'] == 20,'energy1']*np.interp(df.loc[df['id1'] == 20,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['e_bias'])
-    df.loc[df['id2'] == 20,'energy2'] -= df.loc[df['id2'] == 20,'energy2']*np.interp(df.loc[df['id2'] == 20,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['e_bias'])
-    df.loc[df['id3'] == 20,'energy3'] -= df.loc[df['id3'] == 20,'energy3']*np.interp(df.loc[df['id3'] == 20,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['e_bias'])
-    df.loc[df['id1'] == 22,'energy1'] -= df.loc[df['id1'] == 22,'energy1']*np.interp(df.loc[df['id1'] == 22,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['u_bias'])
-    df.loc[df['id2'] == 22,'energy2'] -= df.loc[df['id2'] == 22,'energy2']*np.interp(df.loc[df['id2'] == 22,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['u_bias'])
-    df.loc[df['id3'] == 22,'energy3'] -= df.loc[df['id3'] == 22,'energy3']*np.interp(df.loc[df['id3'] == 22,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['u_bias'])
+    # Note: We divide here since we define the bias as:
+    #
+    #     bias = (T_reco - T_true)/T_true
+    #
+    # Therefore,
+    #
+    #     T_true = T_reco/(1+bias)
+    df.loc[df['id1'] == 20,'energy1'] /= (1+np.interp(df.loc[df['id1'] == 20,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['e_bias']))
+    df.loc[df['id2'] == 20,'energy2'] /= (1+np.interp(df.loc[df['id2'] == 20,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['e_bias']))
+    df.loc[df['id3'] == 20,'energy3'] /= (1+np.interp(df.loc[df['id3'] == 20,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['e_bias']))
+    df.loc[df['id1'] == 22,'energy1'] /= (1+np.interp(df.loc[df['id1'] == 22,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['u_bias']))
+    df.loc[df['id2'] == 22,'energy2'] /= (1+np.interp(df.loc[df['id2'] == 22,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['u_bias']))
+    df.loc[df['id3'] == 22,'energy3'] /= (1+np.interp(df.loc[df['id3'] == 22,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['u_bias']))
     df['ke'] = df['energy1'].fillna(0) + df['energy2'].fillna(0) + df['energy3'].fillna(0)
     return df
 
diff --git a/utils/plot-michels b/utils/plot-michels
index bec9e17..b313c3e 100755
--- a/utils/plot-michels
+++ b/utils/plot-michels
@@ -65,16 +65,16 @@ def print_particle_probs(data):
         print("P(%s) = %.2f +/- %.2f" % (particle_id_str,mode[i]*100,std[i]*100))
 
 def make_nll(data, mc, bins, print_nll=False):
+    data_hist = np.histogram(data.ke.values,bins=bins)[0]
+
     def nll(x, grad=None):
         if any(x[i] < 0 for i in (0,2)):
             return np.inf
 
-        data_hist = np.histogram(data.ke.values*(1+x[1]),bins=bins)[0]
-
         # Get the Monte Carlo histograms. We need to do this within the
         # likelihood function since we apply the energy resolution parameters
         # to the Monte Carlo.
-        cdf = norm.cdf(bins[:,np.newaxis],mc.ke.values,mc.ke.values*x[2])
+        cdf = norm.cdf(bins[:,np.newaxis],mc.ke.values*(1+x[1]),mc.ke.values*x[2])
         mc_hist = np.sum(cdf[1:] - cdf[:-1],axis=-1)*x[0]
 
         # Calculate the negative log of the likelihood of observing the data
author	tlatorre <tlatorre@uchicago.edu>	2020-10-05 11:15:33 -0500
committer	tlatorre <tlatorre@uchicago.edu>	2020-10-05 11:15:33 -0500
commit	1cd79549ee8ceb0f5e64e06611d891098905c05a (patch)
tree	326a84888fd7ae44846c206846e5be24982bd092
parent	1cc5fc8014ab75ac55672aa962228ac54ae8d9db (diff)
download	sddm-1cd79549ee8ceb0f5e64e06611d891098905c05a.tar.gz sddm-1cd79549ee8ceb0f5e64e06611d891098905c05a.tar.bz2 sddm-1cd79549ee8ceb0f5e64e06611d891098905c05a.zip