aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortlatorre <tlatorre@uchicago.edu>2020-10-05 11:15:33 -0500
committertlatorre <tlatorre@uchicago.edu>2020-10-05 11:15:33 -0500
commit1cd79549ee8ceb0f5e64e06611d891098905c05a (patch)
tree326a84888fd7ae44846c206846e5be24982bd092
parent1cc5fc8014ab75ac55672aa962228ac54ae8d9db (diff)
downloadsddm-1cd79549ee8ceb0f5e64e06611d891098905c05a.tar.gz
sddm-1cd79549ee8ceb0f5e64e06611d891098905c05a.tar.bz2
sddm-1cd79549ee8ceb0f5e64e06611d891098905c05a.zip
update how the energy bias is applied in chi2 and plot-michels
This commit updates how the energy bias is applied when we correct for the energy bias in correct_energy_bias(). The correct way to apply this correction is to compute: T_corrected = T_reco/(1+bias) whereas previously we were multiplying by (1-bias).
-rwxr-xr-xutils/chi224
-rwxr-xr-xutils/plot-michels6
2 files changed, 16 insertions, 14 deletions
diff --git a/utils/chi2 b/utils/chi2
index 23c8560..205037b 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -489,17 +489,19 @@ def correct_energy_bias(df):
Corrects for the energy bias of the reconstruction relative to the true
Monte Carlo energy.
"""
- # Note: We subtract here since the values in the energy_bias array are MC
- # reconstruction relative to truth. So, for example if we have an energy
- # bias of -2% at 100 MeV, then the array will contain -0.02. In this case,
- # our reconstruction is low relative to the truth, so we need to *increase*
- # our estimate.
- df.loc[df['id1'] == 20,'energy1'] -= df.loc[df['id1'] == 20,'energy1']*np.interp(df.loc[df['id1'] == 20,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['e_bias'])
- df.loc[df['id2'] == 20,'energy2'] -= df.loc[df['id2'] == 20,'energy2']*np.interp(df.loc[df['id2'] == 20,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['e_bias'])
- df.loc[df['id3'] == 20,'energy3'] -= df.loc[df['id3'] == 20,'energy3']*np.interp(df.loc[df['id3'] == 20,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['e_bias'])
- df.loc[df['id1'] == 22,'energy1'] -= df.loc[df['id1'] == 22,'energy1']*np.interp(df.loc[df['id1'] == 22,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['u_bias'])
- df.loc[df['id2'] == 22,'energy2'] -= df.loc[df['id2'] == 22,'energy2']*np.interp(df.loc[df['id2'] == 22,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['u_bias'])
- df.loc[df['id3'] == 22,'energy3'] -= df.loc[df['id3'] == 22,'energy3']*np.interp(df.loc[df['id3'] == 22,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['u_bias'])
+ # Note: We divide here since we define the bias as:
+ #
+ # bias = (T_reco - T_true)/T_true
+ #
+ # Therefore,
+ #
+ # T_true = T_reco/(1+bias)
+ df.loc[df['id1'] == 20,'energy1'] /= (1+np.interp(df.loc[df['id1'] == 20,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['e_bias']))
+ df.loc[df['id2'] == 20,'energy2'] /= (1+np.interp(df.loc[df['id2'] == 20,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['e_bias']))
+ df.loc[df['id3'] == 20,'energy3'] /= (1+np.interp(df.loc[df['id3'] == 20,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['e_bias']))
+ df.loc[df['id1'] == 22,'energy1'] /= (1+np.interp(df.loc[df['id1'] == 22,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['u_bias']))
+ df.loc[df['id2'] == 22,'energy2'] /= (1+np.interp(df.loc[df['id2'] == 22,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['u_bias']))
+ df.loc[df['id3'] == 22,'energy3'] /= (1+np.interp(df.loc[df['id3'] == 22,'energy1'],ENERGY_BIAS['T'],ENERGY_BIAS['u_bias']))
df['ke'] = df['energy1'].fillna(0) + df['energy2'].fillna(0) + df['energy3'].fillna(0)
return df
diff --git a/utils/plot-michels b/utils/plot-michels
index bec9e17..b313c3e 100755
--- a/utils/plot-michels
+++ b/utils/plot-michels
@@ -65,16 +65,16 @@ def print_particle_probs(data):
print("P(%s) = %.2f +/- %.2f" % (particle_id_str,mode[i]*100,std[i]*100))
def make_nll(data, mc, bins, print_nll=False):
+ data_hist = np.histogram(data.ke.values,bins=bins)[0]
+
def nll(x, grad=None):
if any(x[i] < 0 for i in (0,2)):
return np.inf
- data_hist = np.histogram(data.ke.values*(1+x[1]),bins=bins)[0]
-
# Get the Monte Carlo histograms. We need to do this within the
# likelihood function since we apply the energy resolution parameters
# to the Monte Carlo.
- cdf = norm.cdf(bins[:,np.newaxis],mc.ke.values,mc.ke.values*x[2])
+ cdf = norm.cdf(bins[:,np.newaxis],mc.ke.values*(1+x[1]),mc.ke.values*x[2])
mc_hist = np.sum(cdf[1:] - cdf[:-1],axis=-1)*x[0]
# Calculate the negative log of the likelihood of observing the data