update chi2

This commit updates the chi2 analysis with two big changes: - I now apply the energy bias correction in the likelihood per particle and not per fit. So there is no longer an energy bias parameter for electron + muon fits, instead we just apply the energy bias correction for electrons to the electron and the energy bias correction for muons to the muon and then add the two kinetic energies. - I now apply the energy bias correction terms to the data instead of the Monte Carlo. This does introduce an issue with discontinuities in the likelihood but it makes everything easier to interpret. The discontinuities *should* be correctly taken into account by the MCMC.
author: tlatorre <tlatorre@uchicago.edu> 2020-09-07 13:39:53 -0500
committer: tlatorre <tlatorre@uchicago.edu> 2020-09-07 13:39:53 -0500
commit: 96220c7a7e9a07456d64915f88e46f3468c3d2bc (patch)
tree: 0bebc9360959f1f855c54a5701f3bda675192ca0
parent: 8c2dba79a018e91dc8cf607499cea4d975776cd1 (diff)
download: sddm-96220c7a7e9a07456d64915f88e46f3468c3d2bc.tar.gz
sddm-96220c7a7e9a07456d64915f88e46f3468c3d2bc.tar.bz2
sddm-96220c7a7e9a07456d64915f88e46f3468c3d2bc.zip
1 files changed, 80 insertions, 37 deletions
diff --git a/utils/chi2 b/utils/chi2
index dc78648..b3875d1 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -47,10 +47,10 @@ def printoptions(*args, **kwargs):
 # Uncertainty on the energy scale
 # FIXME: These are just placeholders! Should get real number from stopping
 # muons.
-ENERGY_SCALE_MEAN = {'e': 1.0, 'u': 1.0, 'eu': 1.0}
-ENERGY_SCALE_UNCERTAINTY = {'e':0.1, 'u': 1.1, 'eu': 0.1}
+ENERGY_SCALE_MEAN = {'e': 0.0, 'u': -0.066}
+ENERGY_SCALE_UNCERTAINTY = {'e':0.1, 'u': 0.011}
 ENERGY_RESOLUTION_MEAN = {'e': 0.0, 'u': 0.0, 'eu': 0.0}
-ENERGY_RESOLUTION_UNCERTAINTY = {'e':0.1, 'u': 0.1, 'eu': 0.1}
+ENERGY_RESOLUTION_UNCERTAINTY = {'e':0.1, 'u': 0.014, 'eu': 0.1}
 
 # Absolute tolerance for the minimizer.
 # Since we're minimizing the negative log likelihood, we really only care about
@@ -139,14 +139,11 @@ def get_mc_hists_fast(ke_dict,x,bins,apply_norm=False,weights_dict=None):
         ke = ke_dict[id]
 
         if id in (20,2020):
-            ke = ke*x[1]
             scale = bincenters2*max(EPSILON,x[2])
         elif id in (22,2222):
-            ke = ke*x[3]
             scale = bincenters2*max(EPSILON,x[4])
         elif id == 2022:
-            ke = ke*x[5]
-            scale = bincenters2*max(EPSILON,x[6])
+            scale = bincenters2*max(EPSILON,x[5])
 
         if weights_dict:
             hist = np.histogram(ke,bins=bins2,weights=weights_dict[id])[0]
@@ -159,15 +156,32 @@ def get_mc_hists_fast(ke_dict,x,bins,apply_norm=False,weights_dict=None):
             mc_hists[id] *= x[0]
     return mc_hists
 
-def get_data_hists(data,bins,scale=1.0):
+def get_data_hists_fast(ke_dict,x,bins,scale=1.0):
+    data_hists = {}
+    for id in (20,22,2020,2022,2222):
+        if id in (20,2020):
+            data_hists[id] = np.histogram(ke_dict[id]*(1+x[1]),bins=bins)[0]*scale
+        elif id in (22,2222):
+            data_hists[id] = np.histogram(ke_dict[id]*(1+x[3]),bins=bins)[0]*scale
+        elif id == 2022:
+            data_hists[id] = np.histogram((ke_dict[id]*[1+x[1],1+x[3]]).sum(axis=-1),bins=bins)[0]*scale
+    return data_hists
+
+def get_data_hists(data,x,bins,scale=1.0):
     """
     Returns the data histogrammed into `bins`.
     """
-    data_hists = {}
+    ke_dict = {}
     for id in (20,22,2020,2022,2222):
-        df_id = data[data.id == id]
-        data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]*scale
-    return data_hists
+        if id in (20,2020):
+            ke_dict[id] = data[data.id == id].ke.values
+        elif id in (22,2222):
+            ke_dict[id] = data[data.id == id].ke.values
+        elif id == 2022:
+            ke_dict[id] = np.array([data.loc[data.id == id,'energy1'].values,
+                                    data.loc[data.id == id,'energy2'].values]).T
+
+    return get_data_hists_fast(ke_dict,x,bins,scale)
 
 # Likelihood Fit Parameters
 # 0 - Atmospheric Neutrino Flux Scale
@@ -175,9 +189,8 @@ def get_data_hists(data,bins,scale=1.0):
 # 2 - Electron energy resolution
 # 3 - Muon energy bias
 # 4 - Muon energy resolution
-# 5 - Electron + Muon energy bias
-# 6 - Electron + Muon energy resolution
-# 7 - External Muon scale
+# 5 - Electron + Muon energy resolution
+# 6 - External Muon scale
 
 FIT_PARS = [
     'Atmospheric Neutrino Flux Scale',
@@ -185,13 +198,10 @@ FIT_PARS = [
     'Electron energy resolution',
     'Muon energy bias',
     'Muon energy resolution',
-    'Electron + Muon energy bias',
     'Electron + Muon energy resolution',
     'External Muon scale']
 
 def make_nll(data, muons, mc, bins, print_nll=False):
-    data_hists = get_data_hists(data,bins)
-
     ke_dict = {}
     for id in (20,22,2020,2022,2222):
         ke_dict[id] = mc[mc.id == id].ke.values
@@ -200,10 +210,22 @@ def make_nll(data, muons, mc, bins, print_nll=False):
     for id in (20,22,2020,2022,2222):
         ke_dict_muon[id] = muons[muons.id == id].ke.values
 
+    ke_dict_data = {}
+    for id in (20,22,2020,2022,2222):
+        if id in (20,2020):
+            ke_dict_data[id] = data[data.id == id].ke.values
+        elif id in (22,2222):
+            ke_dict_data[id] = data[data.id == id].ke.values
+        elif id == 2022:
+            ke_dict_data[id] = np.array([data.loc[data.id == id,'energy1'].values,
+                                         data.loc[data.id == id,'energy2'].values]).T
+
     def nll(x, grad=None):
         if any(x[i] < 0 for i in range(len(x))):
             return np.inf
 
+        data_hists = get_data_hists_fast(ke_dict_data,x,bins)
+
         # Get the Monte Carlo histograms. We need to do this within the
         # likelihood function since several of the parameters like the energy
         # bias and resolution affect the histograms.
@@ -245,17 +267,16 @@ def make_nll(data, muons, mc, bins, print_nll=False):
         nll = 0
         for id in data_hists:
             oi = data_hists[id]
-            ei = mc_hists[id] + muon_hists[id]*x[7] + EPSILON
+            ei = mc_hists[id] + muon_hists[id]*x[6] + EPSILON
             N = ei.sum()
             nll -= -N - np.sum(gammaln(oi+1)) + np.sum(oi*np.log(ei))
 
         # Add the priors
         nll -= norm.logpdf(x[1],ENERGY_SCALE_MEAN['e'],ENERGY_SCALE_UNCERTAINTY['e'])
         nll -= norm.logpdf(x[3],ENERGY_SCALE_MEAN['u'],ENERGY_SCALE_UNCERTAINTY['u'])
-        nll -= norm.logpdf(x[5],ENERGY_SCALE_MEAN['eu'],ENERGY_SCALE_UNCERTAINTY['eu'])
         nll -= norm.logpdf(x[2],ENERGY_RESOLUTION_MEAN['e'],ENERGY_RESOLUTION_UNCERTAINTY['e'])
         nll -= norm.logpdf(x[4],ENERGY_RESOLUTION_MEAN['u'],ENERGY_RESOLUTION_UNCERTAINTY['u'])
-        nll -= norm.logpdf(x[6],ENERGY_RESOLUTION_MEAN['eu'],ENERGY_RESOLUTION_UNCERTAINTY['eu'])
+        nll -= norm.logpdf(x[5],ENERGY_RESOLUTION_MEAN['eu'],ENERGY_RESOLUTION_UNCERTAINTY['eu'])
 
         if print_nll:
             # Print the result
@@ -292,7 +313,7 @@ def get_mc_hists_posterior(data_mc,muon_hists,data_hists,x,bins):
     for id in (20,22,2020,2022,2222):
         mc_hists[id] = get_mc_hist_posterior(mc_hists[id],data_hists[id],norm=x[0])
         # FIXME: does the orering of when we add the muons matter here?
-        mc_hists[id] += muon_hists[id]*x[7]
+        mc_hists[id] += muon_hists[id]*x[6]
     return mc_hists
 
 def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percentile=50.0, size=10000):
@@ -315,8 +336,19 @@ def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percenti
         bins: bins used to bin the mc histogram
         size: number of values to compute
     """
-    data_hists = get_data_hists(data,bins)
-    muon_hists = get_data_hists(data_muon,bins)
+    ke_dict_muon = {}
+    for _id in (20,22,2020,2022,2222):
+        ke_dict_muon[_id] = data_muon[data_muon.id == _id].ke.values
+
+    ke_dict_data = {}
+    for _id in (20,22,2020,2022,2222):
+        if _id in (20,2020):
+            ke_dict_data[_id] = data[data.id == _id].ke.values
+        elif _id in (22,2222):
+            ke_dict_data[_id] = data[data.id == _id].ke.values
+        elif _id == 2022:
+            ke_dict_data[_id] = np.array([data.loc[data.id == _id,'energy1'].values,
+                                          data.loc[data.id == _id,'energy2'].values]).T
 
     # Get the total number of "universes" simulated in the GENIE reweight tool
     if len(data_mc):
@@ -327,6 +359,10 @@ def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percenti
     ps = []
     for i in range(size):
         x = x_samples[np.random.randint(x_samples.shape[0])]
+
+        data_hists = get_data_hists_fast(ke_dict_data,x,bins)
+        muon_hists = get_mc_hists_fast(ke_dict_muon,x,bins,apply_norm=False)
+
         if nuniverses > 0:
             universe = np.random.randint(nuniverses)
         else:
@@ -379,11 +415,17 @@ def do_fit(data,muon,data_mc,bins,steps,print_nll=False):
     """
     nll = make_nll(data,muon,data_mc,bins,print_nll)
 
-    x0 = np.array([1.0,1.0,EPSILON,1.0,EPSILON,1.0,EPSILON,EPSILON])
+    x0 = np.array([1.0,0.0,EPSILON,0.0,EPSILON,EPSILON,EPSILON])
     opt = nlopt.opt(nlopt.LN_SBPLX, len(x0))
     opt.set_min_objective(nll)
-    low = np.array([EPSILON]*len(x0))
-    high = np.array([10]*len(x0))
+    low = np.array([EPSILON,-1e9,EPSILON,-1e9,EPSILON,EPSILON,EPSILON])
+    high = np.array([1e9]*len(x0))
+    # Fix the energy bias parameters for the minimization, since they will
+    # cause discontinuities in the likelihood function.
+    low[1] = 0.0
+    low[3] = 0.0
+    high[1] = 0.0
+    high[3] = 0.0
     opt.set_lower_bounds(low)
     opt.set_upper_bounds(high)
     opt.set_ftol_abs(FTOL_ABS)
@@ -394,6 +436,12 @@ def do_fit(data,muon,data_mc,bins,steps,print_nll=False):
     nll_xopt = nll(xopt)
     print("nll(xopt) = ", nll(xopt))
 
+    # Now, we allow them to float
+    low[1] = -1e9
+    low[3] = -1e9
+    high[1] = 1e9
+    high[3] = 1e9
+
     stepsizes = estimate_errors(nll,xopt,low,high)
 
     with printoptions(precision=3, suppress=True):
@@ -719,16 +767,11 @@ if __name__ == '__main__':
     plt.gca().get_yaxis().set_visible(False)
     plt.subplot(3,3,6)
     plt.hist(samples[:,5],bins=100,histtype='step')
-    plt.xlabel("Electron + Muon Energy Scale")
+    plt.xlabel("Electron + Muon Energy Resolution")
     despine(ax=plt.gca(),left=True,trim=True)
     plt.gca().get_yaxis().set_visible(False)
     plt.subplot(3,3,7)
     plt.hist(samples[:,6],bins=100,histtype='step')
-    plt.xlabel("Electron + Muon Energy Resolution")
-    despine(ax=plt.gca(),left=True,trim=True)
-    plt.gca().get_yaxis().set_visible(False)
-    plt.subplot(3,3,8)
-    plt.hist(samples[:,7],bins=100,histtype='step')
     plt.xlabel("Muon Scale")
     despine(ax=plt.gca(),left=True,trim=True)
     plt.gca().get_yaxis().set_visible(False)
@@ -746,8 +789,8 @@ if __name__ == '__main__':
     labels = ('Data','Monte Carlo','External Muons')
 
     fig = plt.figure()
-    hists = get_data_hists(data,bins)
-    hists_muon = get_data_hists(muon,bins,scale=xopt[7])
+    hists = get_data_hists(data,xopt,bins)
+    hists_muon = get_data_hists(muon,xopt,bins,scale=xopt[6])
     hists_mc = get_mc_hists(data_mc,xopt,bins,apply_norm=True)
     plot_hist2(hists,bins=bins,color='C0')
     plot_hist2(hists_mc,bins=bins,color='C1')
@@ -773,8 +816,8 @@ if __name__ == '__main__':
     else:
         plt.suptitle("Without Neutron Follower")
     fig = plt.figure()
-    hists = get_data_hists(data_atm,bins)
-    hists_muon = get_data_hists(muon_atm,bins,scale=xopt[7])
+    hists = get_data_hists(data_atm,xopt,bins)
+    hists_muon = get_data_hists(muon_atm,xopt,bins,scale=xopt[6])
     hists_mc = get_mc_hists(data_atm_mc,xopt,bins,apply_norm=True)
     plot_hist2(hists,bins=bins,color='C0')
     plot_hist2(hists_mc,bins=bins,color='C1')
author	tlatorre <tlatorre@uchicago.edu>	2020-09-07 13:39:53 -0500
committer	tlatorre <tlatorre@uchicago.edu>	2020-09-07 13:39:53 -0500
commit	96220c7a7e9a07456d64915f88e46f3468c3d2bc (patch)
tree	0bebc9360959f1f855c54a5701f3bda675192ca0
parent	8c2dba79a018e91dc8cf607499cea4d975776cd1 (diff)
download	sddm-96220c7a7e9a07456d64915f88e46f3468c3d2bc.tar.gz sddm-96220c7a7e9a07456d64915f88e46f3468c3d2bc.tar.bz2 sddm-96220c7a7e9a07456d64915f88e46f3468c3d2bc.zip