1 files changed, 80 insertions, 37 deletions
diff --git a/utils/chi2 b/utils/chi2
index dc78648..b3875d1 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -47,10 +47,10 @@ def printoptions(*args, **kwargs):
 # Uncertainty on the energy scale
 # FIXME: These are just placeholders! Should get real number from stopping
 # muons.
-ENERGY_SCALE_MEAN = {'e': 1.0, 'u': 1.0, 'eu': 1.0}
-ENERGY_SCALE_UNCERTAINTY = {'e':0.1, 'u': 1.1, 'eu': 0.1}
+ENERGY_SCALE_MEAN = {'e': 0.0, 'u': -0.066}
+ENERGY_SCALE_UNCERTAINTY = {'e':0.1, 'u': 0.011}
 ENERGY_RESOLUTION_MEAN = {'e': 0.0, 'u': 0.0, 'eu': 0.0}
-ENERGY_RESOLUTION_UNCERTAINTY = {'e':0.1, 'u': 0.1, 'eu': 0.1}
+ENERGY_RESOLUTION_UNCERTAINTY = {'e':0.1, 'u': 0.014, 'eu': 0.1}
 
 # Absolute tolerance for the minimizer.
 # Since we're minimizing the negative log likelihood, we really only care about
@@ -139,14 +139,11 @@ def get_mc_hists_fast(ke_dict,x,bins,apply_norm=False,weights_dict=None):
         ke = ke_dict[id]
 
         if id in (20,2020):
-            ke = ke*x[1]
             scale = bincenters2*max(EPSILON,x[2])
         elif id in (22,2222):
-            ke = ke*x[3]
             scale = bincenters2*max(EPSILON,x[4])
         elif id == 2022:
-            ke = ke*x[5]
-            scale = bincenters2*max(EPSILON,x[6])
+            scale = bincenters2*max(EPSILON,x[5])
 
         if weights_dict:
             hist = np.histogram(ke,bins=bins2,weights=weights_dict[id])[0]
@@ -159,15 +156,32 @@ def get_mc_hists_fast(ke_dict,x,bins,apply_norm=False,weights_dict=None):
             mc_hists[id] *= x[0]
     return mc_hists
 
-def get_data_hists(data,bins,scale=1.0):
+def get_data_hists_fast(ke_dict,x,bins,scale=1.0):
+    data_hists = {}
+    for id in (20,22,2020,2022,2222):
+        if id in (20,2020):
+            data_hists[id] = np.histogram(ke_dict[id]*(1+x[1]),bins=bins)[0]*scale
+        elif id in (22,2222):
+            data_hists[id] = np.histogram(ke_dict[id]*(1+x[3]),bins=bins)[0]*scale
+        elif id == 2022:
+            data_hists[id] = np.histogram((ke_dict[id]*[1+x[1],1+x[3]]).sum(axis=-1),bins=bins)[0]*scale
+    return data_hists
+
+def get_data_hists(data,x,bins,scale=1.0):
     """
     Returns the data histogrammed into `bins`.
     """
-    data_hists = {}
+    ke_dict = {}
     for id in (20,22,2020,2022,2222):
-        df_id = data[data.id == id]
-        data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]*scale
-    return data_hists
+        if id in (20,2020):
+            ke_dict[id] = data[data.id == id].ke.values
+        elif id in (22,2222):
+            ke_dict[id] = data[data.id == id].ke.values
+        elif id == 2022:
+            ke_dict[id] = np.array([data.loc[data.id == id,'energy1'].values,
+                                    data.loc[data.id == id,'energy2'].values]).T
+
+    return get_data_hists_fast(ke_dict,x,bins,scale)
 
 # Likelihood Fit Parameters
 # 0 - Atmospheric Neutrino Flux Scale
@@ -175,9 +189,8 @@ def get_data_hists(data,bins,scale=1.0):
 # 2 - Electron energy resolution
 # 3 - Muon energy bias
 # 4 - Muon energy resolution
-# 5 - Electron + Muon energy bias
-# 6 - Electron + Muon energy resolution
-# 7 - External Muon scale
+# 5 - Electron + Muon energy resolution
+# 6 - External Muon scale
 
 FIT_PARS = [
     'Atmospheric Neutrino Flux Scale',
@@ -185,13 +198,10 @@ FIT_PARS = [
     'Electron energy resolution',
     'Muon energy bias',
     'Muon energy resolution',
-    'Electron + Muon energy bias',
     'Electron + Muon energy resolution',
     'External Muon scale']
 
 def make_nll(data, muons, mc, bins, print_nll=False):
-    data_hists = get_data_hists(data,bins)
-
     ke_dict = {}
     for id in (20,22,2020,2022,2222):
         ke_dict[id] = mc[mc.id == id].ke.values
@@ -200,10 +210,22 @@ def make_nll(data, muons, mc, bins, print_nll=False):
     for id in (20,22,2020,2022,2222):
         ke_dict_muon[id] = muons[muons.id == id].ke.values
 
+    ke_dict_data = {}
+    for id in (20,22,2020,2022,2222):
+        if id in (20,2020):
+            ke_dict_data[id] = data[data.id == id].ke.values
+        elif id in (22,2222):
+            ke_dict_data[id] = data[data.id == id].ke.values
+        elif id == 2022:
+            ke_dict_data[id] = np.array([data.loc[data.id == id,'energy1'].values,
+                                         data.loc[data.id == id,'energy2'].values]).T
+
     def nll(x, grad=None):
         if any(x[i] < 0 for i in range(len(x))):
             return np.inf
 
+        data_hists = get_data_hists_fast(ke_dict_data,x,bins)
+
         # Get the Monte Carlo histograms. We need to do this within the
         # likelihood function since several of the parameters like the energy
         # bias and resolution affect the histograms.
@@ -245,17 +267,16 @@ def make_nll(data, muons, mc, bins, print_nll=False):
         nll = 0
         for id in data_hists:
             oi = data_hists[id]
-            ei = mc_hists[id] + muon_hists[id]*x[7] + EPSILON
+            ei = mc_hists[id] + muon_hists[id]*x[6] + EPSILON
             N = ei.sum()
             nll -= -N - np.sum(gammaln(oi+1)) + np.sum(oi*np.log(ei))
 
         # Add the priors
         nll -= norm.logpdf(x[1],ENERGY_SCALE_MEAN['e'],ENERGY_SCALE_UNCERTAINTY['e'])
         nll -= norm.logpdf(x[3],ENERGY_SCALE_MEAN['u'],ENERGY_SCALE_UNCERTAINTY['u'])
-        nll -= norm.logpdf(x[5],ENERGY_SCALE_MEAN['eu'],ENERGY_SCALE_UNCERTAINTY['eu'])
         nll -= norm.logpdf(x[2],ENERGY_RESOLUTION_MEAN['e'],ENERGY_RESOLUTION_UNCERTAINTY['e'])
         nll -= norm.logpdf(x[4],ENERGY_RESOLUTION_MEAN['u'],ENERGY_RESOLUTION_UNCERTAINTY['u'])
-        nll -= norm.logpdf(x[6],ENERGY_RESOLUTION_MEAN['eu'],ENERGY_RESOLUTION_UNCERTAINTY['eu'])
+        nll -= norm.logpdf(x[5],ENERGY_RESOLUTION_MEAN['eu'],ENERGY_RESOLUTION_UNCERTAINTY['eu'])
 
         if print_nll:
             # Print the result
@@ -292,7 +313,7 @@ def get_mc_hists_posterior(data_mc,muon_hists,data_hists,x,bins):
     for id in (20,22,2020,2022,2222):
         mc_hists[id] = get_mc_hist_posterior(mc_hists[id],data_hists[id],norm=x[0])
         # FIXME: does the orering of when we add the muons matter here?
-        mc_hists[id] += muon_hists[id]*x[7]
+        mc_hists[id] += muon_hists[id]*x[6]
     return mc_hists
 
 def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percentile=50.0, size=10000):
@@ -315,8 +336,19 @@ def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percenti
         bins: bins used to bin the mc histogram
         size: number of values to compute
     """
-    data_hists = get_data_hists(data,bins)
-    muon_hists = get_data_hists(data_muon,bins)
+    ke_dict_muon = {}
+    for _id in (20,22,2020,2022,2222):
+        ke_dict_muon[_id] = data_muon[data_muon.id == _id].ke.values
+
+    ke_dict_data = {}
+    for _id in (20,22,2020,2022,2222):
+        if _id in (20,2020):
+            ke_dict_data[_id] = data[data.id == _id].ke.values
+        elif _id in (22,2222):
+            ke_dict_data[_id] = data[data.id == _id].ke.values
+        elif _id == 2022:
+            ke_dict_data[_id] = np.array([data.loc[data.id == _id,'energy1'].values,
+                                          data.loc[data.id == _id,'energy2'].values]).T
 
     # Get the total number of "universes" simulated in the GENIE reweight tool
     if len(data_mc):
@@ -327,6 +359,10 @@ def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percenti
     ps = []
     for i in range(size):
         x = x_samples[np.random.randint(x_samples.shape[0])]
+
+        data_hists = get_data_hists_fast(ke_dict_data,x,bins)
+        muon_hists = get_mc_hists_fast(ke_dict_muon,x,bins,apply_norm=False)
+
         if nuniverses > 0:
             universe = np.random.randint(nuniverses)
         else:
@@ -379,11 +415,17 @@ def do_fit(data,muon,data_mc,bins,steps,print_nll=False):
     """
     nll = make_nll(data,muon,data_mc,bins,print_nll)
 
-    x0 = np.array([1.0,1.0,EPSILON,1.0,EPSILON,1.0,EPSILON,EPSILON])
+    x0 = np.array([1.0,0.0,EPSILON,0.0,EPSILON,EPSILON,EPSILON])
     opt = nlopt.opt(nlopt.LN_SBPLX, len(x0))
     opt.set_min_objective(nll)
-    low = np.array([EPSILON]*len(x0))
-    high = np.array([10]*len(x0))
+    low = np.array([EPSILON,-1e9,EPSILON,-1e9,EPSILON,EPSILON,EPSILON])
+    high = np.array([1e9]*len(x0))
+    # Fix the energy bias parameters for the minimization, since they will
+    # cause discontinuities in the likelihood function.
+    low[1] = 0.0
+    low[3] = 0.0
+    high[1] = 0.0
+    high[3] = 0.0
     opt.set_lower_bounds(low)
     opt.set_upper_bounds(high)
     opt.set_ftol_abs(FTOL_ABS)
@@ -394,6 +436,12 @@ def do_fit(data,muon,data_mc,bins,steps,print_nll=False):
     nll_xopt = nll(xopt)
     print("nll(xopt) = ", nll(xopt))
 
+    # Now, we allow them to float
+    low[1] = -1e9
+    low[3] = -1e9
+    high[1] = 1e9
+    high[3] = 1e9
+
     stepsizes = estimate_errors(nll,xopt,low,high)
 
     with printoptions(precision=3, suppress=True):
@@ -719,16 +767,11 @@ if __name__ == '__main__':
     plt.gca().get_yaxis().set_visible(False)
     plt.subplot(3,3,6)
     plt.hist(samples[:,5],bins=100,histtype='step')
-    plt.xlabel("Electron + Muon Energy Scale")
+    plt.xlabel("Electron + Muon Energy Resolution")
     despine(ax=plt.gca(),left=True,trim=True)
     plt.gca().get_yaxis().set_visible(False)
     plt.subplot(3,3,7)
     plt.hist(samples[:,6],bins=100,histtype='step')
-    plt.xlabel("Electron + Muon Energy Resolution")
-    despine(ax=plt.gca(),left=True,trim=True)
-    plt.gca().get_yaxis().set_visible(False)
-    plt.subplot(3,3,8)
-    plt.hist(samples[:,7],bins=100,histtype='step')
     plt.xlabel("Muon Scale")
     despine(ax=plt.gca(),left=True,trim=True)
     plt.gca().get_yaxis().set_visible(False)
@@ -746,8 +789,8 @@ if __name__ == '__main__':
     labels = ('Data','Monte Carlo','External Muons')
 
     fig = plt.figure()
-    hists = get_data_hists(data,bins)
-    hists_muon = get_data_hists(muon,bins,scale=xopt[7])
+    hists = get_data_hists(data,xopt,bins)
+    hists_muon = get_data_hists(muon,xopt,bins,scale=xopt[6])
     hists_mc = get_mc_hists(data_mc,xopt,bins,apply_norm=True)
     plot_hist2(hists,bins=bins,color='C0')
     plot_hist2(hists_mc,bins=bins,color='C1')
@@ -773,8 +816,8 @@ if __name__ == '__main__':
     else:
         plt.suptitle("Without Neutron Follower")
     fig = plt.figure()
-    hists = get_data_hists(data_atm,bins)
-    hists_muon = get_data_hists(muon_atm,bins,scale=xopt[7])
+    hists = get_data_hists(data_atm,xopt,bins)
+    hists_muon = get_data_hists(muon_atm,xopt,bins,scale=xopt[6])
     hists_mc = get_mc_hists(data_atm_mc,xopt,bins,apply_norm=True)
     plot_hist2(hists,bins=bins,color='C0')
     plot_hist2(hists_mc,bins=bins,color='C1')