update chi2 with scale factors for the MC

This commit updates the chi2 script to add scale factors for the atmospheric neutrino scale and muon scale parameters. The reason for this is that these parameters should now have a much easier interpretation than before. Now, the atmospheric neutrino scale parameter is relative to the expected atmospheric neutrino flux (i.e. we expect the fit to return something close to 1), and the muon scale parameter is the total number of expected muons in our sample, which is exactly what the data cleaning analysis gives us.
author: tlatorre <tlatorre@uchicago.edu> 2020-10-09 11:33:36 -0500
committer: tlatorre <tlatorre@uchicago.edu> 2020-10-09 11:33:36 -0500
commit: 6b9de6e4c01e71fd74494eb6782b99527adffe26 (patch)
tree: 7e7a72e5b0d46fb676182babbd4a491f4b92593e
parent: 4521e01bd9e6dde09aa1f3ccb4300a47e875c2c1 (diff)
download: sddm-6b9de6e4c01e71fd74494eb6782b99527adffe26.tar.gz
sddm-6b9de6e4c01e71fd74494eb6782b99527adffe26.tar.bz2
sddm-6b9de6e4c01e71fd74494eb6782b99527adffe26.zip
1 files changed, 38 insertions, 35 deletions
diff --git a/utils/chi2 b/utils/chi2
index 30a0fbe..be99626 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -71,21 +71,21 @@ FIT_PARS = [
 # - For the electron + muon energy resolution, I don't have any real good way
 #   to estimate this, so we are conservative and set the error to 10%.
 PRIORS = [
-    0.01,  # Atmospheric Neutrino Scale
+    1.0,   # Atmospheric Neutrino Scale
     0.0,   # Electron energy scale
     0.0,   # Electron energy resolution
     0.066, # Muon energy scale
     0.0,   # Muon energy resolution
-    0.01,  # Muon scale
+    0.0,   # Muon scale
 ]
 
 PRIOR_UNCERTAINTIES = [
-    0.002, # Atmospheric Neutrino Scale
+    0.2  , # Atmospheric Neutrino Scale
     0.1,   # Electron energy scale
     0.1,   # Electron energy resolution
     0.011, # Muon energy scale
     0.014, # Muon energy resolution
-    0.001, # Muon scale
+    10.0,  # Muon scale
 ]
 
 # Lower bounds for the fit parameters
@@ -201,7 +201,7 @@ def get_data_hists(data,bins,scale=1.0):
         data_hists[id] = np.histogram(data[data.id == id].ke.values,bins=bins)[0]*scale
     return data_hists
 
-def make_nll(data, muons, mc, bins, print_nll=False):
+def make_nll(data, muons, mc, atmo_scale_factor, muon_scale_factor, bins, print_nll=False):
     df_dict = {}
     for id in (20,22,2020,2022,2222):
         df_dict[id] = mc[mc.id == id]
@@ -219,8 +219,8 @@ def make_nll(data, muons, mc, bins, print_nll=False):
         # Get the Monte Carlo histograms. We need to do this within the
         # likelihood function since we apply the energy resolution parameters
         # to the Monte Carlo.
-        mc_hists = get_mc_hists_fast(df_dict,x,bins)
-        muon_hists = get_mc_hists_fast(df_dict_muon,x,bins)
+        mc_hists = get_mc_hists_fast(df_dict,x,bins,scale=1/atmo_scale_factor)
+        muon_hists = get_mc_hists_fast(df_dict_muon,x,bins,scale=1/muon_scale_factor)
 
         # Calculate the negative log of the likelihood of observing the data
         # given the fit parameters
@@ -242,7 +242,7 @@ def make_nll(data, muons, mc, bins, print_nll=False):
         return nll
     return nll
 
-def get_mc_hists_posterior(data_mc,muon_hists,data_hists,x,bins):
+def get_mc_hists_posterior(data_mc,muon_hists,data_hists,atmo_scale_factor,muon_scale_factor,x,bins):
     """
     Returns the posterior on the Monte Carlo histograms.
 
@@ -268,12 +268,12 @@ def get_mc_hists_posterior(data_mc,muon_hists,data_hists,x,bins):
     """
     mc_hists = get_mc_hists(data_mc,x,bins,reweight=True)
     for id in (20,22,2020,2022,2222):
-        mc_hists[id] = get_mc_hist_posterior(mc_hists[id],data_hists[id],norm=x[0])
+        mc_hists[id] = get_mc_hist_posterior(mc_hists[id],data_hists[id],norm=x[0]/atmo_scale_factor)
         # FIXME: does the orering of when we add the muons matter here?
-        mc_hists[id] += muon_hists[id]*x[5]
+        mc_hists[id] += muon_hists[id]*x[5]/muon_scale_factor
     return mc_hists
 
-def get_multinomial_prob(data, data_muon, data_mc, weights, id, x_samples, bins, percentile=50.0, size=10000):
+def get_multinomial_prob(data, data_muon, data_mc, weights, atmo_scale_factor, muon_scale_factor, id, x_samples, bins, percentile=50.0, size=10000):
     """
     Returns the p-value that the histogram of the data is drawn from the MC
     histogram.
@@ -318,7 +318,7 @@ def get_multinomial_prob(data, data_muon, data_mc, weights, id, x_samples, bins,
         else:
             data_mc_with_weights = data_mc.copy()
             data_mc['weight'] = 1.0
-        mc = get_mc_hists_posterior(data_mc_with_weights,muon_hists,data_hists,x,bins)[id]
+        mc = get_mc_hists_posterior(data_mc_with_weights,muon_hists,data_hists,atmo_scale_factor,muon_scale_factor,x,bins)[id]
         N = mc.sum()
         # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think).
         mc = mc + 1e-10
@@ -340,14 +340,14 @@ def get_multinomial_prob(data, data_muon, data_mc, weights, id, x_samples, bins,
         ps.append(np.count_nonzero(chi2_samples >= chi2_data)/len(chi2_samples))
     return np.percentile(ps,percentile)
 
-def get_prob(data,muon,mc,weights,samples,bins,size):
+def get_prob(data,muon,mc,weights,atmo_scale_factor,muon_scale_factor,samples,bins,size):
     prob = {}
     for id in (20,22,2020,2022,2222):
-        prob[id] = get_multinomial_prob(data,muon,mc,weights,id,samples,bins,size=size)
+        prob[id] = get_multinomial_prob(data,muon,mc,weights,atmo_scale_factor,muon_scale_factor,id,samples,bins,size=size)
         print(id, prob[id])
     return prob
 
-def do_fit(data,muon,data_mc,bins,steps,print_nll=False,walkers=100,thin=10):
+def do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll=False,walkers=100,thin=10):
     """
     Run the fit and return the minimum along with samples from running an MCMC
     starting near the minimum.
@@ -364,7 +364,7 @@ def do_fit(data,muon,data_mc,bins,steps,print_nll=False,walkers=100,thin=10):
     Returns a tuple (xopt, samples) where samples is an array of shape (steps,
     number of parameters).
     """
-    nll = make_nll(data,muon,data_mc,bins,print_nll)
+    nll = make_nll(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,print_nll)
 
     pos = np.empty((walkers, len(PRIORS)),dtype=np.double)
     for i in range(pos.shape[0]):
@@ -487,6 +487,9 @@ if __name__ == '__main__':
 
     bins = np.logspace(np.log10(20),np.log10(10e3),21)
 
+    atmo_scale_factor = 100.0
+    muon_scale_factor = len(muon) + len(muon_atm)
+
     if args.coverage:
         p_values = {id: [] for id in (20,22,2020,2022,2222)}
         p_values_atm = {id: [] for id in (20,22,2020,2022,2222)}
@@ -501,10 +504,10 @@ if __name__ == '__main__':
 
         for i in range(args.coverage):
             # Calculate expected number of events
-            N = len(data_mc)*xtrue[0]
-            N_atm = len(data_atm_mc)*xtrue[0]
-            N_muon = len(muon)*xtrue[5]
-            N_muon_atm = len(muon_atm)*xtrue[5]
+            N = len(data_mc)*xtrue[0]/atmo_scale_factor
+            N_atm = len(data_atm_mc)*xtrue[0]/atmo_scale_factor
+            N_muon = len(muon)*xtrue[5]/muon_scale_factor
+            N_muon_atm = len(muon_atm)*xtrue[5]/muon_scale_factor
 
             # Calculate observed number of events
             n = np.random.poisson(N)
@@ -529,10 +532,10 @@ if __name__ == '__main__':
             data_atm.loc[data_atm.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id2 == 22))*xtrue[4])
             data_atm['ke'] = data_atm['energy1'].fillna(0) + data_atm['energy2'].fillna(0) + data_atm['energy3'].fillna(0)
 
-            xopt, samples = do_fit(data,muon,data_mc,bins,args.steps,args.print_nll,args.walkers,args.thin)
+            xopt, samples = do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin)
 
-            prob = get_prob(data,muon,data_mc,weights,samples,bins,size=args.multinomial_prob_size)
-            prob_atm = get_prob(data_atm,muon_atm,data_atm_mc,weights,samples,bins,size=args.multinomial_prob_size)
+            prob = get_prob(data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,samples,bins,size=args.multinomial_prob_size)
+            prob_atm = get_prob(data_atm,muon_atm,data_atm_mc,weights,atmo_scale_factor,muon_scale_factor,samples,bins,size=args.multinomial_prob_size)
 
             for id in (20,22,2020,2022,2222):
                 p_values[id].append(prob[id])
@@ -596,10 +599,10 @@ if __name__ == '__main__':
             xtrue = truncnorm_scaled(PRIORS_LOW,PRIORS_HIGH,PRIORS,PRIOR_UNCERTAINTIES)
 
             # Calculate expected number of events
-            N = len(data_mc)*xtrue[0]
-            N_atm = len(data_atm_mc)*xtrue[0]
-            N_muon = len(muon)*xtrue[5]
-            N_muon_atm = len(muon_atm)*xtrue[5]
+            N = len(data_mc)*xtrue[0]/atmo_scale_factor
+            N_atm = len(data_atm_mc)*xtrue[0]/atmo_scale_factor
+            N_muon = len(muon)*xtrue[5]/muon_scale_factor
+            N_muon_atm = len(muon_atm)*xtrue[5]/muon_scale_factor
 
             # Calculate observed number of events
             n = np.random.poisson(N)
@@ -624,7 +627,7 @@ if __name__ == '__main__':
             data_atm.loc[data_atm.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id2 == 22))*xtrue[4])
             data_atm['ke'] = data_atm['energy1'].fillna(0) + data_atm['energy2'].fillna(0) + data_atm['energy3'].fillna(0)
 
-            xopt, samples = do_fit(data,muon,data_mc,bins,args.steps,args.print_nll,args.walkers,args.thin)
+            xopt, samples = do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin)
 
             for i in range(len(FIT_PARS)):
                 # The "pull plots" we make here are actually produced via a
@@ -652,10 +655,10 @@ if __name__ == '__main__':
 
         sys.exit(0)
 
-    xopt, samples = do_fit(data,muon,data_mc,bins,args.steps,args.print_nll,args.walkers,args.thin)
+    xopt, samples = do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin)
 
-    prob = get_prob(data,muon,data_mc,weights,samples,bins,size=args.multinomial_prob_size)
-    prob_atm = get_prob(data_atm,muon_atm,data_atm_mc,weights,samples,bins,size=args.multinomial_prob_size)
+    prob = get_prob(data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,samples,bins,size=args.multinomial_prob_size)
+    prob_atm = get_prob(data_atm,muon_atm,data_atm_mc,weights,atmo_scale_factor,muon_scale_factor,samples,bins,size=args.multinomial_prob_size)
 
     plt.figure()
     for i in range(len(FIT_PARS)):
@@ -679,8 +682,8 @@ if __name__ == '__main__':
 
     fig = plt.figure()
     hists = get_data_hists(data,bins)
-    hists_muon = get_mc_hists(muon,xopt,bins,scale=xopt[5])
-    hists_mc = get_mc_hists(data_mc,xopt,bins,scale=xopt[0])
+    hists_muon = get_mc_hists(muon,xopt,bins,scale=xopt[5]/muon_scale_factor)
+    hists_mc = get_mc_hists(data_mc,xopt,bins,scale=xopt[0]/atmo_scale_factor)
     plot_hist2(hists,bins=bins,color='C0')
     plot_hist2(hists_mc,bins=bins,color='C1')
     plot_hist2(hists_muon,bins=bins,color='C2')
@@ -706,8 +709,8 @@ if __name__ == '__main__':
         plt.suptitle("Without Neutron Follower")
     fig = plt.figure()
     hists = get_data_hists(data_atm,bins)
-    hists_muon = get_mc_hists(muon_atm,xopt,bins,scale=xopt[5])
-    hists_mc = get_mc_hists(data_atm_mc,xopt,bins,scale=xopt[0])
+    hists_muon = get_mc_hists(muon_atm,xopt,bins,scale=xopt[5]/muon_scale_factor)
+    hists_mc = get_mc_hists(data_atm_mc,xopt,bins,scale=xopt[0]/atmo_scale_factor)
     plot_hist2(hists,bins=bins,color='C0')
     plot_hist2(hists_mc,bins=bins,color='C1')
     plot_hist2(hists_muon,bins=bins,color='C2')
author	tlatorre <tlatorre@uchicago.edu>	2020-10-09 11:33:36 -0500
committer	tlatorre <tlatorre@uchicago.edu>	2020-10-09 11:33:36 -0500
commit	6b9de6e4c01e71fd74494eb6782b99527adffe26 (patch)
tree	7e7a72e5b0d46fb676182babbd4a491f4b92593e
parent	4521e01bd9e6dde09aa1f3ccb4300a47e875c2c1 (diff)
download	sddm-6b9de6e4c01e71fd74494eb6782b99527adffe26.tar.gz sddm-6b9de6e4c01e71fd74494eb6782b99527adffe26.tar.bz2 sddm-6b9de6e4c01e71fd74494eb6782b99527adffe26.zip