1 files changed, 50 insertions, 2 deletions
diff --git a/utils/chi2 b/utils/chi2
index 18799ff..d493741 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -338,7 +338,7 @@ def get_prob(data,muon,mc,atmo_scale_factor,muon_scale_factor,samples,bins,size)
         print(id, prob[id])
     return prob
 
-def do_fit(data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll=False,walkers=100,thin=10):
+def do_fit(data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll=False,walkers=100,thin=10,refit=True):
     """
     Run the fit and return the minimum along with samples from running an MCMC
     starting near the minimum.
@@ -410,6 +410,54 @@ def do_fit(data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,bins,st
 
     universe = np.argmin(nlls)
 
+    if refit:
+        data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == universe],how='left',on=['run','evn'])
+        data_mc_with_weights.weight = data_mc_with_weights.weight.fillna(1.0)
+
+        # Create a new negative log likelihood function with the weighted Monte Carlo.
+        nll = make_nll(data,muon,data_mc_with_weights,atmo_scale_factor,muon_scale_factor,bins,reweight=True,print_nll=print_nll)
+
+        # Now, we refit with the Monte Carlo weighted by the most likely GENIE
+        # systematics.
+        pos = np.empty((walkers, len(PRIORS)),dtype=np.double)
+        for i in range(pos.shape[0]):
+            pos[i] = truncnorm_scaled(PRIORS_LOW,PRIORS_HIGH,PRIORS,PRIOR_UNCERTAINTIES)
+
+        nwalkers, ndim = pos.shape
+
+        # We use the KDEMove here because I think it should sample the likelihood
+        # better. Because we have energy scale parameters and we are doing a binned
+        # likelihood, the likelihood is discontinuous. There can also be several
+        # local minima. The author of emcee recommends using the KDEMove with a lot
+        # of workers to try and properly sample a multimodal distribution. In
+        # addition, I've found that the autocorrelation time for the KDEMove is
+        # much better than the other moves.
+        sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x: -nll(x), moves=emcee.moves.KDEMove())
+        with np.errstate(invalid='ignore'):
+            sampler.run_mcmc(pos, steps)
+
+        print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction)))
+
+        try:
+            print("autocorrelation time: ", sampler.get_autocorr_time(quiet=True))
+        except Exception as e:
+            print(e)
+
+        samples = sampler.get_chain(flat=True,thin=thin)
+
+        # Now, we use nlopt to find the best set of parameters. We start at the
+        # best starting point from the MCMC and then run the SBPLX routine.
+        x0 = sampler.get_chain(flat=True)[sampler.get_log_prob(flat=True).argmax()]
+        opt = nlopt.opt(nlopt.LN_SBPLX, len(x0))
+        opt.set_min_objective(nll)
+        low = np.array(PRIORS_LOW)
+        high = np.array(PRIORS_HIGH)
+        opt.set_lower_bounds(low)
+        opt.set_upper_bounds(high)
+        opt.set_ftol_abs(1e-10)
+        opt.set_initial_step([0.01]*len(x0))
+        xopt = opt.optimize(x0)
+
     return xopt, universe, samples
 
 if __name__ == '__main__':
@@ -672,7 +720,7 @@ if __name__ == '__main__':
             data_atm.loc[data_atm.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id2 == 22))*xtrue[4])
             data_atm['ke'] = data_atm['energy1'].fillna(0) + data_atm['energy2'].fillna(0) + data_atm['energy3'].fillna(0)
 
-            xopt, universe, samples = do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin)
+            xopt, universe, samples = do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin,refit=False)
 
             for i in range(len(FIT_PARS)):
                 # The "pull plots" we make here are actually produced via a