diff options
-rwxr-xr-x | utils/chi2 | 2 | ||||
-rwxr-xr-x | utils/dm-search | 128 |
2 files changed, 67 insertions, 63 deletions
@@ -418,6 +418,8 @@ def do_fit(data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,bins,st universe = np.argmin(nlls) + print("universe = ", universe) + if refit: data_mc_with_weights = pd.merge(data_mc,weights[universe],how='left',on=['run','unique_id']) data_mc_with_weights.weight = data_mc_with_weights.weight.fillna(1.0) diff --git a/utils/dm-search b/utils/dm-search index 9e1a136..d5d3ec7 100755 --- a/utils/dm-search +++ b/utils/dm-search @@ -279,7 +279,7 @@ def make_nll(dm_particle_id, dm_mass, dm_energy, data, muons, mc, atmo_scale_fac return nll return nll -def do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll=False,walkers=100,thin=10,refit=True): +def do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll=False,walkers=100,thin=10,refit=True,universe=None): """ Run the fit and return the minimum along with samples from running an MCMC starting near the minimum. @@ -297,67 +297,68 @@ def do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,weights,atmo_scale Returns a tuple (xopt, samples) where samples is an array of shape (steps, number of parameters). """ - dm_sample = get_dm_sample(DM_SAMPLES,dm_particle_id,dm_mass,dm_energy) - - nll = make_nll(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,print_nll,dm_sample=dm_sample) - - pos = np.empty((walkers, len(PRIORS)),dtype=np.double) - for i in range(pos.shape[0]): - pos[i] = sample_priors() - - nwalkers, ndim = pos.shape - - # We use the KDEMove here because I think it should sample the likelihood - # better. Because we have energy scale parameters and we are doing a binned - # likelihood, the likelihood is discontinuous. There can also be several - # local minima. The author of emcee recommends using the KDEMove with a lot - # of workers to try and properly sample a multimodal distribution. In - # addition, I've found that the autocorrelation time for the KDEMove is - # much better than the other moves. - sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x: -nll(x), moves=emcee.moves.KDEMove()) - with np.errstate(invalid='ignore'): - sampler.run_mcmc(pos, steps) - - print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction))) - - try: - print("autocorrelation time: ", sampler.get_autocorr_time(quiet=True)) - except Exception as e: - print(e) - - samples = sampler.get_chain(flat=True,thin=thin) - - # Now, we use nlopt to find the best set of parameters. We start at the - # best starting point from the MCMC and then run the SBPLX routine. - x0 = sampler.get_chain(flat=True)[sampler.get_log_prob(flat=True).argmax()] - opt = nlopt.opt(nlopt.LN_SBPLX, len(x0)) - opt.set_min_objective(nll) - low = np.array(PRIORS_LOW) - high = np.array(PRIORS_HIGH) - if refit: - # If we are refitting, we want to do the first fit assuming no dark - # matter to make sure we get the best GENIE systematics for the null - # hypothesis. - x0[6] = low[6] - high[6] = low[6] - opt.set_lower_bounds(low) - opt.set_upper_bounds(high) - opt.set_ftol_abs(1e-10) - opt.set_initial_step([0.01]*len(x0)) - xopt = opt.optimize(x0) - - # Get the total number of "universes" simulated in the GENIE reweight tool - nuniverses = max(weights.keys())+1 - - nlls = [] - for universe in range(nuniverses): - data_mc_with_weights = pd.merge(data_mc,weights[universe],how='left',on=['run','unique_id']) - data_mc_with_weights.weight = data_mc_with_weights.weight.fillna(1.0) + if universe is None: + dm_sample = get_dm_sample(DM_SAMPLES,dm_particle_id,dm_mass,dm_energy) - nll = make_nll(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc_with_weights,atmo_scale_factor,muon_scale_factor,bins,reweight=True,print_nll=print_nll,dm_sample=dm_sample) - nlls.append(nll(xopt)) + nll = make_nll(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,print_nll,dm_sample=dm_sample) + + pos = np.empty((walkers, len(PRIORS)),dtype=np.double) + for i in range(pos.shape[0]): + pos[i] = sample_priors() + + nwalkers, ndim = pos.shape + + # We use the KDEMove here because I think it should sample the likelihood + # better. Because we have energy scale parameters and we are doing a binned + # likelihood, the likelihood is discontinuous. There can also be several + # local minima. The author of emcee recommends using the KDEMove with a lot + # of workers to try and properly sample a multimodal distribution. In + # addition, I've found that the autocorrelation time for the KDEMove is + # much better than the other moves. + sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x: -nll(x), moves=emcee.moves.KDEMove()) + with np.errstate(invalid='ignore'): + sampler.run_mcmc(pos, steps) + + print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction))) + + try: + print("autocorrelation time: ", sampler.get_autocorr_time(quiet=True)) + except Exception as e: + print(e) + + samples = sampler.get_chain(flat=True,thin=thin) + + # Now, we use nlopt to find the best set of parameters. We start at the + # best starting point from the MCMC and then run the SBPLX routine. + x0 = sampler.get_chain(flat=True)[sampler.get_log_prob(flat=True).argmax()] + opt = nlopt.opt(nlopt.LN_SBPLX, len(x0)) + opt.set_min_objective(nll) + low = np.array(PRIORS_LOW) + high = np.array(PRIORS_HIGH) + if refit: + # If we are refitting, we want to do the first fit assuming no dark + # matter to make sure we get the best GENIE systematics for the null + # hypothesis. + x0[6] = low[6] + high[6] = low[6] + opt.set_lower_bounds(low) + opt.set_upper_bounds(high) + opt.set_ftol_abs(1e-10) + opt.set_initial_step([0.01]*len(x0)) + xopt = opt.optimize(x0) + + # Get the total number of "universes" simulated in the GENIE reweight tool + nuniverses = max(weights.keys())+1 + + nlls = [] + for universe in range(nuniverses): + data_mc_with_weights = pd.merge(data_mc,weights[universe],how='left',on=['run','unique_id']) + data_mc_with_weights.weight = data_mc_with_weights.weight.fillna(1.0) + + nll = make_nll(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc_with_weights,atmo_scale_factor,muon_scale_factor,bins,reweight=True,print_nll=print_nll,dm_sample=dm_sample) + nlls.append(nll(xopt)) - universe = np.argmin(nlls) + universe = np.argmin(nlls) if refit: data_mc_with_weights = pd.merge(data_mc,weights[universe],how='left',on=['run','unique_id']) @@ -452,7 +453,7 @@ def get_dm_sample(n,dm_particle_id,dm_mass,dm_energy): return pd.DataFrame(data) -def get_limits(dm_masses,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll,walkers,thin): +def get_limits(dm_masses,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll,walkers,thin,universe=None): limits = {} best_fit = {} discovery_array = {} @@ -466,7 +467,7 @@ def get_limits(dm_masses,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,b m1 = SNOMAN_MASS[id1] m2 = SNOMAN_MASS[id2] dm_energy = dm_mass - xopt, universe, samples = do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll,walkers,thin) + xopt, universe, samples = do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll,walkers,thin,universe) data_mc_with_weights = pd.merge(data_mc,weights[universe],how='left',on=['run','unique_id']) data_mc_with_weights.weight = data_mc_with_weights.weight.fillna(1.0) @@ -554,6 +555,7 @@ if __name__ == '__main__': parser.add_argument("--run-list", default=None, help="run list") parser.add_argument("--mcpl", nargs='+', required=True, help="GENIE MCPL files") parser.add_argument("--run-info", required=True, help="run_info.log autosno file") + parser.add_argument("--universe", default=None, help="GENIE universe for systematics") args = parser.parse_args() setup_matplotlib(args.save) @@ -836,7 +838,7 @@ if __name__ == '__main__': sys.exit(0) - limits, best_fit, discovery_array = get_limits(DM_MASSES,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) + limits, best_fit, discovery_array = get_limits(DM_MASSES,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin,args.universe) fig = plt.figure() for color, dm_particle_id in zip(('C0','C1'),(2020,2222)): |