From 834df6cfc5737db2b2f4c0a1432c97960ed65e76 Mon Sep 17 00:00:00 2001 From: tlatorre Date: Wed, 16 Dec 2020 13:43:25 -0600 Subject: use a hash to merge weights with MC data --- utils/dm-search | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'utils/dm-search') diff --git a/utils/dm-search b/utils/dm-search index 91d9b87..dd4946b 100755 --- a/utils/dm-search +++ b/utils/dm-search @@ -350,7 +350,7 @@ def do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,weights,atmo_scale nlls = [] for universe in range(nuniverses): - data_mc_with_weights = pd.merge(data_mc,weights_dict[universe],how='left',on=['run','evn']) + data_mc_with_weights = pd.merge(data_mc,weights_dict[universe],how='left',on=['run','unique_id']) data_mc_with_weights.weight = data_mc_with_weights.weight.fillna(1.0) nll = make_nll(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc_with_weights,atmo_scale_factor,muon_scale_factor,bins,reweight=True,print_nll=print_nll,dm_sample=dm_sample) @@ -359,7 +359,7 @@ def do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,weights,atmo_scale universe = np.argmin(nlls) if refit: - data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == universe],how='left',on=['run','evn']) + data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == universe],how='left',on=['run','unique_id']) data_mc_with_weights.weight = data_mc_with_weights.weight.fillna(1.0) # Create a new negative log likelihood function with the weighted Monte Carlo. @@ -467,7 +467,7 @@ def get_limits(dm_masses,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,b dm_energy = dm_mass xopt, universe, samples = do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,weights,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll,walkers,thin) - data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == universe],how='left',on=['run','evn']) + data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == universe],how='left',on=['run','unique_id']) data_mc_with_weights.weight = data_mc_with_weights.weight.fillna(1.0) limit = np.percentile(samples[:,6],90) @@ -609,6 +609,11 @@ if __name__ == '__main__': mcpl = load_mcpl_files(args.mcpl) ev_mc = renormalize_data(ev_mc.reset_index(),mcpl) + # Merge weights with MCPL dataframe to get the unique id column in the + # weights dataframe since that is what we use to merge with the Monte + # Carlo. + weights = pd.merge(weights,mcpl[['run','evn','unique_id']],on=['run','evn'],how='left') + # There are a handful of weights which turn out to be slightly negative for # some reason. For example: # @@ -776,8 +781,8 @@ if __name__ == '__main__': # Set the random seed so we get reproducible results here np.random.seed(0) - data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == 0],how='left',on=['run','evn']) - data_atm_mc_with_weights = pd.merge(data_atm_mc,weights[weights.universe == 0],how='left',on=['run','evn']) + data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == 0],how='left',on=['run','unique_id']) + data_atm_mc_with_weights = pd.merge(data_atm_mc,weights[weights.universe == 0],how='left',on=['run','unique_id']) discoveries = 0 -- cgit