diff options
-rwxr-xr-x | utils/chi2 | 60 | ||||
-rwxr-xr-x | utils/dm-search | 37 |
2 files changed, 67 insertions, 30 deletions
@@ -122,14 +122,17 @@ def plot_hist2(hists, bins, color=None): elif id == 2222: plt.subplot(2,3,6) - bincenters = (bins[1:] + bins[:-1])/2 - plt.hist(bincenters, bins=bins, histtype='step', weights=hists[id],color=color) + bincenters = (bins[id][1:] + bins[id][:-1])/2 + plt.hist(bincenters, bins=bins[id], histtype='step', weights=hists[id],color=color) plt.gca().set_xscale("log") major = np.array([10,100,1000,10000]) minor = np.unique(list(chain(*list(range(i,i*10,i) for i in major[:-1])))) minor = np.setdiff1d(minor,major) + major = major[major <= bins[id][-1]] + minor = minor[minor <= bins[id][-1]] plt.gca().set_xticks(major) plt.gca().set_xticks(minor,minor=True) + plt.gca().set_xlim(10,10000) plt.xlabel("Energy (MeV)") plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$') @@ -188,9 +191,9 @@ def get_mc_hists_fast(df_dict,x,bins,scale=1.0,reweight=False): resolution = np.sqrt((df.energy1.values*max(EPSILON,x[2]))**2 + (df.energy2.values*max(EPSILON,x[4]))**2) if reweight: - cdf = fast_cdf(bins[:,np.newaxis],ke,resolution)*df.weight.values + cdf = fast_cdf(bins[id][:,np.newaxis],ke,resolution)*df.weight.values else: - cdf = fast_cdf(bins[:,np.newaxis],ke,resolution) + cdf = fast_cdf(bins[id][:,np.newaxis],ke,resolution) if 'flux_weight' in df.columns: cdf *= df.flux_weight.values @@ -205,7 +208,7 @@ def get_data_hists(data,bins,scale=1.0): """ data_hists = {} for id in (20,22,2020,2022,2222): - data_hists[id] = np.histogram(data[data.id == id].ke.values,bins=bins)[0]*scale + data_hists[id] = np.histogram(data[data.id == id].ke.values,bins=bins[id])[0]*scale return data_hists def make_nll(data, muons, mc, atmo_scale_factor, muon_scale_factor, bins, reweight=False, print_nll=False): @@ -479,7 +482,7 @@ if __name__ == '__main__': import nlopt from sddm.renormalize import * - parser = argparse.ArgumentParser("plot fit results") + parser = argparse.ArgumentParser("run null hypothesis test") parser.add_argument("filenames", nargs='+', help="input files") parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds") parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files") @@ -501,16 +504,17 @@ if __name__ == '__main__': import matplotlib.pyplot as plt + if args.run_list is not None: + run_list = np.genfromtxt(args.run_list) + # Loop over runs to prevent using too much memory evs = [] rhdr = pd.concat([read_hdf(filename, "rhdr").assign(filename=filename) for filename in args.filenames],ignore_index=True) for run, df in rhdr.groupby('run'): + if args.run_list and run not in run_list: + continue evs.append(get_events(df.filename.values, merge_fits=True, nhit_thresh=args.nhit_thresh)) - ev = pd.concat(evs) - - if args.run_list is not None: - runs = np.genfromtxt(args.run_list) - ev = ev[ev.run.isin(runs)] + ev = pd.concat(evs).reset_index() ev = correct_energy_bias(ev) @@ -521,10 +525,25 @@ if __name__ == '__main__': ev_mcs = [] for filename in args.mc: ev_mcs.append(get_events([filename], merge_fits=True, nhit_thresh=args.nhit_thresh, mc=True)) - ev_mc = pd.concat(ev_mcs) + ev_mc = pd.concat(ev_mcs).reset_index() muon_mc = get_events(args.muon_mc, merge_fits=True, nhit_thresh=args.nhit_thresh, mc=True) weights = pd.concat([read_hdf(filename, "weights") for filename in args.weights],ignore_index=True) + # The next two things we have to do are reweight the Monte Carlo data since + # I accidentally simulated the muon neutrino flux instead of the tau + # neutrino flux and load in the GENIE systematics weights. + # + # Both of these are a bit tricky because of the fact that I had to + # resimulate some MC events since they failed to simulate (there was a + # packer error and occasionally a geometry error that was causing ~10% of + # the MC events to fail). Since I had to resimulate them, it's not possible + # to connect the GENIE weights to the MC events by just using the event + # number. + # + # Therefore, I decided to use a "unique_id" field which I compute by + # hashing the position of the event. This unique_id along with the run + # should completely specify a unique mapping between the events. + # Add the "flux_weight" column to the ev_mc data since I stupidly simulated # the muon neutrino flux for the tau neutrino flux in GENIE. Doh! mcpl = load_mcpl_files(args.mcpl) @@ -605,9 +624,9 @@ if __name__ == '__main__': #muon = ev[ev.muon & ev.prompt & ~ev.atm] #muon_atm = ev[ev.muon & ev.prompt & ev.atm] - if (~rhdr.run.isin(ev_mc.run)).any(): + if (~ev.run.isin(ev_mc.run)).any(): print_warning("Error! The following runs have no Monte Carlo: %s" % \ - rhdr.run[~rhdr.run.isin(ev_mc.run)].values) + np.unique(ev.run[~ev.run.isin(ev_mc.run)].values)) sys.exit(1) if not args.pull and not args.coverage: @@ -616,7 +635,11 @@ if __name__ == '__main__': data_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ~ev_mc.atm] data_atm_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ev_mc.atm] - bins = np.logspace(np.log10(20),np.log10(10e3),21) + bins = {20:np.logspace(np.log10(20),np.log10(10e3),21), + 22:np.logspace(np.log10(20),np.log10(10e3),21)[:-5], + 2020:np.logspace(np.log10(20),np.log10(10e3),21), + 2022:np.logspace(np.log10(20),np.log10(10e3),21)[:-5], + 2222:np.logspace(np.log10(20),np.log10(10e3),21)[:-5]} atmo_scale_factor = 100.0 muon_scale_factor = len(muon) + len(muon_atm) @@ -633,6 +656,9 @@ if __name__ == '__main__': data_mc_with_weights = pd.merge(data_mc,weights[weights.universe == 0],how='left',on=['run','unique_id']) data_atm_mc_with_weights = pd.merge(data_atm_mc,weights[weights.universe == 0],how='left',on=['run','unique_id']) + data_mc_with_weights.weight *= data_mc_with_weights.flux_weight + data_atm_mc_with_weights.weight *= data_atm_mc_with_weights.flux_weight + for i in range(args.coverage): # Calculate expected number of events N = len(data_mc)*xtrue[0]/atmo_scale_factor @@ -748,8 +774,8 @@ if __name__ == '__main__': n_muon_atm = np.random.poisson(N_muon_atm) # Sample data from Monte Carlo - data = pd.concat((data_mc.sample(n=n,replace=True), muon.sample(n=n_muon,replace=True))) - data_atm = pd.concat((data_atm_mc.sample(n=n_atm,replace=True), muon_atm.sample(n=n_muon_atm,replace=True))) + data = pd.concat((data_mc.sample(n=n,weights='flux_weight',replace=True), muon.sample(n=n_muon,replace=True))) + data_atm = pd.concat((data_atm_mc.sample(n=n_atm,weights='flux_weight',replace=True), muon_atm.sample(n=n_muon_atm,replace=True))) # Smear the energies by the additional energy resolution data.loc[data.id1 == 20,'energy1'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data.id1 == 20))*xtrue[2]) diff --git a/utils/dm-search b/utils/dm-search index dd4946b..12d2a4b 100755 --- a/utils/dm-search +++ b/utils/dm-search @@ -139,14 +139,17 @@ def plot_hist2(hists, bins, color=None): elif id == 2222: plt.subplot(2,3,6) - bincenters = (bins[1:] + bins[:-1])/2 - plt.hist(bincenters, bins=bins, histtype='step', weights=hists[id],color=color) + bincenters = (bins[id][1:] + bins[id][:-1])/2 + plt.hist(bincenters, bins=bins[id], histtype='step', weights=hists[id],color=color) plt.gca().set_xscale("log") major = np.array([10,100,1000,10000]) minor = np.unique(list(chain(*list(range(i,i*10,i) for i in major[:-1])))) minor = np.setdiff1d(minor,major) + major = major[major <= bins[id][-1]] + minor = minor[minor <= bins[id][-1]] plt.gca().set_xticks(major) plt.gca().set_xticks(minor,minor=True) + plt.gca().set_xlim(10,10000) plt.xlabel("Energy (MeV)") plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$') @@ -205,9 +208,9 @@ def get_mc_hists_fast(df_dict,x,bins,scale=1.0,reweight=False): resolution = np.sqrt((df.energy1.values*max(EPSILON,x[2]))**2 + (df.energy2.values*max(EPSILON,x[4]))**2) if reweight: - cdf = fast_cdf(bins[:,np.newaxis],ke,resolution)*df.weight.values + cdf = fast_cdf(bins[id][:,np.newaxis],ke,resolution)*df.weight.values else: - cdf = fast_cdf(bins[:,np.newaxis],ke,resolution) + cdf = fast_cdf(bins[id][:,np.newaxis],ke,resolution) if 'flux_weight' in df.columns: cdf *= df.flux_weight.values @@ -222,7 +225,7 @@ def get_data_hists(data,bins,scale=1.0): """ data_hists = {} for id in (20,22,2020,2022,2222): - data_hists[id] = np.histogram(data[data.id == id].ke.values,bins=bins)[0]*scale + data_hists[id] = np.histogram(data[data.id == id].ke.values,bins=bins[id])[0]*scale return data_hists def make_nll(dm_particle_id, dm_mass, dm_energy, data, muons, mc, atmo_scale_factor, muon_scale_factor, bins, reweight=False, print_nll=False, dm_sample=None): @@ -559,17 +562,18 @@ if __name__ == '__main__': import matplotlib.pyplot as plt + if args.run_list is not None: + run_list = np.genfromtxt(args.run_list) + # Loop over runs to prevent using too much memory evs = [] rhdr = pd.concat([read_hdf(filename, "rhdr").assign(filename=filename) for filename in args.filenames],ignore_index=True) for run, df in rhdr.groupby('run'): + if args.run_list and run not in run_list: + continue evs.append(get_events(df.filename.values, merge_fits=True, nhit_thresh=args.nhit_thresh)) ev = pd.concat(evs).reset_index() - if args.run_list is not None: - runs = np.genfromtxt(args.run_list) - ev = ev[ev.run.isin(runs)] - livetime = 0.0 livetime_pulse_gt = 0.0 for _ev in evs: @@ -600,7 +604,7 @@ if __name__ == '__main__': ev_mcs = [] for filename in args.mc: ev_mcs.append(get_events([filename], merge_fits=True, nhit_thresh=args.nhit_thresh, mc=True)) - ev_mc = pd.concat(ev_mcs) + ev_mc = pd.concat(ev_mcs).reset_index() muon_mc = get_events(args.muon_mc, merge_fits=True, nhit_thresh=args.nhit_thresh, mc=True) weights = pd.concat([read_hdf(filename, "weights") for filename in args.weights],ignore_index=True) @@ -697,7 +701,11 @@ if __name__ == '__main__': data_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ~ev_mc.atm] data_atm_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ev_mc.atm] - bins = np.logspace(np.log10(20),np.log10(10e3),21) + bins = {20:np.logspace(np.log10(20),np.log10(10e3),21), + 22:np.logspace(np.log10(20),np.log10(10e3),21)[:-5], + 2020:np.logspace(np.log10(20),np.log10(10e3),21), + 2022:np.logspace(np.log10(20),np.log10(10e3),21)[:-5], + 2222:np.logspace(np.log10(20),np.log10(10e3),21)[:-5]} atmo_scale_factor = 100.0 muon_scale_factor = len(muon) + len(muon_atm) @@ -730,8 +738,8 @@ if __name__ == '__main__': dm_energy = dm_mass # Sample data from Monte Carlo - data = pd.concat((data_mc.sample(n=n,replace=True), muon.sample(n=n_muon,replace=True),get_dm_sample(n_dm,dm_particle_id,dm_mass,dm_energy))) - data_atm = pd.concat((data_atm_mc.sample(n=n_atm,replace=True), muon_atm.sample(n=n_muon_atm,replace=True))) + data = pd.concat((data_mc.sample(n=n,weights='flux_weight',replace=True), muon.sample(n=n_muon,replace=True))) + data_atm = pd.concat((data_atm_mc.sample(n=n_atm,weights='flux_weight',replace=True), muon_atm.sample(n=n_muon_atm,replace=True))) # Smear the energies by the additional energy resolution data.loc[data.id1 == 20,'energy1'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data.id1 == 20))*xtrue[2]) @@ -786,6 +794,9 @@ if __name__ == '__main__': discoveries = 0 + data_mc_with_weights.weight *= data_mc_with_weights.flux_weight + data_atm_mc_with_weights.weight *= data_atm_mc_with_weights.flux_weight + for i in range(args.test): xtrue = sample_priors() |