diff options
author | tlatorre <tlatorre@uchicago.edu> | 2020-11-16 07:55:17 -0600 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2020-11-16 07:55:17 -0600 |
commit | fbcdf03f205840c5cd550689e3f60f5168f8ecca (patch) | |
tree | 8733e490193a7243a87dda563a86597a58a96289 /utils/dm-search | |
parent | 31bf8df0a8d222310bbbd772067f336140c24baa (diff) | |
download | sddm-fbcdf03f205840c5cd550689e3f60f5168f8ecca.tar.gz sddm-fbcdf03f205840c5cd550689e3f60f5168f8ecca.tar.bz2 sddm-fbcdf03f205840c5cd550689e3f60f5168f8ecca.zip |
update dm-search script
Diffstat (limited to 'utils/dm-search')
-rwxr-xr-x | utils/dm-search | 250 |
1 files changed, 220 insertions, 30 deletions
diff --git a/utils/dm-search b/utils/dm-search index 323aabc..1d14d05 100755 --- a/utils/dm-search +++ b/utils/dm-search @@ -33,6 +33,8 @@ from sddm.dc import estimate_errors, EPSILON, truncnorm_scaled import emcee from sddm import printoptions from sddm.utils import fast_cdf, correct_energy_bias +from scipy.integrate import quad +from sddm.dm import * # Likelihood Fit Parameters # 0 - Atmospheric Neutrino Flux Scale @@ -43,6 +45,15 @@ from sddm.utils import fast_cdf, correct_energy_bias # 5 - External Muon scale # 6 - Dark Matter Scale +# Number of events to use in the dark matter Monte Carlo histogram when fitting +# Ideally this would be as big as possible, but the bigger it is, the more time +# the fit takes. +DM_SAMPLES = 10000 + +DM_ENERGIES = np.logspace(np.log10(20),np.log10(10e3),10) + +DISCOVERY_P_VALUE = 0.05 + # Energy resolution as a fraction of energy for dark matter signal DM_ENERGY_RESOLUTION = 0.1 @@ -75,7 +86,7 @@ PRIORS = [ 1.0, # Atmospheric Neutrino Scale 0.015, # Electron energy scale 0.0, # Electron energy resolution - 0.054, # Muon energy scale + 0.053, # Muon energy scale 0.0, # Muon energy resolution 0.0, # Muon scale 0.0, # Dark Matter Scale @@ -85,8 +96,8 @@ PRIOR_UNCERTAINTIES = [ 0.2, # Atmospheric Neutrino Scale 0.03, # Electron energy scale 0.05, # Electron energy resolution - 0.011, # Muon energy scale - 0.014, # Muon energy resolution + 0.01, # Muon energy scale + 0.013, # Muon energy resolution 10.0, # Muon scale np.inf,# Dark Matter Scale ] @@ -206,7 +217,7 @@ def get_data_hists(data,bins,scale=1.0): data_hists[id] = np.histogram(data[data.id == id].ke.values,bins=bins)[0]*scale return data_hists -def make_nll(dm_particle_id, dm_energy, data, muons, mc, atmo_scale_factor, muon_scale_factor, bins, print_nll=False): +def make_nll(dm_particle_id, dm_mass, dm_energy, data, muons, mc, atmo_scale_factor, muon_scale_factor, bins, print_nll=False): df_dict = {} for id in (20,22,2020,2022,2222): df_dict[id] = mc[mc.id == id] @@ -217,6 +228,12 @@ def make_nll(dm_particle_id, dm_energy, data, muons, mc, atmo_scale_factor, muon data_hists = get_data_hists(data,bins) + dm_sample = get_dm_sample(DM_SAMPLES,dm_particle_id,dm_mass,dm_energy) + + df_dict_dm = {} + for id in (20,22,2020,2022,2222): + df_dict_dm[id] = dm_sample[dm_sample.id == id] + def nll(x, grad=None): if any(x[i] < 0 for i in (0,2,4,5,6)): return np.inf @@ -226,6 +243,7 @@ def make_nll(dm_particle_id, dm_energy, data, muons, mc, atmo_scale_factor, muon # to the Monte Carlo. mc_hists = get_mc_hists_fast(df_dict,x,bins,scale=1/atmo_scale_factor) muon_hists = get_mc_hists_fast(df_dict_muon,x,bins,scale=1/muon_scale_factor) + dm_hists = get_mc_hists_fast(df_dict_dm,x,bins,scale=1/len(dm_sample)) # Calculate the negative log of the likelihood of observing the data # given the fit parameters @@ -233,11 +251,7 @@ def make_nll(dm_particle_id, dm_energy, data, muons, mc, atmo_scale_factor, muon nll = 0 for id in data_hists: oi = data_hists[id] - ei = mc_hists[id]*x[0] + muon_hists[id]*x[5] + EPSILON - if id == dm_particle_id: - cdf = fast_cdf(bins,dm_energy,dm_energy*DM_ENERGY_RESOLUTION) - dm_hist = np.sum(cdf[1:] - cdf[:-1]) - ei += dm_hist*x[6] + ei = mc_hists[id]*x[0] + muon_hists[id]*x[5] + dm_hists[id]*x[6] + EPSILON N = ei.sum() nll -= -N - np.sum(gammaln(oi+1)) + np.sum(oi*np.log(ei)) @@ -251,7 +265,7 @@ def make_nll(dm_particle_id, dm_energy, data, muons, mc, atmo_scale_factor, muon return nll return nll -def do_fit(dm_particle_id,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll=False,walkers=100,thin=10): +def do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll=False,walkers=100,thin=10): """ Run the fit and return the minimum along with samples from running an MCMC starting near the minimum. @@ -268,7 +282,7 @@ def do_fit(dm_particle_id,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_sca Returns a tuple (xopt, samples) where samples is an array of shape (steps, number of parameters). """ - nll = make_nll(dm_particle_id,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,print_nll) + nll = make_nll(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,print_nll) pos = np.empty((walkers, len(PRIORS)),dtype=np.double) for i in range(pos.shape[0]): @@ -306,11 +320,121 @@ def sample_priors(): """ return np.concatenate((truncnorm_scaled(PRIORS_LOW[:6],PRIORS_HIGH[:6],PRIORS[:6],PRIOR_UNCERTAINTIES[:6]),[np.random.uniform(PRIORS_LOW[6],PRIORS_HIGH[6])])) -def get_dm_sample(n,dm_particle_id,dm_energy,dm_resolution): - ke = norm.rvs(dm_energy,dm_energy*dm_resolution) - id = np.tile(dm_particle_id,n) - data = pd.DataFrame(np.concatenate((ke,id)).T,columns=['ke','id']) - return data +def get_dm_sample(n,dm_particle_id,dm_mass,dm_energy): + """ + Returns a dataframe containing events from a dark matter particle. + + Args: + + - n: int + number of events + - dm_particle_id: int + the particle id of the DM particle (2020 or 2222) + - dm_energy: float + The total kinetic energy of the DM particle + - dm_resolution: float + The fractional energy resolution of the dark matter particle, i.e. + the actual energy resolution will be dm_energy*dm_resolution. + """ + id1 = dm_particle_id//100 + id2 = dm_particle_id % 100 + m1 = SNOMAN_MASS[id1] + m2 = SNOMAN_MASS[id2] + energy1 = [] + data = np.empty(n,dtype=[('energy1',np.double),('energy2',np.double),('ke',np.double),('id1',np.int),('id2',np.int),('id',np.int)]) + for i, (v1, v2) in enumerate(islice(gen_decay(dm_mass,dm_energy,m1,m2),n)): + pos = rand_ball(PSUP_RADIUS) + E1 = v1[0] + E2 = v2[0] + p1 = np.linalg.norm(v1[1:]) + p2 = np.linalg.norm(v2[1:]) + T1 = E1 - m1 + T2 = E2 - m2 + # FIXME: Get electron and muon resolution + T1 += norm.rvs(scale=T1*0.05) + T2 += norm.rvs(scale=T2*0.05) + data[i] = T1, T2, T1 + T2, id1, id2, dm_particle_id + return pd.DataFrame(data) + +def integrate_mc_hist(mc, muons, id, x, a, b, atmo_scale_factor, muon_scale_factor): + """ + Integrate the Monte Carlo expected histograms from a to b. + + Args: + - mc: DataFrame + Pandas dataframe of atmospheric Monte Carlo events. + - muons: DataFrame + Pandas dataframe of external muon Monte Carlo events. + - id: int + The particle ID histogram to integrate. + - x: array + The fit parameters. + - a, b: float + Bounds of the integration. + - atmo_scale_factor, muon_scale_factor: float + Scale factors for the fit parameters. + """ + mc_hists = get_mc_hists(mc,x,bins,scale=x[0]/atmo_scale_factor) + muon_hists = get_mc_hists(muons,x,bins,scale=x[5]/muon_scale_factor) + + def total_hist(x): + if x < bins[0] or x > bins[-1]: + return 0 + i = np.digitize(x,bins) + if i == len(bins): + i = len(bins) - 1 + return mc_hists[id][i-1] + muon_hists[id][i-1] + + # Yes, this is a stupid simple integration that I don't need quad for, but + # I don't want to have to code all the corner cases including when a and b + # are in the same bin, etc. + return quad(total_hist,a,b,points=bins[(bins >= a) & (bins <= b)])[0] + +def get_limits(dm_energies,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll,walkers,thin): + limits = {} + best_fit = {} + discovery_array = {} + for dm_particle_id in (2020,2222): + limits[dm_particle_id] = [] + best_fit[dm_particle_id] = [] + discovery_array[dm_particle_id] = [] + for dm_energy in dm_energies: + dm_mass = dm_energy + xopt, samples = do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll,walkers,thin) + + limit = np.percentile(samples[:,6],90) + limits[dm_particle_id].append(limit) + + # Here, to determine if there is a discovery we make an approximate + # calculation of the number of events which would be significant. + # + # We expect the likelihood to be approximately that of a Poisson + # distribution with n background events and we are searching for a + # signal s. n is constrained by the rest of the histograms, and so + # we can treat is as being approximately fixed. In this case, the + # likelihood looks approximately like: + # + # P(s) = e^(-(s+n))(s+n)**i/i! + # + # Where i is the actual number of events. Under the null hypothesis + # (i.e. no dark matter), we expect i to be Poisson distributed with + # mean n. Therefore s should have the same distribution but offset + # by n. Therefore, to determine the threshold, we simply look for + # the threshold we expect in n and then subtract n. + a = dm_energy - 2*dm_energy*DM_ENERGY_RESOLUTION + b = dm_energy + 2*dm_energy*DM_ENERGY_RESOLUTION + n = integrate_mc_hist(data_mc, muon, dm_particle_id, xopt, a, b, atmo_scale_factor, muon_scale_factor) + # Set our discovery threshold to the p-value we want divided by the + # number of bins. The idea here is that the number of bins is + # approximately equal to the number of trials so we need to + # increase our discovery threshold to account for the look + # elsewhere effect. + threshold = DISCOVERY_P_VALUE/(len(bins)-1) + discovery = poisson.ppf(1-threshold,n) + 1 - n + discovery_array[dm_particle_id].append(discovery) + best_fit[dm_particle_id].append(xopt[6]) + + return limits, best_fit, discovery_array if __name__ == '__main__': import argparse @@ -335,6 +459,7 @@ if __name__ == '__main__': parser.add_argument("--print-nll", action='store_true', default=False, help="print nll values") parser.add_argument("--walkers", type=int, default=100, help="number of walkers") parser.add_argument("--thin", type=int, default=10, help="number of steps to thin") + parser.add_argument("--test", type=int, default=0, help="run tests to check discovery threshold") args = parser.parse_args() setup_matplotlib(args.save) @@ -439,10 +564,11 @@ if __name__ == '__main__': n_dm = np.random.poisson(N_dm) dm_particle_id = np.random.choice([2020,2222]) - dm_energy = np.random.uniform(20,10e3) + dm_mass = np.random.uniform(20,10e3) + dm_energy = dm_mass # Sample data from Monte Carlo - data = pd.concat((data_mc.sample(n=n,replace=True), muon.sample(n=n_muon,replace=True),get_dm_sample(n_dm,dm_particle_id,dm_energy,DM_ENERGY_RESOLUTION))) + data = pd.concat((data_mc.sample(n=n,replace=True), muon.sample(n=n_muon,replace=True),get_dm_sample(n_dm,dm_particle_id,dm_mass,dm_energy))) data_atm = pd.concat((data_atm_mc.sample(n=n_atm,replace=True), muon_atm.sample(n=n_muon_atm,replace=True))) # Smear the energies by the additional energy resolution @@ -458,7 +584,7 @@ if __name__ == '__main__': data_atm.loc[data_atm.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id2 == 22))*xtrue[4]) data_atm['ke'] = data_atm['energy1'].fillna(0) + data_atm['energy2'].fillna(0) + data_atm['energy3'].fillna(0) - xopt, samples = do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) + xopt, samples = do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) for i in range(len(FIT_PARS)): # The "pull plots" we make here are actually produced via a @@ -470,7 +596,7 @@ if __name__ == '__main__': fig = plt.figure() axes = [] for i, name in enumerate(FIT_PARS): - axes.append(plt.subplot(3,2,i+1)) + axes.append(plt.subplot(4,2,i+1)) n, bins, patches = plt.hist(pull[i],bins=np.linspace(0,100,11),histtype='step') expected = len(pull[i])/(len(bins)-1) plt.axhline(expected,color='k',ls='--',alpha=0.25) @@ -489,19 +615,66 @@ if __name__ == '__main__': sys.exit(0) - dm_energies = np.logspace(np.log10(20),np.log10(10e3),10) - limits = {} - for dm_particle_id in (2020,2222): - limits[dm_particle_id] = [] - for dm_energy in dm_energies: - xopt, samples = do_fit(dm_particle_id,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) + if args.test: + # Set the random seed so we get reproducible results here + np.random.seed(0) - limit = np.percentile(samples[:,6],90) - limits[dm_particle_id].append(limit) + discoveries = 0 + + for i in range(args.test): + xtrue = sample_priors() + + # Calculate expected number of events + N = len(data_mc)*xtrue[0]/atmo_scale_factor + N_atm = len(data_atm_mc)*xtrue[0]/atmo_scale_factor + N_muon = len(muon)*xtrue[5]/muon_scale_factor + N_muon_atm = len(muon_atm)*xtrue[5]/muon_scale_factor + N_dm = xtrue[6] + + # Calculate observed number of events + n = np.random.poisson(N) + n_atm = np.random.poisson(N_atm) + n_muon = np.random.poisson(N_muon) + n_muon_atm = np.random.poisson(N_muon_atm) + n_dm = np.random.poisson(N_dm) + + dm_particle_id = np.random.choice([2020,2222]) + dm_mass = np.random.uniform(20,10e3) + dm_energy = dm_mass + + # Sample data from Monte Carlo + data = pd.concat((data_mc.sample(n=n,replace=True), muon.sample(n=n_muon,replace=True),get_dm_sample(n_dm,dm_particle_id,dm_mass,dm_energy))) + data_atm = pd.concat((data_atm_mc.sample(n=n_atm,replace=True), muon_atm.sample(n=n_muon_atm,replace=True))) + + # Smear the energies by the additional energy resolution + data.loc[data.id1 == 20,'energy1'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data.id1 == 20))*xtrue[2]) + data.loc[data.id1 == 22,'energy1'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data.id1 == 22))*xtrue[4]) + data.loc[data.id2 == 20,'energy2'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data.id2 == 20))*xtrue[2]) + data.loc[data.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data.id2 == 22))*xtrue[4]) + data['ke'] = data['energy1'].fillna(0) + data['energy2'].fillna(0) + data['energy3'].fillna(0) + + data_atm.loc[data_atm.id1 == 20,'energy1'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data_atm.id1 == 20))*xtrue[2]) + data_atm.loc[data_atm.id1 == 22,'energy1'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id1 == 22))*xtrue[4]) + data_atm.loc[data_atm.id2 == 20,'energy2'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data_atm.id2 == 20))*xtrue[2]) + data_atm.loc[data_atm.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id2 == 22))*xtrue[4]) + data_atm['ke'] = data_atm['energy1'].fillna(0) + data_atm['energy2'].fillna(0) + data_atm['energy3'].fillna(0) + + limits, best_fit, discovery_array = get_limits(DM_ENERGIES,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) + + for id in (2020,2222): + if (best_fit[id] > discovery_array[id]).any(): + discoveries += 1 + + print("expected %.2f discoveries" % DISCOVERY_P_VALUE) + print("actually got %i/%i = %.2f discoveries" % (discoveries,args.test,discoveries/args.test)) + + sys.exit(0) + + limits, best_fit, discovery_array = get_limits(DM_ENERGIES,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin) fig = plt.figure() - for dm_particle_id in (2020,2222): - plt.plot(dm_energies,limits[dm_particle_id],label='$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(dm_particle_id),2)]) + '$') + for color, dm_particle_id in zip(('C0','C1'),(2020,2222)): + plt.plot(DM_ENERGIES,limits[dm_particle_id],color=color,label='$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(dm_particle_id),2)]) + '$') plt.gca().set_xscale("log") despine(fig,trim=True) plt.xlabel("Energy (MeV)") @@ -514,4 +687,21 @@ if __name__ == '__main__': plt.savefig("dm_search_limit.eps") else: plt.suptitle("Dark Matter Limits") + + fig = plt.figure() + for color, dm_particle_id in zip(('C0','C1'),(2020,2222)): + plt.plot(DM_ENERGIES,best_fit[dm_particle_id],color=color,label='$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(dm_particle_id),2)]) + '$') + plt.plot(DM_ENERGIES,discovery_array[dm_particle_id],color=color,ls='--') + plt.gca().set_xscale("log") + despine(fig,trim=True) + plt.xlabel("Energy (MeV)") + plt.ylabel("Event Rate Limit (events)") + plt.legend() + plt.tight_layout() + + if args.save: + plt.savefig("dm_best_fit_with_discovery_threshold.pdf") + plt.savefig("dm_best_fit_with_discovery_threshold.eps") + else: + plt.suptitle("Best Fit Dark Matter") plt.show() |