aboutsummaryrefslogtreecommitdiff
path: root/utils/dm-search
diff options
context:
space:
mode:
authortlatorre <tlatorre@uchicago.edu>2020-11-16 07:55:17 -0600
committertlatorre <tlatorre@uchicago.edu>2020-11-16 07:55:17 -0600
commitfbcdf03f205840c5cd550689e3f60f5168f8ecca (patch)
tree8733e490193a7243a87dda563a86597a58a96289 /utils/dm-search
parent31bf8df0a8d222310bbbd772067f336140c24baa (diff)
downloadsddm-fbcdf03f205840c5cd550689e3f60f5168f8ecca.tar.gz
sddm-fbcdf03f205840c5cd550689e3f60f5168f8ecca.tar.bz2
sddm-fbcdf03f205840c5cd550689e3f60f5168f8ecca.zip
update dm-search script
Diffstat (limited to 'utils/dm-search')
-rwxr-xr-xutils/dm-search250
1 files changed, 220 insertions, 30 deletions
diff --git a/utils/dm-search b/utils/dm-search
index 323aabc..1d14d05 100755
--- a/utils/dm-search
+++ b/utils/dm-search
@@ -33,6 +33,8 @@ from sddm.dc import estimate_errors, EPSILON, truncnorm_scaled
import emcee
from sddm import printoptions
from sddm.utils import fast_cdf, correct_energy_bias
+from scipy.integrate import quad
+from sddm.dm import *
# Likelihood Fit Parameters
# 0 - Atmospheric Neutrino Flux Scale
@@ -43,6 +45,15 @@ from sddm.utils import fast_cdf, correct_energy_bias
# 5 - External Muon scale
# 6 - Dark Matter Scale
+# Number of events to use in the dark matter Monte Carlo histogram when fitting
+# Ideally this would be as big as possible, but the bigger it is, the more time
+# the fit takes.
+DM_SAMPLES = 10000
+
+DM_ENERGIES = np.logspace(np.log10(20),np.log10(10e3),10)
+
+DISCOVERY_P_VALUE = 0.05
+
# Energy resolution as a fraction of energy for dark matter signal
DM_ENERGY_RESOLUTION = 0.1
@@ -75,7 +86,7 @@ PRIORS = [
1.0, # Atmospheric Neutrino Scale
0.015, # Electron energy scale
0.0, # Electron energy resolution
- 0.054, # Muon energy scale
+ 0.053, # Muon energy scale
0.0, # Muon energy resolution
0.0, # Muon scale
0.0, # Dark Matter Scale
@@ -85,8 +96,8 @@ PRIOR_UNCERTAINTIES = [
0.2, # Atmospheric Neutrino Scale
0.03, # Electron energy scale
0.05, # Electron energy resolution
- 0.011, # Muon energy scale
- 0.014, # Muon energy resolution
+ 0.01, # Muon energy scale
+ 0.013, # Muon energy resolution
10.0, # Muon scale
np.inf,# Dark Matter Scale
]
@@ -206,7 +217,7 @@ def get_data_hists(data,bins,scale=1.0):
data_hists[id] = np.histogram(data[data.id == id].ke.values,bins=bins)[0]*scale
return data_hists
-def make_nll(dm_particle_id, dm_energy, data, muons, mc, atmo_scale_factor, muon_scale_factor, bins, print_nll=False):
+def make_nll(dm_particle_id, dm_mass, dm_energy, data, muons, mc, atmo_scale_factor, muon_scale_factor, bins, print_nll=False):
df_dict = {}
for id in (20,22,2020,2022,2222):
df_dict[id] = mc[mc.id == id]
@@ -217,6 +228,12 @@ def make_nll(dm_particle_id, dm_energy, data, muons, mc, atmo_scale_factor, muon
data_hists = get_data_hists(data,bins)
+ dm_sample = get_dm_sample(DM_SAMPLES,dm_particle_id,dm_mass,dm_energy)
+
+ df_dict_dm = {}
+ for id in (20,22,2020,2022,2222):
+ df_dict_dm[id] = dm_sample[dm_sample.id == id]
+
def nll(x, grad=None):
if any(x[i] < 0 for i in (0,2,4,5,6)):
return np.inf
@@ -226,6 +243,7 @@ def make_nll(dm_particle_id, dm_energy, data, muons, mc, atmo_scale_factor, muon
# to the Monte Carlo.
mc_hists = get_mc_hists_fast(df_dict,x,bins,scale=1/atmo_scale_factor)
muon_hists = get_mc_hists_fast(df_dict_muon,x,bins,scale=1/muon_scale_factor)
+ dm_hists = get_mc_hists_fast(df_dict_dm,x,bins,scale=1/len(dm_sample))
# Calculate the negative log of the likelihood of observing the data
# given the fit parameters
@@ -233,11 +251,7 @@ def make_nll(dm_particle_id, dm_energy, data, muons, mc, atmo_scale_factor, muon
nll = 0
for id in data_hists:
oi = data_hists[id]
- ei = mc_hists[id]*x[0] + muon_hists[id]*x[5] + EPSILON
- if id == dm_particle_id:
- cdf = fast_cdf(bins,dm_energy,dm_energy*DM_ENERGY_RESOLUTION)
- dm_hist = np.sum(cdf[1:] - cdf[:-1])
- ei += dm_hist*x[6]
+ ei = mc_hists[id]*x[0] + muon_hists[id]*x[5] + dm_hists[id]*x[6] + EPSILON
N = ei.sum()
nll -= -N - np.sum(gammaln(oi+1)) + np.sum(oi*np.log(ei))
@@ -251,7 +265,7 @@ def make_nll(dm_particle_id, dm_energy, data, muons, mc, atmo_scale_factor, muon
return nll
return nll
-def do_fit(dm_particle_id,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll=False,walkers=100,thin=10):
+def do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll=False,walkers=100,thin=10):
"""
Run the fit and return the minimum along with samples from running an MCMC
starting near the minimum.
@@ -268,7 +282,7 @@ def do_fit(dm_particle_id,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_sca
Returns a tuple (xopt, samples) where samples is an array of shape (steps,
number of parameters).
"""
- nll = make_nll(dm_particle_id,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,print_nll)
+ nll = make_nll(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,print_nll)
pos = np.empty((walkers, len(PRIORS)),dtype=np.double)
for i in range(pos.shape[0]):
@@ -306,11 +320,121 @@ def sample_priors():
"""
return np.concatenate((truncnorm_scaled(PRIORS_LOW[:6],PRIORS_HIGH[:6],PRIORS[:6],PRIOR_UNCERTAINTIES[:6]),[np.random.uniform(PRIORS_LOW[6],PRIORS_HIGH[6])]))
-def get_dm_sample(n,dm_particle_id,dm_energy,dm_resolution):
- ke = norm.rvs(dm_energy,dm_energy*dm_resolution)
- id = np.tile(dm_particle_id,n)
- data = pd.DataFrame(np.concatenate((ke,id)).T,columns=['ke','id'])
- return data
+def get_dm_sample(n,dm_particle_id,dm_mass,dm_energy):
+ """
+ Returns a dataframe containing events from a dark matter particle.
+
+ Args:
+
+ - n: int
+ number of events
+ - dm_particle_id: int
+ the particle id of the DM particle (2020 or 2222)
+ - dm_energy: float
+ The total kinetic energy of the DM particle
+ - dm_resolution: float
+ The fractional energy resolution of the dark matter particle, i.e.
+ the actual energy resolution will be dm_energy*dm_resolution.
+ """
+ id1 = dm_particle_id//100
+ id2 = dm_particle_id % 100
+ m1 = SNOMAN_MASS[id1]
+ m2 = SNOMAN_MASS[id2]
+ energy1 = []
+ data = np.empty(n,dtype=[('energy1',np.double),('energy2',np.double),('ke',np.double),('id1',np.int),('id2',np.int),('id',np.int)])
+ for i, (v1, v2) in enumerate(islice(gen_decay(dm_mass,dm_energy,m1,m2),n)):
+ pos = rand_ball(PSUP_RADIUS)
+ E1 = v1[0]
+ E2 = v2[0]
+ p1 = np.linalg.norm(v1[1:])
+ p2 = np.linalg.norm(v2[1:])
+ T1 = E1 - m1
+ T2 = E2 - m2
+ # FIXME: Get electron and muon resolution
+ T1 += norm.rvs(scale=T1*0.05)
+ T2 += norm.rvs(scale=T2*0.05)
+ data[i] = T1, T2, T1 + T2, id1, id2, dm_particle_id
+ return pd.DataFrame(data)
+
+def integrate_mc_hist(mc, muons, id, x, a, b, atmo_scale_factor, muon_scale_factor):
+ """
+ Integrate the Monte Carlo expected histograms from a to b.
+
+ Args:
+ - mc: DataFrame
+ Pandas dataframe of atmospheric Monte Carlo events.
+ - muons: DataFrame
+ Pandas dataframe of external muon Monte Carlo events.
+ - id: int
+ The particle ID histogram to integrate.
+ - x: array
+ The fit parameters.
+ - a, b: float
+ Bounds of the integration.
+ - atmo_scale_factor, muon_scale_factor: float
+ Scale factors for the fit parameters.
+ """
+ mc_hists = get_mc_hists(mc,x,bins,scale=x[0]/atmo_scale_factor)
+ muon_hists = get_mc_hists(muons,x,bins,scale=x[5]/muon_scale_factor)
+
+ def total_hist(x):
+ if x < bins[0] or x > bins[-1]:
+ return 0
+ i = np.digitize(x,bins)
+ if i == len(bins):
+ i = len(bins) - 1
+ return mc_hists[id][i-1] + muon_hists[id][i-1]
+
+ # Yes, this is a stupid simple integration that I don't need quad for, but
+ # I don't want to have to code all the corner cases including when a and b
+ # are in the same bin, etc.
+ return quad(total_hist,a,b,points=bins[(bins >= a) & (bins <= b)])[0]
+
+def get_limits(dm_energies,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll,walkers,thin):
+ limits = {}
+ best_fit = {}
+ discovery_array = {}
+ for dm_particle_id in (2020,2222):
+ limits[dm_particle_id] = []
+ best_fit[dm_particle_id] = []
+ discovery_array[dm_particle_id] = []
+ for dm_energy in dm_energies:
+ dm_mass = dm_energy
+ xopt, samples = do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll,walkers,thin)
+
+ limit = np.percentile(samples[:,6],90)
+ limits[dm_particle_id].append(limit)
+
+ # Here, to determine if there is a discovery we make an approximate
+ # calculation of the number of events which would be significant.
+ #
+ # We expect the likelihood to be approximately that of a Poisson
+ # distribution with n background events and we are searching for a
+ # signal s. n is constrained by the rest of the histograms, and so
+ # we can treat is as being approximately fixed. In this case, the
+ # likelihood looks approximately like:
+ #
+ # P(s) = e^(-(s+n))(s+n)**i/i!
+ #
+ # Where i is the actual number of events. Under the null hypothesis
+ # (i.e. no dark matter), we expect i to be Poisson distributed with
+ # mean n. Therefore s should have the same distribution but offset
+ # by n. Therefore, to determine the threshold, we simply look for
+ # the threshold we expect in n and then subtract n.
+ a = dm_energy - 2*dm_energy*DM_ENERGY_RESOLUTION
+ b = dm_energy + 2*dm_energy*DM_ENERGY_RESOLUTION
+ n = integrate_mc_hist(data_mc, muon, dm_particle_id, xopt, a, b, atmo_scale_factor, muon_scale_factor)
+ # Set our discovery threshold to the p-value we want divided by the
+ # number of bins. The idea here is that the number of bins is
+ # approximately equal to the number of trials so we need to
+ # increase our discovery threshold to account for the look
+ # elsewhere effect.
+ threshold = DISCOVERY_P_VALUE/(len(bins)-1)
+ discovery = poisson.ppf(1-threshold,n) + 1 - n
+ discovery_array[dm_particle_id].append(discovery)
+ best_fit[dm_particle_id].append(xopt[6])
+
+ return limits, best_fit, discovery_array
if __name__ == '__main__':
import argparse
@@ -335,6 +459,7 @@ if __name__ == '__main__':
parser.add_argument("--print-nll", action='store_true', default=False, help="print nll values")
parser.add_argument("--walkers", type=int, default=100, help="number of walkers")
parser.add_argument("--thin", type=int, default=10, help="number of steps to thin")
+ parser.add_argument("--test", type=int, default=0, help="run tests to check discovery threshold")
args = parser.parse_args()
setup_matplotlib(args.save)
@@ -439,10 +564,11 @@ if __name__ == '__main__':
n_dm = np.random.poisson(N_dm)
dm_particle_id = np.random.choice([2020,2222])
- dm_energy = np.random.uniform(20,10e3)
+ dm_mass = np.random.uniform(20,10e3)
+ dm_energy = dm_mass
# Sample data from Monte Carlo
- data = pd.concat((data_mc.sample(n=n,replace=True), muon.sample(n=n_muon,replace=True),get_dm_sample(n_dm,dm_particle_id,dm_energy,DM_ENERGY_RESOLUTION)))
+ data = pd.concat((data_mc.sample(n=n,replace=True), muon.sample(n=n_muon,replace=True),get_dm_sample(n_dm,dm_particle_id,dm_mass,dm_energy)))
data_atm = pd.concat((data_atm_mc.sample(n=n_atm,replace=True), muon_atm.sample(n=n_muon_atm,replace=True)))
# Smear the energies by the additional energy resolution
@@ -458,7 +584,7 @@ if __name__ == '__main__':
data_atm.loc[data_atm.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id2 == 22))*xtrue[4])
data_atm['ke'] = data_atm['energy1'].fillna(0) + data_atm['energy2'].fillna(0) + data_atm['energy3'].fillna(0)
- xopt, samples = do_fit(data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin)
+ xopt, samples = do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin)
for i in range(len(FIT_PARS)):
# The "pull plots" we make here are actually produced via a
@@ -470,7 +596,7 @@ if __name__ == '__main__':
fig = plt.figure()
axes = []
for i, name in enumerate(FIT_PARS):
- axes.append(plt.subplot(3,2,i+1))
+ axes.append(plt.subplot(4,2,i+1))
n, bins, patches = plt.hist(pull[i],bins=np.linspace(0,100,11),histtype='step')
expected = len(pull[i])/(len(bins)-1)
plt.axhline(expected,color='k',ls='--',alpha=0.25)
@@ -489,19 +615,66 @@ if __name__ == '__main__':
sys.exit(0)
- dm_energies = np.logspace(np.log10(20),np.log10(10e3),10)
- limits = {}
- for dm_particle_id in (2020,2222):
- limits[dm_particle_id] = []
- for dm_energy in dm_energies:
- xopt, samples = do_fit(dm_particle_id,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin)
+ if args.test:
+ # Set the random seed so we get reproducible results here
+ np.random.seed(0)
- limit = np.percentile(samples[:,6],90)
- limits[dm_particle_id].append(limit)
+ discoveries = 0
+
+ for i in range(args.test):
+ xtrue = sample_priors()
+
+ # Calculate expected number of events
+ N = len(data_mc)*xtrue[0]/atmo_scale_factor
+ N_atm = len(data_atm_mc)*xtrue[0]/atmo_scale_factor
+ N_muon = len(muon)*xtrue[5]/muon_scale_factor
+ N_muon_atm = len(muon_atm)*xtrue[5]/muon_scale_factor
+ N_dm = xtrue[6]
+
+ # Calculate observed number of events
+ n = np.random.poisson(N)
+ n_atm = np.random.poisson(N_atm)
+ n_muon = np.random.poisson(N_muon)
+ n_muon_atm = np.random.poisson(N_muon_atm)
+ n_dm = np.random.poisson(N_dm)
+
+ dm_particle_id = np.random.choice([2020,2222])
+ dm_mass = np.random.uniform(20,10e3)
+ dm_energy = dm_mass
+
+ # Sample data from Monte Carlo
+ data = pd.concat((data_mc.sample(n=n,replace=True), muon.sample(n=n_muon,replace=True),get_dm_sample(n_dm,dm_particle_id,dm_mass,dm_energy)))
+ data_atm = pd.concat((data_atm_mc.sample(n=n_atm,replace=True), muon_atm.sample(n=n_muon_atm,replace=True)))
+
+ # Smear the energies by the additional energy resolution
+ data.loc[data.id1 == 20,'energy1'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data.id1 == 20))*xtrue[2])
+ data.loc[data.id1 == 22,'energy1'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data.id1 == 22))*xtrue[4])
+ data.loc[data.id2 == 20,'energy2'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data.id2 == 20))*xtrue[2])
+ data.loc[data.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data.id2 == 22))*xtrue[4])
+ data['ke'] = data['energy1'].fillna(0) + data['energy2'].fillna(0) + data['energy3'].fillna(0)
+
+ data_atm.loc[data_atm.id1 == 20,'energy1'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data_atm.id1 == 20))*xtrue[2])
+ data_atm.loc[data_atm.id1 == 22,'energy1'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id1 == 22))*xtrue[4])
+ data_atm.loc[data_atm.id2 == 20,'energy2'] *= (1+xtrue[1]+np.random.randn(np.count_nonzero(data_atm.id2 == 20))*xtrue[2])
+ data_atm.loc[data_atm.id2 == 22,'energy2'] *= (1+xtrue[3]+np.random.randn(np.count_nonzero(data_atm.id2 == 22))*xtrue[4])
+ data_atm['ke'] = data_atm['energy1'].fillna(0) + data_atm['energy2'].fillna(0) + data_atm['energy3'].fillna(0)
+
+ limits, best_fit, discovery_array = get_limits(DM_ENERGIES,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin)
+
+ for id in (2020,2222):
+ if (best_fit[id] > discovery_array[id]).any():
+ discoveries += 1
+
+ print("expected %.2f discoveries" % DISCOVERY_P_VALUE)
+ print("actually got %i/%i = %.2f discoveries" % (discoveries,args.test,discoveries/args.test))
+
+ sys.exit(0)
+
+ limits, best_fit, discovery_array = get_limits(DM_ENERGIES,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,args.steps,args.print_nll,args.walkers,args.thin)
fig = plt.figure()
- for dm_particle_id in (2020,2222):
- plt.plot(dm_energies,limits[dm_particle_id],label='$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(dm_particle_id),2)]) + '$')
+ for color, dm_particle_id in zip(('C0','C1'),(2020,2222)):
+ plt.plot(DM_ENERGIES,limits[dm_particle_id],color=color,label='$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(dm_particle_id),2)]) + '$')
plt.gca().set_xscale("log")
despine(fig,trim=True)
plt.xlabel("Energy (MeV)")
@@ -514,4 +687,21 @@ if __name__ == '__main__':
plt.savefig("dm_search_limit.eps")
else:
plt.suptitle("Dark Matter Limits")
+
+ fig = plt.figure()
+ for color, dm_particle_id in zip(('C0','C1'),(2020,2222)):
+ plt.plot(DM_ENERGIES,best_fit[dm_particle_id],color=color,label='$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(dm_particle_id),2)]) + '$')
+ plt.plot(DM_ENERGIES,discovery_array[dm_particle_id],color=color,ls='--')
+ plt.gca().set_xscale("log")
+ despine(fig,trim=True)
+ plt.xlabel("Energy (MeV)")
+ plt.ylabel("Event Rate Limit (events)")
+ plt.legend()
+ plt.tight_layout()
+
+ if args.save:
+ plt.savefig("dm_best_fit_with_discovery_threshold.pdf")
+ plt.savefig("dm_best_fit_with_discovery_threshold.eps")
+ else:
+ plt.suptitle("Best Fit Dark Matter")
plt.show()