diff options
author | tlatorre <tlatorre@uchicago.edu> | 2020-12-08 09:02:16 -0600 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2020-12-08 09:02:16 -0600 |
commit | 5d8906508262d326450752204ac93119f355db88 (patch) | |
tree | 983f82312909c17c630962a259250e49fffa236d /utils/dm-search | |
parent | ed6302abb7bf4c3304f2e8f7cb6c7462c7caaf4f (diff) | |
download | sddm-5d8906508262d326450752204ac93119f355db88.tar.gz sddm-5d8906508262d326450752204ac93119f355db88.tar.bz2 sddm-5d8906508262d326450752204ac93119f355db88.zip |
update dm-search to fix bug
This commit updates dm-search to fix a bug where I was returning lists
from get_limits() but then comparing the best fit and discovery lists as
if they were numpy arrays.
I also updated how I calculate the expected number of events based on
the results from doing a toy MC.
Diffstat (limited to 'utils/dm-search')
-rwxr-xr-x | utils/dm-search | 74 |
1 files changed, 30 insertions, 44 deletions
diff --git a/utils/dm-search b/utils/dm-search index 1e9c987..5b9cbaa 100755 --- a/utils/dm-search +++ b/utils/dm-search @@ -377,49 +377,15 @@ def get_dm_sample(n,dm_particle_id,dm_mass,dm_energy): data[i] = T1, T2, T1 + T2, id1, id2, dm_particle_id return pd.DataFrame(data) -def integrate_mc_hist(mc, muons, id, x, a, b, atmo_scale_factor, muon_scale_factor): - """ - Integrate the Monte Carlo expected histograms from a to b. - - Args: - - mc: DataFrame - Pandas dataframe of atmospheric Monte Carlo events. - - muons: DataFrame - Pandas dataframe of external muon Monte Carlo events. - - id: int - The particle ID histogram to integrate. - - x: array - The fit parameters. - - a, b: float - Bounds of the integration. - - atmo_scale_factor, muon_scale_factor: float - Scale factors for the fit parameters. - """ - mc_hists = get_mc_hists(mc,x,bins,scale=x[0]/atmo_scale_factor) - muon_hists = get_mc_hists(muons,x,bins,scale=x[5]/muon_scale_factor) - - def total_hist(x): - if x < bins[0] or x > bins[-1]: - return 0 - i = np.digitize(x,bins) - if i == len(bins): - i = len(bins) - 1 - return (mc_hists[id][i-1] + muon_hists[id][i-1])/(bins[i]-bins[i-1]) - - # Yes, this is a stupid simple integration that I don't need quad for, but - # I don't want to have to code all the corner cases including when a and b - # are in the same bin, etc. - return quad(total_hist,a,b,points=bins[(bins >= a) & (bins <= b)])[0] - def get_limits(dm_masses,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll,walkers,thin): limits = {} best_fit = {} discovery_array = {} for dm_particle_id in (2020,2222): - limits[dm_particle_id] = [] - best_fit[dm_particle_id] = [] - discovery_array[dm_particle_id] = [] - for dm_mass in dm_masses[dm_particle_id]: + limits[dm_particle_id] = np.empty(len(dm_masses[dm_particle_id])) + best_fit[dm_particle_id] = np.empty(len(dm_masses[dm_particle_id])) + discovery_array[dm_particle_id] = np.empty(len(dm_masses[dm_particle_id])) + for i, dm_mass in enumerate(dm_masses[dm_particle_id]): id1 = dm_particle_id//100 id2 = dm_particle_id % 100 m1 = SNOMAN_MASS[id1] @@ -428,7 +394,7 @@ def get_limits(dm_masses,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,b xopt, samples = do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll,walkers,thin) limit = np.percentile(samples[:,6],90) - limits[dm_particle_id].append(limit) + limits[dm_particle_id][i] = limit # Here, to determine if there is a discovery we make an approximate # calculation of the number of events which would be significant. @@ -447,9 +413,22 @@ def get_limits(dm_masses,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,b # by n. Therefore, to determine the threshold, we simply look for # the threshold we expect in n and then subtract n. dm_kinetic_energy = dm_energy - m1 - m2 - a = dm_kinetic_energy - 2*dm_kinetic_energy*DM_ENERGY_RESOLUTION - b = dm_kinetic_energy + 2*dm_kinetic_energy*DM_ENERGY_RESOLUTION - n = integrate_mc_hist(data_mc, muon, dm_particle_id, xopt, a, b, atmo_scale_factor, muon_scale_factor) + + dm_sample = get_dm_sample(DM_SAMPLES,dm_particle_id,dm_mass,dm_energy) + + # To calculate `n` we approximately want the number of events in + # the bin which most of the dark matter events will fall. However, + # to smoothly transition between bins, we multiply the normalized + # dark matter histogram with the expected MC histogram and then + # take the sum. In the case that the dark matter events all fall + # into a single bin, this gives us that bin, but smoothly + # interpolates between the bins. + dm_hists = get_mc_hists(dm_sample,xopt,bins,scale=1/len(dm_sample)) + frac = dm_hists[dm_particle_id].sum() + dm_hists[dm_particle_id] /= frac + mc_hists = get_mc_hists(data_mc,xopt,bins,scale=xopt[0]/atmo_scale_factor) + muon_hists = get_mc_hists(muon,xopt,bins,scale=xopt[5]/muon_scale_factor) + n = (dm_hists[dm_particle_id]*(mc_hists[dm_particle_id] + muon_hists[dm_particle_id])).sum() # Set our discovery threshold to the p-value we want divided by the # number of bins. The idea here is that the number of bins is # approximately equal to the number of trials so we need to @@ -457,8 +436,15 @@ def get_limits(dm_masses,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,b # elsewhere effect. threshold = DISCOVERY_P_VALUE/(len(bins)-1) discovery = poisson.ppf(1-threshold,n) + 1 - n - discovery_array[dm_particle_id].append(discovery) - best_fit[dm_particle_id].append(xopt[6]) + # Here, we scale the discovery threshold by the fraction of the + # dark matter hist in the histogram range. The idea is that if only + # a small fraction of the dark matter histogram falls into the + # histogram range, the total number of dark matter events returned + # by the fit can be larger by this amount. I noticed this when + # testing under the null hypothesis that the majority of the + # "discoveries" were on the edge of the histogram. + discovery_array[dm_particle_id][i] = discovery/frac + best_fit[dm_particle_id][i] = xopt[6] return limits, best_fit, discovery_array |