diff options
| author | tlatorre <tlatorre@uchicago.edu> | 2020-12-08 09:02:16 -0600 | 
|---|---|---|
| committer | tlatorre <tlatorre@uchicago.edu> | 2020-12-08 09:02:16 -0600 | 
| commit | 5d8906508262d326450752204ac93119f355db88 (patch) | |
| tree | 983f82312909c17c630962a259250e49fffa236d /utils/dm-search | |
| parent | ed6302abb7bf4c3304f2e8f7cb6c7462c7caaf4f (diff) | |
| download | sddm-5d8906508262d326450752204ac93119f355db88.tar.gz sddm-5d8906508262d326450752204ac93119f355db88.tar.bz2 sddm-5d8906508262d326450752204ac93119f355db88.zip  | |
update dm-search to fix bug
This commit updates dm-search to fix a bug where I was returning lists
from get_limits() but then comparing the best fit and discovery lists as
if they were numpy arrays.
I also updated how I calculate the expected number of events based on
the results from doing a toy MC.
Diffstat (limited to 'utils/dm-search')
| -rwxr-xr-x | utils/dm-search | 74 | 
1 files changed, 30 insertions, 44 deletions
diff --git a/utils/dm-search b/utils/dm-search index 1e9c987..5b9cbaa 100755 --- a/utils/dm-search +++ b/utils/dm-search @@ -377,49 +377,15 @@ def get_dm_sample(n,dm_particle_id,dm_mass,dm_energy):          data[i] = T1, T2, T1 + T2, id1, id2, dm_particle_id      return pd.DataFrame(data) -def integrate_mc_hist(mc, muons, id, x, a, b, atmo_scale_factor, muon_scale_factor): -    """ -    Integrate the Monte Carlo expected histograms from a to b. - -    Args: -        - mc: DataFrame -            Pandas dataframe of atmospheric Monte Carlo events. -        - muons: DataFrame -            Pandas dataframe of external muon Monte Carlo events. -        - id: int -            The particle ID histogram to integrate. -        - x: array -            The fit parameters. -        - a, b: float -            Bounds of the integration. -        - atmo_scale_factor, muon_scale_factor: float -            Scale factors for the fit parameters. -    """ -    mc_hists = get_mc_hists(mc,x,bins,scale=x[0]/atmo_scale_factor) -    muon_hists = get_mc_hists(muons,x,bins,scale=x[5]/muon_scale_factor) - -    def total_hist(x): -        if x < bins[0] or x > bins[-1]: -            return 0 -        i = np.digitize(x,bins) -        if i == len(bins): -            i = len(bins) - 1 -        return (mc_hists[id][i-1] + muon_hists[id][i-1])/(bins[i]-bins[i-1]) - -    # Yes, this is a stupid simple integration that I don't need quad for, but -    # I don't want to have to code all the corner cases including when a and b -    # are in the same bin, etc. -    return quad(total_hist,a,b,points=bins[(bins >= a) & (bins <= b)])[0] -  def get_limits(dm_masses,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll,walkers,thin):      limits = {}      best_fit = {}      discovery_array = {}      for dm_particle_id in (2020,2222): -        limits[dm_particle_id] = [] -        best_fit[dm_particle_id] = [] -        discovery_array[dm_particle_id] = [] -        for dm_mass in dm_masses[dm_particle_id]: +        limits[dm_particle_id] = np.empty(len(dm_masses[dm_particle_id])) +        best_fit[dm_particle_id] = np.empty(len(dm_masses[dm_particle_id])) +        discovery_array[dm_particle_id] = np.empty(len(dm_masses[dm_particle_id])) +        for i, dm_mass in enumerate(dm_masses[dm_particle_id]):              id1 = dm_particle_id//100              id2 = dm_particle_id % 100              m1 = SNOMAN_MASS[id1] @@ -428,7 +394,7 @@ def get_limits(dm_masses,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,b              xopt, samples = do_fit(dm_particle_id,dm_mass,dm_energy,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,bins,steps,print_nll,walkers,thin)              limit = np.percentile(samples[:,6],90) -            limits[dm_particle_id].append(limit) +            limits[dm_particle_id][i] = limit              # Here, to determine if there is a discovery we make an approximate              # calculation of the number of events which would be significant. @@ -447,9 +413,22 @@ def get_limits(dm_masses,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,b              # by n. Therefore, to determine the threshold, we simply look for              # the threshold we expect in n and then subtract n.              dm_kinetic_energy = dm_energy - m1 - m2 -            a = dm_kinetic_energy - 2*dm_kinetic_energy*DM_ENERGY_RESOLUTION -            b = dm_kinetic_energy + 2*dm_kinetic_energy*DM_ENERGY_RESOLUTION -            n = integrate_mc_hist(data_mc, muon, dm_particle_id, xopt, a, b, atmo_scale_factor, muon_scale_factor) + +            dm_sample = get_dm_sample(DM_SAMPLES,dm_particle_id,dm_mass,dm_energy) + +            # To calculate `n` we approximately want the number of events in +            # the bin which most of the dark matter events will fall. However, +            # to smoothly transition between bins, we multiply the normalized +            # dark matter histogram with the expected MC histogram and then +            # take the sum. In the case that the dark matter events all fall +            # into a single bin, this gives us that bin, but smoothly +            # interpolates between the bins. +            dm_hists = get_mc_hists(dm_sample,xopt,bins,scale=1/len(dm_sample)) +            frac = dm_hists[dm_particle_id].sum() +            dm_hists[dm_particle_id] /= frac +            mc_hists = get_mc_hists(data_mc,xopt,bins,scale=xopt[0]/atmo_scale_factor) +            muon_hists = get_mc_hists(muon,xopt,bins,scale=xopt[5]/muon_scale_factor) +            n = (dm_hists[dm_particle_id]*(mc_hists[dm_particle_id] + muon_hists[dm_particle_id])).sum()              # Set our discovery threshold to the p-value we want divided by the              # number of bins. The idea here is that the number of bins is              # approximately equal to the number of trials so we need to @@ -457,8 +436,15 @@ def get_limits(dm_masses,data,muon,data_mc,atmo_scale_factor,muon_scale_factor,b              # elsewhere effect.              threshold = DISCOVERY_P_VALUE/(len(bins)-1)              discovery = poisson.ppf(1-threshold,n) + 1 - n -            discovery_array[dm_particle_id].append(discovery) -            best_fit[dm_particle_id].append(xopt[6]) +            # Here, we scale the discovery threshold by the fraction of the +            # dark matter hist in the histogram range. The idea is that if only +            # a small fraction of the dark matter histogram falls into the +            # histogram range, the total number of dark matter events returned +            # by the fit can be larger by this amount. I noticed this when +            # testing under the null hypothesis that the majority of the +            # "discoveries" were on the edge of the histogram. +            discovery_array[dm_particle_id][i] = discovery/frac +            best_fit[dm_particle_id][i] = xopt[6]      return limits, best_fit, discovery_array  | 
