aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xutils/chi266
-rwxr-xr-xutils/sddm/dc.py7
2 files changed, 53 insertions, 20 deletions
diff --git a/utils/chi2 b/utils/chi2
index b1c3c17..0ae7cb8 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -84,7 +84,7 @@ def plot_hist2(hists, bins, color=None):
if len(df):
plt.tight_layout()
-def get_mc_hists(data,x,bins,apply_norm=False):
+def get_mc_hists(data,x,bins,apply_norm=False,reweight=False):
"""
Returns the expected Monte Carlo histograms for the atmospheric neutrino
background.
@@ -113,9 +113,17 @@ def get_mc_hists(data,x,bins,apply_norm=False):
ke_dict = {}
for id in (20,22,2020,2022,2222):
ke_dict[id] = data[data.id == id].ke.values
- return get_mc_hists_fast(ke_dict,x,bins,apply_norm)
-def get_mc_hists_fast(ke_dict,x,bins,apply_norm=False):
+ if reweight:
+ ke_weights_dict = {}
+ for id in (20,22,2020,2022,2222):
+ ke_weights_dict[id] = data[data.id == id].weight.values
+ else:
+ ke_weights_dict = None
+
+ return get_mc_hists_fast(ke_dict,x,bins,apply_norm,weights_dict=ke_weights_dict)
+
+def get_mc_hists_fast(ke_dict,x,bins,apply_norm=False,weights_dict=None):
"""
Same as get_mc_hists() but the first argument is a dictionary mapping
particle id -> kinetic energy array. This is much faster than selecting the
@@ -140,7 +148,10 @@ def get_mc_hists_fast(ke_dict,x,bins,apply_norm=False):
ke = ke*x[5]
scale = bincenters2*max(EPSILON,x[6])
- hist = np.histogram(ke,bins=bins2)[0]
+ if weights_dict:
+ hist = np.histogram(ke,bins=bins2,weights=weights_dict[id])[0]
+ else:
+ hist = np.histogram(ke,bins=bins2)[0]
cdf = norm.cdf(bins[:,np.newaxis],bincenters2,scale)*hist
mc_hists[id] = np.sum(cdf[1:] - cdf[:-1],axis=-1)
@@ -252,7 +263,7 @@ def make_nll(data, muons, mc, bins):
return nll
return nll
-def get_mc_hists_posterior(ke_dict,muon_hists,data_hists,x,bins):
+def get_mc_hists_posterior(data_mc,muon_hists,data_hists,x,bins):
"""
Returns the posterior on the Monte Carlo histograms.
@@ -276,7 +287,7 @@ def get_mc_hists_posterior(ke_dict,muon_hists,data_hists,x,bins):
Returns a dictionary mapping particle id combo -> histogram.
"""
- mc_hists = get_mc_hists_fast(ke_dict,x,bins)
+ mc_hists = get_mc_hists(data_mc,x,bins,reweight=True)
for id in (20,22,2020,2022,2222):
mc_hists[id] = get_mc_hist_posterior(mc_hists[id],data_hists[id],norm=x[0])
# FIXME: does the orering of when we add the muons matter here?
@@ -306,14 +317,20 @@ def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percenti
data_hists = get_data_hists(data,bins)
muon_hists = get_data_hists(data_muon,bins)
- ke_dict = {}
- for _id in (20,22,2020,2022,2222):
- ke_dict[_id] = data_mc[data_mc.id == _id].ke.values
+ # Get the total number of "universes" simulated in the GENIE reweight tool
+ if len(data_mc):
+ nuniverses = data_mc['universe'].max()+1
+ else:
+ nuniverses = 0
ps = []
for i in range(size):
x = x_samples[np.random.randint(x_samples.shape[0])]
- mc = get_mc_hists_posterior(ke_dict,muon_hists,data_hists,x,bins)[id]
+ if nuniverses > 0:
+ universe = np.random.randint(nuniverses)
+ else:
+ universe = 0
+ mc = get_mc_hists_posterior(data_mc[data_mc.universe == universe],muon_hists,data_hists,x,bins)[id]
N = mc.sum()
# Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think).
mc = mc + 1e-10
@@ -376,7 +393,7 @@ def do_fit(data,muon,data_mc,bins,steps):
nll_xopt = nll(xopt)
print("nll(xopt) = ", nll(xopt))
- stepsizes = estimate_errors(nll,xopt,low,high,constraints)
+ stepsizes = estimate_errors(nll,xopt,low,high)
with printoptions(precision=3, suppress=True):
print("Errors: ", stepsizes)
@@ -424,6 +441,7 @@ if __name__ == '__main__':
parser.add_argument("--steps", type=int, default=1000, help="number of steps in the MCMC chain")
parser.add_argument("--multinomial-prob-size", type=int, default=10000, help="number of p values to compute")
parser.add_argument("--coverage", type=int, default=0, help="plot p value coverage")
+ parser.add_argument("--weights", nargs='+', required=True, help="GENIE reweight HDF5 files")
args = parser.parse_args()
setup_matplotlib(args.save)
@@ -439,6 +457,7 @@ if __name__ == '__main__':
ev_mc = get_events(args.mc, merge_fits=True, nhit_thresh=args.nhit_thresh)
muon_mc = get_events(args.muon_mc, merge_fits=True, nhit_thresh=args.nhit_thresh)
+ weights = pd.concat([read_hdf(filename, "weights") for filename in args.weights],ignore_index=True)
# Set all prompt events in the MC to be muons
muon_mc.loc[muon_mc.prompt & muon_mc.filename.str.contains("cosmic"),'muon'] = True
@@ -484,6 +503,19 @@ if __name__ == '__main__':
data_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ~ev_mc.atm]
data_atm_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ev_mc.atm]
+ # Merge the reweight scales only after we've done all the data cleaning and
+ # high level cuts
+ data_mc_with_weights = pd.merge(data_mc,weights,how='left',on=['run','evn'])
+ data_atm_mc_with_weights = pd.merge(data_atm_mc,weights,how='left',on=['run','evn'])
+
+ # Make sure we are only using data with weights
+ #
+ # FIXME: There is probably a better way to do this. For example, we could
+ # use the full dataset for the fit and then scale the normalization factor
+ # by the ratio of events with weights to the total number of events.
+ data_mc = data_mc_with_weights[data_mc_with_weights.universe == 0]
+ data_atm_mc = data_atm_mc_with_weights[data_atm_mc_with_weights.universe == 0]
+
bins = np.logspace(np.log10(20),np.log10(10e3),21)
if args.coverage:
@@ -519,8 +551,8 @@ if __name__ == '__main__':
n_muon_atm = np.random.poisson(N_muon_atm)
# Sample data from Monte Carlo
- data = pd.concat((data_mc.sample(n=n,replace=True), muon.sample(n=n_muon,replace=True)))
- data_atm = pd.concat((data_atm_mc.sample(n=n_atm,replace=True), muon_atm.sample(n=n_muon_atm,replace=True)))
+ data = pd.concat((data_mc.sample(n=n,replace=True,weights='weights'), muon.sample(n=n_muon,replace=True)))
+ data_atm = pd.concat((data_atm_mc.sample(n=n_atm,replace=True,weights='weights'), muon_atm.sample(n=n_muon_atm,replace=True)))
# Smear the energies by the additional energy resolution
data.ke += np.random.randn(len(data.ke))*data.ke*energy_resolution
@@ -533,8 +565,8 @@ if __name__ == '__main__':
std = np.std(samples[:,i])
pull[i].append((mean - true_values[i])/std)
- prob = get_prob(data,muon,data_mc,samples,bins,size=args.multinomial_prob_size)
- prob_atm = get_prob(data_atm,muon_atm,data_atm_mc,samples,bins,size=args.multinomial_prob_size)
+ prob = get_prob(data,muon,data_mc_with_weights,samples,bins,size=args.multinomial_prob_size)
+ prob_atm = get_prob(data_atm,muon_atm,data_atm_mc_with_weights,samples,bins,size=args.multinomial_prob_size)
for id in (20,22,2020,2022,2222):
p_values[id].append(prob[id])
@@ -613,8 +645,8 @@ if __name__ == '__main__':
xopt, samples = do_fit(data,muon,data_mc,bins,args.steps)
- prob = get_prob(data,muon,data_mc,samples,bins,size=args.multinomial_prob_size)
- prob_atm = get_prob(data_atm,muon_atm,data_atm_mc,samples,bins,size=args.multinomial_prob_size)
+ prob = get_prob(data,muon,data_mc_with_weights,samples,bins,size=args.multinomial_prob_size)
+ prob_atm = get_prob(data_atm,muon_atm,data_atm_mc_with_weights,samples,bins,size=args.multinomial_prob_size)
plt.figure()
plt.subplot(3,3,1)
diff --git a/utils/sddm/dc.py b/utils/sddm/dc.py
index 1146213..461769e 100755
--- a/utils/sddm/dc.py
+++ b/utils/sddm/dc.py
@@ -93,7 +93,7 @@ def get_proposal_func(stepsizes, low, high):
return x, log_p_x0_given_x - log_p_x_given_x0
return proposal
-def estimate_errors(nll, xopt, low, high, constraints):
+def estimate_errors(nll, xopt, low, high, constraints=None):
"""
Return approximate 1 sigma errors for each parameter assuming all
parameters are uncorrelated.
@@ -115,11 +115,12 @@ def estimate_errors(nll, xopt, low, high, constraints):
errors = np.empty_like(xopt)
for i in range(len(xopt)):
- if np.sign(root(low[i],xopt,i)) == np.sign(nll_xopt):
+ if np.sign(root(low[i],xopt,i)) == np.sign(root(xopt[i],xopt,i)):
xlo = xopt[i] - low[i]
else:
xlo = brentq(root,low[i],xopt[i],args=(xopt,i))
- if np.sign(root(high[i],xopt,i)) == np.sign(nll_xopt):
+
+ if np.sign(root(high[i],xopt,i)) == np.sign(root(xopt[i],xopt,i)):
xhi = high[i] - xopt[i]
else:
xhi = brentq(root,xopt[i],high[i],args=(xopt,i))