aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xutils/chi2113
1 files changed, 62 insertions, 51 deletions
diff --git a/utils/chi2 b/utils/chi2
index 444638b..18c162b 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -93,11 +93,11 @@ def get_mc_hists(data,x,bins,apply_norm=False):
mc_hists[id] *= x[0]
return mc_hists
-def get_data_hists(data,bins):
+def get_data_hists(data,bins,scale=1.0):
data_hists = {}
for id in (20,22,2020,2022,2222):
df_id = data[data.id == id]
- data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]
+ data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]*scale
return data_hists
# Likelihood Fit Parameters
@@ -108,9 +108,11 @@ def get_data_hists(data,bins):
# 4 - Muon energy resolution
# 5 - Electron + Muon energy bias
# 6 - Electron + Muon energy resolution
+# 7 - External Muon scale
-def make_nll(data, mc, bins):
+def make_nll(data, muons, mc, bins):
data_hists = get_data_hists(data,bins)
+ muon_hists = get_data_hists(muons,bins)
def nll(x, grad=None):
if any(x[i] < 0 for i in range(len(x))):
@@ -121,7 +123,7 @@ def make_nll(data, mc, bins):
nll = 0
for id in data_hists:
oi = data_hists[id]
- ei = mc_hists[id] + EPSILON
+ ei = mc_hists[id] + muon_hists[id]*x[7] + EPSILON
N = ei.sum()
nll -= -N - np.sum(gammaln(oi+1)) + np.sum(oi*np.log(ei))
nll -= norm.logpdf(x[1],ENERGY_SCALE_MEAN['e'],ENERGY_SCALE_UNCERTAINTY['e'])
@@ -134,16 +136,17 @@ def make_nll(data, mc, bins):
return nll
return nll
-def get_mc_hists_posterior(data,data_hists,x,bins):
+def get_mc_hists_posterior(data,muon_hists,data_hists,x,bins):
mc_hists = get_mc_hists(data,x,bins)
for id in (20,22,2020,2022,2222):
mc_hists[id] = get_mc_hist_posterior(mc_hists[id],data_hists[id],norm=x[0])
+ mc_hists[id] += muon_hists[id]*x[7]
return mc_hists
def get_data_hist(data,x,bins):
return np.histogram(data,bins=bins)[0]
-def get_multinomial_prob(data, data_mc, id, x_samples, bins, percentile=99.0, size=10000):
+def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percentile=99.0, size=10000):
"""
Returns the p-value that the histogram of the data is drawn from the MC
histogram.
@@ -164,11 +167,12 @@ def get_multinomial_prob(data, data_mc, id, x_samples, bins, percentile=99.0, si
size: number of values to compute
"""
data_hists = get_data_hists(data,bins)
+ muon_hists = get_data_hists(data_muon,bins)
ps = []
for i in range(size):
x = x_samples[np.random.randint(x_samples.shape[0])]
- mc = get_mc_hists_posterior(data_mc,data_hists,x,bins)[id]
+ mc = get_mc_hists_posterior(data_mc,muon_hists,data_hists,x,bins)[id]
N = mc.sum()
# Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think).
mc = mc + 1e-10
@@ -206,8 +210,10 @@ if __name__ == '__main__':
parser.add_argument("filenames", nargs='+', help="input files")
parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds")
parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files")
+ parser.add_argument("--muon-mc", nargs='+', required=True, help="muon MC files")
parser.add_argument("--nhit-thresh", type=int, default=None, help="nhit threshold to apply to events before processing (should only be used for testing to speed things up)")
parser.add_argument("--steps", type=int, default=1000, help="number of steps in the MCMC chain")
+ parser.add_argument("--multinomial-prob-size", type=int, default=10000, help="number of p values to compute")
args = parser.parse_args()
setup_matplotlib(args.save)
@@ -221,62 +227,58 @@ if __name__ == '__main__':
evs.append(get_events(df.filename.values, merge_fits=True, nhit_thresh=args.nhit_thresh))
ev = pd.concat(evs)
- ev_mc = get_events(args.mc, merge_fits=True)
+ ev_mc = get_events(args.mc, merge_fits=True, nhit_thresh=args.nhit_thresh)
+ muon_mc = get_events(args.muon_mc, merge_fits=True, nhit_thresh=args.nhit_thresh)
- ev_mc = get_events(args.mc,merge_fits=True,nhit_thresh=args.nhit_thresh)
+ # Set all prompt events in the MC to be muons
+ muon_mc.loc[muon_mc.prompt & muon_mc.filename.str.contains("cosmic"),'muon'] = True
ev = ev.reset_index()
ev_mc = ev_mc.reset_index()
-
- # First, do basic data cleaning which is done for all events.
- ev = ev[ev.signal]
- ev_mc = ev_mc[ev_mc.signal]
+ muon_mc = muon_mc.reset_index()
# 00-orphan cut
ev = ev[(ev.gtid & 0xff) != 0]
ev_mc = ev_mc[(ev_mc.gtid & 0xff) != 0]
-
- print("number of events after data cleaning = %i" % np.count_nonzero(ev.prompt))
+ muon_mc = muon_mc[(muon_mc.gtid & 0xff) != 0]
# remove events 200 microseconds after a muon
ev = ev.groupby('run',group_keys=False).apply(muon_follower_cut)
- # Get rid of muon events in our main event sample
- ev = ev[(ev.dc & DC_MUON) == 0]
-
+ # Get rid of events which don't have a successful fit
ev = ev[~np.isnan(ev.fmin)]
ev_mc = ev_mc[~np.isnan(ev_mc.fmin)]
-
- prompt = ev[ev.prompt & ~ev.atm & ~ev.muon]
- atm = ev[ev.atm]
-
- prompt_mc = ev_mc[ev_mc.prompt & ~ev_mc.atm & ~ev_mc.muon]
- atm_mc = ev_mc[ev_mc.atm]
+ muon_mc = muon_mc[~np.isnan(muon_mc.fmin)]
# require (r/r_psup)^3 < 0.9
- prompt = prompt[prompt.r_psup < 0.9]
- atm = atm[atm.r_psup < 0.9]
-
- prompt_mc = prompt_mc[prompt_mc.r_psup < 0.9]
- atm_mc = atm_mc[atm_mc.r_psup < 0.9]
+ ev = ev[ev.r_psup < 0.9]
+ ev_mc = ev_mc[ev_mc.r_psup < 0.9]
+ muon_mc = muon_mc[muon_mc.r_psup < 0.9]
# require psi < 6
- prompt = prompt[prompt.psi < 6]
- atm = atm[atm.psi < 6]
+ ev = ev[ev.psi < 6]
+ ev_mc = ev_mc[ev_mc.psi < 6]
+ muon_mc = muon_mc[muon_mc.psi < 6]
- prompt_mc = prompt_mc[prompt_mc.psi < 6]
- atm_mc = atm_mc[atm_mc.psi < 6]
+ data = ev[ev.signal & ev.prompt & ~ev.atm]
+ data_atm = ev[ev.signal & ev.prompt & ev.atm]
- print("number of events after psi cut = %i" % len(prompt))
+ # Right now we use the muon Monte Carlo in the fit. If you want to use the
+ # actual data, you can comment the next two lines and then uncomment the
+ # two after that.
+ muon = muon_mc[muon_mc.muon & muon_mc.prompt & ~muon_mc.atm]
+ muon_atm = muon_mc[muon_mc.muon & muon_mc.prompt & muon_mc.atm]
+ #muon = ev[ev.muon & ev.prompt & ~ev.atm]
+ #muon_atm = ev[ev.muon & ev.prompt & ev.atm]
- data = prompt
- mc = prompt_mc
+ data_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ~ev_mc.atm]
+ data_atm_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ev_mc.atm]
bins = np.logspace(np.log10(20),np.log10(10e3),21)
- nll = make_nll(data,mc,bins)
+ nll = make_nll(data,muon,data_mc,bins)
- x0 = np.array([1.0,1.0,EPSILON,1.0,EPSILON,1.0,EPSILON])
+ x0 = np.array([1.0,1.0,EPSILON,1.0,EPSILON,1.0,EPSILON,EPSILON])
opt = nlopt.opt(nlopt.LN_SBPLX, len(x0))
opt.set_min_objective(nll)
low = np.array([EPSILON]*len(x0))
@@ -319,40 +321,47 @@ if __name__ == '__main__':
plt.hist(samples[:,1],bins=100,histtype='step')
plt.xlabel("Electron Energy Scale")
plt.subplot(3,3,3)
- plt.hist(samples[:,1],bins=100,histtype='step')
+ plt.hist(samples[:,2],bins=100,histtype='step')
plt.xlabel("Electron Energy Resolution")
plt.subplot(3,3,4)
- plt.hist(samples[:,1],bins=100,histtype='step')
+ plt.hist(samples[:,3],bins=100,histtype='step')
plt.xlabel("Muon Energy Scale")
plt.subplot(3,3,5)
- plt.hist(samples[:,1],bins=100,histtype='step')
+ plt.hist(samples[:,4],bins=100,histtype='step')
plt.xlabel("Muon Energy Resolution")
plt.subplot(3,3,6)
- plt.hist(samples[:,1],bins=100,histtype='step')
+ plt.hist(samples[:,5],bins=100,histtype='step')
plt.xlabel("Electron + Muon Energy Scale")
plt.subplot(3,3,7)
- plt.hist(samples[:,1],bins=100,histtype='step')
+ plt.hist(samples[:,6],bins=100,histtype='step')
plt.xlabel("Electron + Muon Energy Resolution")
+ plt.subplot(3,3,8)
+ plt.hist(samples[:,7],bins=100,histtype='step')
+ plt.xlabel("Muon Scale")
+ plt.tight_layout()
prob = {}
for id in (20,22,2020,2022,2222):
- prob[id] = get_multinomial_prob(data,mc,id,samples,bins)
+ prob[id] = get_multinomial_prob(data,muon,data_mc,id,samples,bins,size=args.multinomial_prob_size)
print(id, prob[id])
prob_atm = {}
for id in (20,22,2020,2022,2222):
- prob_atm[id] = get_multinomial_prob(atm,atm_mc,id,samples,bins)
+ prob_atm[id] = get_multinomial_prob(data_atm,muon_atm,data_atm_mc,id,samples,bins,size=args.multinomial_prob_size)
print(id, prob_atm[id])
handles = [Line2D([0], [0], color='C0'),
- Line2D([0], [0], color='C1')]
- labels = ('Data','Monte Carlo')
+ Line2D([0], [0], color='C1'),
+ Line2D([0], [0], color='C2')]
+ labels = ('Data','Monte Carlo','External Muons')
fig = plt.figure()
- hists = get_data_hists(prompt,bins)
- hists_mc = get_mc_hists(prompt_mc,xopt,bins,apply_norm=True)
+ hists = get_data_hists(data,bins)
+ hists_muon = get_data_hists(muon,bins,scale=xopt[7])
+ hists_mc = get_mc_hists(data_mc,xopt,bins,apply_norm=True)
plot_hist2(hists,bins=bins,color='C0')
plot_hist2(hists_mc,bins=bins,color='C1')
+ plot_hist2(hists_muon,bins=bins,color='C2')
for id in (20,22,2020,2022,2222):
if id == 20:
plt.subplot(2,3,1)
@@ -374,10 +383,12 @@ if __name__ == '__main__':
else:
plt.suptitle("Without Neutron Follower")
fig = plt.figure()
- hists = get_data_hists(atm,bins)
- hists_mc = get_mc_hists(atm_mc,xopt,bins,apply_norm=True)
+ hists = get_data_hists(data_atm,bins)
+ hists_muon = get_data_hists(muon_atm,bins,scale=xopt[7])
+ hists_mc = get_mc_hists(data_atm_mc,xopt,bins,apply_norm=True)
plot_hist2(hists,bins=bins,color='C0')
plot_hist2(hists_mc,bins=bins,color='C1')
+ plot_hist2(hists_muon,bins=bins,color='C1')
for id in (20,22,2020,2022,2222):
if id == 20:
plt.subplot(2,3,1)