1 files changed, 62 insertions, 51 deletions
diff --git a/utils/chi2 b/utils/chi2
index 444638b..18c162b 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -93,11 +93,11 @@ def get_mc_hists(data,x,bins,apply_norm=False):
             mc_hists[id] *= x[0]
     return mc_hists
 
-def get_data_hists(data,bins):
+def get_data_hists(data,bins,scale=1.0):
     data_hists = {}
     for id in (20,22,2020,2022,2222):
         df_id = data[data.id == id]
-        data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]
+        data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]*scale
     return data_hists
 
 # Likelihood Fit Parameters
@@ -108,9 +108,11 @@ def get_data_hists(data,bins):
 # 4 - Muon energy resolution
 # 5 - Electron + Muon energy bias
 # 6 - Electron + Muon energy resolution
+# 7 - External Muon scale
 
-def make_nll(data, mc, bins):
+def make_nll(data, muons, mc, bins):
     data_hists = get_data_hists(data,bins)
+    muon_hists = get_data_hists(muons,bins)
 
     def nll(x, grad=None):
         if any(x[i] < 0 for i in range(len(x))):
@@ -121,7 +123,7 @@ def make_nll(data, mc, bins):
         nll = 0
         for id in data_hists:
             oi = data_hists[id]
-            ei = mc_hists[id] + EPSILON
+            ei = mc_hists[id] + muon_hists[id]*x[7] + EPSILON
             N = ei.sum()
             nll -= -N - np.sum(gammaln(oi+1)) + np.sum(oi*np.log(ei))
         nll -= norm.logpdf(x[1],ENERGY_SCALE_MEAN['e'],ENERGY_SCALE_UNCERTAINTY['e'])
@@ -134,16 +136,17 @@ def make_nll(data, mc, bins):
         return nll
     return nll
 
-def get_mc_hists_posterior(data,data_hists,x,bins):
+def get_mc_hists_posterior(data,muon_hists,data_hists,x,bins):
     mc_hists = get_mc_hists(data,x,bins)
     for id in (20,22,2020,2022,2222):
         mc_hists[id] = get_mc_hist_posterior(mc_hists[id],data_hists[id],norm=x[0])
+        mc_hists[id] += muon_hists[id]*x[7]
     return mc_hists
 
 def get_data_hist(data,x,bins):
     return np.histogram(data,bins=bins)[0]
 
-def get_multinomial_prob(data, data_mc, id, x_samples, bins, percentile=99.0, size=10000):
+def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percentile=99.0, size=10000):
     """
     Returns the p-value that the histogram of the data is drawn from the MC
     histogram.
@@ -164,11 +167,12 @@ def get_multinomial_prob(data, data_mc, id, x_samples, bins, percentile=99.0, si
         size: number of values to compute
     """
     data_hists = get_data_hists(data,bins)
+    muon_hists = get_data_hists(data_muon,bins)
 
     ps = []
     for i in range(size):
         x = x_samples[np.random.randint(x_samples.shape[0])]
-        mc = get_mc_hists_posterior(data_mc,data_hists,x,bins)[id]
+        mc = get_mc_hists_posterior(data_mc,muon_hists,data_hists,x,bins)[id]
         N = mc.sum()
         # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think).
         mc = mc + 1e-10
@@ -206,8 +210,10 @@ if __name__ == '__main__':
     parser.add_argument("filenames", nargs='+', help="input files")
     parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds")
     parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files")
+    parser.add_argument("--muon-mc", nargs='+', required=True, help="muon MC files")
     parser.add_argument("--nhit-thresh", type=int, default=None, help="nhit threshold to apply to events before processing (should only be used for testing to speed things up)")
     parser.add_argument("--steps", type=int, default=1000, help="number of steps in the MCMC chain")
+    parser.add_argument("--multinomial-prob-size", type=int, default=10000, help="number of p values to compute")
     args = parser.parse_args()
 
     setup_matplotlib(args.save)
@@ -221,62 +227,58 @@ if __name__ == '__main__':
         evs.append(get_events(df.filename.values, merge_fits=True, nhit_thresh=args.nhit_thresh))
     ev = pd.concat(evs)
 
-    ev_mc = get_events(args.mc, merge_fits=True)
+    ev_mc = get_events(args.mc, merge_fits=True, nhit_thresh=args.nhit_thresh)
+    muon_mc = get_events(args.muon_mc, merge_fits=True, nhit_thresh=args.nhit_thresh)
 
-    ev_mc = get_events(args.mc,merge_fits=True,nhit_thresh=args.nhit_thresh)
+    # Set all prompt events in the MC to be muons
+    muon_mc.loc[muon_mc.prompt & muon_mc.filename.str.contains("cosmic"),'muon'] = True
 
     ev = ev.reset_index()
     ev_mc = ev_mc.reset_index()
-
-    # First, do basic data cleaning which is done for all events.
-    ev = ev[ev.signal]
-    ev_mc = ev_mc[ev_mc.signal]
+    muon_mc = muon_mc.reset_index()
 
     # 00-orphan cut
     ev = ev[(ev.gtid & 0xff) != 0]
     ev_mc = ev_mc[(ev_mc.gtid & 0xff) != 0]
-
-    print("number of events after data cleaning = %i" % np.count_nonzero(ev.prompt))
+    muon_mc = muon_mc[(muon_mc.gtid & 0xff) != 0]
 
     # remove events 200 microseconds after a muon
     ev = ev.groupby('run',group_keys=False).apply(muon_follower_cut)
 
-    # Get rid of muon events in our main event sample
-    ev = ev[(ev.dc & DC_MUON) == 0]
-
+    # Get rid of events which don't have a successful fit
     ev = ev[~np.isnan(ev.fmin)]
     ev_mc = ev_mc[~np.isnan(ev_mc.fmin)]
-
-    prompt = ev[ev.prompt & ~ev.atm & ~ev.muon]
-    atm = ev[ev.atm]
-
-    prompt_mc = ev_mc[ev_mc.prompt & ~ev_mc.atm & ~ev_mc.muon]
-    atm_mc = ev_mc[ev_mc.atm]
+    muon_mc = muon_mc[~np.isnan(muon_mc.fmin)]
 
     # require (r/r_psup)^3 < 0.9
-    prompt = prompt[prompt.r_psup < 0.9]
-    atm = atm[atm.r_psup < 0.9]
-
-    prompt_mc = prompt_mc[prompt_mc.r_psup < 0.9]
-    atm_mc = atm_mc[atm_mc.r_psup < 0.9]
+    ev = ev[ev.r_psup < 0.9]
+    ev_mc = ev_mc[ev_mc.r_psup < 0.9]
+    muon_mc = muon_mc[muon_mc.r_psup < 0.9]
 
     # require psi < 6
-    prompt = prompt[prompt.psi < 6]
-    atm = atm[atm.psi < 6]
+    ev = ev[ev.psi < 6]
+    ev_mc = ev_mc[ev_mc.psi < 6]
+    muon_mc = muon_mc[muon_mc.psi < 6]
 
-    prompt_mc = prompt_mc[prompt_mc.psi < 6]
-    atm_mc = atm_mc[atm_mc.psi < 6]
+    data = ev[ev.signal & ev.prompt & ~ev.atm]
+    data_atm = ev[ev.signal & ev.prompt & ev.atm]
 
-    print("number of events after psi cut = %i" % len(prompt))
+    # Right now we use the muon Monte Carlo in the fit. If you want to use the
+    # actual data, you can comment the next two lines and then uncomment the
+    # two after that.
+    muon = muon_mc[muon_mc.muon & muon_mc.prompt & ~muon_mc.atm]
+    muon_atm = muon_mc[muon_mc.muon & muon_mc.prompt & muon_mc.atm]
+    #muon = ev[ev.muon & ev.prompt & ~ev.atm]
+    #muon_atm = ev[ev.muon & ev.prompt & ev.atm]
 
-    data = prompt
-    mc = prompt_mc
+    data_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ~ev_mc.atm]
+    data_atm_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ev_mc.atm]
 
     bins = np.logspace(np.log10(20),np.log10(10e3),21)
 
-    nll = make_nll(data,mc,bins)
+    nll = make_nll(data,muon,data_mc,bins)
 
-    x0 = np.array([1.0,1.0,EPSILON,1.0,EPSILON,1.0,EPSILON])
+    x0 = np.array([1.0,1.0,EPSILON,1.0,EPSILON,1.0,EPSILON,EPSILON])
     opt = nlopt.opt(nlopt.LN_SBPLX, len(x0))
     opt.set_min_objective(nll)
     low = np.array([EPSILON]*len(x0))
@@ -319,40 +321,47 @@ if __name__ == '__main__':
     plt.hist(samples[:,1],bins=100,histtype='step')
     plt.xlabel("Electron Energy Scale")
     plt.subplot(3,3,3)
-    plt.hist(samples[:,1],bins=100,histtype='step')
+    plt.hist(samples[:,2],bins=100,histtype='step')
     plt.xlabel("Electron Energy Resolution")
     plt.subplot(3,3,4)
-    plt.hist(samples[:,1],bins=100,histtype='step')
+    plt.hist(samples[:,3],bins=100,histtype='step')
     plt.xlabel("Muon Energy Scale")
     plt.subplot(3,3,5)
-    plt.hist(samples[:,1],bins=100,histtype='step')
+    plt.hist(samples[:,4],bins=100,histtype='step')
     plt.xlabel("Muon Energy Resolution")
     plt.subplot(3,3,6)
-    plt.hist(samples[:,1],bins=100,histtype='step')
+    plt.hist(samples[:,5],bins=100,histtype='step')
     plt.xlabel("Electron + Muon Energy Scale")
     plt.subplot(3,3,7)
-    plt.hist(samples[:,1],bins=100,histtype='step')
+    plt.hist(samples[:,6],bins=100,histtype='step')
     plt.xlabel("Electron + Muon Energy Resolution")
+    plt.subplot(3,3,8)
+    plt.hist(samples[:,7],bins=100,histtype='step')
+    plt.xlabel("Muon Scale")
+    plt.tight_layout()
 
     prob = {}
     for id in (20,22,2020,2022,2222):
-        prob[id] = get_multinomial_prob(data,mc,id,samples,bins)
+        prob[id] = get_multinomial_prob(data,muon,data_mc,id,samples,bins,size=args.multinomial_prob_size)
         print(id, prob[id])
 
     prob_atm = {}
     for id in (20,22,2020,2022,2222):
-        prob_atm[id] = get_multinomial_prob(atm,atm_mc,id,samples,bins)
+        prob_atm[id] = get_multinomial_prob(data_atm,muon_atm,data_atm_mc,id,samples,bins,size=args.multinomial_prob_size)
         print(id, prob_atm[id])
 
     handles = [Line2D([0], [0], color='C0'),
-               Line2D([0], [0], color='C1')]
-    labels = ('Data','Monte Carlo')
+               Line2D([0], [0], color='C1'),
+               Line2D([0], [0], color='C2')]
+    labels = ('Data','Monte Carlo','External Muons')
 
     fig = plt.figure()
-    hists = get_data_hists(prompt,bins)
-    hists_mc = get_mc_hists(prompt_mc,xopt,bins,apply_norm=True)
+    hists = get_data_hists(data,bins)
+    hists_muon = get_data_hists(muon,bins,scale=xopt[7])
+    hists_mc = get_mc_hists(data_mc,xopt,bins,apply_norm=True)
     plot_hist2(hists,bins=bins,color='C0')
     plot_hist2(hists_mc,bins=bins,color='C1')
+    plot_hist2(hists_muon,bins=bins,color='C2')
     for id in (20,22,2020,2022,2222):
         if id == 20:
             plt.subplot(2,3,1)
@@ -374,10 +383,12 @@ if __name__ == '__main__':
     else:
         plt.suptitle("Without Neutron Follower")
     fig = plt.figure()
-    hists = get_data_hists(atm,bins)
-    hists_mc = get_mc_hists(atm_mc,xopt,bins,apply_norm=True)
+    hists = get_data_hists(data_atm,bins)
+    hists_muon = get_data_hists(muon_atm,bins,scale=xopt[7])
+    hists_mc = get_mc_hists(data_atm_mc,xopt,bins,apply_norm=True)
     plot_hist2(hists,bins=bins,color='C0')
     plot_hist2(hists_mc,bins=bins,color='C1')
+    plot_hist2(hists_muon,bins=bins,color='C1')
     for id in (20,22,2020,2022,2222):
         if id == 20:
             plt.subplot(2,3,1)