diff options
author | tlatorre <tlatorre@uchicago.edu> | 2020-08-20 14:25:44 -0500 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2020-08-20 14:25:44 -0500 |
commit | aedf61a2bb1ab7479f5d27cebd2c83d0c107260f (patch) | |
tree | 51e86702ee8a8f9dba7c4a151da42552fcd5a867 | |
parent | c98657689b3e14417d4e3ff7067e339b7a8bf14e (diff) | |
download | sddm-aedf61a2bb1ab7479f5d27cebd2c83d0c107260f.tar.gz sddm-aedf61a2bb1ab7479f5d27cebd2c83d0c107260f.tar.bz2 sddm-aedf61a2bb1ab7479f5d27cebd2c83d0c107260f.zip |
add external muons to the chi2 fit
-rwxr-xr-x | utils/chi2 | 113 |
1 files changed, 62 insertions, 51 deletions
@@ -93,11 +93,11 @@ def get_mc_hists(data,x,bins,apply_norm=False): mc_hists[id] *= x[0] return mc_hists -def get_data_hists(data,bins): +def get_data_hists(data,bins,scale=1.0): data_hists = {} for id in (20,22,2020,2022,2222): df_id = data[data.id == id] - data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0] + data_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]*scale return data_hists # Likelihood Fit Parameters @@ -108,9 +108,11 @@ def get_data_hists(data,bins): # 4 - Muon energy resolution # 5 - Electron + Muon energy bias # 6 - Electron + Muon energy resolution +# 7 - External Muon scale -def make_nll(data, mc, bins): +def make_nll(data, muons, mc, bins): data_hists = get_data_hists(data,bins) + muon_hists = get_data_hists(muons,bins) def nll(x, grad=None): if any(x[i] < 0 for i in range(len(x))): @@ -121,7 +123,7 @@ def make_nll(data, mc, bins): nll = 0 for id in data_hists: oi = data_hists[id] - ei = mc_hists[id] + EPSILON + ei = mc_hists[id] + muon_hists[id]*x[7] + EPSILON N = ei.sum() nll -= -N - np.sum(gammaln(oi+1)) + np.sum(oi*np.log(ei)) nll -= norm.logpdf(x[1],ENERGY_SCALE_MEAN['e'],ENERGY_SCALE_UNCERTAINTY['e']) @@ -134,16 +136,17 @@ def make_nll(data, mc, bins): return nll return nll -def get_mc_hists_posterior(data,data_hists,x,bins): +def get_mc_hists_posterior(data,muon_hists,data_hists,x,bins): mc_hists = get_mc_hists(data,x,bins) for id in (20,22,2020,2022,2222): mc_hists[id] = get_mc_hist_posterior(mc_hists[id],data_hists[id],norm=x[0]) + mc_hists[id] += muon_hists[id]*x[7] return mc_hists def get_data_hist(data,x,bins): return np.histogram(data,bins=bins)[0] -def get_multinomial_prob(data, data_mc, id, x_samples, bins, percentile=99.0, size=10000): +def get_multinomial_prob(data, data_muon, data_mc, id, x_samples, bins, percentile=99.0, size=10000): """ Returns the p-value that the histogram of the data is drawn from the MC histogram. @@ -164,11 +167,12 @@ def get_multinomial_prob(data, data_mc, id, x_samples, bins, percentile=99.0, si size: number of values to compute """ data_hists = get_data_hists(data,bins) + muon_hists = get_data_hists(data_muon,bins) ps = [] for i in range(size): x = x_samples[np.random.randint(x_samples.shape[0])] - mc = get_mc_hists_posterior(data_mc,data_hists,x,bins)[id] + mc = get_mc_hists_posterior(data_mc,muon_hists,data_hists,x,bins)[id] N = mc.sum() # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think). mc = mc + 1e-10 @@ -206,8 +210,10 @@ if __name__ == '__main__': parser.add_argument("filenames", nargs='+', help="input files") parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds") parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files") + parser.add_argument("--muon-mc", nargs='+', required=True, help="muon MC files") parser.add_argument("--nhit-thresh", type=int, default=None, help="nhit threshold to apply to events before processing (should only be used for testing to speed things up)") parser.add_argument("--steps", type=int, default=1000, help="number of steps in the MCMC chain") + parser.add_argument("--multinomial-prob-size", type=int, default=10000, help="number of p values to compute") args = parser.parse_args() setup_matplotlib(args.save) @@ -221,62 +227,58 @@ if __name__ == '__main__': evs.append(get_events(df.filename.values, merge_fits=True, nhit_thresh=args.nhit_thresh)) ev = pd.concat(evs) - ev_mc = get_events(args.mc, merge_fits=True) + ev_mc = get_events(args.mc, merge_fits=True, nhit_thresh=args.nhit_thresh) + muon_mc = get_events(args.muon_mc, merge_fits=True, nhit_thresh=args.nhit_thresh) - ev_mc = get_events(args.mc,merge_fits=True,nhit_thresh=args.nhit_thresh) + # Set all prompt events in the MC to be muons + muon_mc.loc[muon_mc.prompt & muon_mc.filename.str.contains("cosmic"),'muon'] = True ev = ev.reset_index() ev_mc = ev_mc.reset_index() - - # First, do basic data cleaning which is done for all events. - ev = ev[ev.signal] - ev_mc = ev_mc[ev_mc.signal] + muon_mc = muon_mc.reset_index() # 00-orphan cut ev = ev[(ev.gtid & 0xff) != 0] ev_mc = ev_mc[(ev_mc.gtid & 0xff) != 0] - - print("number of events after data cleaning = %i" % np.count_nonzero(ev.prompt)) + muon_mc = muon_mc[(muon_mc.gtid & 0xff) != 0] # remove events 200 microseconds after a muon ev = ev.groupby('run',group_keys=False).apply(muon_follower_cut) - # Get rid of muon events in our main event sample - ev = ev[(ev.dc & DC_MUON) == 0] - + # Get rid of events which don't have a successful fit ev = ev[~np.isnan(ev.fmin)] ev_mc = ev_mc[~np.isnan(ev_mc.fmin)] - - prompt = ev[ev.prompt & ~ev.atm & ~ev.muon] - atm = ev[ev.atm] - - prompt_mc = ev_mc[ev_mc.prompt & ~ev_mc.atm & ~ev_mc.muon] - atm_mc = ev_mc[ev_mc.atm] + muon_mc = muon_mc[~np.isnan(muon_mc.fmin)] # require (r/r_psup)^3 < 0.9 - prompt = prompt[prompt.r_psup < 0.9] - atm = atm[atm.r_psup < 0.9] - - prompt_mc = prompt_mc[prompt_mc.r_psup < 0.9] - atm_mc = atm_mc[atm_mc.r_psup < 0.9] + ev = ev[ev.r_psup < 0.9] + ev_mc = ev_mc[ev_mc.r_psup < 0.9] + muon_mc = muon_mc[muon_mc.r_psup < 0.9] # require psi < 6 - prompt = prompt[prompt.psi < 6] - atm = atm[atm.psi < 6] + ev = ev[ev.psi < 6] + ev_mc = ev_mc[ev_mc.psi < 6] + muon_mc = muon_mc[muon_mc.psi < 6] - prompt_mc = prompt_mc[prompt_mc.psi < 6] - atm_mc = atm_mc[atm_mc.psi < 6] + data = ev[ev.signal & ev.prompt & ~ev.atm] + data_atm = ev[ev.signal & ev.prompt & ev.atm] - print("number of events after psi cut = %i" % len(prompt)) + # Right now we use the muon Monte Carlo in the fit. If you want to use the + # actual data, you can comment the next two lines and then uncomment the + # two after that. + muon = muon_mc[muon_mc.muon & muon_mc.prompt & ~muon_mc.atm] + muon_atm = muon_mc[muon_mc.muon & muon_mc.prompt & muon_mc.atm] + #muon = ev[ev.muon & ev.prompt & ~ev.atm] + #muon_atm = ev[ev.muon & ev.prompt & ev.atm] - data = prompt - mc = prompt_mc + data_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ~ev_mc.atm] + data_atm_mc = ev_mc[ev_mc.signal & ev_mc.prompt & ev_mc.atm] bins = np.logspace(np.log10(20),np.log10(10e3),21) - nll = make_nll(data,mc,bins) + nll = make_nll(data,muon,data_mc,bins) - x0 = np.array([1.0,1.0,EPSILON,1.0,EPSILON,1.0,EPSILON]) + x0 = np.array([1.0,1.0,EPSILON,1.0,EPSILON,1.0,EPSILON,EPSILON]) opt = nlopt.opt(nlopt.LN_SBPLX, len(x0)) opt.set_min_objective(nll) low = np.array([EPSILON]*len(x0)) @@ -319,40 +321,47 @@ if __name__ == '__main__': plt.hist(samples[:,1],bins=100,histtype='step') plt.xlabel("Electron Energy Scale") plt.subplot(3,3,3) - plt.hist(samples[:,1],bins=100,histtype='step') + plt.hist(samples[:,2],bins=100,histtype='step') plt.xlabel("Electron Energy Resolution") plt.subplot(3,3,4) - plt.hist(samples[:,1],bins=100,histtype='step') + plt.hist(samples[:,3],bins=100,histtype='step') plt.xlabel("Muon Energy Scale") plt.subplot(3,3,5) - plt.hist(samples[:,1],bins=100,histtype='step') + plt.hist(samples[:,4],bins=100,histtype='step') plt.xlabel("Muon Energy Resolution") plt.subplot(3,3,6) - plt.hist(samples[:,1],bins=100,histtype='step') + plt.hist(samples[:,5],bins=100,histtype='step') plt.xlabel("Electron + Muon Energy Scale") plt.subplot(3,3,7) - plt.hist(samples[:,1],bins=100,histtype='step') + plt.hist(samples[:,6],bins=100,histtype='step') plt.xlabel("Electron + Muon Energy Resolution") + plt.subplot(3,3,8) + plt.hist(samples[:,7],bins=100,histtype='step') + plt.xlabel("Muon Scale") + plt.tight_layout() prob = {} for id in (20,22,2020,2022,2222): - prob[id] = get_multinomial_prob(data,mc,id,samples,bins) + prob[id] = get_multinomial_prob(data,muon,data_mc,id,samples,bins,size=args.multinomial_prob_size) print(id, prob[id]) prob_atm = {} for id in (20,22,2020,2022,2222): - prob_atm[id] = get_multinomial_prob(atm,atm_mc,id,samples,bins) + prob_atm[id] = get_multinomial_prob(data_atm,muon_atm,data_atm_mc,id,samples,bins,size=args.multinomial_prob_size) print(id, prob_atm[id]) handles = [Line2D([0], [0], color='C0'), - Line2D([0], [0], color='C1')] - labels = ('Data','Monte Carlo') + Line2D([0], [0], color='C1'), + Line2D([0], [0], color='C2')] + labels = ('Data','Monte Carlo','External Muons') fig = plt.figure() - hists = get_data_hists(prompt,bins) - hists_mc = get_mc_hists(prompt_mc,xopt,bins,apply_norm=True) + hists = get_data_hists(data,bins) + hists_muon = get_data_hists(muon,bins,scale=xopt[7]) + hists_mc = get_mc_hists(data_mc,xopt,bins,apply_norm=True) plot_hist2(hists,bins=bins,color='C0') plot_hist2(hists_mc,bins=bins,color='C1') + plot_hist2(hists_muon,bins=bins,color='C2') for id in (20,22,2020,2022,2222): if id == 20: plt.subplot(2,3,1) @@ -374,10 +383,12 @@ if __name__ == '__main__': else: plt.suptitle("Without Neutron Follower") fig = plt.figure() - hists = get_data_hists(atm,bins) - hists_mc = get_mc_hists(atm_mc,xopt,bins,apply_norm=True) + hists = get_data_hists(data_atm,bins) + hists_muon = get_data_hists(muon_atm,bins,scale=xopt[7]) + hists_mc = get_mc_hists(data_atm_mc,xopt,bins,apply_norm=True) plot_hist2(hists,bins=bins,color='C0') plot_hist2(hists_mc,bins=bins,color='C1') + plot_hist2(hists_muon,bins=bins,color='C1') for id in (20,22,2020,2022,2222): if id == 20: plt.subplot(2,3,1) |