aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortlatorre <tlatorre@uchicago.edu>2020-06-29 07:52:43 -0500
committertlatorre <tlatorre@uchicago.edu>2020-06-29 07:52:43 -0500
commit21420a805f214203819dd19ffc45fca9451a10bd (patch)
tree864b651123c9c0347b763fc501992a50f497dbe6
parent83a96dec3aec3d794dcf9b2d566d0cfb6547a0f5 (diff)
downloadsddm-21420a805f214203819dd19ffc45fca9451a10bd.tar.gz
sddm-21420a805f214203819dd19ffc45fca9451a10bd.tar.bz2
sddm-21420a805f214203819dd19ffc45fca9451a10bd.zip
add energy scale to chi2 fit
-rwxr-xr-xutils/chi2132
1 files changed, 102 insertions, 30 deletions
diff --git a/utils/chi2 b/utils/chi2
index 8c9e0e3..0460993 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -33,9 +33,14 @@ from scipy.stats import iqr, norm, beta
from scipy.special import spence
from itertools import izip_longest
+# Uncertainty on the energy scale
+# FIXME: Should get real number from stopping muons
+ENERGY_SCALE_MEAN = 1.0
+ENERGY_SCALE_UNCERTAINTY = 0.1
+
particle_id = {20: 'e', 22: r'\mu'}
-def plot_hist2(df, muons=False, norm=1.0, color=None):
+def plot_hist2(df, norm=1.0, scale=1.0, color=None):
for id, df_id in sorted(df.groupby('id')):
if id == 20:
plt.subplot(2,3,1)
@@ -48,14 +53,10 @@ def plot_hist2(df, muons=False, norm=1.0, color=None):
elif id == 2222:
plt.subplot(2,3,6)
- if muons:
- plt.hist(np.log10(df_id.ke.values/1000), bins=np.linspace(0,4.5,100), histtype='step')
- plt.xlabel("log10(Energy (GeV))")
- else:
- bins = np.logspace(np.log10(20),np.log10(10e3),21)
- plt.hist(df_id.ke.values, bins=bins, histtype='step', weights=np.tile(norm,len(df_id.ke.values)),color=color)
- plt.gca().set_xscale("log")
- plt.xlabel("Energy (MeV)")
+ bins = np.logspace(np.log10(20),np.log10(10e3),21)
+ plt.hist(df_id.ke.values*scale, bins=bins, histtype='step', weights=np.tile(norm,len(df_id.ke.values)),color=color)
+ plt.gca().set_xscale("log")
+ plt.xlabel("Energy (MeV)")
plt.title('$' + ''.join([particle_id[int(''.join(x))] for x in grouper(str(id),2)]) + '$')
if len(df):
@@ -93,8 +94,19 @@ def plot_hist(df, muons=False):
if len(df):
plt.tight_layout()
-def make_nll(data_hists, mc_hists):
+def make_nll(data, mc_hists):
def nll(x, grad=None):
+ if x[0] < 0 or x[1] < 0:
+ return np.inf
+
+ data_hists = {}
+ for id in (20,22,2020,2022,2222):
+ df_id = data[data.id == id]
+ if len(df_id):
+ data_hists[id] = np.histogram(df_id.ke.values*x[1],bins=bins)[0]
+ else:
+ data_hists[id] = np.zeros(len(bins)-1,dtype=np.int)
+
nll = 0
for id in data_hists:
N = data_hists[id].sum()
@@ -105,24 +117,53 @@ def make_nll(data_hists, mc_hists):
p += 1e-10
p /= p.sum()
nll -= multinomial.logpmf(data_hists[id],N,p)
- return nll
+ return nll - norm.logpdf(x[1],ENERGY_SCALE_MEAN,ENERGY_SCALE_UNCERTAINTY)
return nll
def chi2(samples,expected):
return np.sum((samples-expected)**2/expected,axis=-1)
-def get_multinomial_prob(data, mc, size=1000):
- N = mc.sum()
- # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think).
- mc = mc + 1e-10
- p = mc/mc.sum()
- chi2_data = chi2(data,mc)
+def get_mc_hist(data,x,bins):
+ if len(data):
+ return np.histogram(data,bins=bins)[0]*x[0]/100.0
+ else:
+ return np.zeros(len(bins)-1,dtype=np.int)
+
+def get_data_hist(data,x,bins):
+ if len(data):
+ return np.histogram(data*x[1],bins=bins)[0]
+ else:
+ return np.zeros(len(bins)-1,dtype=np.int)
+
+def get_multinomial_prob(data, data_mc, x_samples, bins, size=10000):
+ """
+ Returns the p-value that the histogram of the data is drawn from the MC
+ histogram.
+
+ Arguments:
+
+ data: 1D array of KE values
+ data_mc: 1D array of MC KE values
+ x_samples: MCMC samples of the floated parameters in the fit
+ bins: bins used to bin the mc histogram
+ size: number of values to compute
+ """
+ chi2_data = []
chi2_samples = []
for i in range(size):
+ x = x_samples[np.random.randint(x_samples.shape[0])]
+ mc = get_mc_hist(data_mc,x,bins)
+ N = mc.sum()
+ # Fix a bug in scipy(). See https://github.com/scipy/scipy/issues/8235 (I think).
+ mc = mc + 1e-10
+ p = mc/mc.sum()
+ data_hist = get_data_hist(data,x,bins)
+ chi2_data.append(chi2(data_hist,mc))
n = np.random.poisson(N)
samples = multinomial.rvs(n,p)
chi2_samples.append(chi2(samples,mc))
chi2_samples = np.array(chi2_samples)
+ chi2_data = np.array(chi2_data)
return np.count_nonzero(chi2_samples >= chi2_data)/len(chi2_samples)
if __name__ == '__main__':
@@ -135,12 +176,14 @@ if __name__ == '__main__':
from sddm.plot import *
from sddm import setup_matplotlib
import nlopt
+ import emcee
parser = argparse.ArgumentParser("plot fit results")
parser.add_argument("filenames", nargs='+', help="input files")
parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds")
parser.add_argument("--mc", nargs='+', required=True, help="atmospheric MC files")
parser.add_argument("--nhit-thresh", type=int, default=None, help="nhit threshold to apply to events before processing (should only be used for testing to speed things up)")
+ parser.add_argument("--steps", type=int, default=1000, help="number of steps in the MCMC chain")
args = parser.parse_args()
setup_matplotlib(args.save)
@@ -210,7 +253,7 @@ if __name__ == '__main__':
for id in (20,22,2020,2022,2222):
df_id = mc[mc.id == id]
if len(df_id):
- mc_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]
+ mc_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]/100
else:
mc_hists[id] = np.zeros(len(bins)-1,dtype=np.int)
@@ -226,34 +269,63 @@ if __name__ == '__main__':
for id in (20,22,2020,2022,2222):
df_id = atm_mc[atm_mc.id == id]
if len(df_id):
- mc_atm_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]
+ mc_atm_hists[id] = np.histogram(df_id.ke.values,bins=bins)[0]/100
else:
mc_atm_hists[id] = np.zeros(len(bins)-1,dtype=np.int)
- nll = make_nll(data_hists,mc_hists)
+ nll = make_nll(data,mc_hists)
- x0 = np.array([1.0])
+ x0 = np.array([1.0,1.0])
opt = nlopt.opt(nlopt.LN_SBPLX, len(x0))
opt.set_min_objective(nll)
- low = np.array([1e-10])
- high = np.array([10])
+ low = np.array([1e-10,1e-10])
+ high = np.array([10,10])
opt.set_lower_bounds(low)
opt.set_upper_bounds(high)
opt.set_ftol_abs(1e-10)
- opt.set_initial_step([0.01])
+ opt.set_initial_step([0.01,0.01])
xopt = opt.optimize(x0)
+ print("xopt = ", xopt)
nll_xopt = nll(xopt)
print("nll(xopt) = ", nll(xopt))
+ pos = np.empty((10, len(x0)),dtype=np.double)
+ for i in range(pos.shape[0]):
+ pos[i] = xopt + np.random.randn(len(x0))*0.1
+ pos[i,:] = np.clip(pos[i,:],1e-10,10)
+
+ nwalkers, ndim = pos.shape
+
+ sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x: -nll(x))
+ with np.errstate(invalid='ignore'):
+ sampler.run_mcmc(pos, args.steps)
+
+ print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction)))
+
+ try:
+ print("autocorrelation time: ", sampler.get_autocorr_time(quiet=True))
+ except Exception as e:
+ print(e)
+
+ samples = sampler.chain.reshape((-1,len(x0)))
+
+ plt.figure()
+ plt.subplot(2,2,1)
+ plt.hist(samples[:,0],bins=100,histtype='step')
+ plt.xlabel("Atmospheric Flux Scale")
+ plt.subplot(2,2,2)
+ plt.hist(samples[:,1],bins=100,histtype='step')
+ plt.xlabel("Energy Scale")
+
prob = {}
for id in (20,22,2020,2022,2222):
- prob[id] = get_multinomial_prob(data_hists[id],mc_hists[id]*xopt[0])
+ prob[id] = get_multinomial_prob(data[data.id == id].ke.values,mc[mc.id == id].ke.values,samples,bins)
print(id, prob[id])
prob_atm = {}
for id in (20,22,2020,2022,2222):
- prob_atm[id] = get_multinomial_prob(data_atm_hists[id],mc_atm_hists[id]*xopt[0])
+ prob_atm[id] = get_multinomial_prob(atm[atm.id == id].ke.values,atm_mc[atm_mc.id == id].ke.values,samples,bins)
print(id, prob_atm[id])
handles = [Line2D([0], [0], color='C0'),
@@ -261,8 +333,8 @@ if __name__ == '__main__':
labels = ('Data','Monte Carlo')
fig = plt.figure()
- plot_hist2(prompt,color='C0')
- plot_hist2(prompt_mc,norm=xopt[0],color='C1')
+ plot_hist2(prompt,scale=xopt[1],color='C0')
+ plot_hist2(prompt_mc,norm=xopt[0]/100,color='C1')
for id in (20,22,2020,2022,2222):
if id == 20:
plt.subplot(2,3,1)
@@ -284,8 +356,8 @@ if __name__ == '__main__':
else:
plt.suptitle("Without Neutron Follower")
fig = plt.figure()
- plot_hist2(atm,color='C0')
- plot_hist2(atm_mc,norm=xopt[0],color='C1')
+ plot_hist2(atm,scale=xopt[1],color='C0')
+ plot_hist2(atm_mc,norm=xopt[0]/100,color='C1')
for id in (20,22,2020,2022,2222):
if id == 20:
plt.subplot(2,3,1)