aboutsummaryrefslogtreecommitdiff
path: root/utils/chi2
diff options
context:
space:
mode:
authortlatorre <tlatorre@uchicago.edu>2020-07-06 09:58:35 -0500
committertlatorre <tlatorre@uchicago.edu>2020-07-06 09:58:35 -0500
commite5a66e48d690a6e49c909e797097d6374ff95feb (patch)
treebb44cc5d518e141be27257df3f3852ac77a8ddeb /utils/chi2
parent5c2653e210f5685350c20be0a551749c39994b84 (diff)
downloadsddm-e5a66e48d690a6e49c909e797097d6374ff95feb.tar.gz
sddm-e5a66e48d690a6e49c909e797097d6374ff95feb.tar.bz2
sddm-e5a66e48d690a6e49c909e797097d6374ff95feb.zip
add sddm/stats.py
This commit adds the new file sddm/stats.py to and adds a function to correctly sample a Monte Carlo histogram when computing p-values. In particular, I now take into account the uncertainty on the total number of expected events by drawing from a gamma distribution, which is the posterior of the Poisson likelihood function with a prior of 1/lambda.
Diffstat (limited to 'utils/chi2')
-rwxr-xr-xutils/chi222
1 files changed, 13 insertions, 9 deletions
diff --git a/utils/chi2 b/utils/chi2
index 0460993..72ad89b 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -32,6 +32,7 @@ from matplotlib.lines import Line2D
from scipy.stats import iqr, norm, beta
from scipy.special import spence
from itertools import izip_longest
+from sddm.stats import *
# Uncertainty on the energy scale
# FIXME: Should get real number from stopping muons
@@ -124,16 +125,11 @@ def chi2(samples,expected):
return np.sum((samples-expected)**2/expected,axis=-1)
def get_mc_hist(data,x,bins):
- if len(data):
- return np.histogram(data,bins=bins)[0]*x[0]/100.0
- else:
- return np.zeros(len(bins)-1,dtype=np.int)
+ hist = np.histogram(data,bins=bins)[0]
+ return sample_mc_hist(hist,norm=x[0]/100.0)
def get_data_hist(data,x,bins):
- if len(data):
- return np.histogram(data*x[1],bins=bins)[0]
- else:
- return np.zeros(len(bins)-1,dtype=np.int)
+ return np.histogram(data*x[1],bins=bins)[0]
def get_multinomial_prob(data, data_mc, x_samples, bins, size=10000):
"""
@@ -190,7 +186,15 @@ if __name__ == '__main__':
import matplotlib.pyplot as plt
- ev = get_events(args.filenames,merge_fits=True,nhit_thresh=args.nhit_thresh)
+ # Loop over runs to prevent using too much memory
+ evs = []
+ rhdr = pd.concat([read_hdf(filename, "rhdr").assign(filename=filename) for filename in args.filenames],ignore_index=True)
+ for run, df in rhdr.groupby('run'):
+ evs.append(get_events(df.filename.values, merge_fits=True, nhit_thresh=args.nhit_thresh))
+ ev = pd.concat(evs)
+
+ ev_mc = get_events(args.mc, merge_fits=True)
+
ev_mc = get_events(args.mc,merge_fits=True,nhit_thresh=args.nhit_thresh)
ev = ev.reset_index()