add sddm/stats.py

This commit adds the new file sddm/stats.py to and adds a function to correctly sample a Monte Carlo histogram when computing p-values. In particular, I now take into account the uncertainty on the total number of expected events by drawing from a gamma distribution, which is the posterior of the Poisson likelihood function with a prior of 1/lambda.
author: tlatorre <tlatorre@uchicago.edu> 2020-07-06 09:58:35 -0500
committer: tlatorre <tlatorre@uchicago.edu> 2020-07-06 09:58:35 -0500
commit: e5a66e48d690a6e49c909e797097d6374ff95feb (patch)
tree: bb44cc5d518e141be27257df3f3852ac77a8ddeb /utils/chi2
parent: 5c2653e210f5685350c20be0a551749c39994b84 (diff)
download: sddm-e5a66e48d690a6e49c909e797097d6374ff95feb.tar.gz
sddm-e5a66e48d690a6e49c909e797097d6374ff95feb.tar.bz2
sddm-e5a66e48d690a6e49c909e797097d6374ff95feb.zip
1 files changed, 13 insertions, 9 deletions
diff --git a/utils/chi2 b/utils/chi2
index 0460993..72ad89b 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -32,6 +32,7 @@ from matplotlib.lines import Line2D
 from scipy.stats import iqr, norm, beta
 from scipy.special import spence
 from itertools import izip_longest
+from sddm.stats import *
 
 # Uncertainty on the energy scale
 # FIXME: Should get real number from stopping muons
@@ -124,16 +125,11 @@ def chi2(samples,expected):
     return np.sum((samples-expected)**2/expected,axis=-1)
 
 def get_mc_hist(data,x,bins):
-    if len(data):
-        return np.histogram(data,bins=bins)[0]*x[0]/100.0
-    else:
-        return np.zeros(len(bins)-1,dtype=np.int)
+    hist = np.histogram(data,bins=bins)[0]
+    return sample_mc_hist(hist,norm=x[0]/100.0)
 
 def get_data_hist(data,x,bins):
-    if len(data):
-        return np.histogram(data*x[1],bins=bins)[0]
-    else:
-        return np.zeros(len(bins)-1,dtype=np.int)
+    return np.histogram(data*x[1],bins=bins)[0]
 
 def get_multinomial_prob(data, data_mc, x_samples, bins, size=10000):
     """
@@ -190,7 +186,15 @@ if __name__ == '__main__':
 
     import matplotlib.pyplot as plt
 
-    ev = get_events(args.filenames,merge_fits=True,nhit_thresh=args.nhit_thresh)
+    # Loop over runs to prevent using too much memory
+    evs = []
+    rhdr = pd.concat([read_hdf(filename, "rhdr").assign(filename=filename) for filename in args.filenames],ignore_index=True)
+    for run, df in rhdr.groupby('run'):
+        evs.append(get_events(df.filename.values, merge_fits=True, nhit_thresh=args.nhit_thresh))
+    ev = pd.concat(evs)
+
+    ev_mc = get_events(args.mc, merge_fits=True)
+
     ev_mc = get_events(args.mc,merge_fits=True,nhit_thresh=args.nhit_thresh)
 
     ev = ev.reset_index()
author	tlatorre <tlatorre@uchicago.edu>	2020-07-06 09:58:35 -0500
committer	tlatorre <tlatorre@uchicago.edu>	2020-07-06 09:58:35 -0500
commit	e5a66e48d690a6e49c909e797097d6374ff95feb (patch)
tree	bb44cc5d518e141be27257df3f3852ac77a8ddeb /utils/chi2
parent	5c2653e210f5685350c20be0a551749c39994b84 (diff)
download	sddm-e5a66e48d690a6e49c909e797097d6374ff95feb.tar.gz sddm-e5a66e48d690a6e49c909e797097d6374ff95feb.tar.bz2 sddm-e5a66e48d690a6e49c909e797097d6374ff95feb.zip