diff options
author | tlatorre <tlatorre@uchicago.edu> | 2020-07-06 09:58:35 -0500 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2020-07-06 09:58:35 -0500 |
commit | e5a66e48d690a6e49c909e797097d6374ff95feb (patch) | |
tree | bb44cc5d518e141be27257df3f3852ac77a8ddeb /utils/chi2 | |
parent | 5c2653e210f5685350c20be0a551749c39994b84 (diff) | |
download | sddm-e5a66e48d690a6e49c909e797097d6374ff95feb.tar.gz sddm-e5a66e48d690a6e49c909e797097d6374ff95feb.tar.bz2 sddm-e5a66e48d690a6e49c909e797097d6374ff95feb.zip |
add sddm/stats.py
This commit adds the new file sddm/stats.py to and adds a function to
correctly sample a Monte Carlo histogram when computing p-values. In
particular, I now take into account the uncertainty on the total number
of expected events by drawing from a gamma distribution, which is the
posterior of the Poisson likelihood function with a prior of 1/lambda.
Diffstat (limited to 'utils/chi2')
-rwxr-xr-x | utils/chi2 | 22 |
1 files changed, 13 insertions, 9 deletions
@@ -32,6 +32,7 @@ from matplotlib.lines import Line2D from scipy.stats import iqr, norm, beta from scipy.special import spence from itertools import izip_longest +from sddm.stats import * # Uncertainty on the energy scale # FIXME: Should get real number from stopping muons @@ -124,16 +125,11 @@ def chi2(samples,expected): return np.sum((samples-expected)**2/expected,axis=-1) def get_mc_hist(data,x,bins): - if len(data): - return np.histogram(data,bins=bins)[0]*x[0]/100.0 - else: - return np.zeros(len(bins)-1,dtype=np.int) + hist = np.histogram(data,bins=bins)[0] + return sample_mc_hist(hist,norm=x[0]/100.0) def get_data_hist(data,x,bins): - if len(data): - return np.histogram(data*x[1],bins=bins)[0] - else: - return np.zeros(len(bins)-1,dtype=np.int) + return np.histogram(data*x[1],bins=bins)[0] def get_multinomial_prob(data, data_mc, x_samples, bins, size=10000): """ @@ -190,7 +186,15 @@ if __name__ == '__main__': import matplotlib.pyplot as plt - ev = get_events(args.filenames,merge_fits=True,nhit_thresh=args.nhit_thresh) + # Loop over runs to prevent using too much memory + evs = [] + rhdr = pd.concat([read_hdf(filename, "rhdr").assign(filename=filename) for filename in args.filenames],ignore_index=True) + for run, df in rhdr.groupby('run'): + evs.append(get_events(df.filename.values, merge_fits=True, nhit_thresh=args.nhit_thresh)) + ev = pd.concat(evs) + + ev_mc = get_events(args.mc, merge_fits=True) + ev_mc = get_events(args.mc,merge_fits=True,nhit_thresh=args.nhit_thresh) ev = ev.reset_index() |