3 files changed, 25 insertions, 9 deletions
diff --git a/utils/chi2 b/utils/chi2
index 0460993..72ad89b 100755
--- a/utils/chi2
+++ b/utils/chi2
@@ -32,6 +32,7 @@ from matplotlib.lines import Line2D
 from scipy.stats import iqr, norm, beta
 from scipy.special import spence
 from itertools import izip_longest
+from sddm.stats import *
 
 # Uncertainty on the energy scale
 # FIXME: Should get real number from stopping muons
@@ -124,16 +125,11 @@ def chi2(samples,expected):
     return np.sum((samples-expected)**2/expected,axis=-1)
 
 def get_mc_hist(data,x,bins):
-    if len(data):
-        return np.histogram(data,bins=bins)[0]*x[0]/100.0
-    else:
-        return np.zeros(len(bins)-1,dtype=np.int)
+    hist = np.histogram(data,bins=bins)[0]
+    return sample_mc_hist(hist,norm=x[0]/100.0)
 
 def get_data_hist(data,x,bins):
-    if len(data):
-        return np.histogram(data*x[1],bins=bins)[0]
-    else:
-        return np.zeros(len(bins)-1,dtype=np.int)
+    return np.histogram(data*x[1],bins=bins)[0]
 
 def get_multinomial_prob(data, data_mc, x_samples, bins, size=10000):
     """
@@ -190,7 +186,15 @@ if __name__ == '__main__':
 
     import matplotlib.pyplot as plt
 
-    ev = get_events(args.filenames,merge_fits=True,nhit_thresh=args.nhit_thresh)
+    # Loop over runs to prevent using too much memory
+    evs = []
+    rhdr = pd.concat([read_hdf(filename, "rhdr").assign(filename=filename) for filename in args.filenames],ignore_index=True)
+    for run, df in rhdr.groupby('run'):
+        evs.append(get_events(df.filename.values, merge_fits=True, nhit_thresh=args.nhit_thresh))
+    ev = pd.concat(evs)
+
+    ev_mc = get_events(args.mc, merge_fits=True)
+
     ev_mc = get_events(args.mc,merge_fits=True,nhit_thresh=args.nhit_thresh)
 
     ev = ev.reset_index()
diff --git a/utils/sddm/plot_energy.py b/utils/sddm/plot_energy.py
index a504d39..90c4498 100755
--- a/utils/sddm/plot_energy.py
+++ b/utils/sddm/plot_energy.py
@@ -111,11 +111,13 @@ def tag_michels(ev):
     """
     time_since_last_prompt_plus_muon = ev.gtr - ev.gtr.where(ev.prompt | ((ev.dc & DC_MUON) != 0)).ffill()
     ev['muon_gtid'] = ev.gtid.where(ev.prompt | ((ev.dc & DC_MUON) != 0)).ffill()
+    ev['muon_nhit'] = ev.nhit_cal.where(ev.prompt | ((ev.dc & DC_MUON) != 0)).ffill()
     ev['michel'] = ~ev.prompt
     ev['michel'] &= ev.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0
     ev['michel'] &= ev.nhit >= 100
     ev['michel'] &= (time_since_last_prompt_plus_muon > 800) & (time_since_last_prompt_plus_muon < 200e3)
     ev.loc[~ev.michel,'muon_gtid'] = -1
+    ev.loc[~ev.michel,'muon_nhit'] = -1
     ev['stopping_muon'] = np.zeros(len(ev),dtype=np.bool)
     ev.loc[ev.gtid.isin(ev.muon_gtid[ev.muon_gtid > 0].values),'stopping_muon'] = 1
     return ev
diff --git a/utils/sddm/stats.py b/utils/sddm/stats.py
new file mode 100644
index 0000000..96ca87a
--- /dev/null
+++ b/utils/sddm/stats.py
@@ -0,0 +1,10 @@
+import numpy as np
+from scipy.stats import gamma, dirichlet
+
+def chi2(samples,expected):
+    return np.sum((samples-expected)**2/expected,axis=-1)
+
+def sample_mc_hist(hist, norm):
+    alpha = np.ones_like(hist) + hist
+    N = gamma.rvs(np.sum(hist)+1e-10,scale=1)
+    return dirichlet.rvs(alpha)[0]*N*norm