aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortlatorre <tlatorre@uchicago.edu>2020-05-11 10:30:39 -0500
committertlatorre <tlatorre@uchicago.edu>2020-05-11 10:30:39 -0500
commit15fc972c89a4366a06755daeedaac52f91762ecd (patch)
tree9a5dbea7787cef9946473787e9a3996f24cd2898
parent651cbe5d261a6d29b4dec7c38b65c0eac5431363 (diff)
downloadsddm-15fc972c89a4366a06755daeedaac52f91762ecd.tar.gz
sddm-15fc972c89a4366a06755daeedaac52f91762ecd.tar.bz2
sddm-15fc972c89a4366a06755daeedaac52f91762ecd.zip
update utils/ folder to make a python package called sddm
This commit adds an sddm python package to the utils/ folder. This allows me to consolidate code used across all the various scripts. This package is now installed by default to /home/tlatorre/local/lib/python2.7/site-packages so you should add the following to your .bashrc file: export PYTHONPATH=$HOME/local/lib/python2.7/site-packages/:$PYTHONPATH before using the scripts installed to ~/local/bin.
-rw-r--r--utils/.gitignore1
-rw-r--r--utils/Makefile1
-rwxr-xr-xutils/calculate_limits.py103
-rwxr-xr-xutils/cat-grid-jobs157
-rwxr-xr-xutils/dc595
-rwxr-xr-xutils/plot50
-rwxr-xr-xutils/plot-atmospheric-fluxes128
-rwxr-xr-xutils/plot-energy675
-rwxr-xr-xutils/plot-fit-results332
-rwxr-xr-xutils/plot-root-results104
-rw-r--r--utils/sddm/.gitignore1
-rw-r--r--utils/sddm/__init__.py139
-rwxr-xr-xutils/sddm/dc.py366
-rw-r--r--utils/sddm/logger.py70
-rwxr-xr-xutils/sddm/plot.py257
-rwxr-xr-xutils/sddm/plot_energy.py352
-rwxr-xr-xutils/setup.py10
-rwxr-xr-xutils/submit-grid-jobs161
18 files changed, 1342 insertions, 2160 deletions
diff --git a/utils/.gitignore b/utils/.gitignore
index e307f16..cea117c 100644
--- a/utils/.gitignore
+++ b/utils/.gitignore
@@ -1,2 +1,3 @@
*.eps
*.pdf
+build/
diff --git a/utils/Makefile b/utils/Makefile
index 6ad38c5..c8690d6 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -17,5 +17,6 @@ install:
$(INSTALL) cat-grid-jobs $(INSTALL_BIN)
$(INSTALL) plot-energy $(INSTALL_BIN)
$(INSTALL) zdab-reprocess $(INSTALL_BIN)
+ ./setup.py install --prefix=$(PREFIX)
.PHONY: install
diff --git a/utils/calculate_limits.py b/utils/calculate_limits.py
index 3737850..c8698c1 100755
--- a/utils/calculate_limits.py
+++ b/utils/calculate_limits.py
@@ -23,6 +23,7 @@ from numpy import pi
from collections import namedtuple
from functools import update_wrapper
from threading import RLock
+from sddm.plot import despine
# Backport of lru_cache from http://code.activestate.com/recipes/578078-py26-and-py30-backport-of-python-33s-lru-cache/
_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
@@ -487,108 +488,6 @@ def get_event_rate_xenon(m, cs, A, threshold):
return rate
-# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py
-def despine(fig=None, ax=None, top=True, right=True, left=False,
- bottom=False, offset=None, trim=False):
- """Remove the top and right spines from plot(s).
-
- fig : matplotlib figure, optional
- Figure to despine all axes of, default uses current figure.
- ax : matplotlib axes, optional
- Specific axes object to despine.
- top, right, left, bottom : boolean, optional
- If True, remove that spine.
- offset : int or dict, optional
- Absolute distance, in points, spines should be moved away
- from the axes (negative values move spines inward). A single value
- applies to all spines; a dict can be used to set offset values per
- side.
- trim : bool, optional
- If True, limit spines to the smallest and largest major tick
- on each non-despined axis.
-
- Returns
- -------
- None
-
- """
- # Get references to the axes we want
- if fig is None and ax is None:
- axes = plt.gcf().axes
- elif fig is not None:
- axes = fig.axes
- elif ax is not None:
- axes = [ax]
-
- for ax_i in axes:
- for side in ["top", "right", "left", "bottom"]:
- # Toggle the spine objects
- is_visible = not locals()[side]
- ax_i.spines[side].set_visible(is_visible)
- if offset is not None and is_visible:
- try:
- val = offset.get(side, 0)
- except AttributeError:
- val = offset
- _set_spine_position(ax_i.spines[side], ('outward', val))
-
- # Potentially move the ticks
- if left and not right:
- maj_on = any(
- t.tick1line.get_visible()
- for t in ax_i.yaxis.majorTicks
- )
- min_on = any(
- t.tick1line.get_visible()
- for t in ax_i.yaxis.minorTicks
- )
- ax_i.yaxis.set_ticks_position("right")
- for t in ax_i.yaxis.majorTicks:
- t.tick2line.set_visible(maj_on)
- for t in ax_i.yaxis.minorTicks:
- t.tick2line.set_visible(min_on)
-
- if bottom and not top:
- maj_on = any(
- t.tick1line.get_visible()
- for t in ax_i.xaxis.majorTicks
- )
- min_on = any(
- t.tick1line.get_visible()
- for t in ax_i.xaxis.minorTicks
- )
- ax_i.xaxis.set_ticks_position("top")
- for t in ax_i.xaxis.majorTicks:
- t.tick2line.set_visible(maj_on)
- for t in ax_i.xaxis.minorTicks:
- t.tick2line.set_visible(min_on)
-
- if trim:
- # clip off the parts of the spines that extend past major ticks
- xticks = ax_i.get_xticks()
- if xticks.size:
- firsttick = np.compress(xticks >= min(ax_i.get_xlim()),
- xticks)[0]
- lasttick = np.compress(xticks <= max(ax_i.get_xlim()),
- xticks)[-1]
- ax_i.spines['bottom'].set_bounds(firsttick, lasttick)
- ax_i.spines['top'].set_bounds(firsttick, lasttick)
- newticks = xticks.compress(xticks <= lasttick)
- newticks = newticks.compress(newticks >= firsttick)
- ax_i.set_xticks(newticks)
-
- yticks = ax_i.get_yticks()
- if yticks.size:
- firsttick = np.compress(yticks >= min(ax_i.get_ylim()),
- yticks)[0]
- lasttick = np.compress(yticks <= max(ax_i.get_ylim()),
- yticks)[-1]
- ax_i.spines['left'].set_bounds(firsttick, lasttick)
- ax_i.spines['right'].set_bounds(firsttick, lasttick)
- newticks = yticks.compress(yticks <= lasttick)
- newticks = newticks.compress(newticks >= firsttick)
- ax_i.set_yticks(newticks)
-
if __name__ == '__main__':
import argparse
diff --git a/utils/cat-grid-jobs b/utils/cat-grid-jobs
index 62dcd42..0755774 100755
--- a/utils/cat-grid-jobs
+++ b/utils/cat-grid-jobs
@@ -35,164 +35,11 @@ from datetime import datetime
import h5py
from os.path import join, split
from subprocess import check_call
-
-DEBUG = 0
-VERBOSE = 1
-NOTICE = 2
-WARNING = 3
-
-class Logger(object):
- """
- Simple logger class that I wrote for the SNO+ DAQ. Very easy to use:
-
- log = Logger()
- log.set_logfile("test.log")
- log.notice("blah")
- log.warn("foo")
-
- The log file format is taken from the Redis log file format which is really
- nice since it shows the exact time and severity of each log message.
- """
- def __init__(self):
- self.logfile = sys.stdout
- # by default, we log everything
- self.verbosity = DEBUG
-
- def set_verbosity(self, level):
- if isinstance(level, int):
- self.verbosity = level
- elif isinstance(level, basestring):
- if level == 'debug':
- self.verbosity = DEBUG
- elif level == 'verbose':
- self.verbosity = VERBOSE
- elif level == 'notice':
- self.verbosity = NOTICE
- elif level == 'warning':
- self.verbosity = WARNING
- else:
- raise ValueError("unknown loglevel '%s'" % level)
- else:
- raise TypeError("level must be a string or integer")
-
- def set_logfile(self, filename):
- self.logfile = open(filename, 'a')
-
- def debug(self, msg):
- self.log(DEBUG, msg)
-
- def verbose(self, msg):
- self.log(VERBOSE, msg)
-
- def notice(self, msg):
- self.log(NOTICE, msg)
-
- def warn(self, msg):
- self.log(WARNING, msg)
-
- def log(self, level, msg):
- if level < self.verbosity:
- return
-
- c = '.-*#'
- pid = os.getpid()
- now = datetime.now()
- buf = now.strftime('%d %b %H:%M:%S.%f')[:-3]
-
- self.logfile.write('%d:%s %c %s\n' % (pid, buf, c[level], msg))
- self.logfile.flush()
+from sddm import splitext, which
+from sddm.logger import Logger
log = Logger()
-# Check that a given file can be accessed with the correct mode.
-# Additionally check that `file` is not a directory, as on Windows
-# directories pass the os.access check.
-def _access_check(fn, mode):
- return (os.path.exists(fn) and os.access(fn, mode) and not os.path.isdir(fn))
-
-def which(cmd, mode=os.F_OK | os.X_OK, path=None):
- """Given a command, mode, and a PATH string, return the path which
- conforms to the given mode on the PATH, or None if there is no such
- file.
- `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
- of os.environ.get("PATH"), or can be overridden with a custom search
- path.
- """
- # If we're given a path with a directory part, look it up directly rather
- # than referring to PATH directories. This includes checking relative to the
- # current directory, e.g. ./script
- if os.path.dirname(cmd):
- if _access_check(cmd, mode):
- return cmd
- return None
-
- if path is None:
- path = os.environ.get("PATH", None)
- if path is None:
- try:
- path = os.confstr("CS_PATH")
- except (AttributeError, ValueError):
- # os.confstr() or CS_PATH is not available
- path = os.defpath
- # bpo-35755: Don't use os.defpath if the PATH environment variable is
- # set to an empty string
-
- # PATH='' doesn't match, whereas PATH=':' looks in the current directory
- if not path:
- return None
-
- path = path.split(os.pathsep)
-
- if sys.platform == "win32":
- # The current directory takes precedence on Windows.
- curdir = os.curdir
- if curdir not in path:
- path.insert(0, curdir)
-
- # PATHEXT is necessary to check on Windows.
- pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
- # See if the given file matches any of the expected path extensions.
- # This will allow us to short circuit when given "python.exe".
- # If it does match, only test that one, otherwise we have to try
- # others.
- if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
- files = [cmd]
- else:
- files = [cmd + ext for ext in pathext]
- else:
- # On other platforms you don't have things like PATHEXT to tell you
- # what file suffixes are executable, so just pass on cmd as-is.
- files = [cmd]
-
- seen = set()
- for dir in path:
- normdir = os.path.normcase(dir)
- if not normdir in seen:
- seen.add(normdir)
- for thefile in files:
- name = os.path.join(dir, thefile)
- if _access_check(name, mode):
- return name
- return None
-
-def splitext(path):
- """
- Like os.path.splitext() except it returns the full extension if the
- filename has multiple extensions, for example:
-
- splitext('foo.tar.gz') -> 'foo', '.tar.gz'
- """
- full_root, full_ext = os.path.splitext(path)
- while True:
- root, ext = os.path.splitext(full_root)
- if ext:
- full_ext = ext + full_ext
- full_root = root
- else:
- break
-
- return full_root, full_ext
-
def cat_grid_jobs(conn, output_dir):
zdab_cat = which("zdab-cat")
diff --git a/utils/dc b/utils/dc
index 56decf9..b551925 100755
--- a/utils/dc
+++ b/utils/dc
@@ -26,6 +26,10 @@ import emcee
from scipy.optimize import brentq
from scipy.stats import truncnorm
from matplotlib.lines import Line2D
+from sddm.dc import get_proposal_func, estimate_errors, metropolis_hastings, EPSILON
+from sddm.plot import despine
+from sddm.dc import *
+from sddm.plot_energy import *
try:
from emcee import moves
@@ -33,238 +37,6 @@ except ImportError:
print("emcee version 2.2.1 is required",file=sys.stderr)
sys.exit(1)
-# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py
-def despine(fig=None, ax=None, top=True, right=True, left=False,
- bottom=False, offset=None, trim=False):
- """Remove the top and right spines from plot(s).
-
- fig : matplotlib figure, optional
- Figure to despine all axes of, default uses current figure.
- ax : matplotlib axes, optional
- Specific axes object to despine.
- top, right, left, bottom : boolean, optional
- If True, remove that spine.
- offset : int or dict, optional
- Absolute distance, in points, spines should be moved away
- from the axes (negative values move spines inward). A single value
- applies to all spines; a dict can be used to set offset values per
- side.
- trim : bool, optional
- If True, limit spines to the smallest and largest major tick
- on each non-despined axis.
-
- Returns
- -------
- None
-
- """
- # Get references to the axes we want
- if fig is None and ax is None:
- axes = plt.gcf().axes
- elif fig is not None:
- axes = fig.axes
- elif ax is not None:
- axes = [ax]
-
- for ax_i in axes:
- for side in ["top", "right", "left", "bottom"]:
- # Toggle the spine objects
- is_visible = not locals()[side]
- ax_i.spines[side].set_visible(is_visible)
- if offset is not None and is_visible:
- try:
- val = offset.get(side, 0)
- except AttributeError:
- val = offset
- _set_spine_position(ax_i.spines[side], ('outward', val))
-
- # Potentially move the ticks
- if left and not right:
- maj_on = any(
- t.tick1line.get_visible()
- for t in ax_i.yaxis.majorTicks
- )
- min_on = any(
- t.tick1line.get_visible()
- for t in ax_i.yaxis.minorTicks
- )
- ax_i.yaxis.set_ticks_position("right")
- for t in ax_i.yaxis.majorTicks:
- t.tick2line.set_visible(maj_on)
- for t in ax_i.yaxis.minorTicks:
- t.tick2line.set_visible(min_on)
-
- if bottom and not top:
- maj_on = any(
- t.tick1line.get_visible()
- for t in ax_i.xaxis.majorTicks
- )
- min_on = any(
- t.tick1line.get_visible()
- for t in ax_i.xaxis.minorTicks
- )
- ax_i.xaxis.set_ticks_position("top")
- for t in ax_i.xaxis.majorTicks:
- t.tick2line.set_visible(maj_on)
- for t in ax_i.xaxis.minorTicks:
- t.tick2line.set_visible(min_on)
-
- if trim:
- # clip off the parts of the spines that extend past major ticks
- xticks = ax_i.get_xticks()
- if xticks.size:
- firsttick = np.compress(xticks >= min(ax_i.get_xlim()),
- xticks)[0]
- lasttick = np.compress(xticks <= max(ax_i.get_xlim()),
- xticks)[-1]
- ax_i.spines['bottom'].set_bounds(firsttick, lasttick)
- ax_i.spines['top'].set_bounds(firsttick, lasttick)
- newticks = xticks.compress(xticks <= lasttick)
- newticks = newticks.compress(newticks >= firsttick)
- ax_i.set_xticks(newticks)
-
- yticks = ax_i.get_yticks()
- if yticks.size:
- firsttick = np.compress(yticks >= min(ax_i.get_ylim()),
- yticks)[0]
- lasttick = np.compress(yticks <= max(ax_i.get_ylim()),
- yticks)[-1]
- ax_i.spines['left'].set_bounds(firsttick, lasttick)
- ax_i.spines['right'].set_bounds(firsttick, lasttick)
- newticks = yticks.compress(yticks <= lasttick)
- newticks = newticks.compress(newticks >= firsttick)
- ax_i.set_yticks(newticks)
-
-# Lower bound for probabilities in fit. We set this to be nonzero to prevent
-# nans in case an expected value is predicted to be zero.
-EPSILON = 1e-10
-
-def get_proposal_func(stepsizes, low, high):
- """
- Returns a function which produces proposal steps for a Metropolis Hastings
- MCMC. This function can then be passed to emcee.moves.MHMove().
-
- Steps are proposed according to a truncated normal distribution with mean
- at the current position and with standard deviations given by stepsizes.
- The bounds for each step are given by the two arrays low and high.
- """
- stepsizes = np.atleast_1d(stepsizes)
- low = np.atleast_1d(low)
- high = np.atleast_1d(high)
- def proposal(x0, random):
- """
- Returns a tuple of proposed positions and the log-ratio of the proposal
- probabilities.
-
- x0 should be a (K,ndim) dimensional numpy array where K is the number
- of walkers and ndim is the number of dimensions in the likelihood function.
- """
- # calculate a and b needed for truncnorm
- # Note: These are not just low and high b/c
- #
- # The standard form of this distribution is a standard normal
- # truncated to the range [a, b] --- notice that a and b are defined
- # over the domain of the standard normal. To convert clip values
- # for a specific mean and standard deviation, use::
- #
- # a, b = (myclip_a - my_mean) / my_std, (myclip_b - my_mean) / my_std
- a, b = (low - x0)/stepsizes, (high - x0)/stepsizes
- x = truncnorm.rvs(a,b,x0,stepsizes,random_state=random)
- # Note: Should be able to do this:
- #
- # log_p_x_given_x0 = truncnorm.logpdf(x,a,b,x0,stepsizes).sum(axis=1)
- #
- # but I think there is a bug in truncnorm.logpdf() which barfs when
- # passed 2D arrays, so instead we just loop over the first axis.
- log_p_x_given_x0 = np.empty(x0.shape[0])
- for i in range(x0.shape[0]):
- log_p_x_given_x0[i] = truncnorm.logpdf(x[i],a[i],b[i],x0[i],stepsizes).sum()
- a, b = (low - x)/stepsizes, (high - x)/stepsizes
- # Note: Should be able to do this:
- #
- # log_p_x0_given_x = truncnorm.logpdf(x0,a,b,x,stepsizes).sum(axis=1)
- #
- # but I think there is a bug in truncnorm.logpdf() which barfs when
- # passed 2D arrays, so instead we just loop over the first axis.
- log_p_x0_given_x = np.empty(x0.shape[0])
- for i in range(x0.shape[0]):
- log_p_x0_given_x[i] = truncnorm.logpdf(x0[i],a[i],b[i],x[i],stepsizes).sum()
- return x, log_p_x0_given_x - log_p_x_given_x0
- return proposal
-
-def estimate_errors(nll, xopt, constraints):
- """
- Return approximate 1 sigma errors for each parameter assuming all
- parameters are uncorrelated.
- """
- nll_xopt = nll(xopt)
-
- def root(x, xopt, i):
- """
- Function to bisect where the negative log likelihood reaches 0.5 from the minimum.
- """
- xtest = xopt.copy()
- xtest[i] = x
- for constraint in constraints:
- if constraint(xtest) >= 0:
- xtest = constraint.renormalize(xtest,i)
- return nll(xtest) - (nll_xopt + 0.5)
-
- errors = np.empty_like(xopt)
-
- for i in range(len(xopt)):
- if i < 6:
- xlo = brentq(root,0,xopt[i],args=(xopt,i))
- xhi = brentq(root,xopt[i],1e9,args=(xopt,i))
- else:
- xlo = brentq(root,0,xopt[i],args=(xopt,i))
- xhi = brentq(root,xopt[i],1,args=(xopt,i))
-
- errors[i] = max(xopt[i]-xlo,xhi-xopt[i])
-
- return errors
-
-def metropolis_hastings(nll, x0, stepsizes=None, size=1000):
- x = np.asarray(x0)
-
- if stepsizes is None:
- stepsizes = np.ones_like(x)
- else:
- stepsizes = np.asarray(stepsizes)
-
- accept = 0
-
- nll_old = nll(x)
-
- samples = np.empty((size,len(x0)),dtype=np.double)
-
- try:
- for i in range(size):
- if i % 100 == 0:
- sys.stdout.write("\r%i/%i acceptance rate = %i/%i = %.2g%%" % (i+1,size,accept,i+1,accept*100/float(i+1)))
- sys.stdout.flush()
-
- xnew = x + np.random.randn(len(x))*stepsizes
-
- nll_new = nll(xnew)
-
- if nll_new < nll_old or np.random.rand() < exp(nll_old-nll_new):
- x = xnew
- nll_new = nll_old
- accept += 1
-
- samples[i] = x
- except KeyboardInterrupt:
- sys.stdout.write("\n")
- sys.stdout.flush()
- print("ctrl-c caught. quitting...")
- samples.resize((i,len(x)))
- else:
- sys.stdout.write("\n")
- sys.stdout.flush()
-
- return samples
-
# from https://stackoverflow.com/questions/2891790/how-to-pretty-print-a-numpy-array-without-scientific-notation-and-with-given-pre
@contextlib.contextmanager
def printoptions(*args, **kwargs):
@@ -275,250 +47,6 @@ def printoptions(*args, **kwargs):
finally:
np.set_printoptions(**original)
-# from https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python
-class bcolors:
- HEADER = '\033[95m'
- OKBLUE = '\033[94m'
- OKGREEN = '\033[92m'
- WARNING = '\033[93m'
- FAIL = '\033[91m'
- ENDC = '\033[0m'
- BOLD = '\033[1m'
- UNDERLINE = '\033[4m'
-
-SNOMAN_MASS = {
- 20: 0.511,
- 21: 0.511,
- 22: 105.658,
- 23: 105.658
-}
-
-AV_RADIUS = 600.0
-PSUP_RADIUS = 840.0
-
-# Data cleaning bitmasks.
-DC_MUON = 0x1
-DC_JUNK = 0x2
-DC_CRATE_ISOTROPY = 0x4
-DC_QVNHIT = 0x8
-DC_NECK = 0x10
-DC_FLASHER = 0x20
-DC_ESUM = 0x40
-DC_OWL = 0x80
-DC_OWL_TRIGGER = 0x100
-DC_FTS = 0x200
-DC_ITC = 0x400
-DC_BREAKDOWN = 0x800
-
-def plot_hist(df, title=None, muons=False):
- for id, df_id in sorted(df.groupby('id')):
- if id == 20:
- plt.subplot(3,4,1)
- elif id == 22:
- plt.subplot(3,4,2)
- elif id == 2020:
- plt.subplot(3,4,5)
- elif id == 2022:
- plt.subplot(3,4,6)
- elif id == 2222:
- plt.subplot(3,4,7)
- elif id == 202020:
- plt.subplot(3,4,9)
- elif id == 202022:
- plt.subplot(3,4,10)
- elif id == 202222:
- plt.subplot(3,4,11)
- elif id == 222222:
- plt.subplot(3,4,12)
-
- if muons:
- plt.hist(df_id.ke.values, bins=np.linspace(20,1000e3,100), histtype='step')
- else:
- plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step')
- plt.xlabel("Energy (MeV)")
- plt.title(str(id))
-
- if title:
- plt.suptitle(title)
-
- if len(df):
- plt.tight_layout()
-
-def chunks(l, n):
- """Yield successive n-sized chunks from l."""
- for i in range(0, len(l), n):
- yield l[i:i + n]
-
-def print_warning(msg):
- print(bcolors.FAIL + msg + bcolors.ENDC,file=sys.stderr)
-
-def unwrap(p, delta, axis=-1):
- """
- A modified version of np.unwrap() useful for unwrapping the 50 MHz clock.
- It unwraps discontinuities bigger than delta/2 by delta.
-
- Example:
-
- >>> a = np.arange(10) % 5
- >>> a
- array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
- >>> unwrap(a,5)
- array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
-
- In the case of the 50 MHz clock delta should be 0x7ffffffffff*20.0.
- """
- p = np.asarray(p)
- nd = p.ndim
- dd = np.diff(p, axis=axis)
- slice1 = [slice(None, None)]*nd # full slices
- slice1[axis] = slice(1, None)
- slice1 = tuple(slice1)
- ddmod = np.mod(dd + delta/2, delta) - delta/2
- np.copyto(ddmod, delta/2, where=(ddmod == -delta/2) & (dd > 0))
- ph_correct = ddmod - dd
- np.copyto(ph_correct, 0, where=abs(dd) < delta/2)
- up = np.array(p, copy=True, dtype='d')
- up[slice1] = p[slice1] + ph_correct.cumsum(axis)
- return up
-
-def unwrap_50_mhz_clock(gtr):
- """
- Unwrap an array with 50 MHz clock times. These times should all be in
- nanoseconds and come from the KEV_GTR variable in the EV bank.
-
- Note: We assume here that the events are already ordered contiguously by
- GTID, so you shouldn't pass an array with multiple runs!
- """
- return unwrap(gtr,0x7ffffffffff*20.0)
-
-def retrigger_cut(ev):
- """
- Cuts all retrigger events.
- """
- return ev[ev.dt > 500]
-
-def breakdown_follower_cut(ev):
- """
- Cuts all events within 1 second of breakdown events.
- """
- breakdowns = ev[ev.dc & DC_BREAKDOWN != 0]
- return ev[~np.any((ev.gtr.values > breakdowns.gtr.values[:,np.newaxis]) & \
- (ev.gtr.values < breakdowns.gtr.values[:,np.newaxis] + 1e9),axis=0)]
-
-def flasher_follower_cut(ev):
- """
- Cuts all events within 200 microseconds of flasher events.
- """
- flashers = ev[ev.dc & DC_FLASHER != 0]
- return ev[~np.any((ev.gtr.values > flashers.gtr.values[:,np.newaxis]) & \
- (ev.gtr.values < flashers.gtr.values[:,np.newaxis] + 200e3),axis=0)]
-
-def muon_follower_cut(ev):
- """
- Cuts all events 200 microseconds after a muon.
- """
- muons = ev[ev.dc & DC_MUON != 0]
- return ev[~np.any((ev.gtr.values > muons.gtr.values[:,np.newaxis]) & \
- (ev.gtr.values < muons.gtr.values[:,np.newaxis] + 200e3),axis=0)]
-
-def michel_cut(ev):
- """
- Looks for Michel electrons after muons.
- """
- prompt_plus_muons = ev[ev.prompt | ((ev.dc & DC_MUON) != 0)]
-
- # Michel electrons and neutrons can be any event which is not a prompt
- # event
- follower = ev[~ev.prompt]
-
- # require Michel events to pass more of the SNO data cleaning cuts
- michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0]
-
- michel = michel[michel.nhit >= 100]
-
- # Accept events which had a muon more than 800 nanoseconds but less
- # than 20 microseconds before them. The 800 nanoseconds cut comes from
- # Richie's thesis. He also mentions that the In Time Channel Spread Cut
- # is very effective at cutting electrons events caused by muons, so I
- # should implement that.
- #
- # Note: We currently don't look across run boundaries. This should be a
- # *very* small effect, and the logic to do so would be very complicated
- # since I would have to deal with 50 MHz clock rollovers, etc.
- #
- # Do a simple python for loop here over the runs since we create
- # temporary arrays with shape (michel.size,prompt_plus_muons.size)
- # which could be too big for the full dataset.
- #
- # There might be some clever way to do this without the loop in Pandas,
- # but I don't know how.
- if prompt_plus_muons.size and michel.size:
- return michel[np.any((michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \
- (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3),axis=0)]
- else:
- return pd.DataFrame(columns=ev.columns)
-
-def atmospheric_events(ev):
- """
- Tags atmospheric events which have a neutron follower.
- """
- prompt = ev[ev.prompt]
-
- # Michel electrons and neutrons can be any event which is not a prompt
- # event
- follower = ev[~ev.prompt]
-
- ev['atm'] = np.zeros(len(ev),dtype=np.bool)
-
- if prompt.size and follower.size:
- # neutron followers have to obey stricter set of data cleaning cuts
- neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0]
- neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)]
- r = np.sqrt(neutron.ftp_x**2 + neutron.ftp_y**2 + neutron.ftp_z**2)
- neutron = neutron[r < AV_RADIUS]
- neutron = neutron[neutron.rsp_energy > 4.0]
-
- # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt)
- ev.loc[ev.prompt,'atm'] = np.any((neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \
- (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1)
-
- return ev
-
-def gtid_sort(ev, first_gtid):
- """
- Adds 0x1000000 to the gtid_sort column for all gtids before the first gtid
- in a run, which should be passed as a dictionary. This column can then be
- used to sort the events sequentially.
-
- This function should be passed to ev.groupby('run').apply(). We use this
- idiom instead of just looping over the groupby results since groupby()
- makes a copy of the dataframe, i.e.
-
- for run, ev_run in ev.groupby('run'):
- ev_run.loc[ev_run.gtid < first_gtid[run],'gtid_sort'] += 0x1000000
-
- would produce a SettingWithCopyWarning, so instead we use:
-
- ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid)
-
- which doesn't have this problem.
- """
- # see https://stackoverflow.com/questions/32460593/including-the-group-name-in-the-apply-function-pandas-python
- run = ev.name
-
- if run not in first_gtid:
- print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run)
- first_gtid[run] = ev.gtid.iloc[0]
-
- ev.loc[ev.gtid < first_gtid[run],'gtid_sort'] += 0x1000000
-
- return ev
-
-def prompt_event(ev):
- ev['prompt'] = (ev.nhit >= 100)
- ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6))
- return ev
-
def radius_cut(ev):
ev['radius_cut'] = np.digitize((ev.r/PSUP_RADIUS)**3,(0.9,))
return ev
@@ -539,33 +67,6 @@ def z_cut(ev):
ev['z_cut'] = np.digitize(ev.z,(0.0,))
return ev
-class Constraint(object):
- def __init__(self, range):
- self.range = range
-
- def __call__(self, x, grad=None):
- return np.sum(x[self.range]) - 1 + EPSILON
-
- def renormalize(self, x, fix):
- if fix not in self.range:
- return x
- x = x.copy()
- while self(x) >= 0:
- for i in self.range:
- if i == fix:
- continue
- x[i] -= self(x)
- return x
-
- def renormalize_no_fix(self, x):
- x = x.copy()
- while self(x) >= 0:
- for i in self.range:
- if x[i] < 10*EPSILON:
- continue
- x[i] -= x[i]/2.0
- return x
-
# Constraint to enforce the fact that P(r,psi,z,udotr|muon) all add up to 1.0.
# In the likelihood function we set the last possibility for r and udotr equal
# to 1.0 minus the others. Therefore, we need to enforce the fact that the
@@ -596,7 +97,7 @@ flasher_r_udotr = Constraint(range(39,42))
# others must add up to less than 1.
breakdown_r_udotr = Constraint(range(44,47))
-def make_nll(data, sacrifice,constraints):
+def make_nll(data, sacrifice, constraints):
def nll(x, grad=None, fill_value=1e9):
if grad is not None and grad.size > 0:
raise Exception("nll got passed grad!")
@@ -762,7 +263,6 @@ def make_nll(data, sacrifice,constraints):
if __name__ == '__main__':
import argparse
- import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sys
@@ -770,9 +270,51 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser("plot fit results")
parser.add_argument("filenames", nargs='+', help="input files")
+ parser.add_argument("--steps", type=int, default=100000, help="number of steps in the MCMC chain")
parser.add_argument("--save", action="store_true", default=False, help="save plots")
args = parser.parse_args()
+ if args.save:
+ # default \textwidth for a fullpage article in Latex is 16.50764 cm.
+ # You can figure this out by compiling the following TeX document:
+ #
+ # \documentclass{article}
+ # \usepackage{fullpage}
+ # \usepackage{layouts}
+ # \begin{document}
+ # textwidth in cm: \printinunitsof{cm}\prntlen{\textwidth}
+ # \end{document}
+
+ width = 16.50764
+ width /= 2.54 # cm -> inches
+ # According to this page:
+ # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf,
+ # Tufte suggests an aspect ratio of 1.5 - 1.6.
+ height = width/1.5
+ FIGSIZE = (width,height)
+
+ import matplotlib.pyplot as plt
+
+ font = {'family':'serif', 'serif': ['computer modern roman']}
+ plt.rc('font',**font)
+
+ plt.rc('text', usetex=True)
+ else:
+ # on retina screens, the default plots are way too small
+ # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1
+ # Qt5 will scale everything using the dpi in ~/.Xresources
+ import matplotlib
+ matplotlib.use("Qt5Agg")
+
+ import matplotlib.pyplot as plt
+
+ # Default figure size. Currently set to my monitor width and height so that
+ # things are properly formatted
+ FIGSIZE = (13.78,7.48)
+
+ # Make the defalt font bigger
+ plt.rc('font', size=22)
+
for filename in args.filenames:
ev = pd.read_hdf(filename,"ev")
@@ -1079,7 +621,7 @@ if __name__ == '__main__':
proposal = get_proposal_func(stepsizes*0.1,low,high)
sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x, grad, fill_value: -nll(x,grad,fill_value), moves=emcee.moves.MHMove(proposal),args=[None,np.inf])
with np.errstate(invalid='ignore'):
- sampler.run_mcmc(pos, 100000)
+ sampler.run_mcmc(pos, args.steps)
print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction)))
@@ -1088,47 +630,6 @@ if __name__ == '__main__':
except Exception as e:
print(e)
- if args.save:
- # default \textwidth for a fullpage article in Latex is 16.50764 cm.
- # You can figure this out by compiling the following TeX document:
- #
- # \documentclass{article}
- # \usepackage{fullpage}
- # \usepackage{layouts}
- # \begin{document}
- # textwidth in cm: \printinunitsof{cm}\prntlen{\textwidth}
- # \end{document}
-
- width = 16.50764
- width /= 2.54 # cm -> inches
- # According to this page:
- # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf,
- # Tufte suggests an aspect ratio of 1.5 - 1.6.
- height = width/1.5
- FIGSIZE = (width,height)
-
- import matplotlib.pyplot as plt
-
- font = {'family':'serif', 'serif': ['computer modern roman']}
- plt.rc('font',**font)
-
- plt.rc('text', usetex=True)
- else:
- # on retina screens, the default plots are way too small
- # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1
- # Qt5 will scale everything using the dpi in ~/.Xresources
- import matplotlib
- matplotlib.use("Qt5Agg")
-
- import matplotlib.pyplot as plt
-
- # Default figure size. Currently set to my monitor width and height so that
- # things are properly formatted
- FIGSIZE = (13.78,7.48)
-
- # Make the defalt font bigger
- plt.rc('font', size=22)
-
# Plot walker positions as a function of step number for the expected
# number of events
fig, axes = plt.subplots(6, figsize=FIGSIZE, num=1, sharex=True)
diff --git a/utils/plot b/utils/plot
index 548452b..72a7754 100755
--- a/utils/plot
+++ b/utils/plot
@@ -15,15 +15,6 @@
# this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import print_function, division
-import yaml
-try:
- from yaml import CLoader as Loader
-except ImportError:
- from yaml.loader import SafeLoader as Loader
-
-import numpy as np
-from scipy.stats import iqr
-from matplotlib.lines import Line2D
# on retina screens, the default plots are way too small
# by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1
@@ -31,51 +22,14 @@ from matplotlib.lines import Line2D
import matplotlib
matplotlib.use("Qt5Agg")
-IDP_E_MINUS = 20
-IDP_MU_MINUS = 22
-
-SNOMAN_MASS = {
- 20: 0.511,
- 21: 0.511,
- 22: 105.658,
- 23: 105.658
-}
-
-def plot_hist(x, label=None):
- # determine the bin width using the Freedman Diaconis rule
- # see https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule
- h = 2*iqr(x)/len(x)**(1/3)
- n = max(int((np.max(x)-np.min(x))/h),10)
- bins = np.linspace(np.min(x),np.max(x),n)
- plt.hist(x, bins=bins, histtype='step', label=label)
-
-def plot_legend(n):
- plt.figure(n)
- ax = plt.gca()
- handles, labels = ax.get_legend_handles_labels()
- new_handles = [Line2D([],[],c=h.get_edgecolor()) for h in handles]
- plt.legend(handles=new_handles,labels=labels)
-
-def get_stats(x):
- """
- Returns a tuple (mean, error mean, std, error std) for the values in x.
-
- The formula for the standard error on the standard deviation comes from
- https://stats.stackexchange.com/questions/156518.
- """
- mean = np.mean(x)
- std = np.std(x)
- n = len(x)
- u4 = np.mean((x-mean)**4)
- error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std)
- return mean, std/np.sqrt(n), std, error
-
if __name__ == '__main__':
import argparse
import matplotlib.pyplot as plt
import numpy as np
import h5py
import pandas as pd
+ from sddm import IDP_E_MINUS, IDP_MU_MINUS, SNOMAN_MASS
+ from sddm.plot import plot_hist, plot_legend, get_stats
parser = argparse.ArgumentParser("plot fit results")
parser.add_argument("filenames", nargs='+', help="input files")
diff --git a/utils/plot-atmospheric-fluxes b/utils/plot-atmospheric-fluxes
index fb4861d..7dafbbb 100755
--- a/utils/plot-atmospheric-fluxes
+++ b/utils/plot-atmospheric-fluxes
@@ -6,132 +6,8 @@ http://www-pnp.physics.ox.ac.uk/~barr/fluxfiles/0403i/index.html.
from __future__ import print_function, division
import numpy as np
import os
-
-def splitext(path):
- """
- Like os.path.splitext() except it returns the full extension if the
- filename has multiple extensions, for example:
-
- splitext('foo.tar.gz') -> 'foo', '.tar.gz'
- """
- full_root, full_ext = os.path.splitext(path)
- while True:
- root, ext = os.path.splitext(full_root)
- if ext:
- full_ext = ext + full_ext
- full_root = root
- else:
- break
-
- return full_root, full_ext
-
-# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py
-def despine(fig=None, ax=None, top=True, right=True, left=False,
- bottom=False, offset=None, trim=False):
- """Remove the top and right spines from plot(s).
-
- fig : matplotlib figure, optional
- Figure to despine all axes of, default uses current figure.
- ax : matplotlib axes, optional
- Specific axes object to despine.
- top, right, left, bottom : boolean, optional
- If True, remove that spine.
- offset : int or dict, optional
- Absolute distance, in points, spines should be moved away
- from the axes (negative values move spines inward). A single value
- applies to all spines; a dict can be used to set offset values per
- side.
- trim : bool, optional
- If True, limit spines to the smallest and largest major tick
- on each non-despined axis.
-
- Returns
- -------
- None
-
- """
- # Get references to the axes we want
- if fig is None and ax is None:
- axes = plt.gcf().axes
- elif fig is not None:
- axes = fig.axes
- elif ax is not None:
- axes = [ax]
-
- for ax_i in axes:
- for side in ["top", "right", "left", "bottom"]:
- # Toggle the spine objects
- is_visible = not locals()[side]
- ax_i.spines[side].set_visible(is_visible)
- if offset is not None and is_visible:
- try:
- val = offset.get(side, 0)
- except AttributeError:
- val = offset
- _set_spine_position(ax_i.spines[side], ('outward', val))
-
- # Potentially move the ticks
- if left and not right:
- maj_on = any(
- t.tick1line.get_visible()
- for t in ax_i.yaxis.majorTicks
- )
- min_on = any(
- t.tick1line.get_visible()
- for t in ax_i.yaxis.minorTicks
- )
- ax_i.yaxis.set_ticks_position("right")
- for t in ax_i.yaxis.majorTicks:
- t.tick2line.set_visible(maj_on)
- for t in ax_i.yaxis.minorTicks:
- t.tick2line.set_visible(min_on)
-
- if bottom and not top:
- maj_on = any(
- t.tick1line.get_visible()
- for t in ax_i.xaxis.majorTicks
- )
- min_on = any(
- t.tick1line.get_visible()
- for t in ax_i.xaxis.minorTicks
- )
- ax_i.xaxis.set_ticks_position("top")
- for t in ax_i.xaxis.majorTicks:
- t.tick2line.set_visible(maj_on)
- for t in ax_i.xaxis.minorTicks:
- t.tick2line.set_visible(min_on)
-
- if trim:
- # clip off the parts of the spines that extend past major ticks
- xticks = ax_i.get_xticks()
- if xticks.size:
- firsttick = np.compress(xticks >= min(ax_i.get_xlim()),
- xticks)[0]
- lasttick = np.compress(xticks <= max(ax_i.get_xlim()),
- xticks)[-1]
- ax_i.spines['bottom'].set_bounds(firsttick, lasttick)
- ax_i.spines['top'].set_bounds(firsttick, lasttick)
- newticks = xticks.compress(xticks <= lasttick)
- newticks = newticks.compress(newticks >= firsttick)
- ax_i.set_xticks(newticks)
-
- xticks_minor = [t for t in ax_i.get_xticks(minor=True) if firsttick < t < lasttick]
- ax_i.set_xticks(xticks_minor,minor=True)
-
- yticks = ax_i.get_yticks()
- if yticks.size:
- firsttick = np.compress(yticks >= min(ax_i.get_ylim()),
- yticks)[0]
- lasttick = np.compress(yticks <= max(ax_i.get_ylim()),
- yticks)[-1]
- ax_i.spines['left'].set_bounds(firsttick, lasttick)
- ax_i.spines['right'].set_bounds(firsttick, lasttick)
- newticks = yticks.compress(yticks <= lasttick)
- newticks = newticks.compress(newticks >= firsttick)
- ax_i.set_yticks(newticks)
-
- yticks_minor = [t for t in ax_i.get_yticks(minor=True) if firsttick < t < lasttick]
- ax_i.set_yticks(yticks_minor,minor=True)
+from sddm.plot import despine
+from sddm import splitext
# Data is in the form: Fluxes in bins of neutrino energy (equally spaced bins
# in logE with 10 bins per decade with the low edge of the first bin at 100
diff --git a/utils/plot-energy b/utils/plot-energy
index a057302..5c33969 100755
--- a/utils/plot-energy
+++ b/utils/plot-energy
@@ -37,61 +37,8 @@ from scipy.stats import iqr, norm, beta
from scipy.special import spence
from itertools import izip_longest
-PSUP_RADIUS = 840.0
-
-# from https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python
-class bcolors:
- HEADER = '\033[95m'
- OKBLUE = '\033[94m'
- OKGREEN = '\033[92m'
- WARNING = '\033[93m'
- FAIL = '\033[91m'
- ENDC = '\033[0m'
- BOLD = '\033[1m'
- UNDERLINE = '\033[4m'
-
-# on retina screens, the default plots are way too small
-# by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1
-# Qt5 will scale everything using the dpi in ~/.Xresources
-import matplotlib
-matplotlib.use("Qt5Agg")
-
-font = {'family':'serif', 'serif': ['computer modern roman']}
-matplotlib.rc('font',**font)
-
-matplotlib.rc('text', usetex=True)
-
-SNOMAN_MASS = {
- 20: 0.511,
- 21: 0.511,
- 22: 105.658,
- 23: 105.658
-}
-
-AV_RADIUS = 600.0
-
-# Data cleaning bitmasks.
-DC_MUON = 0x1
-DC_JUNK = 0x2
-DC_CRATE_ISOTROPY = 0x4
-DC_QVNHIT = 0x8
-DC_NECK = 0x10
-DC_FLASHER = 0x20
-DC_ESUM = 0x40
-DC_OWL = 0x80
-DC_OWL_TRIGGER = 0x100
-DC_FTS = 0x200
-DC_ITC = 0x400
-DC_BREAKDOWN = 0x800
-
particle_id = {20: 'e', 22: r'\mu'}
-def grouper(iterable, n, fillvalue=None):
- "Collect data into fixed-length chunks or blocks"
- # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx
- args = [iter(iterable)] * n
- return izip_longest(fillvalue=fillvalue, *args)
-
def plot_hist2(df, muons=False):
for id, df_id in sorted(df.groupby('id')):
if id == 20:
@@ -148,550 +95,14 @@ def plot_hist(df, muons=False):
if len(df):
plt.tight_layout()
-def chunks(l, n):
- """Yield successive n-sized chunks from l."""
- for i in range(0, len(l), n):
- yield l[i:i + n]
-
-def print_warning(msg):
- print(bcolors.FAIL + msg + bcolors.ENDC,file=sys.stderr)
-
-def unwrap(p, delta, axis=-1):
- """
- A modified version of np.unwrap() useful for unwrapping the 50 MHz clock.
- It unwraps discontinuities bigger than delta/2 by delta.
-
- Example:
-
- >>> a = np.arange(10) % 5
- >>> a
- array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
- >>> unwrap(a,5)
- array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
-
- In the case of the 50 MHz clock delta should be 0x7ffffffffff*20.0.
- """
- p = np.asarray(p)
- nd = p.ndim
- dd = np.diff(p, axis=axis)
- slice1 = [slice(None, None)]*nd # full slices
- slice1[axis] = slice(1, None)
- slice1 = tuple(slice1)
- ddmod = np.mod(dd + delta/2, delta) - delta/2
- np.copyto(ddmod, delta/2, where=(ddmod == -delta/2) & (dd > 0))
- ph_correct = ddmod - dd
- np.copyto(ph_correct, 0, where=abs(dd) < delta/2)
- up = np.array(p, copy=True, dtype='d')
- up[slice1] = p[slice1] + ph_correct.cumsum(axis)
- return up
-
-def unwrap_50_mhz_clock(gtr):
- """
- Unwrap an array with 50 MHz clock times. These times should all be in
- nanoseconds and come from the KEV_GTR variable in the EV bank.
-
- Note: We assume here that the events are already ordered contiguously by
- GTID, so you shouldn't pass an array with multiple runs!
- """
- return unwrap(gtr,0x7ffffffffff*20.0)
-
-def retrigger_cut(ev):
- """
- Cuts all retrigger events.
- """
- return ev[ev.dt > 500]
-
-def breakdown_follower_cut(ev):
- """
- Cuts all events within 1 second of breakdown events.
- """
- breakdowns = ev[ev.dc & DC_BREAKDOWN != 0]
- return ev[~np.any((ev.gtr.values > breakdowns.gtr.values[:,np.newaxis]) & \
- (ev.gtr.values < breakdowns.gtr.values[:,np.newaxis] + 1e9),axis=0)]
-
-def flasher_follower_cut(ev):
- """
- Cuts all events within 200 microseconds of flasher events.
- """
- flashers = ev[ev.dc & DC_FLASHER != 0]
- return ev[~np.any((ev.gtr.values > flashers.gtr.values[:,np.newaxis]) & \
- (ev.gtr.values < flashers.gtr.values[:,np.newaxis] + 200e3),axis=0)]
-
-def muon_follower_cut(ev):
- """
- Cuts all events 200 microseconds after a muon.
- """
- muons = ev[ev.dc & DC_MUON != 0]
- return ev[~np.any((ev.gtr.values > muons.gtr.values[:,np.newaxis]) & \
- (ev.gtr.values < muons.gtr.values[:,np.newaxis] + 200e3),axis=0)]
-
-def michel_cut(ev):
- """
- Looks for Michel electrons after muons.
- """
- prompt_plus_muons = ev[ev.prompt | ((ev.dc & DC_MUON) != 0)]
-
- # Michel electrons and neutrons can be any event which is not a prompt
- # event
- follower = ev[~ev.prompt]
-
- # require Michel events to pass more of the SNO data cleaning cuts
- michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0]
-
- michel = michel[michel.nhit >= 100]
-
- # Accept events which had a muon more than 800 nanoseconds but less than 20
- # microseconds before them. The 800 nanoseconds cut comes from Richie's
- # thesis. He also mentions that the In Time Channel Spread Cut is very
- # effective at cutting electron events caused by muons, so I should
- # implement that.
- #
- # Note: We currently don't look across run boundaries. This should be a
- # *very* small effect, and the logic to do so would be very complicated
- # since I would have to deal with 50 MHz clock rollovers, etc.
- if prompt_plus_muons.size and michel.size:
- mask = (michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \
- (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3)
- michel = michel.iloc[np.any(mask,axis=0)]
- michel['muon_gtid'] = pd.Series(prompt_plus_muons['gtid'].iloc[np.argmax(mask[:,np.any(mask,axis=0)],axis=0)].values,
- index=michel.index.values,
- dtype=np.int32)
- return michel
- else:
- # Return an empty slice since we need it to have the same datatype as
- # the other dataframes
- michel = ev[:0]
- michel['muon_gtid'] = -1
- return michel
-
-def atmospheric_events(ev):
- """
- Tags atmospheric events which have a neutron follower.
- """
- prompt = ev[ev.prompt]
-
- # Michel electrons and neutrons can be any event which is not a prompt
- # event
- follower = ev[~ev.prompt]
-
- ev['atm'] = np.zeros(len(ev),dtype=np.bool)
-
- if prompt.size and follower.size:
- # neutron followers have to obey stricter set of data cleaning cuts
- neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0]
- neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)]
- # FIXME: What should the radius cut be here? AV? (r/r_psup)^3 < 0.9?
- neutron = neutron[neutron.ftp_r < AV_RADIUS]
- neutron = neutron[neutron.rsp_energy > 4.0]
-
- # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt)
- ev.loc[ev.prompt,'atm'] = np.any((neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \
- (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1)
-
- return ev
-
-def gtid_sort(ev, first_gtid):
- """
- Adds 0x1000000 to the gtid_sort column for all gtids before the first gtid
- in a run, which should be passed as a dictionary. This column can then be
- used to sort the events sequentially.
-
- This function should be passed to ev.groupby('run').apply(). We use this
- idiom instead of just looping over the groupby results since groupby()
- makes a copy of the dataframe, i.e.
-
- for run, ev_run in ev.groupby('run'):
- ev_run.loc[ev_run.gtid < first_gtid[run],'gtid_sort'] += 0x1000000
-
- would produce a SettingWithCopyWarning, so instead we use:
-
- ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid)
-
- which doesn't have this problem.
- """
- # see https://stackoverflow.com/questions/32460593/including-the-group-name-in-the-apply-function-pandas-python
- run = ev.name
-
- if run not in first_gtid:
- print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run)
- first_gtid[run] = ev.gtid.iloc[0]
-
- ev.loc[ev.gtid < first_gtid[run],'gtid_sort'] += 0x1000000
-
- return ev
-
-def prompt_event(ev):
- ev['prompt'] = (ev.nhit >= 100)
- ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6))
- return ev
-
-# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py
-def despine(fig=None, ax=None, top=True, right=True, left=False,
- bottom=False, offset=None, trim=False):
- """Remove the top and right spines from plot(s).
-
- fig : matplotlib figure, optional
- Figure to despine all axes of, default uses current figure.
- ax : matplotlib axes, optional
- Specific axes object to despine.
- top, right, left, bottom : boolean, optional
- If True, remove that spine.
- offset : int or dict, optional
- Absolute distance, in points, spines should be moved away
- from the axes (negative values move spines inward). A single value
- applies to all spines; a dict can be used to set offset values per
- side.
- trim : bool, optional
- If True, limit spines to the smallest and largest major tick
- on each non-despined axis.
-
- Returns
- -------
- None
-
- """
- # Get references to the axes we want
- if fig is None and ax is None:
- axes = plt.gcf().axes
- elif fig is not None:
- axes = fig.axes
- elif ax is not None:
- axes = [ax]
-
- for ax_i in axes:
- for side in ["top", "right", "left", "bottom"]:
- # Toggle the spine objects
- is_visible = not locals()[side]
- ax_i.spines[side].set_visible(is_visible)
- if offset is not None and is_visible:
- try:
- val = offset.get(side, 0)
- except AttributeError:
- val = offset
- _set_spine_position(ax_i.spines[side], ('outward', val))
-
- # Potentially move the ticks
- if left and not right:
- maj_on = any(
- t.tick1line.get_visible()
- for t in ax_i.yaxis.majorTicks
- )
- min_on = any(
- t.tick1line.get_visible()
- for t in ax_i.yaxis.minorTicks
- )
- ax_i.yaxis.set_ticks_position("right")
- for t in ax_i.yaxis.majorTicks:
- t.tick2line.set_visible(maj_on)
- for t in ax_i.yaxis.minorTicks:
- t.tick2line.set_visible(min_on)
-
- if bottom and not top:
- maj_on = any(
- t.tick1line.get_visible()
- for t in ax_i.xaxis.majorTicks
- )
- min_on = any(
- t.tick1line.get_visible()
- for t in ax_i.xaxis.minorTicks
- )
- ax_i.xaxis.set_ticks_position("top")
- for t in ax_i.xaxis.majorTicks:
- t.tick2line.set_visible(maj_on)
- for t in ax_i.xaxis.minorTicks:
- t.tick2line.set_visible(min_on)
-
- if trim:
- # clip off the parts of the spines that extend past major ticks
- xticks = ax_i.get_xticks()
- if xticks.size:
- firsttick = np.compress(xticks >= min(ax_i.get_xlim()),
- xticks)[0]
- lasttick = np.compress(xticks <= max(ax_i.get_xlim()),
- xticks)[-1]
- ax_i.spines['bottom'].set_bounds(firsttick, lasttick)
- ax_i.spines['top'].set_bounds(firsttick, lasttick)
- newticks = xticks.compress(xticks <= lasttick)
- newticks = newticks.compress(newticks >= firsttick)
- ax_i.set_xticks(newticks)
-
- yticks = ax_i.get_yticks()
- if yticks.size:
- firsttick = np.compress(yticks >= min(ax_i.get_ylim()),
- yticks)[0]
- lasttick = np.compress(yticks <= max(ax_i.get_ylim()),
- yticks)[-1]
- ax_i.spines['left'].set_bounds(firsttick, lasttick)
- ax_i.spines['right'].set_bounds(firsttick, lasttick)
- newticks = yticks.compress(yticks <= lasttick)
- newticks = newticks.compress(newticks >= firsttick)
- ax_i.set_yticks(newticks)
-
-def plot_corner_plot(ev, title, save=None):
- variables = ['r_psup','psi','z','udotr']
- labels = [r'$(r/r_\mathrm{PSUP})^3$',r'$\psi$','z',r'$\vec{u}\cdot\vec{r}$']
- limits = [(0,1),(0,10),(-840,840),(-1,1)]
- cuts = [0.9,6,0,-0.5]
-
- ev = ev.dropna(subset=variables)
-
- fig = plt.figure(figsize=(FIGSIZE[0],FIGSIZE[0]))
- despine(fig,trim=True)
- for i in range(len(variables)):
- for j in range(len(variables)):
- if j > i:
- continue
- ax = plt.subplot(len(variables),len(variables),i*len(variables)+j+1)
- if i == j:
- plt.hist(ev[variables[i]],bins=np.linspace(limits[i][0],limits[i][1],100),histtype='step')
- plt.gca().set_xlim(limits[i])
- else:
- plt.scatter(ev[variables[j]],ev[variables[i]],s=0.5)
- plt.gca().set_xlim(limits[j])
- plt.gca().set_ylim(limits[i])
- n = len(ev)
- if n:
- p_i_lo = np.count_nonzero(ev[variables[i]] < cuts[i])/n
- p_j_lo = np.count_nonzero(ev[variables[j]] < cuts[j])/n
- p_lolo = p_i_lo*p_j_lo
- p_lohi = p_i_lo*(1-p_j_lo)
- p_hilo = (1-p_i_lo)*p_j_lo
- p_hihi = (1-p_i_lo)*(1-p_j_lo)
- n_lolo = np.count_nonzero((ev[variables[i]] < cuts[i]) & (ev[variables[j]] < cuts[j]))
- n_lohi = np.count_nonzero((ev[variables[i]] < cuts[i]) & (ev[variables[j]] >= cuts[j]))
- n_hilo = np.count_nonzero((ev[variables[i]] >= cuts[i]) & (ev[variables[j]] < cuts[j]))
- n_hihi = np.count_nonzero((ev[variables[i]] >= cuts[i]) & (ev[variables[j]] >= cuts[j]))
- observed = np.array([n_lolo,n_lohi,n_hilo,n_hihi])
- expected = n*np.array([p_lolo,p_lohi,p_hilo,p_hihi])
- psi = -poisson.logpmf(observed,expected).sum() + poisson.logpmf(observed,observed).sum()
- psi /= np.std(-poisson.logpmf(np.random.poisson(observed,size=(10000,4)),observed).sum(axis=1) + poisson.logpmf(observed,observed).sum())
- plt.title(r"$\psi = %.1f$" % psi)
- if i == len(variables) - 1:
- plt.xlabel(labels[j])
- else:
- plt.setp(ax.get_xticklabels(),visible=False)
- if j == 0:
- plt.ylabel(labels[i])
- else:
- plt.setp(ax.get_yticklabels(),visible=False)
- plt.axvline(cuts[j],color='k',ls='--',alpha=0.5)
- if i != j:
- plt.axhline(cuts[i],color='k',ls='--',alpha=0.5)
-
- plt.tight_layout()
-
- if save:
- plt.savefig(save + ".pdf")
- plt.savefig(save + ".eps")
-
- plt.suptitle(title)
-
-def intersect_sphere(pos, dir, R):
- """
- Compute the first intersection of a ray starting at `pos` with direction
- `dir` and a sphere centered at the origin with radius `R`. The distance to
- the intersection is returned.
-
- Example:
-
- pos = np.array([0,0,0])
- dir = np.array([1,0,0])
-
- l = intersect_sphere(pos,dir,PSUP_RADIUS):
- if l is not None:
- hit = pos + l*dir
- print("ray intersects sphere at %.2f %.2f %.2f", hit[0], hit[1], hit[2])
- else:
- print("ray didn't intersect sphere")
- """
-
- b = 2*np.dot(dir,pos)
- c = np.dot(pos,pos) - R*R
-
- if b*b - 4*c <= 0:
- # Ray doesn't intersect the sphere.
- return None
-
- # First, check the shorter solution.
- l = (-b - np.sqrt(b*b - 4*c))/2
-
- # If the shorter solution is less than 0, check the second solution.
- if l < 0:
- l = (-b + np.sqrt(b*b - 4*c))/2
-
- # If the distance is still negative, we didn't intersect the sphere.
- if l < 0:
- return None
-
- return l
-
-def get_dx(row):
- pos = np.array([row.x,row.y,row.z])
- dir = np.array([np.sin(row.theta1)*np.cos(row.phi1),
- np.sin(row.theta1)*np.sin(row.phi1),
- np.cos(row.theta1)])
- l = intersect_sphere(pos,-dir,PSUP_RADIUS)
- if l is not None:
- pos -= dir*l
- michel_pos = np.array([row.x_michel,row.y_michel,row.z_michel])
- return np.linalg.norm(michel_pos-pos)
- else:
- return 0
-
-def dx_to_energy(dx):
- lines = []
- with open("../src/muE_water_liquid.txt") as f:
- for i, line in enumerate(f):
- if i < 10:
- continue
- if 'Minimum ionization' in line:
- continue
- if 'Muon critical energy' in line:
- continue
- lines.append(line)
- data = np.genfromtxt(lines)
- return np.interp(dx,data[:,8],data[:,0])
-
-def iqr_std_err(x):
- """
- Returns the approximate standard deviation assuming the central part of the
- distribution is gaussian.
- """
- x = x.dropna()
- n = len(x)
- if n == 0:
- return np.nan
- # see https://stats.stackexchange.com/questions/110902/error-on-interquartile-range
- std = iqr(x.values)/1.3489795
- return 1.573*std/np.sqrt(n)
-
-def iqr_std(x):
- """
- Returns the approximate standard deviation assuming the central part of the
- distribution is gaussian.
- """
- x = x.dropna()
- n = len(x)
- if n == 0:
- return np.nan
- return iqr(x.values)/1.3489795
-
-def quantile_error(x,q):
- """
- Returns the standard error for the qth quantile of `x`. The error is
- computed using the Maritz-Jarrett method described here:
- https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/quantse.htm.
- """
- x = np.sort(x)
- n = len(x)
- m = int(q*n+0.5)
- A = m - 1
- B = n - m
- i = np.arange(1,len(x)+1)
- w = beta.cdf(i/n,A,B) - beta.cdf((i-1)/n,A,B)
- return np.sqrt(np.sum(w*x**2)-np.sum(w*x)**2)
-
-def q90_err(x):
- """
- Returns the error on the 90th percentile for all the non NaN values in a
- Series `x`.
- """
- x = x.dropna()
- n = len(x)
- if n == 0:
- return np.nan
- return quantile_error(x.values,0.9)
-
-def q90(x):
- """
- Returns the 90th percentile for all the non NaN values in a Series `x`.
- """
- x = x.dropna()
- n = len(x)
- if n == 0:
- return np.nan
- return np.percentile(x.values,90.0)
-
-def median(x):
- """
- Returns the median for all the non NaN values in a Series `x`.
- """
- x = x.dropna()
- n = len(x)
- if n == 0:
- return np.nan
- return np.median(x.values)
-
-def median_err(x):
- """
- Returns the approximate error on the median for all the non NaN values in a
- Series `x`. The error on the median is approximated assuming the central
- part of the distribution is gaussian.
- """
- x = x.dropna()
- n = len(x)
- if n == 0:
- return np.nan
- # First we estimate the standard deviation using the interquartile range.
- # Here we are essentially assuming the central part of the distribution is
- # gaussian.
- std = iqr(x.values)/1.3489795
- median = np.median(x.values)
- # Now we estimate the error on the median for a gaussian
- # See https://stats.stackexchange.com/questions/45124/central-limit-theorem-for-sample-medians.
- return 1/(2*np.sqrt(n)*norm.pdf(median,median,std))
-
-def std_err(x):
- x = x.dropna()
- mean = np.mean(x)
- std = np.std(x)
- n = len(x)
- if n == 0:
- return np.nan
- elif n == 1:
- return 0.0
- u4 = np.mean((x-mean)**4)
- error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std)
- return error
-
-# Fermi constant
-GF = 1.16637887e-5 # 1/MeV^2
-ELECTRON_MASS = 0.5109989461 # MeV
-MUON_MASS = 105.6583745 # MeV
-PROTON_MASS = 938.272081 # MeV
-FINE_STRUCTURE_CONSTANT = 7.297352566417e-3
-
-def f(x):
- y = (5/(3*x**2) + 16*x/3 + 4/x + (12-8*x)*np.log(1/x-1) - 8)*np.log(MUON_MASS/ELECTRON_MASS)
- y += (6-4*x)*(2*spence(x) - 2*np.log(x)**2 + np.log(x) + np.log(1-x)*(3*np.log(x)-1/x-1) - np.pi**2/3-2)
- y += (1-x)*(34*x**2+(5-34*x**2+17*x)*np.log(x) - 22*x)/(3*x**2)
- y += 6*(1-x)*np.log(x)
- return y
-
-def michel_spectrum(T):
- """
- Michel electron energy spectrum for a free muon. `T` should be the kinetic
- energy of the electron or positron in MeV.
-
- Note: The result is not normalized.
-
- From https://arxiv.org/abs/1406.3575.
- """
- E = T + ELECTRON_MASS
- x = 2*E/MUON_MASS
- mask = (x > 0) & (x < 1)
- y = np.zeros_like(x,dtype=np.double)
- y[mask] = GF**2*MUON_MASS**5*x[mask]**2*(6-4*x[mask]+FINE_STRUCTURE_CONSTANT*f(x[mask])/np.pi)/(192*np.pi**3)
- y *= 2*MUON_MASS
- return y
-
if __name__ == '__main__':
import argparse
- import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sys
import h5py
+ from sddm.plot_energy import *
+ from sddm.plot import despine
parser = argparse.ArgumentParser("plot fit results")
parser.add_argument("filenames", nargs='+', help="input files")
@@ -699,6 +110,47 @@ if __name__ == '__main__':
parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds")
args = parser.parse_args()
+ if args.save:
+ # default \textwidth for a fullpage article in Latex is 16.50764 cm.
+ # You can figure this out by compiling the following TeX document:
+ #
+ # \documentclass{article}
+ # \usepackage{fullpage}
+ # \usepackage{layouts}
+ # \begin{document}
+ # textwidth in cm: \printinunitsof{cm}\printlen{\textwidth}
+ # \end{document}
+
+ width = 16.50764
+ width /= 2.54 # cm -> inches
+ # According to this page:
+ # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf,
+ # Tufte suggests an aspect ratio of 1.5 - 1.6.
+ height = width/1.5
+ FIGSIZE = (width,height)
+
+ import matplotlib.pyplot as plt
+
+ font = {'family':'serif', 'serif': ['computer modern roman']}
+ plt.rc('font',**font)
+
+ plt.rc('text', usetex=True)
+ else:
+ # on retina screens, the default plots are way too small
+ # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1
+ # Qt5 will scale everything using the dpi in ~/.Xresources
+ import matplotlib
+ matplotlib.use("Qt5Agg")
+
+ import matplotlib.pyplot as plt
+
+ # Default figure size. Currently set to my monitor width and height so that
+ # things are properly formatted
+ FIGSIZE = (13.78,7.48)
+
+ # Make the defalt font bigger
+ plt.rc('font', size=22)
+
ev = pd.concat([pd.read_hdf(filename, "ev") for filename in args.filenames],ignore_index=True)
fits = pd.concat([pd.read_hdf(filename, "fits") for filename in args.filenames],ignore_index=True)
rhdr = pd.concat([pd.read_hdf(filename, "rhdr") for filename in args.filenames],ignore_index=True)
@@ -834,47 +286,6 @@ if __name__ == '__main__':
# retrigger cut
ev = ev.groupby('run',group_keys=False).apply(retrigger_cut)
- if args.save:
- # default \textwidth for a fullpage article in Latex is 16.50764 cm.
- # You can figure this out by compiling the following TeX document:
- #
- # \documentclass{article}
- # \usepackage{fullpage}
- # \usepackage{layouts}
- # \begin{document}
- # textwidth in cm: \printinunitsof{cm}\prntlen{\textwidth}
- # \end{document}
-
- width = 16.50764
- width /= 2.54 # cm -> inches
- # According to this page:
- # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf,
- # Tufte suggests an aspect ratio of 1.5 - 1.6.
- height = width/1.5
- FIGSIZE = (width,height)
-
- import matplotlib.pyplot as plt
-
- font = {'family':'serif', 'serif': ['computer modern roman']}
- plt.rc('font',**font)
-
- plt.rc('text', usetex=True)
- else:
- # on retina screens, the default plots are way too small
- # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1
- # Qt5 will scale everything using the dpi in ~/.Xresources
- import matplotlib
- matplotlib.use("Qt5Agg")
-
- import matplotlib.pyplot as plt
-
- # Default figure size. Currently set to my monitor width and height so that
- # things are properly formatted
- FIGSIZE = (13.78,7.48)
-
- # Make the defalt font bigger
- plt.rc('font', size=22)
-
if args.dc:
ev = ev[ev.prompt]
diff --git a/utils/plot-fit-results b/utils/plot-fit-results
index 18139ff..e154b21 100755
--- a/utils/plot-fit-results
+++ b/utils/plot-fit-results
@@ -15,265 +15,62 @@
# this program. If not, see <https://www.gnu.org/licenses/>.
from __future__ import print_function, division
-import numpy as np
-from scipy.stats import iqr, norm, beta
-from matplotlib.lines import Line2D
-import itertools
-
-IDP_E_MINUS = 20
-IDP_MU_MINUS = 22
-
-SNOMAN_MASS = {
- 20: 0.511,
- 21: 0.511,
- 22: 105.658,
- 23: 105.658
-}
-
-def plot_hist(x, label=None):
- # determine the bin width using the Freedman Diaconis rule
- # see https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule
- h = 2*iqr(x)/len(x)**(1/3)
- n = max(int((np.max(x)-np.min(x))/h),10)
- bins = np.linspace(np.min(x),np.max(x),n)
- plt.hist(x, bins=bins, histtype='step', label=label)
-
-def plot_legend(n):
- plt.figure(n)
- ax = plt.gca()
- handles, labels = ax.get_legend_handles_labels()
- new_handles = [Line2D([],[],c=h.get_edgecolor()) for h in handles]
- plt.legend(handles=new_handles,labels=labels)
-
-def get_stats(x):
- """
- Returns a tuple (mean, error mean, std, error std) for the values in x.
-
- The formula for the standard error on the standard deviation comes from
- https://stats.stackexchange.com/questions/156518.
- """
- mean = np.mean(x)
- std = np.std(x)
- n = len(x)
- u4 = np.mean((x-mean)**4)
- error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std)
- return mean, std/np.sqrt(n), std, error
-
-def iqr_std_err(x):
- """
- Returns the approximate standard deviation assuming the central part of the
- distribution is gaussian.
- """
- x = x.dropna()
- n = len(x)
- if n == 0:
- return np.nan
- # see https://stats.stackexchange.com/questions/110902/error-on-interquartile-range
- std = iqr(x.values)/1.3489795
- return 1.573*std/np.sqrt(n)
-
-def iqr_std(x):
- """
- Returns the approximate standard deviation assuming the central part of the
- distribution is gaussian.
- """
- x = x.dropna()
- n = len(x)
- if n == 0:
- return np.nan
- return iqr(x.values)/1.3489795
-
-def quantile_error(x,q):
- """
- Returns the standard error for the qth quantile of `x`. The error is
- computed using the Maritz-Jarrett method described here:
- https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/quantse.htm.
- """
- x = np.sort(x)
- n = len(x)
- m = int(q*n+0.5)
- A = m - 1
- B = n - m
- i = np.arange(1,len(x)+1)
- w = beta.cdf(i/n,A,B) - beta.cdf((i-1)/n,A,B)
- return np.sqrt(np.sum(w*x**2)-np.sum(w*x)**2)
-
-def q90_err(x):
- """
- Returns the error on the 90th percentile for all the non NaN values in a
- Series `x`.
- """
- x = x.dropna()
- n = len(x)
- if n == 0:
- return np.nan
- return quantile_error(x.values,0.9)
-
-def q90(x):
- """
- Returns the 90th percentile for all the non NaN values in a Series `x`.
- """
- x = x.dropna()
- n = len(x)
- if n == 0:
- return np.nan
- return np.percentile(x.values,90.0)
-
-def median(x):
- """
- Returns the median for all the non NaN values in a Series `x`.
- """
- x = x.dropna()
- n = len(x)
- if n == 0:
- return np.nan
- return np.median(x.values)
-
-def median_err(x):
- """
- Returns the approximate error on the median for all the non NaN values in a
- Series `x`. The error on the median is approximated assuming the central
- part of the distribution is gaussian.
- """
- x = x.dropna()
- n = len(x)
- if n == 0:
- return np.nan
- # First we estimate the standard deviation using the interquartile range.
- # Here we are essentially assuming the central part of the distribution is
- # gaussian.
- std = iqr(x.values)/1.3489795
- median = np.median(x.values)
- # Now we estimate the error on the median for a gaussian
- # See https://stats.stackexchange.com/questions/45124/central-limit-theorem-for-sample-medians.
- return 1/(2*np.sqrt(n)*norm.pdf(median,median,std))
-
-def std_err(x):
- x = x.dropna()
- mean = np.mean(x)
- std = np.std(x)
- n = len(x)
- if n == 0:
- return np.nan
- elif n == 1:
- return 0.0
- u4 = np.mean((x-mean)**4)
- error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std)
- return error
-
-# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py
-def despine(fig=None, ax=None, top=True, right=True, left=False,
- bottom=False, offset=None, trim=False):
- """Remove the top and right spines from plot(s).
-
- fig : matplotlib figure, optional
- Figure to despine all axes of, default uses current figure.
- ax : matplotlib axes, optional
- Specific axes object to despine.
- top, right, left, bottom : boolean, optional
- If True, remove that spine.
- offset : int or dict, optional
- Absolute distance, in points, spines should be moved away
- from the axes (negative values move spines inward). A single value
- applies to all spines; a dict can be used to set offset values per
- side.
- trim : bool, optional
- If True, limit spines to the smallest and largest major tick
- on each non-despined axis.
-
- Returns
- -------
- None
-
- """
- # Get references to the axes we want
- if fig is None and ax is None:
- axes = plt.gcf().axes
- elif fig is not None:
- axes = fig.axes
- elif ax is not None:
- axes = [ax]
-
- for ax_i in axes:
- for side in ["top", "right", "left", "bottom"]:
- # Toggle the spine objects
- is_visible = not locals()[side]
- ax_i.spines[side].set_visible(is_visible)
- if offset is not None and is_visible:
- try:
- val = offset.get(side, 0)
- except AttributeError:
- val = offset
- _set_spine_position(ax_i.spines[side], ('outward', val))
-
- # Potentially move the ticks
- if left and not right:
- maj_on = any(
- t.tick1line.get_visible()
- for t in ax_i.yaxis.majorTicks
- )
- min_on = any(
- t.tick1line.get_visible()
- for t in ax_i.yaxis.minorTicks
- )
- ax_i.yaxis.set_ticks_position("right")
- for t in ax_i.yaxis.majorTicks:
- t.tick2line.set_visible(maj_on)
- for t in ax_i.yaxis.minorTicks:
- t.tick2line.set_visible(min_on)
-
- if bottom and not top:
- maj_on = any(
- t.tick1line.get_visible()
- for t in ax_i.xaxis.majorTicks
- )
- min_on = any(
- t.tick1line.get_visible()
- for t in ax_i.xaxis.minorTicks
- )
- ax_i.xaxis.set_ticks_position("top")
- for t in ax_i.xaxis.majorTicks:
- t.tick2line.set_visible(maj_on)
- for t in ax_i.xaxis.minorTicks:
- t.tick2line.set_visible(min_on)
-
- if trim:
- # clip off the parts of the spines that extend past major ticks
- xticks = ax_i.get_xticks()
- if xticks.size:
- firsttick = np.compress(xticks >= min(ax_i.get_xlim()),
- xticks)[0]
- lasttick = np.compress(xticks <= max(ax_i.get_xlim()),
- xticks)[-1]
- ax_i.spines['bottom'].set_bounds(firsttick, lasttick)
- ax_i.spines['top'].set_bounds(firsttick, lasttick)
- newticks = xticks.compress(xticks <= lasttick)
- newticks = newticks.compress(newticks >= firsttick)
- ax_i.set_xticks(newticks)
-
- yticks = ax_i.get_yticks()
- if yticks.size:
- firsttick = np.compress(yticks >= min(ax_i.get_ylim()),
- yticks)[0]
- lasttick = np.compress(yticks <= max(ax_i.get_ylim()),
- yticks)[-1]
- ax_i.spines['left'].set_bounds(firsttick, lasttick)
- ax_i.spines['right'].set_bounds(firsttick, lasttick)
- newticks = yticks.compress(yticks <= lasttick)
- newticks = newticks.compress(newticks >= firsttick)
- ax_i.set_yticks(newticks)
if __name__ == '__main__':
import argparse
import numpy as np
import h5py
import pandas as pd
+ import itertools
+ from sddm import IDP_E_MINUS, IDP_MU_MINUS, SNOMAN_MASS
+ from sddm.plot import plot_hist, plot_legend, get_stats, despine, iqr_std_err, iqr_std, quantile_error, q90_err, q90, median, median_err, std_err
parser = argparse.ArgumentParser("plot fit results")
parser.add_argument("filenames", nargs='+', help="input files")
parser.add_argument("--save", action="store_true", default=False, help="save plots")
args = parser.parse_args()
+ if args.save:
+ # default \textwidth for a fullpage article in Latex is 16.50764 cm.
+ # You can figure this out by compiling the following TeX document:
+ #
+ # \documentclass{article}
+ # \usepackage{fullpage}
+ # \usepackage{layouts}
+ # \begin{document}
+ # textwidth in cm: \printinunitsof{cm}\prntlen{\textwidth}
+ # \end{document}
+
+ width = 16.50764
+ width /= 2.54 # cm -> inches
+ # According to this page:
+ # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf,
+ # Tufte suggests an aspect ratio of 1.5 - 1.6.
+ height = width/1.5
+ FIGSIZE = (width,height)
+
+ import matplotlib.pyplot as plt
+
+ font = {'family':'serif', 'serif': ['computer modern roman']}
+ plt.rc('font',**font)
+
+ plt.rc('text', usetex=True)
+ else:
+ # on retina screens, the default plots are way too small
+ # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1
+ # Qt5 will scale everything using the dpi in ~/.Xresources
+ import matplotlib
+ matplotlib.use("Qt5Agg")
+
+ import matplotlib.pyplot as plt
+
+ # Default figure size. Currently set to my monitor width and height so that
+ # things are properly formatted
+ FIGSIZE = (13.78,7.48)
+
+ # Make the defalt font bigger
+ plt.rc('font', size=22)
+
# Read in all the data.
#
# Note: We have to add the filename as a column to each dataset since this
@@ -351,47 +148,6 @@ if __name__ == '__main__':
markers = itertools.cycle(('o', 'v'))
- if args.save:
- # default \textwidth for a fullpage article in Latex is 16.50764 cm.
- # You can figure this out by compiling the following TeX document:
- #
- # \documentclass{article}
- # \usepackage{fullpage}
- # \usepackage{layouts}
- # \begin{document}
- # textwidth in cm: \printinunitsof{cm}\prntlen{\textwidth}
- # \end{document}
-
- width = 16.50764
- width /= 2.54 # cm -> inches
- # According to this page:
- # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf,
- # Tufte suggests an aspect ratio of 1.5 - 1.6.
- height = width/1.5
- FIGSIZE = (width,height)
-
- import matplotlib.pyplot as plt
-
- font = {'family':'serif', 'serif': ['computer modern roman']}
- plt.rc('font',**font)
-
- plt.rc('text', usetex=True)
- else:
- # on retina screens, the default plots are way too small
- # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1
- # Qt5 will scale everything using the dpi in ~/.Xresources
- import matplotlib
- matplotlib.use("Qt5Agg")
-
- import matplotlib.pyplot as plt
-
- # Default figure size. Currently set to my monitor width and height so that
- # things are properly formatted
- FIGSIZE = (13.78,7.48)
-
- # Make the defalt font bigger
- plt.rc('font', size=22)
-
fig3, ax3 = plt.subplots(3,1,figsize=FIGSIZE,num=3,sharex=True)
fig4, ax4 = plt.subplots(3,1,figsize=FIGSIZE,num=4,sharex=True)
diff --git a/utils/plot-root-results b/utils/plot-root-results
index e8dcf19..7f115f1 100755
--- a/utils/plot-root-results
+++ b/utils/plot-root-results
@@ -17,113 +17,11 @@
from __future__ import print_function, division
import numpy as np
-# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py
-def despine(fig=None, ax=None, top=True, right=True, left=False,
- bottom=False, offset=None, trim=False):
- """Remove the top and right spines from plot(s).
-
- fig : matplotlib figure, optional
- Figure to despine all axes of, default uses current figure.
- ax : matplotlib axes, optional
- Specific axes object to despine.
- top, right, left, bottom : boolean, optional
- If True, remove that spine.
- offset : int or dict, optional
- Absolute distance, in points, spines should be moved away
- from the axes (negative values move spines inward). A single value
- applies to all spines; a dict can be used to set offset values per
- side.
- trim : bool, optional
- If True, limit spines to the smallest and largest major tick
- on each non-despined axis.
-
- Returns
- -------
- None
-
- """
- # Get references to the axes we want
- if fig is None and ax is None:
- axes = plt.gcf().axes
- elif fig is not None:
- axes = fig.axes
- elif ax is not None:
- axes = [ax]
-
- for ax_i in axes:
- for side in ["top", "right", "left", "bottom"]:
- # Toggle the spine objects
- is_visible = not locals()[side]
- ax_i.spines[side].set_visible(is_visible)
- if offset is not None and is_visible:
- try:
- val = offset.get(side, 0)
- except AttributeError:
- val = offset
- _set_spine_position(ax_i.spines[side], ('outward', val))
-
- # Potentially move the ticks
- if left and not right:
- maj_on = any(
- t.tick1line.get_visible()
- for t in ax_i.yaxis.majorTicks
- )
- min_on = any(
- t.tick1line.get_visible()
- for t in ax_i.yaxis.minorTicks
- )
- ax_i.yaxis.set_ticks_position("right")
- for t in ax_i.yaxis.majorTicks:
- t.tick2line.set_visible(maj_on)
- for t in ax_i.yaxis.minorTicks:
- t.tick2line.set_visible(min_on)
-
- if bottom and not top:
- maj_on = any(
- t.tick1line.get_visible()
- for t in ax_i.xaxis.majorTicks
- )
- min_on = any(
- t.tick1line.get_visible()
- for t in ax_i.xaxis.minorTicks
- )
- ax_i.xaxis.set_ticks_position("top")
- for t in ax_i.xaxis.majorTicks:
- t.tick2line.set_visible(maj_on)
- for t in ax_i.xaxis.minorTicks:
- t.tick2line.set_visible(min_on)
-
- if trim:
- # clip off the parts of the spines that extend past major ticks
- xticks = ax_i.get_xticks()
- if xticks.size:
- firsttick = np.compress(xticks >= min(ax_i.get_xlim()),
- xticks)[0]
- lasttick = np.compress(xticks <= max(ax_i.get_xlim()),
- xticks)[-1]
- ax_i.spines['bottom'].set_bounds(firsttick, lasttick)
- ax_i.spines['top'].set_bounds(firsttick, lasttick)
- newticks = xticks.compress(xticks <= lasttick)
- newticks = newticks.compress(newticks >= firsttick)
- ax_i.set_xticks(newticks)
-
- yticks = ax_i.get_yticks()
- if yticks.size:
- firsttick = np.compress(yticks >= min(ax_i.get_ylim()),
- yticks)[0]
- lasttick = np.compress(yticks <= max(ax_i.get_ylim()),
- yticks)[-1]
- ax_i.spines['left'].set_bounds(firsttick, lasttick)
- ax_i.spines['right'].set_bounds(firsttick, lasttick)
- newticks = yticks.compress(yticks <= lasttick)
- newticks = newticks.compress(newticks >= firsttick)
- ax_i.set_yticks(newticks)
-
if __name__ == '__main__':
import ROOT
import argparse
from os.path import split
- from matplotlib.ticker import FuncFormatter
+ from sddm.plot import despine
parser = argparse.ArgumentParser("plot ROOT fit results")
parser.add_argument("filename", help="input file")
diff --git a/utils/sddm/.gitignore b/utils/sddm/.gitignore
new file mode 100644
index 0000000..0d20b64
--- /dev/null
+++ b/utils/sddm/.gitignore
@@ -0,0 +1 @@
+*.pyc
diff --git a/utils/sddm/__init__.py b/utils/sddm/__init__.py
new file mode 100644
index 0000000..2525383
--- /dev/null
+++ b/utils/sddm/__init__.py
@@ -0,0 +1,139 @@
+from __future__ import print_function, division
+import sys
+from itertools import izip_longest
+import os
+
+IDP_E_MINUS = 20
+IDP_MU_MINUS = 22
+
+SNOMAN_MASS = {
+ 20: 0.511,
+ 21: 0.511,
+ 22: 105.658,
+ 23: 105.658
+}
+
+AV_RADIUS = 600.0
+PSUP_RADIUS = 840.0
+
+# from https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python
+class bcolors:
+ HEADER = '\033[95m'
+ OKBLUE = '\033[94m'
+ OKGREEN = '\033[92m'
+ WARNING = '\033[93m'
+ FAIL = '\033[91m'
+ ENDC = '\033[0m'
+ BOLD = '\033[1m'
+ UNDERLINE = '\033[4m'
+
+def print_warning(msg):
+ print(bcolors.FAIL + msg + bcolors.ENDC,file=sys.stderr)
+
+def chunks(l, n):
+ """Yield successive n-sized chunks from l."""
+ for i in range(0, len(l), n):
+ yield l[i:i + n]
+
+def grouper(iterable, n, fillvalue=None):
+ "Collect data into fixed-length chunks or blocks"
+ # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx
+ args = [iter(iterable)] * n
+ return izip_longest(fillvalue=fillvalue, *args)
+
+def splitext(path):
+ """
+ Like os.path.splitext() except it returns the full extension if the
+ filename has multiple extensions, for example:
+
+ splitext('foo.tar.gz') -> 'foo', '.tar.gz'
+ """
+ full_root, full_ext = os.path.splitext(path)
+ while True:
+ root, ext = os.path.splitext(full_root)
+ if ext:
+ full_ext = ext + full_ext
+ full_root = root
+ else:
+ break
+
+ return full_root, full_ext
+
+# Next two functions are a backport of the shutil.which() function from Python
+# 3.3 from Lib/shutil.py in the CPython code See
+# https://github.com/python/cpython/blob/master/Lib/shutil.py.
+
+# Check that a given file can be accessed with the correct mode.
+# Additionally check that `file` is not a directory, as on Windows
+# directories pass the os.access check.
+def _access_check(fn, mode):
+ return (os.path.exists(fn) and os.access(fn, mode) and not os.path.isdir(fn))
+
+def which(cmd, mode=os.F_OK | os.X_OK, path=None):
+ """Given a command, mode, and a PATH string, return the path which
+ conforms to the given mode on the PATH, or None if there is no such
+ file.
+ `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
+ of os.environ.get("PATH"), or can be overridden with a custom search
+ path.
+ """
+ # If we're given a path with a directory part, look it up directly rather
+ # than referring to PATH directories. This includes checking relative to the
+ # current directory, e.g. ./script
+ if os.path.dirname(cmd):
+ if _access_check(cmd, mode):
+ return cmd
+ return None
+
+ if path is None:
+ path = os.environ.get("PATH", None)
+ if path is None:
+ try:
+ path = os.confstr("CS_PATH")
+ except (AttributeError, ValueError):
+ # os.confstr() or CS_PATH is not available
+ path = os.defpath
+ # bpo-35755: Don't use os.defpath if the PATH environment variable is
+ # set to an empty string
+
+ # PATH='' doesn't match, whereas PATH=':' looks in the current directory
+ if not path:
+ return None
+
+ path = path.split(os.pathsep)
+
+ if sys.platform == "win32":
+ # The current directory takes precedence on Windows.
+ curdir = os.curdir
+ if curdir not in path:
+ path.insert(0, curdir)
+
+ # PATHEXT is necessary to check on Windows.
+ pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
+ # See if the given file matches any of the expected path extensions.
+ # This will allow us to short circuit when given "python.exe".
+ # If it does match, only test that one, otherwise we have to try
+ # others.
+ if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
+ files = [cmd]
+ else:
+ files = [cmd + ext for ext in pathext]
+ else:
+ # On other platforms you don't have things like PATHEXT to tell you
+ # what file suffixes are executable, so just pass on cmd as-is.
+ files = [cmd]
+
+ seen = set()
+ for dir in path:
+ normdir = os.path.normcase(dir)
+ if not normdir in seen:
+ seen.add(normdir)
+ for thefile in files:
+ name = os.path.join(dir, thefile)
+ if _access_check(name, mode):
+ return name
+ return None
+
+from . import plot
+from . import plot_energy
+from . import dc
diff --git a/utils/sddm/dc.py b/utils/sddm/dc.py
new file mode 100755
index 0000000..930d32f
--- /dev/null
+++ b/utils/sddm/dc.py
@@ -0,0 +1,366 @@
+#!/usr/bin/env python
+# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago>
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <https://www.gnu.org/licenses/>.
+
+from __future__ import print_function, division
+import numpy as np
+from scipy.stats import iqr
+import nlopt
+from scipy.stats import poisson
+import contextlib
+import sys
+from math import exp
+import emcee
+from scipy.optimize import brentq
+from scipy.stats import truncnorm
+from matplotlib.lines import Line2D
+
+# Data cleaning bitmasks.
+DC_MUON = 0x1
+DC_JUNK = 0x2
+DC_CRATE_ISOTROPY = 0x4
+DC_QVNHIT = 0x8
+DC_NECK = 0x10
+DC_FLASHER = 0x20
+DC_ESUM = 0x40
+DC_OWL = 0x80
+DC_OWL_TRIGGER = 0x100
+DC_FTS = 0x200
+DC_ITC = 0x400
+DC_BREAKDOWN = 0x800
+
+# Lower bound for probabilities in fit. We set this to be nonzero to prevent
+# nans in case an expected value is predicted to be zero.
+EPSILON = 1e-10
+
+def get_proposal_func(stepsizes, low, high):
+ """
+ Returns a function which produces proposal steps for a Metropolis Hastings
+ MCMC. This function can then be passed to emcee.moves.MHMove().
+
+ Steps are proposed according to a truncated normal distribution with mean
+ at the current position and with standard deviations given by stepsizes.
+ The bounds for each step are given by the two arrays low and high.
+ """
+ stepsizes = np.atleast_1d(stepsizes)
+ low = np.atleast_1d(low)
+ high = np.atleast_1d(high)
+ def proposal(x0, random):
+ """
+ Returns a tuple of proposed positions and the log-ratio of the proposal
+ probabilities.
+
+ x0 should be a (K,ndim) dimensional numpy array where K is the number
+ of walkers and ndim is the number of dimensions in the likelihood function.
+ """
+ # calculate a and b needed for truncnorm
+ # Note: These are not just low and high b/c
+ #
+ # The standard form of this distribution is a standard normal
+ # truncated to the range [a, b] --- notice that a and b are defined
+ # over the domain of the standard normal. To convert clip values
+ # for a specific mean and standard deviation, use::
+ #
+ # a, b = (myclip_a - my_mean) / my_std, (myclip_b - my_mean) / my_std
+ a, b = (low - x0)/stepsizes, (high - x0)/stepsizes
+ x = truncnorm.rvs(a,b,x0,stepsizes,random_state=random)
+ # Note: Should be able to do this:
+ #
+ # log_p_x_given_x0 = truncnorm.logpdf(x,a,b,x0,stepsizes).sum(axis=1)
+ #
+ # but I think there is a bug in truncnorm.logpdf() which barfs when
+ # passed 2D arrays, so instead we just loop over the first axis.
+ log_p_x_given_x0 = np.empty(x0.shape[0])
+ for i in range(x0.shape[0]):
+ log_p_x_given_x0[i] = truncnorm.logpdf(x[i],a[i],b[i],x0[i],stepsizes).sum()
+ a, b = (low - x)/stepsizes, (high - x)/stepsizes
+ # Note: Should be able to do this:
+ #
+ # log_p_x0_given_x = truncnorm.logpdf(x0,a,b,x,stepsizes).sum(axis=1)
+ #
+ # but I think there is a bug in truncnorm.logpdf() which barfs when
+ # passed 2D arrays, so instead we just loop over the first axis.
+ log_p_x0_given_x = np.empty(x0.shape[0])
+ for i in range(x0.shape[0]):
+ log_p_x0_given_x[i] = truncnorm.logpdf(x0[i],a[i],b[i],x[i],stepsizes).sum()
+ return x, log_p_x0_given_x - log_p_x_given_x0
+ return proposal
+
+def estimate_errors(nll, xopt, constraints):
+ """
+ Return approximate 1 sigma errors for each parameter assuming all
+ parameters are uncorrelated.
+ """
+ nll_xopt = nll(xopt)
+
+ def root(x, xopt, i):
+ """
+ Function to bisect where the negative log likelihood reaches 0.5 from the minimum.
+ """
+ xtest = xopt.copy()
+ xtest[i] = x
+ for constraint in constraints:
+ if constraint(xtest) >= 0:
+ xtest = constraint.renormalize(xtest,i)
+ return nll(xtest) - (nll_xopt + 0.5)
+
+ errors = np.empty_like(xopt)
+
+ for i in range(len(xopt)):
+ if i < 6:
+ xlo = brentq(root,0,xopt[i],args=(xopt,i))
+ xhi = brentq(root,xopt[i],1e9,args=(xopt,i))
+ else:
+ xlo = brentq(root,0,xopt[i],args=(xopt,i))
+ xhi = brentq(root,xopt[i],1,args=(xopt,i))
+
+ errors[i] = max(xopt[i]-xlo,xhi-xopt[i])
+
+ return errors
+
+def metropolis_hastings(nll, x0, stepsizes=None, size=1000):
+ x = np.asarray(x0)
+
+ if stepsizes is None:
+ stepsizes = np.ones_like(x)
+ else:
+ stepsizes = np.asarray(stepsizes)
+
+ accept = 0
+
+ nll_old = nll(x)
+
+ samples = np.empty((size,len(x0)),dtype=np.double)
+
+ try:
+ for i in range(size):
+ if i % 100 == 0:
+ sys.stdout.write("\r%i/%i acceptance rate = %i/%i = %.2g%%" % (i+1,size,accept,i+1,accept*100/float(i+1)))
+ sys.stdout.flush()
+
+ xnew = x + np.random.randn(len(x))*stepsizes
+
+ nll_new = nll(xnew)
+
+ if nll_new < nll_old or np.random.rand() < exp(nll_old-nll_new):
+ x = xnew
+ nll_new = nll_old
+ accept += 1
+
+ samples[i] = x
+ except KeyboardInterrupt:
+ sys.stdout.write("\n")
+ sys.stdout.flush()
+ print("ctrl-c caught. quitting...")
+ samples.resize((i,len(x)))
+ else:
+ sys.stdout.write("\n")
+ sys.stdout.flush()
+
+ return samples
+
+def unwrap(p, delta, axis=-1):
+ """
+ A modified version of np.unwrap() useful for unwrapping the 50 MHz clock.
+ It unwraps discontinuities bigger than delta/2 by delta.
+
+ Example:
+
+ >>> a = np.arange(10) % 5
+ >>> a
+ array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
+ >>> unwrap(a,5)
+ array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
+
+ In the case of the 50 MHz clock delta should be 0x7ffffffffff*20.0.
+ """
+ p = np.asarray(p)
+ nd = p.ndim
+ dd = np.diff(p, axis=axis)
+ slice1 = [slice(None, None)]*nd # full slices
+ slice1[axis] = slice(1, None)
+ slice1 = tuple(slice1)
+ ddmod = np.mod(dd + delta/2, delta) - delta/2
+ np.copyto(ddmod, delta/2, where=(ddmod == -delta/2) & (dd > 0))
+ ph_correct = ddmod - dd
+ np.copyto(ph_correct, 0, where=abs(dd) < delta/2)
+ up = np.array(p, copy=True, dtype='d')
+ up[slice1] = p[slice1] + ph_correct.cumsum(axis)
+ return up
+
+def unwrap_50_mhz_clock(gtr):
+ """
+ Unwrap an array with 50 MHz clock times. These times should all be in
+ nanoseconds and come from the KEV_GTR variable in the EV bank.
+
+ Note: We assume here that the events are already ordered contiguously by
+ GTID, so you shouldn't pass an array with multiple runs!
+ """
+ return unwrap(gtr,0x7ffffffffff*20.0)
+
+def retrigger_cut(ev):
+ """
+ Cuts all retrigger events.
+ """
+ return ev[ev.dt > 500]
+
+def breakdown_follower_cut(ev):
+ """
+ Cuts all events within 1 second of breakdown events.
+ """
+ breakdowns = ev[ev.dc & DC_BREAKDOWN != 0]
+ return ev[~np.any((ev.gtr.values > breakdowns.gtr.values[:,np.newaxis]) & \
+ (ev.gtr.values < breakdowns.gtr.values[:,np.newaxis] + 1e9),axis=0)]
+
+def flasher_follower_cut(ev):
+ """
+ Cuts all events within 200 microseconds of flasher events.
+ """
+ flashers = ev[ev.dc & DC_FLASHER != 0]
+ return ev[~np.any((ev.gtr.values > flashers.gtr.values[:,np.newaxis]) & \
+ (ev.gtr.values < flashers.gtr.values[:,np.newaxis] + 200e3),axis=0)]
+
+def muon_follower_cut(ev):
+ """
+ Cuts all events 200 microseconds after a muon.
+ """
+ muons = ev[ev.dc & DC_MUON != 0]
+ return ev[~np.any((ev.gtr.values > muons.gtr.values[:,np.newaxis]) & \
+ (ev.gtr.values < muons.gtr.values[:,np.newaxis] + 200e3),axis=0)]
+
+def michel_cut(ev):
+ """
+ Looks for Michel electrons after muons.
+ """
+ prompt_plus_muons = ev[ev.prompt | ((ev.dc & DC_MUON) != 0)]
+
+ # Michel electrons and neutrons can be any event which is not a prompt
+ # event
+ follower = ev[~ev.prompt]
+
+ # require Michel events to pass more of the SNO data cleaning cuts
+ michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0]
+
+ michel = michel[michel.nhit >= 100]
+
+ # Accept events which had a muon more than 800 nanoseconds but less
+ # than 20 microseconds before them. The 800 nanoseconds cut comes from
+ # Richie's thesis. He also mentions that the In Time Channel Spread Cut
+ # is very effective at cutting electrons events caused by muons, so I
+ # should implement that.
+ #
+ # Note: We currently don't look across run boundaries. This should be a
+ # *very* small effect, and the logic to do so would be very complicated
+ # since I would have to deal with 50 MHz clock rollovers, etc.
+ #
+ # Do a simple python for loop here over the runs since we create
+ # temporary arrays with shape (michel.size,prompt_plus_muons.size)
+ # which could be too big for the full dataset.
+ #
+ # There might be some clever way to do this without the loop in Pandas,
+ # but I don't know how.
+ if prompt_plus_muons.size and michel.size:
+ return michel[np.any((michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \
+ (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3),axis=0)]
+ else:
+ return pd.DataFrame(columns=ev.columns)
+
+def atmospheric_events(ev):
+ """
+ Tags atmospheric events which have a neutron follower.
+ """
+ prompt = ev[ev.prompt]
+
+ # Michel electrons and neutrons can be any event which is not a prompt
+ # event
+ follower = ev[~ev.prompt]
+
+ ev['atm'] = np.zeros(len(ev),dtype=np.bool)
+
+ if prompt.size and follower.size:
+ # neutron followers have to obey stricter set of data cleaning cuts
+ neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0]
+ neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)]
+ r = np.sqrt(neutron.ftp_x**2 + neutron.ftp_y**2 + neutron.ftp_z**2)
+ neutron = neutron[r < AV_RADIUS]
+ neutron = neutron[neutron.rsp_energy > 4.0]
+
+ # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt)
+ ev.loc[ev.prompt,'atm'] = np.any((neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \
+ (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1)
+
+ return ev
+
+def gtid_sort(ev, first_gtid):
+ """
+ Adds 0x1000000 to the gtid_sort column for all gtids before the first gtid
+ in a run, which should be passed as a dictionary. This column can then be
+ used to sort the events sequentially.
+
+ This function should be passed to ev.groupby('run').apply(). We use this
+ idiom instead of just looping over the groupby results since groupby()
+ makes a copy of the dataframe, i.e.
+
+ for run, ev_run in ev.groupby('run'):
+ ev_run.loc[ev_run.gtid < first_gtid[run],'gtid_sort'] += 0x1000000
+
+ would produce a SettingWithCopyWarning, so instead we use:
+
+ ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid)
+
+ which doesn't have this problem.
+ """
+ # see https://stackoverflow.com/questions/32460593/including-the-group-name-in-the-apply-function-pandas-python
+ run = ev.name
+
+ if run not in first_gtid:
+ print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run)
+ first_gtid[run] = ev.gtid.iloc[0]
+
+ ev.loc[ev.gtid < first_gtid[run],'gtid_sort'] += 0x1000000
+
+ return ev
+
+def prompt_event(ev):
+ ev['prompt'] = (ev.nhit >= 100)
+ ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6))
+ return ev
+
+class Constraint(object):
+ def __init__(self, range):
+ self.range = range
+
+ def __call__(self, x, grad=None):
+ return np.sum(x[self.range]) - 1 + EPSILON
+
+ def renormalize(self, x, fix):
+ if fix not in self.range:
+ return x
+ x = x.copy()
+ while self(x) >= 0:
+ for i in self.range:
+ if i == fix:
+ continue
+ x[i] -= self(x)
+ return x
+
+ def renormalize_no_fix(self, x):
+ x = x.copy()
+ while self(x) >= 0:
+ for i in self.range:
+ if x[i] < 10*EPSILON:
+ continue
+ x[i] -= x[i]/2.0
+ return x
diff --git a/utils/sddm/logger.py b/utils/sddm/logger.py
new file mode 100644
index 0000000..c641893
--- /dev/null
+++ b/utils/sddm/logger.py
@@ -0,0 +1,70 @@
+import os
+import sys
+from datetime import datetime
+
+DEBUG = 0
+VERBOSE = 1
+NOTICE = 2
+WARNING = 3
+
+class Logger(object):
+ """
+ Simple logger class that I wrote for the SNO+ DAQ. Very easy to use:
+
+ log = Logger()
+ log.set_logfile("test.log")
+ log.notice("blah")
+ log.warn("foo")
+
+ The log file format is taken from the Redis log file format which is really
+ nice since it shows the exact time and severity of each log message.
+ """
+ def __init__(self):
+ self.logfile = sys.stdout
+ # by default, we log everything
+ self.verbosity = DEBUG
+
+ def set_verbosity(self, level):
+ if isinstance(level, int):
+ self.verbosity = level
+ elif isinstance(level, basestring):
+ if level == 'debug':
+ self.verbosity = DEBUG
+ elif level == 'verbose':
+ self.verbosity = VERBOSE
+ elif level == 'notice':
+ self.verbosity = NOTICE
+ elif level == 'warning':
+ self.verbosity = WARNING
+ else:
+ raise ValueError("unknown loglevel '%s'" % level)
+ else:
+ raise TypeError("level must be a string or integer")
+
+ def set_logfile(self, filename):
+ self.logfile = open(filename, 'a')
+
+ def debug(self, msg):
+ self.log(DEBUG, msg)
+
+ def verbose(self, msg):
+ self.log(VERBOSE, msg)
+
+ def notice(self, msg):
+ self.log(NOTICE, msg)
+
+ def warn(self, msg):
+ self.log(WARNING, msg)
+
+ def log(self, level, msg):
+ if level < self.verbosity:
+ return
+
+ c = '.-*#'
+ pid = os.getpid()
+ now = datetime.now()
+ buf = now.strftime('%d %b %H:%M:%S.%f')[:-3]
+
+ self.logfile.write('%d:%s %c %s\n' % (pid, buf, c[level], msg))
+ self.logfile.flush()
+
diff --git a/utils/sddm/plot.py b/utils/sddm/plot.py
new file mode 100755
index 0000000..b00adbd
--- /dev/null
+++ b/utils/sddm/plot.py
@@ -0,0 +1,257 @@
+#!/usr/bin/env python
+# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago>
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <https://www.gnu.org/licenses/>.
+
+from __future__ import print_function, division
+import numpy as np
+from scipy.stats import iqr
+from matplotlib.lines import Line2D
+from scipy.stats import iqr, norm, beta
+
+def plot_hist(x, label=None):
+ # determine the bin width using the Freedman Diaconis rule
+ # see https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule
+ import matplotlib.pyplot as plt
+ h = 2*iqr(x)/len(x)**(1/3)
+ n = max(int((np.max(x)-np.min(x))/h),10)
+ bins = np.linspace(np.min(x),np.max(x),n)
+ plt.hist(x, bins=bins, histtype='step', label=label)
+
+def plot_legend(n):
+ import matplotlib.pyplot as plt
+ plt.figure(n)
+ ax = plt.gca()
+ handles, labels = ax.get_legend_handles_labels()
+ new_handles = [Line2D([],[],c=h.get_edgecolor()) for h in handles]
+ plt.legend(handles=new_handles,labels=labels)
+
+def get_stats(x):
+ """
+ Returns a tuple (mean, error mean, std, error std) for the values in x.
+
+ The formula for the standard error on the standard deviation comes from
+ https://stats.stackexchange.com/questions/156518.
+ """
+ mean = np.mean(x)
+ std = np.std(x)
+ n = len(x)
+ u4 = np.mean((x-mean)**4)
+ error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std)
+ return mean, std/np.sqrt(n), std, error
+
+# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py
+def despine(fig=None, ax=None, top=True, right=True, left=False,
+ bottom=False, offset=None, trim=False):
+ """Remove the top and right spines from plot(s).
+
+ fig : matplotlib figure, optional
+ Figure to despine all axes of, default uses current figure.
+ ax : matplotlib axes, optional
+ Specific axes object to despine.
+ top, right, left, bottom : boolean, optional
+ If True, remove that spine.
+ offset : int or dict, optional
+ Absolute distance, in points, spines should be moved away
+ from the axes (negative values move spines inward). A single value
+ applies to all spines; a dict can be used to set offset values per
+ side.
+ trim : bool, optional
+ If True, limit spines to the smallest and largest major tick
+ on each non-despined axis.
+
+ Returns
+ -------
+ None
+
+ """
+ import matplotlib.pyplot as plt
+ # Get references to the axes we want
+ if fig is None and ax is None:
+ axes = plt.gcf().axes
+ elif fig is not None:
+ axes = fig.axes
+ elif ax is not None:
+ axes = [ax]
+
+ for ax_i in axes:
+ for side in ["top", "right", "left", "bottom"]:
+ # Toggle the spine objects
+ is_visible = not locals()[side]
+ ax_i.spines[side].set_visible(is_visible)
+ if offset is not None and is_visible:
+ try:
+ val = offset.get(side, 0)
+ except AttributeError:
+ val = offset
+ _set_spine_position(ax_i.spines[side], ('outward', val))
+
+ # Potentially move the ticks
+ if left and not right:
+ maj_on = any(
+ t.tick1line.get_visible()
+ for t in ax_i.yaxis.majorTicks
+ )
+ min_on = any(
+ t.tick1line.get_visible()
+ for t in ax_i.yaxis.minorTicks
+ )
+ ax_i.yaxis.set_ticks_position("right")
+ for t in ax_i.yaxis.majorTicks:
+ t.tick2line.set_visible(maj_on)
+ for t in ax_i.yaxis.minorTicks:
+ t.tick2line.set_visible(min_on)
+
+ if bottom and not top:
+ maj_on = any(
+ t.tick1line.get_visible()
+ for t in ax_i.xaxis.majorTicks
+ )
+ min_on = any(
+ t.tick1line.get_visible()
+ for t in ax_i.xaxis.minorTicks
+ )
+ ax_i.xaxis.set_ticks_position("top")
+ for t in ax_i.xaxis.majorTicks:
+ t.tick2line.set_visible(maj_on)
+ for t in ax_i.xaxis.minorTicks:
+ t.tick2line.set_visible(min_on)
+
+ if trim:
+ # clip off the parts of the spines that extend past major ticks
+ xticks = ax_i.get_xticks()
+ if xticks.size:
+ firsttick = np.compress(xticks >= min(ax_i.get_xlim()),
+ xticks)[0]
+ lasttick = np.compress(xticks <= max(ax_i.get_xlim()),
+ xticks)[-1]
+ ax_i.spines['bottom'].set_bounds(firsttick, lasttick)
+ ax_i.spines['top'].set_bounds(firsttick, lasttick)
+ newticks = xticks.compress(xticks <= lasttick)
+ newticks = newticks.compress(newticks >= firsttick)
+ ax_i.set_xticks(newticks)
+
+ yticks = ax_i.get_yticks()
+ if yticks.size:
+ firsttick = np.compress(yticks >= min(ax_i.get_ylim()),
+ yticks)[0]
+ lasttick = np.compress(yticks <= max(ax_i.get_ylim()),
+ yticks)[-1]
+ ax_i.spines['left'].set_bounds(firsttick, lasttick)
+ ax_i.spines['right'].set_bounds(firsttick, lasttick)
+ newticks = yticks.compress(yticks <= lasttick)
+ newticks = newticks.compress(newticks >= firsttick)
+ ax_i.set_yticks(newticks)
+
+def iqr_std_err(x):
+ """
+ Returns the approximate standard deviation assuming the central part of the
+ distribution is gaussian.
+ """
+ x = x.dropna()
+ n = len(x)
+ if n == 0:
+ return np.nan
+ # see https://stats.stackexchange.com/questions/110902/error-on-interquartile-range
+ std = iqr(x.values)/1.3489795
+ return 1.573*std/np.sqrt(n)
+
+def iqr_std(x):
+ """
+ Returns the approximate standard deviation assuming the central part of the
+ distribution is gaussian.
+ """
+ x = x.dropna()
+ n = len(x)
+ if n == 0:
+ return np.nan
+ return iqr(x.values)/1.3489795
+
+def quantile_error(x,q):
+ """
+ Returns the standard error for the qth quantile of `x`. The error is
+ computed using the Maritz-Jarrett method described here:
+ https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/quantse.htm.
+ """
+ x = np.sort(x)
+ n = len(x)
+ m = int(q*n+0.5)
+ A = m - 1
+ B = n - m
+ i = np.arange(1,len(x)+1)
+ w = beta.cdf(i/n,A,B) - beta.cdf((i-1)/n,A,B)
+ return np.sqrt(np.sum(w*x**2)-np.sum(w*x)**2)
+
+def q90_err(x):
+ """
+ Returns the error on the 90th percentile for all the non NaN values in a
+ Series `x`.
+ """
+ x = x.dropna()
+ n = len(x)
+ if n == 0:
+ return np.nan
+ return quantile_error(x.values,0.9)
+
+def q90(x):
+ """
+ Returns the 90th percentile for all the non NaN values in a Series `x`.
+ """
+ x = x.dropna()
+ n = len(x)
+ if n == 0:
+ return np.nan
+ return np.percentile(x.values,90.0)
+
+def median(x):
+ """
+ Returns the median for all the non NaN values in a Series `x`.
+ """
+ x = x.dropna()
+ n = len(x)
+ if n == 0:
+ return np.nan
+ return np.median(x.values)
+
+def median_err(x):
+ """
+ Returns the approximate error on the median for all the non NaN values in a
+ Series `x`. The error on the median is approximated assuming the central
+ part of the distribution is gaussian.
+ """
+ x = x.dropna()
+ n = len(x)
+ if n == 0:
+ return np.nan
+ # First we estimate the standard deviation using the interquartile range.
+ # Here we are essentially assuming the central part of the distribution is
+ # gaussian.
+ std = iqr(x.values)/1.3489795
+ median = np.median(x.values)
+ # Now we estimate the error on the median for a gaussian
+ # See https://stats.stackexchange.com/questions/45124/central-limit-theorem-for-sample-medians.
+ return 1/(2*np.sqrt(n)*norm.pdf(median,median,std))
+
+def std_err(x):
+ x = x.dropna()
+ mean = np.mean(x)
+ std = np.std(x)
+ n = len(x)
+ if n == 0:
+ return np.nan
+ elif n == 1:
+ return 0.0
+ u4 = np.mean((x-mean)**4)
+ error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std)
+ return error
diff --git a/utils/sddm/plot_energy.py b/utils/sddm/plot_energy.py
new file mode 100755
index 0000000..a24ea00
--- /dev/null
+++ b/utils/sddm/plot_energy.py
@@ -0,0 +1,352 @@
+#!/usr/bin/env python
+# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago>
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see <https://www.gnu.org/licenses/>.
+
+from __future__ import print_function, division
+import numpy as np
+from scipy.stats import iqr, poisson
+from matplotlib.lines import Line2D
+from scipy.stats import iqr, norm, beta
+from scipy.special import spence
+from itertools import izip_longest
+from .dc import DC_MUON, DC_JUNK, DC_CRATE_ISOTROPY, DC_QVNHIT, DC_NECK, DC_FLASHER, DC_ESUM, DC_OWL, DC_OWL_TRIGGER, DC_FTS, DC_ITC, DC_BREAKDOWN
+from . import grouper, print_warning, AV_RADIUS, PSUP_RADIUS
+import pandas as pd
+
+# Fermi constant
+GF = 1.16637887e-5 # 1/MeV^2
+ELECTRON_MASS = 0.5109989461 # MeV
+MUON_MASS = 105.6583745 # MeV
+PROTON_MASS = 938.272081 # MeV
+FINE_STRUCTURE_CONSTANT = 7.297352566417e-3
+
+def unwrap(p, delta, axis=-1):
+ """
+ A modified version of np.unwrap() useful for unwrapping the 50 MHz clock.
+ It unwraps discontinuities bigger than delta/2 by delta.
+
+ Example:
+
+ >>> a = np.arange(10) % 5
+ >>> a
+ array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
+ >>> unwrap(a,5)
+ array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
+
+ In the case of the 50 MHz clock delta should be 0x7ffffffffff*20.0.
+ """
+ p = np.asarray(p)
+ nd = p.ndim
+ dd = np.diff(p, axis=axis)
+ slice1 = [slice(None, None)]*nd # full slices
+ slice1[axis] = slice(1, None)
+ slice1 = tuple(slice1)
+ ddmod = np.mod(dd + delta/2, delta) - delta/2
+ np.copyto(ddmod, delta/2, where=(ddmod == -delta/2) & (dd > 0))
+ ph_correct = ddmod - dd
+ np.copyto(ph_correct, 0, where=abs(dd) < delta/2)
+ up = np.array(p, copy=True, dtype='d')
+ up[slice1] = p[slice1] + ph_correct.cumsum(axis)
+ return up
+
+def unwrap_50_mhz_clock(gtr):
+ """
+ Unwrap an array with 50 MHz clock times. These times should all be in
+ nanoseconds and come from the KEV_GTR variable in the EV bank.
+
+ Note: We assume here that the events are already ordered contiguously by
+ GTID, so you shouldn't pass an array with multiple runs!
+ """
+ return unwrap(gtr,0x7ffffffffff*20.0)
+
+def retrigger_cut(ev):
+ """
+ Cuts all retrigger events.
+ """
+ return ev[ev.dt > 500]
+
+def breakdown_follower_cut(ev):
+ """
+ Cuts all events within 1 second of breakdown events.
+ """
+ breakdowns = ev[ev.dc & DC_BREAKDOWN != 0]
+ return ev[~np.any((ev.gtr.values > breakdowns.gtr.values[:,np.newaxis]) & \
+ (ev.gtr.values < breakdowns.gtr.values[:,np.newaxis] + 1e9),axis=0)]
+
+def flasher_follower_cut(ev):
+ """
+ Cuts all events within 200 microseconds of flasher events.
+ """
+ flashers = ev[ev.dc & DC_FLASHER != 0]
+ return ev[~np.any((ev.gtr.values > flashers.gtr.values[:,np.newaxis]) & \
+ (ev.gtr.values < flashers.gtr.values[:,np.newaxis] + 200e3),axis=0)]
+
+def muon_follower_cut(ev):
+ """
+ Cuts all events 200 microseconds after a muon.
+ """
+ muons = ev[ev.dc & DC_MUON != 0]
+ return ev[~np.any((ev.gtr.values > muons.gtr.values[:,np.newaxis]) & \
+ (ev.gtr.values < muons.gtr.values[:,np.newaxis] + 200e3),axis=0)]
+
+def michel_cut(ev):
+ """
+ Looks for Michel electrons after muons.
+ """
+ prompt_plus_muons = ev[ev.prompt | ((ev.dc & DC_MUON) != 0)]
+
+ # Michel electrons and neutrons can be any event which is not a prompt
+ # event
+ follower = ev[~ev.prompt]
+
+ # require Michel events to pass more of the SNO data cleaning cuts
+ michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0]
+
+ michel = michel[michel.nhit >= 100]
+
+ # Accept events which had a muon more than 800 nanoseconds but less than 20
+ # microseconds before them. The 800 nanoseconds cut comes from Richie's
+ # thesis. He also mentions that the In Time Channel Spread Cut is very
+ # effective at cutting electron events caused by muons, so I should
+ # implement that.
+ #
+ # Note: We currently don't look across run boundaries. This should be a
+ # *very* small effect, and the logic to do so would be very complicated
+ # since I would have to deal with 50 MHz clock rollovers, etc.
+ if prompt_plus_muons.size and michel.size:
+ mask = (michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \
+ (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3)
+ michel = michel.iloc[np.any(mask,axis=0)]
+ michel['muon_gtid'] = pd.Series(prompt_plus_muons['gtid'].iloc[np.argmax(mask[:,np.any(mask,axis=0)],axis=0)].values,
+ index=michel.index.values,
+ dtype=np.int32)
+ return michel
+ else:
+ # Return an empty slice since we need it to have the same datatype as
+ # the other dataframes
+ michel = ev[:0]
+ michel['muon_gtid'] = -1
+ return michel
+
+def atmospheric_events(ev):
+ """
+ Tags atmospheric events which have a neutron follower.
+ """
+ prompt = ev[ev.prompt]
+
+ # Michel electrons and neutrons can be any event which is not a prompt
+ # event
+ follower = ev[~ev.prompt]
+
+ ev['atm'] = np.zeros(len(ev),dtype=np.bool)
+
+ if prompt.size and follower.size:
+ # neutron followers have to obey stricter set of data cleaning cuts
+ neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0]
+ neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)]
+ # FIXME: What should the radius cut be here? AV? (r/r_psup)^3 < 0.9?
+ neutron = neutron[neutron.ftp_r < AV_RADIUS]
+ neutron = neutron[neutron.rsp_energy > 4.0]
+
+ # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt)
+ ev.loc[ev.prompt,'atm'] = np.any((neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \
+ (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1)
+
+ return ev
+
+def gtid_sort(ev, first_gtid):
+ """
+ Adds 0x1000000 to the gtid_sort column for all gtids before the first gtid
+ in a run, which should be passed as a dictionary. This column can then be
+ used to sort the events sequentially.
+
+ This function should be passed to ev.groupby('run').apply(). We use this
+ idiom instead of just looping over the groupby results since groupby()
+ makes a copy of the dataframe, i.e.
+
+ for run, ev_run in ev.groupby('run'):
+ ev_run.loc[ev_run.gtid < first_gtid[run],'gtid_sort'] += 0x1000000
+
+ would produce a SettingWithCopyWarning, so instead we use:
+
+ ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid)
+
+ which doesn't have this problem.
+ """
+ # see https://stackoverflow.com/questions/32460593/including-the-group-name-in-the-apply-function-pandas-python
+ run = ev.name
+
+ if run not in first_gtid:
+ print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run)
+ first_gtid[run] = ev.gtid.iloc[0]
+
+ ev.loc[ev.gtid < first_gtid[run],'gtid_sort'] += 0x1000000
+
+ return ev
+
+def prompt_event(ev):
+ ev['prompt'] = (ev.nhit >= 100)
+ ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6))
+ return ev
+
+def plot_corner_plot(ev, title, save=None):
+ import matplotlib.pyplot as plt
+ variables = ['r_psup','psi','z','udotr']
+ labels = [r'$(r/r_\mathrm{PSUP})^3$',r'$\psi$','z',r'$\vec{u}\cdot\vec{r}$']
+ limits = [(0,1),(0,10),(-840,840),(-1,1)]
+ cuts = [0.9,6,0,-0.5]
+
+ ev = ev.dropna(subset=variables)
+
+ fig = plt.figure(figsize=(FIGSIZE[0],FIGSIZE[0]))
+ despine(fig,trim=True)
+ for i in range(len(variables)):
+ for j in range(len(variables)):
+ if j > i:
+ continue
+ ax = plt.subplot(len(variables),len(variables),i*len(variables)+j+1)
+ if i == j:
+ plt.hist(ev[variables[i]],bins=np.linspace(limits[i][0],limits[i][1],100),histtype='step')
+ plt.gca().set_xlim(limits[i])
+ else:
+ plt.scatter(ev[variables[j]],ev[variables[i]],s=0.5)
+ plt.gca().set_xlim(limits[j])
+ plt.gca().set_ylim(limits[i])
+ n = len(ev)
+ if n:
+ p_i_lo = np.count_nonzero(ev[variables[i]] < cuts[i])/n
+ p_j_lo = np.count_nonzero(ev[variables[j]] < cuts[j])/n
+ p_lolo = p_i_lo*p_j_lo
+ p_lohi = p_i_lo*(1-p_j_lo)
+ p_hilo = (1-p_i_lo)*p_j_lo
+ p_hihi = (1-p_i_lo)*(1-p_j_lo)
+ n_lolo = np.count_nonzero((ev[variables[i]] < cuts[i]) & (ev[variables[j]] < cuts[j]))
+ n_lohi = np.count_nonzero((ev[variables[i]] < cuts[i]) & (ev[variables[j]] >= cuts[j]))
+ n_hilo = np.count_nonzero((ev[variables[i]] >= cuts[i]) & (ev[variables[j]] < cuts[j]))
+ n_hihi = np.count_nonzero((ev[variables[i]] >= cuts[i]) & (ev[variables[j]] >= cuts[j]))
+ observed = np.array([n_lolo,n_lohi,n_hilo,n_hihi])
+ expected = n*np.array([p_lolo,p_lohi,p_hilo,p_hihi])
+ psi = -poisson.logpmf(observed,expected).sum() + poisson.logpmf(observed,observed).sum()
+ psi /= np.std(-poisson.logpmf(np.random.poisson(observed,size=(10000,4)),observed).sum(axis=1) + poisson.logpmf(observed,observed).sum())
+ plt.title(r"$\psi = %.1f$" % psi)
+ if i == len(variables) - 1:
+ plt.xlabel(labels[j])
+ else:
+ plt.setp(ax.get_xticklabels(),visible=False)
+ if j == 0:
+ plt.ylabel(labels[i])
+ else:
+ plt.setp(ax.get_yticklabels(),visible=False)
+ plt.axvline(cuts[j],color='k',ls='--',alpha=0.5)
+ if i != j:
+ plt.axhline(cuts[i],color='k',ls='--',alpha=0.5)
+
+ plt.tight_layout()
+
+ if save:
+ plt.savefig(save + ".pdf")
+ plt.savefig(save + ".eps")
+
+ plt.suptitle(title)
+
+def intersect_sphere(pos, dir, R):
+ """
+ Compute the first intersection of a ray starting at `pos` with direction
+ `dir` and a sphere centered at the origin with radius `R`. The distance to
+ the intersection is returned.
+
+ Example:
+
+ pos = np.array([0,0,0])
+ dir = np.array([1,0,0])
+
+ l = intersect_sphere(pos,dir,PSUP_RADIUS):
+ if l is not None:
+ hit = pos + l*dir
+ print("ray intersects sphere at %.2f %.2f %.2f", hit[0], hit[1], hit[2])
+ else:
+ print("ray didn't intersect sphere")
+ """
+
+ b = 2*np.dot(dir,pos)
+ c = np.dot(pos,pos) - R*R
+
+ if b*b - 4*c <= 0:
+ # Ray doesn't intersect the sphere.
+ return None
+
+ # First, check the shorter solution.
+ l = (-b - np.sqrt(b*b - 4*c))/2
+
+ # If the shorter solution is less than 0, check the second solution.
+ if l < 0:
+ l = (-b + np.sqrt(b*b - 4*c))/2
+
+ # If the distance is still negative, we didn't intersect the sphere.
+ if l < 0:
+ return None
+
+ return l
+
+def get_dx(row):
+ pos = np.array([row.x,row.y,row.z])
+ dir = np.array([np.sin(row.theta1)*np.cos(row.phi1),
+ np.sin(row.theta1)*np.sin(row.phi1),
+ np.cos(row.theta1)])
+ l = intersect_sphere(pos,-dir,PSUP_RADIUS)
+ if l is not None:
+ pos -= dir*l
+ michel_pos = np.array([row.x_michel,row.y_michel,row.z_michel])
+ return np.linalg.norm(michel_pos-pos)
+ else:
+ return 0
+
+def dx_to_energy(dx):
+ lines = []
+ with open("../src/muE_water_liquid.txt") as f:
+ for i, line in enumerate(f):
+ if i < 10:
+ continue
+ if 'Minimum ionization' in line:
+ continue
+ if 'Muon critical energy' in line:
+ continue
+ lines.append(line)
+ data = np.genfromtxt(lines)
+ return np.interp(dx,data[:,8],data[:,0])
+
+def f(x):
+ y = (5/(3*x**2) + 16*x/3 + 4/x + (12-8*x)*np.log(1/x-1) - 8)*np.log(MUON_MASS/ELECTRON_MASS)
+ y += (6-4*x)*(2*spence(x) - 2*np.log(x)**2 + np.log(x) + np.log(1-x)*(3*np.log(x)-1/x-1) - np.pi**2/3-2)
+ y += (1-x)*(34*x**2+(5-34*x**2+17*x)*np.log(x) - 22*x)/(3*x**2)
+ y += 6*(1-x)*np.log(x)
+ return y
+
+def michel_spectrum(T):
+ """
+ Michel electron energy spectrum for a free muon. `T` should be the kinetic
+ energy of the electron or positron in MeV.
+
+ Note: The result is not normalized.
+
+ From https://arxiv.org/abs/1406.3575.
+ """
+ E = T + ELECTRON_MASS
+ x = 2*E/MUON_MASS
+ mask = (x > 0) & (x < 1)
+ y = np.zeros_like(x,dtype=np.double)
+ y[mask] = GF**2*MUON_MASS**5*x[mask]**2*(6-4*x[mask]+FINE_STRUCTURE_CONSTANT*f(x[mask])/np.pi)/(192*np.pi**3)
+ y *= 2*MUON_MASS
+ return y
diff --git a/utils/setup.py b/utils/setup.py
new file mode 100755
index 0000000..23a2ebe
--- /dev/null
+++ b/utils/setup.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python
+from distutils.core import setup
+
+setup(name='sddm',
+ version='0.1',
+ description="Self-destructing dark matter python scripts",
+ author="Anthony LaTorre",
+ packages=['sddm'],
+ install_requires=['numpy', 'pandas', 'matplotlib']
+)
diff --git a/utils/submit-grid-jobs b/utils/submit-grid-jobs
index 85cf59a..919f17a 100755
--- a/utils/submit-grid-jobs
+++ b/utils/submit-grid-jobs
@@ -50,150 +50,11 @@ import subprocess
import json
import h5py
import numpy as np
-
-DEBUG = 0
-VERBOSE = 1
-NOTICE = 2
-WARNING = 3
-
-class Logger(object):
- """
- Simple logger class that I wrote for the SNO+ DAQ. Very easy to use:
-
- log = Logger()
- log.set_logfile("test.log")
- log.notice("blah")
- log.warn("foo")
-
- The log file format is taken from the Redis log file format which is really
- nice since it shows the exact time and severity of each log message.
- """
- def __init__(self):
- self.logfile = sys.stdout
- # by default, we log everything
- self.verbosity = DEBUG
-
- def set_verbosity(self, level):
- if isinstance(level, int):
- self.verbosity = level
- elif isinstance(level, basestring):
- if level == 'debug':
- self.verbosity = DEBUG
- elif level == 'verbose':
- self.verbosity = VERBOSE
- elif level == 'notice':
- self.verbosity = NOTICE
- elif level == 'warning':
- self.verbosity = WARNING
- else:
- raise ValueError("unknown loglevel '%s'" % level)
- else:
- raise TypeError("level must be a string or integer")
-
- def set_logfile(self, filename):
- self.logfile = open(filename, 'a')
-
- def debug(self, msg):
- self.log(DEBUG, msg)
-
- def verbose(self, msg):
- self.log(VERBOSE, msg)
-
- def notice(self, msg):
- self.log(NOTICE, msg)
-
- def warn(self, msg):
- self.log(WARNING, msg)
-
- def log(self, level, msg):
- if level < self.verbosity:
- return
-
- c = '.-*#'
- pid = os.getpid()
- now = datetime.now()
- buf = now.strftime('%d %b %H:%M:%S.%f')[:-3]
-
- self.logfile.write('%d:%s %c %s\n' % (pid, buf, c[level], msg))
- self.logfile.flush()
+from sddm.logger import Logger
+from sddm import which, splitext
log = Logger()
-# Next two functions are a backport of the shutil.which() function from Python
-# 3.3 from Lib/shutil.py in the CPython code See
-# https://github.com/python/cpython/blob/master/Lib/shutil.py.
-
-# Check that a given file can be accessed with the correct mode.
-# Additionally check that `file` is not a directory, as on Windows
-# directories pass the os.access check.
-def _access_check(fn, mode):
- return (os.path.exists(fn) and os.access(fn, mode) and not os.path.isdir(fn))
-
-def which(cmd, mode=os.F_OK | os.X_OK, path=None):
- """Given a command, mode, and a PATH string, return the path which
- conforms to the given mode on the PATH, or None if there is no such
- file.
- `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
- of os.environ.get("PATH"), or can be overridden with a custom search
- path.
- """
- # If we're given a path with a directory part, look it up directly rather
- # than referring to PATH directories. This includes checking relative to the
- # current directory, e.g. ./script
- if os.path.dirname(cmd):
- if _access_check(cmd, mode):
- return cmd
- return None
-
- if path is None:
- path = os.environ.get("PATH", None)
- if path is None:
- try:
- path = os.confstr("CS_PATH")
- except (AttributeError, ValueError):
- # os.confstr() or CS_PATH is not available
- path = os.defpath
- # bpo-35755: Don't use os.defpath if the PATH environment variable is
- # set to an empty string
-
- # PATH='' doesn't match, whereas PATH=':' looks in the current directory
- if not path:
- return None
-
- path = path.split(os.pathsep)
-
- if sys.platform == "win32":
- # The current directory takes precedence on Windows.
- curdir = os.curdir
- if curdir not in path:
- path.insert(0, curdir)
-
- # PATHEXT is necessary to check on Windows.
- pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
- # See if the given file matches any of the expected path extensions.
- # This will allow us to short circuit when given "python.exe".
- # If it does match, only test that one, otherwise we have to try
- # others.
- if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
- files = [cmd]
- else:
- files = [cmd + ext for ext in pathext]
- else:
- # On other platforms you don't have things like PATHEXT to tell you
- # what file suffixes are executable, so just pass on cmd as-is.
- files = [cmd]
-
- seen = set()
- for dir in path:
- normdir = os.path.normcase(dir)
- if not normdir in seen:
- seen.add(normdir)
- for thefile in files:
- name = os.path.join(dir, thefile)
- if _access_check(name, mode):
- return name
- return None
-
CONDOR_TEMPLATE = \
"""
# We need the job to run our executable script, with the
@@ -230,24 +91,6 @@ INPUT_FILES = ["muE_water_liquid.txt","pmt_response_qoca_d2o_20060216.dat","rsp_
class MyTemplate(string.Template):
delimiter = '@'
-def splitext(path):
- """
- Like os.path.splitext() except it returns the full extension if the
- filename has multiple extensions, for example:
-
- splitext('foo.tar.gz') -> 'foo', '.tar.gz'
- """
- full_root, full_ext = os.path.splitext(path)
- while True:
- root, ext = os.path.splitext(full_root)
- if ext:
- full_ext = ext + full_ext
- full_root = root
- else:
- break
-
- return full_root, full_ext
-
def create_submit_file(filename, uuid, run, gtid, dir, dqxx_dir, particle_combo, max_time, priority):
"""
Creates a submit file and submits a job to the grid.