diff options
author | tlatorre <tlatorre@uchicago.edu> | 2020-05-11 10:30:39 -0500 |
---|---|---|
committer | tlatorre <tlatorre@uchicago.edu> | 2020-05-11 10:30:39 -0500 |
commit | 15fc972c89a4366a06755daeedaac52f91762ecd (patch) | |
tree | 9a5dbea7787cef9946473787e9a3996f24cd2898 | |
parent | 651cbe5d261a6d29b4dec7c38b65c0eac5431363 (diff) | |
download | sddm-15fc972c89a4366a06755daeedaac52f91762ecd.tar.gz sddm-15fc972c89a4366a06755daeedaac52f91762ecd.tar.bz2 sddm-15fc972c89a4366a06755daeedaac52f91762ecd.zip |
update utils/ folder to make a python package called sddm
This commit adds an sddm python package to the utils/ folder. This allows me to
consolidate code used across all the various scripts. This package is now
installed by default to /home/tlatorre/local/lib/python2.7/site-packages so you
should add the following to your .bashrc file:
export PYTHONPATH=$HOME/local/lib/python2.7/site-packages/:$PYTHONPATH
before using the scripts installed to ~/local/bin.
-rw-r--r-- | utils/.gitignore | 1 | ||||
-rw-r--r-- | utils/Makefile | 1 | ||||
-rwxr-xr-x | utils/calculate_limits.py | 103 | ||||
-rwxr-xr-x | utils/cat-grid-jobs | 157 | ||||
-rwxr-xr-x | utils/dc | 595 | ||||
-rwxr-xr-x | utils/plot | 50 | ||||
-rwxr-xr-x | utils/plot-atmospheric-fluxes | 128 | ||||
-rwxr-xr-x | utils/plot-energy | 675 | ||||
-rwxr-xr-x | utils/plot-fit-results | 332 | ||||
-rwxr-xr-x | utils/plot-root-results | 104 | ||||
-rw-r--r-- | utils/sddm/.gitignore | 1 | ||||
-rw-r--r-- | utils/sddm/__init__.py | 139 | ||||
-rwxr-xr-x | utils/sddm/dc.py | 366 | ||||
-rw-r--r-- | utils/sddm/logger.py | 70 | ||||
-rwxr-xr-x | utils/sddm/plot.py | 257 | ||||
-rwxr-xr-x | utils/sddm/plot_energy.py | 352 | ||||
-rwxr-xr-x | utils/setup.py | 10 | ||||
-rwxr-xr-x | utils/submit-grid-jobs | 161 |
18 files changed, 1342 insertions, 2160 deletions
diff --git a/utils/.gitignore b/utils/.gitignore index e307f16..cea117c 100644 --- a/utils/.gitignore +++ b/utils/.gitignore @@ -1,2 +1,3 @@ *.eps *.pdf +build/ diff --git a/utils/Makefile b/utils/Makefile index 6ad38c5..c8690d6 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -17,5 +17,6 @@ install: $(INSTALL) cat-grid-jobs $(INSTALL_BIN) $(INSTALL) plot-energy $(INSTALL_BIN) $(INSTALL) zdab-reprocess $(INSTALL_BIN) + ./setup.py install --prefix=$(PREFIX) .PHONY: install diff --git a/utils/calculate_limits.py b/utils/calculate_limits.py index 3737850..c8698c1 100755 --- a/utils/calculate_limits.py +++ b/utils/calculate_limits.py @@ -23,6 +23,7 @@ from numpy import pi from collections import namedtuple from functools import update_wrapper from threading import RLock +from sddm.plot import despine # Backport of lru_cache from http://code.activestate.com/recipes/578078-py26-and-py30-backport-of-python-33s-lru-cache/ _CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"]) @@ -487,108 +488,6 @@ def get_event_rate_xenon(m, cs, A, threshold): return rate -# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py -def despine(fig=None, ax=None, top=True, right=True, left=False, - bottom=False, offset=None, trim=False): - """Remove the top and right spines from plot(s). - - fig : matplotlib figure, optional - Figure to despine all axes of, default uses current figure. - ax : matplotlib axes, optional - Specific axes object to despine. - top, right, left, bottom : boolean, optional - If True, remove that spine. - offset : int or dict, optional - Absolute distance, in points, spines should be moved away - from the axes (negative values move spines inward). A single value - applies to all spines; a dict can be used to set offset values per - side. - trim : bool, optional - If True, limit spines to the smallest and largest major tick - on each non-despined axis. - - Returns - ------- - None - - """ - # Get references to the axes we want - if fig is None and ax is None: - axes = plt.gcf().axes - elif fig is not None: - axes = fig.axes - elif ax is not None: - axes = [ax] - - for ax_i in axes: - for side in ["top", "right", "left", "bottom"]: - # Toggle the spine objects - is_visible = not locals()[side] - ax_i.spines[side].set_visible(is_visible) - if offset is not None and is_visible: - try: - val = offset.get(side, 0) - except AttributeError: - val = offset - _set_spine_position(ax_i.spines[side], ('outward', val)) - - # Potentially move the ticks - if left and not right: - maj_on = any( - t.tick1line.get_visible() - for t in ax_i.yaxis.majorTicks - ) - min_on = any( - t.tick1line.get_visible() - for t in ax_i.yaxis.minorTicks - ) - ax_i.yaxis.set_ticks_position("right") - for t in ax_i.yaxis.majorTicks: - t.tick2line.set_visible(maj_on) - for t in ax_i.yaxis.minorTicks: - t.tick2line.set_visible(min_on) - - if bottom and not top: - maj_on = any( - t.tick1line.get_visible() - for t in ax_i.xaxis.majorTicks - ) - min_on = any( - t.tick1line.get_visible() - for t in ax_i.xaxis.minorTicks - ) - ax_i.xaxis.set_ticks_position("top") - for t in ax_i.xaxis.majorTicks: - t.tick2line.set_visible(maj_on) - for t in ax_i.xaxis.minorTicks: - t.tick2line.set_visible(min_on) - - if trim: - # clip off the parts of the spines that extend past major ticks - xticks = ax_i.get_xticks() - if xticks.size: - firsttick = np.compress(xticks >= min(ax_i.get_xlim()), - xticks)[0] - lasttick = np.compress(xticks <= max(ax_i.get_xlim()), - xticks)[-1] - ax_i.spines['bottom'].set_bounds(firsttick, lasttick) - ax_i.spines['top'].set_bounds(firsttick, lasttick) - newticks = xticks.compress(xticks <= lasttick) - newticks = newticks.compress(newticks >= firsttick) - ax_i.set_xticks(newticks) - - yticks = ax_i.get_yticks() - if yticks.size: - firsttick = np.compress(yticks >= min(ax_i.get_ylim()), - yticks)[0] - lasttick = np.compress(yticks <= max(ax_i.get_ylim()), - yticks)[-1] - ax_i.spines['left'].set_bounds(firsttick, lasttick) - ax_i.spines['right'].set_bounds(firsttick, lasttick) - newticks = yticks.compress(yticks <= lasttick) - newticks = newticks.compress(newticks >= firsttick) - ax_i.set_yticks(newticks) - if __name__ == '__main__': import argparse diff --git a/utils/cat-grid-jobs b/utils/cat-grid-jobs index 62dcd42..0755774 100755 --- a/utils/cat-grid-jobs +++ b/utils/cat-grid-jobs @@ -35,164 +35,11 @@ from datetime import datetime import h5py from os.path import join, split from subprocess import check_call - -DEBUG = 0 -VERBOSE = 1 -NOTICE = 2 -WARNING = 3 - -class Logger(object): - """ - Simple logger class that I wrote for the SNO+ DAQ. Very easy to use: - - log = Logger() - log.set_logfile("test.log") - log.notice("blah") - log.warn("foo") - - The log file format is taken from the Redis log file format which is really - nice since it shows the exact time and severity of each log message. - """ - def __init__(self): - self.logfile = sys.stdout - # by default, we log everything - self.verbosity = DEBUG - - def set_verbosity(self, level): - if isinstance(level, int): - self.verbosity = level - elif isinstance(level, basestring): - if level == 'debug': - self.verbosity = DEBUG - elif level == 'verbose': - self.verbosity = VERBOSE - elif level == 'notice': - self.verbosity = NOTICE - elif level == 'warning': - self.verbosity = WARNING - else: - raise ValueError("unknown loglevel '%s'" % level) - else: - raise TypeError("level must be a string or integer") - - def set_logfile(self, filename): - self.logfile = open(filename, 'a') - - def debug(self, msg): - self.log(DEBUG, msg) - - def verbose(self, msg): - self.log(VERBOSE, msg) - - def notice(self, msg): - self.log(NOTICE, msg) - - def warn(self, msg): - self.log(WARNING, msg) - - def log(self, level, msg): - if level < self.verbosity: - return - - c = '.-*#' - pid = os.getpid() - now = datetime.now() - buf = now.strftime('%d %b %H:%M:%S.%f')[:-3] - - self.logfile.write('%d:%s %c %s\n' % (pid, buf, c[level], msg)) - self.logfile.flush() +from sddm import splitext, which +from sddm.logger import Logger log = Logger() -# Check that a given file can be accessed with the correct mode. -# Additionally check that `file` is not a directory, as on Windows -# directories pass the os.access check. -def _access_check(fn, mode): - return (os.path.exists(fn) and os.access(fn, mode) and not os.path.isdir(fn)) - -def which(cmd, mode=os.F_OK | os.X_OK, path=None): - """Given a command, mode, and a PATH string, return the path which - conforms to the given mode on the PATH, or None if there is no such - file. - `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result - of os.environ.get("PATH"), or can be overridden with a custom search - path. - """ - # If we're given a path with a directory part, look it up directly rather - # than referring to PATH directories. This includes checking relative to the - # current directory, e.g. ./script - if os.path.dirname(cmd): - if _access_check(cmd, mode): - return cmd - return None - - if path is None: - path = os.environ.get("PATH", None) - if path is None: - try: - path = os.confstr("CS_PATH") - except (AttributeError, ValueError): - # os.confstr() or CS_PATH is not available - path = os.defpath - # bpo-35755: Don't use os.defpath if the PATH environment variable is - # set to an empty string - - # PATH='' doesn't match, whereas PATH=':' looks in the current directory - if not path: - return None - - path = path.split(os.pathsep) - - if sys.platform == "win32": - # The current directory takes precedence on Windows. - curdir = os.curdir - if curdir not in path: - path.insert(0, curdir) - - # PATHEXT is necessary to check on Windows. - pathext = os.environ.get("PATHEXT", "").split(os.pathsep) - # See if the given file matches any of the expected path extensions. - # This will allow us to short circuit when given "python.exe". - # If it does match, only test that one, otherwise we have to try - # others. - if any(cmd.lower().endswith(ext.lower()) for ext in pathext): - files = [cmd] - else: - files = [cmd + ext for ext in pathext] - else: - # On other platforms you don't have things like PATHEXT to tell you - # what file suffixes are executable, so just pass on cmd as-is. - files = [cmd] - - seen = set() - for dir in path: - normdir = os.path.normcase(dir) - if not normdir in seen: - seen.add(normdir) - for thefile in files: - name = os.path.join(dir, thefile) - if _access_check(name, mode): - return name - return None - -def splitext(path): - """ - Like os.path.splitext() except it returns the full extension if the - filename has multiple extensions, for example: - - splitext('foo.tar.gz') -> 'foo', '.tar.gz' - """ - full_root, full_ext = os.path.splitext(path) - while True: - root, ext = os.path.splitext(full_root) - if ext: - full_ext = ext + full_ext - full_root = root - else: - break - - return full_root, full_ext - def cat_grid_jobs(conn, output_dir): zdab_cat = which("zdab-cat") @@ -26,6 +26,10 @@ import emcee from scipy.optimize import brentq from scipy.stats import truncnorm from matplotlib.lines import Line2D +from sddm.dc import get_proposal_func, estimate_errors, metropolis_hastings, EPSILON +from sddm.plot import despine +from sddm.dc import * +from sddm.plot_energy import * try: from emcee import moves @@ -33,238 +37,6 @@ except ImportError: print("emcee version 2.2.1 is required",file=sys.stderr) sys.exit(1) -# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py -def despine(fig=None, ax=None, top=True, right=True, left=False, - bottom=False, offset=None, trim=False): - """Remove the top and right spines from plot(s). - - fig : matplotlib figure, optional - Figure to despine all axes of, default uses current figure. - ax : matplotlib axes, optional - Specific axes object to despine. - top, right, left, bottom : boolean, optional - If True, remove that spine. - offset : int or dict, optional - Absolute distance, in points, spines should be moved away - from the axes (negative values move spines inward). A single value - applies to all spines; a dict can be used to set offset values per - side. - trim : bool, optional - If True, limit spines to the smallest and largest major tick - on each non-despined axis. - - Returns - ------- - None - - """ - # Get references to the axes we want - if fig is None and ax is None: - axes = plt.gcf().axes - elif fig is not None: - axes = fig.axes - elif ax is not None: - axes = [ax] - - for ax_i in axes: - for side in ["top", "right", "left", "bottom"]: - # Toggle the spine objects - is_visible = not locals()[side] - ax_i.spines[side].set_visible(is_visible) - if offset is not None and is_visible: - try: - val = offset.get(side, 0) - except AttributeError: - val = offset - _set_spine_position(ax_i.spines[side], ('outward', val)) - - # Potentially move the ticks - if left and not right: - maj_on = any( - t.tick1line.get_visible() - for t in ax_i.yaxis.majorTicks - ) - min_on = any( - t.tick1line.get_visible() - for t in ax_i.yaxis.minorTicks - ) - ax_i.yaxis.set_ticks_position("right") - for t in ax_i.yaxis.majorTicks: - t.tick2line.set_visible(maj_on) - for t in ax_i.yaxis.minorTicks: - t.tick2line.set_visible(min_on) - - if bottom and not top: - maj_on = any( - t.tick1line.get_visible() - for t in ax_i.xaxis.majorTicks - ) - min_on = any( - t.tick1line.get_visible() - for t in ax_i.xaxis.minorTicks - ) - ax_i.xaxis.set_ticks_position("top") - for t in ax_i.xaxis.majorTicks: - t.tick2line.set_visible(maj_on) - for t in ax_i.xaxis.minorTicks: - t.tick2line.set_visible(min_on) - - if trim: - # clip off the parts of the spines that extend past major ticks - xticks = ax_i.get_xticks() - if xticks.size: - firsttick = np.compress(xticks >= min(ax_i.get_xlim()), - xticks)[0] - lasttick = np.compress(xticks <= max(ax_i.get_xlim()), - xticks)[-1] - ax_i.spines['bottom'].set_bounds(firsttick, lasttick) - ax_i.spines['top'].set_bounds(firsttick, lasttick) - newticks = xticks.compress(xticks <= lasttick) - newticks = newticks.compress(newticks >= firsttick) - ax_i.set_xticks(newticks) - - yticks = ax_i.get_yticks() - if yticks.size: - firsttick = np.compress(yticks >= min(ax_i.get_ylim()), - yticks)[0] - lasttick = np.compress(yticks <= max(ax_i.get_ylim()), - yticks)[-1] - ax_i.spines['left'].set_bounds(firsttick, lasttick) - ax_i.spines['right'].set_bounds(firsttick, lasttick) - newticks = yticks.compress(yticks <= lasttick) - newticks = newticks.compress(newticks >= firsttick) - ax_i.set_yticks(newticks) - -# Lower bound for probabilities in fit. We set this to be nonzero to prevent -# nans in case an expected value is predicted to be zero. -EPSILON = 1e-10 - -def get_proposal_func(stepsizes, low, high): - """ - Returns a function which produces proposal steps for a Metropolis Hastings - MCMC. This function can then be passed to emcee.moves.MHMove(). - - Steps are proposed according to a truncated normal distribution with mean - at the current position and with standard deviations given by stepsizes. - The bounds for each step are given by the two arrays low and high. - """ - stepsizes = np.atleast_1d(stepsizes) - low = np.atleast_1d(low) - high = np.atleast_1d(high) - def proposal(x0, random): - """ - Returns a tuple of proposed positions and the log-ratio of the proposal - probabilities. - - x0 should be a (K,ndim) dimensional numpy array where K is the number - of walkers and ndim is the number of dimensions in the likelihood function. - """ - # calculate a and b needed for truncnorm - # Note: These are not just low and high b/c - # - # The standard form of this distribution is a standard normal - # truncated to the range [a, b] --- notice that a and b are defined - # over the domain of the standard normal. To convert clip values - # for a specific mean and standard deviation, use:: - # - # a, b = (myclip_a - my_mean) / my_std, (myclip_b - my_mean) / my_std - a, b = (low - x0)/stepsizes, (high - x0)/stepsizes - x = truncnorm.rvs(a,b,x0,stepsizes,random_state=random) - # Note: Should be able to do this: - # - # log_p_x_given_x0 = truncnorm.logpdf(x,a,b,x0,stepsizes).sum(axis=1) - # - # but I think there is a bug in truncnorm.logpdf() which barfs when - # passed 2D arrays, so instead we just loop over the first axis. - log_p_x_given_x0 = np.empty(x0.shape[0]) - for i in range(x0.shape[0]): - log_p_x_given_x0[i] = truncnorm.logpdf(x[i],a[i],b[i],x0[i],stepsizes).sum() - a, b = (low - x)/stepsizes, (high - x)/stepsizes - # Note: Should be able to do this: - # - # log_p_x0_given_x = truncnorm.logpdf(x0,a,b,x,stepsizes).sum(axis=1) - # - # but I think there is a bug in truncnorm.logpdf() which barfs when - # passed 2D arrays, so instead we just loop over the first axis. - log_p_x0_given_x = np.empty(x0.shape[0]) - for i in range(x0.shape[0]): - log_p_x0_given_x[i] = truncnorm.logpdf(x0[i],a[i],b[i],x[i],stepsizes).sum() - return x, log_p_x0_given_x - log_p_x_given_x0 - return proposal - -def estimate_errors(nll, xopt, constraints): - """ - Return approximate 1 sigma errors for each parameter assuming all - parameters are uncorrelated. - """ - nll_xopt = nll(xopt) - - def root(x, xopt, i): - """ - Function to bisect where the negative log likelihood reaches 0.5 from the minimum. - """ - xtest = xopt.copy() - xtest[i] = x - for constraint in constraints: - if constraint(xtest) >= 0: - xtest = constraint.renormalize(xtest,i) - return nll(xtest) - (nll_xopt + 0.5) - - errors = np.empty_like(xopt) - - for i in range(len(xopt)): - if i < 6: - xlo = brentq(root,0,xopt[i],args=(xopt,i)) - xhi = brentq(root,xopt[i],1e9,args=(xopt,i)) - else: - xlo = brentq(root,0,xopt[i],args=(xopt,i)) - xhi = brentq(root,xopt[i],1,args=(xopt,i)) - - errors[i] = max(xopt[i]-xlo,xhi-xopt[i]) - - return errors - -def metropolis_hastings(nll, x0, stepsizes=None, size=1000): - x = np.asarray(x0) - - if stepsizes is None: - stepsizes = np.ones_like(x) - else: - stepsizes = np.asarray(stepsizes) - - accept = 0 - - nll_old = nll(x) - - samples = np.empty((size,len(x0)),dtype=np.double) - - try: - for i in range(size): - if i % 100 == 0: - sys.stdout.write("\r%i/%i acceptance rate = %i/%i = %.2g%%" % (i+1,size,accept,i+1,accept*100/float(i+1))) - sys.stdout.flush() - - xnew = x + np.random.randn(len(x))*stepsizes - - nll_new = nll(xnew) - - if nll_new < nll_old or np.random.rand() < exp(nll_old-nll_new): - x = xnew - nll_new = nll_old - accept += 1 - - samples[i] = x - except KeyboardInterrupt: - sys.stdout.write("\n") - sys.stdout.flush() - print("ctrl-c caught. quitting...") - samples.resize((i,len(x))) - else: - sys.stdout.write("\n") - sys.stdout.flush() - - return samples - # from https://stackoverflow.com/questions/2891790/how-to-pretty-print-a-numpy-array-without-scientific-notation-and-with-given-pre @contextlib.contextmanager def printoptions(*args, **kwargs): @@ -275,250 +47,6 @@ def printoptions(*args, **kwargs): finally: np.set_printoptions(**original) -# from https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python -class bcolors: - HEADER = '\033[95m' - OKBLUE = '\033[94m' - OKGREEN = '\033[92m' - WARNING = '\033[93m' - FAIL = '\033[91m' - ENDC = '\033[0m' - BOLD = '\033[1m' - UNDERLINE = '\033[4m' - -SNOMAN_MASS = { - 20: 0.511, - 21: 0.511, - 22: 105.658, - 23: 105.658 -} - -AV_RADIUS = 600.0 -PSUP_RADIUS = 840.0 - -# Data cleaning bitmasks. -DC_MUON = 0x1 -DC_JUNK = 0x2 -DC_CRATE_ISOTROPY = 0x4 -DC_QVNHIT = 0x8 -DC_NECK = 0x10 -DC_FLASHER = 0x20 -DC_ESUM = 0x40 -DC_OWL = 0x80 -DC_OWL_TRIGGER = 0x100 -DC_FTS = 0x200 -DC_ITC = 0x400 -DC_BREAKDOWN = 0x800 - -def plot_hist(df, title=None, muons=False): - for id, df_id in sorted(df.groupby('id')): - if id == 20: - plt.subplot(3,4,1) - elif id == 22: - plt.subplot(3,4,2) - elif id == 2020: - plt.subplot(3,4,5) - elif id == 2022: - plt.subplot(3,4,6) - elif id == 2222: - plt.subplot(3,4,7) - elif id == 202020: - plt.subplot(3,4,9) - elif id == 202022: - plt.subplot(3,4,10) - elif id == 202222: - plt.subplot(3,4,11) - elif id == 222222: - plt.subplot(3,4,12) - - if muons: - plt.hist(df_id.ke.values, bins=np.linspace(20,1000e3,100), histtype='step') - else: - plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step') - plt.xlabel("Energy (MeV)") - plt.title(str(id)) - - if title: - plt.suptitle(title) - - if len(df): - plt.tight_layout() - -def chunks(l, n): - """Yield successive n-sized chunks from l.""" - for i in range(0, len(l), n): - yield l[i:i + n] - -def print_warning(msg): - print(bcolors.FAIL + msg + bcolors.ENDC,file=sys.stderr) - -def unwrap(p, delta, axis=-1): - """ - A modified version of np.unwrap() useful for unwrapping the 50 MHz clock. - It unwraps discontinuities bigger than delta/2 by delta. - - Example: - - >>> a = np.arange(10) % 5 - >>> a - array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4]) - >>> unwrap(a,5) - array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]) - - In the case of the 50 MHz clock delta should be 0x7ffffffffff*20.0. - """ - p = np.asarray(p) - nd = p.ndim - dd = np.diff(p, axis=axis) - slice1 = [slice(None, None)]*nd # full slices - slice1[axis] = slice(1, None) - slice1 = tuple(slice1) - ddmod = np.mod(dd + delta/2, delta) - delta/2 - np.copyto(ddmod, delta/2, where=(ddmod == -delta/2) & (dd > 0)) - ph_correct = ddmod - dd - np.copyto(ph_correct, 0, where=abs(dd) < delta/2) - up = np.array(p, copy=True, dtype='d') - up[slice1] = p[slice1] + ph_correct.cumsum(axis) - return up - -def unwrap_50_mhz_clock(gtr): - """ - Unwrap an array with 50 MHz clock times. These times should all be in - nanoseconds and come from the KEV_GTR variable in the EV bank. - - Note: We assume here that the events are already ordered contiguously by - GTID, so you shouldn't pass an array with multiple runs! - """ - return unwrap(gtr,0x7ffffffffff*20.0) - -def retrigger_cut(ev): - """ - Cuts all retrigger events. - """ - return ev[ev.dt > 500] - -def breakdown_follower_cut(ev): - """ - Cuts all events within 1 second of breakdown events. - """ - breakdowns = ev[ev.dc & DC_BREAKDOWN != 0] - return ev[~np.any((ev.gtr.values > breakdowns.gtr.values[:,np.newaxis]) & \ - (ev.gtr.values < breakdowns.gtr.values[:,np.newaxis] + 1e9),axis=0)] - -def flasher_follower_cut(ev): - """ - Cuts all events within 200 microseconds of flasher events. - """ - flashers = ev[ev.dc & DC_FLASHER != 0] - return ev[~np.any((ev.gtr.values > flashers.gtr.values[:,np.newaxis]) & \ - (ev.gtr.values < flashers.gtr.values[:,np.newaxis] + 200e3),axis=0)] - -def muon_follower_cut(ev): - """ - Cuts all events 200 microseconds after a muon. - """ - muons = ev[ev.dc & DC_MUON != 0] - return ev[~np.any((ev.gtr.values > muons.gtr.values[:,np.newaxis]) & \ - (ev.gtr.values < muons.gtr.values[:,np.newaxis] + 200e3),axis=0)] - -def michel_cut(ev): - """ - Looks for Michel electrons after muons. - """ - prompt_plus_muons = ev[ev.prompt | ((ev.dc & DC_MUON) != 0)] - - # Michel electrons and neutrons can be any event which is not a prompt - # event - follower = ev[~ev.prompt] - - # require Michel events to pass more of the SNO data cleaning cuts - michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] - - michel = michel[michel.nhit >= 100] - - # Accept events which had a muon more than 800 nanoseconds but less - # than 20 microseconds before them. The 800 nanoseconds cut comes from - # Richie's thesis. He also mentions that the In Time Channel Spread Cut - # is very effective at cutting electrons events caused by muons, so I - # should implement that. - # - # Note: We currently don't look across run boundaries. This should be a - # *very* small effect, and the logic to do so would be very complicated - # since I would have to deal with 50 MHz clock rollovers, etc. - # - # Do a simple python for loop here over the runs since we create - # temporary arrays with shape (michel.size,prompt_plus_muons.size) - # which could be too big for the full dataset. - # - # There might be some clever way to do this without the loop in Pandas, - # but I don't know how. - if prompt_plus_muons.size and michel.size: - return michel[np.any((michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \ - (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3),axis=0)] - else: - return pd.DataFrame(columns=ev.columns) - -def atmospheric_events(ev): - """ - Tags atmospheric events which have a neutron follower. - """ - prompt = ev[ev.prompt] - - # Michel electrons and neutrons can be any event which is not a prompt - # event - follower = ev[~ev.prompt] - - ev['atm'] = np.zeros(len(ev),dtype=np.bool) - - if prompt.size and follower.size: - # neutron followers have to obey stricter set of data cleaning cuts - neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] - neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)] - r = np.sqrt(neutron.ftp_x**2 + neutron.ftp_y**2 + neutron.ftp_z**2) - neutron = neutron[r < AV_RADIUS] - neutron = neutron[neutron.rsp_energy > 4.0] - - # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt) - ev.loc[ev.prompt,'atm'] = np.any((neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \ - (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1) - - return ev - -def gtid_sort(ev, first_gtid): - """ - Adds 0x1000000 to the gtid_sort column for all gtids before the first gtid - in a run, which should be passed as a dictionary. This column can then be - used to sort the events sequentially. - - This function should be passed to ev.groupby('run').apply(). We use this - idiom instead of just looping over the groupby results since groupby() - makes a copy of the dataframe, i.e. - - for run, ev_run in ev.groupby('run'): - ev_run.loc[ev_run.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 - - would produce a SettingWithCopyWarning, so instead we use: - - ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid) - - which doesn't have this problem. - """ - # see https://stackoverflow.com/questions/32460593/including-the-group-name-in-the-apply-function-pandas-python - run = ev.name - - if run not in first_gtid: - print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run) - first_gtid[run] = ev.gtid.iloc[0] - - ev.loc[ev.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 - - return ev - -def prompt_event(ev): - ev['prompt'] = (ev.nhit >= 100) - ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6)) - return ev - def radius_cut(ev): ev['radius_cut'] = np.digitize((ev.r/PSUP_RADIUS)**3,(0.9,)) return ev @@ -539,33 +67,6 @@ def z_cut(ev): ev['z_cut'] = np.digitize(ev.z,(0.0,)) return ev -class Constraint(object): - def __init__(self, range): - self.range = range - - def __call__(self, x, grad=None): - return np.sum(x[self.range]) - 1 + EPSILON - - def renormalize(self, x, fix): - if fix not in self.range: - return x - x = x.copy() - while self(x) >= 0: - for i in self.range: - if i == fix: - continue - x[i] -= self(x) - return x - - def renormalize_no_fix(self, x): - x = x.copy() - while self(x) >= 0: - for i in self.range: - if x[i] < 10*EPSILON: - continue - x[i] -= x[i]/2.0 - return x - # Constraint to enforce the fact that P(r,psi,z,udotr|muon) all add up to 1.0. # In the likelihood function we set the last possibility for r and udotr equal # to 1.0 minus the others. Therefore, we need to enforce the fact that the @@ -596,7 +97,7 @@ flasher_r_udotr = Constraint(range(39,42)) # others must add up to less than 1. breakdown_r_udotr = Constraint(range(44,47)) -def make_nll(data, sacrifice,constraints): +def make_nll(data, sacrifice, constraints): def nll(x, grad=None, fill_value=1e9): if grad is not None and grad.size > 0: raise Exception("nll got passed grad!") @@ -762,7 +263,6 @@ def make_nll(data, sacrifice,constraints): if __name__ == '__main__': import argparse - import matplotlib.pyplot as plt import numpy as np import pandas as pd import sys @@ -770,9 +270,51 @@ if __name__ == '__main__': parser = argparse.ArgumentParser("plot fit results") parser.add_argument("filenames", nargs='+', help="input files") + parser.add_argument("--steps", type=int, default=100000, help="number of steps in the MCMC chain") parser.add_argument("--save", action="store_true", default=False, help="save plots") args = parser.parse_args() + if args.save: + # default \textwidth for a fullpage article in Latex is 16.50764 cm. + # You can figure this out by compiling the following TeX document: + # + # \documentclass{article} + # \usepackage{fullpage} + # \usepackage{layouts} + # \begin{document} + # textwidth in cm: \printinunitsof{cm}\prntlen{\textwidth} + # \end{document} + + width = 16.50764 + width /= 2.54 # cm -> inches + # According to this page: + # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf, + # Tufte suggests an aspect ratio of 1.5 - 1.6. + height = width/1.5 + FIGSIZE = (width,height) + + import matplotlib.pyplot as plt + + font = {'family':'serif', 'serif': ['computer modern roman']} + plt.rc('font',**font) + + plt.rc('text', usetex=True) + else: + # on retina screens, the default plots are way too small + # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1 + # Qt5 will scale everything using the dpi in ~/.Xresources + import matplotlib + matplotlib.use("Qt5Agg") + + import matplotlib.pyplot as plt + + # Default figure size. Currently set to my monitor width and height so that + # things are properly formatted + FIGSIZE = (13.78,7.48) + + # Make the defalt font bigger + plt.rc('font', size=22) + for filename in args.filenames: ev = pd.read_hdf(filename,"ev") @@ -1079,7 +621,7 @@ if __name__ == '__main__': proposal = get_proposal_func(stepsizes*0.1,low,high) sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x, grad, fill_value: -nll(x,grad,fill_value), moves=emcee.moves.MHMove(proposal),args=[None,np.inf]) with np.errstate(invalid='ignore'): - sampler.run_mcmc(pos, 100000) + sampler.run_mcmc(pos, args.steps) print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction))) @@ -1088,47 +630,6 @@ if __name__ == '__main__': except Exception as e: print(e) - if args.save: - # default \textwidth for a fullpage article in Latex is 16.50764 cm. - # You can figure this out by compiling the following TeX document: - # - # \documentclass{article} - # \usepackage{fullpage} - # \usepackage{layouts} - # \begin{document} - # textwidth in cm: \printinunitsof{cm}\prntlen{\textwidth} - # \end{document} - - width = 16.50764 - width /= 2.54 # cm -> inches - # According to this page: - # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf, - # Tufte suggests an aspect ratio of 1.5 - 1.6. - height = width/1.5 - FIGSIZE = (width,height) - - import matplotlib.pyplot as plt - - font = {'family':'serif', 'serif': ['computer modern roman']} - plt.rc('font',**font) - - plt.rc('text', usetex=True) - else: - # on retina screens, the default plots are way too small - # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1 - # Qt5 will scale everything using the dpi in ~/.Xresources - import matplotlib - matplotlib.use("Qt5Agg") - - import matplotlib.pyplot as plt - - # Default figure size. Currently set to my monitor width and height so that - # things are properly formatted - FIGSIZE = (13.78,7.48) - - # Make the defalt font bigger - plt.rc('font', size=22) - # Plot walker positions as a function of step number for the expected # number of events fig, axes = plt.subplots(6, figsize=FIGSIZE, num=1, sharex=True) @@ -15,15 +15,6 @@ # this program. If not, see <https://www.gnu.org/licenses/>. from __future__ import print_function, division -import yaml -try: - from yaml import CLoader as Loader -except ImportError: - from yaml.loader import SafeLoader as Loader - -import numpy as np -from scipy.stats import iqr -from matplotlib.lines import Line2D # on retina screens, the default plots are way too small # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1 @@ -31,51 +22,14 @@ from matplotlib.lines import Line2D import matplotlib matplotlib.use("Qt5Agg") -IDP_E_MINUS = 20 -IDP_MU_MINUS = 22 - -SNOMAN_MASS = { - 20: 0.511, - 21: 0.511, - 22: 105.658, - 23: 105.658 -} - -def plot_hist(x, label=None): - # determine the bin width using the Freedman Diaconis rule - # see https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule - h = 2*iqr(x)/len(x)**(1/3) - n = max(int((np.max(x)-np.min(x))/h),10) - bins = np.linspace(np.min(x),np.max(x),n) - plt.hist(x, bins=bins, histtype='step', label=label) - -def plot_legend(n): - plt.figure(n) - ax = plt.gca() - handles, labels = ax.get_legend_handles_labels() - new_handles = [Line2D([],[],c=h.get_edgecolor()) for h in handles] - plt.legend(handles=new_handles,labels=labels) - -def get_stats(x): - """ - Returns a tuple (mean, error mean, std, error std) for the values in x. - - The formula for the standard error on the standard deviation comes from - https://stats.stackexchange.com/questions/156518. - """ - mean = np.mean(x) - std = np.std(x) - n = len(x) - u4 = np.mean((x-mean)**4) - error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std) - return mean, std/np.sqrt(n), std, error - if __name__ == '__main__': import argparse import matplotlib.pyplot as plt import numpy as np import h5py import pandas as pd + from sddm import IDP_E_MINUS, IDP_MU_MINUS, SNOMAN_MASS + from sddm.plot import plot_hist, plot_legend, get_stats parser = argparse.ArgumentParser("plot fit results") parser.add_argument("filenames", nargs='+', help="input files") diff --git a/utils/plot-atmospheric-fluxes b/utils/plot-atmospheric-fluxes index fb4861d..7dafbbb 100755 --- a/utils/plot-atmospheric-fluxes +++ b/utils/plot-atmospheric-fluxes @@ -6,132 +6,8 @@ http://www-pnp.physics.ox.ac.uk/~barr/fluxfiles/0403i/index.html. from __future__ import print_function, division import numpy as np import os - -def splitext(path): - """ - Like os.path.splitext() except it returns the full extension if the - filename has multiple extensions, for example: - - splitext('foo.tar.gz') -> 'foo', '.tar.gz' - """ - full_root, full_ext = os.path.splitext(path) - while True: - root, ext = os.path.splitext(full_root) - if ext: - full_ext = ext + full_ext - full_root = root - else: - break - - return full_root, full_ext - -# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py -def despine(fig=None, ax=None, top=True, right=True, left=False, - bottom=False, offset=None, trim=False): - """Remove the top and right spines from plot(s). - - fig : matplotlib figure, optional - Figure to despine all axes of, default uses current figure. - ax : matplotlib axes, optional - Specific axes object to despine. - top, right, left, bottom : boolean, optional - If True, remove that spine. - offset : int or dict, optional - Absolute distance, in points, spines should be moved away - from the axes (negative values move spines inward). A single value - applies to all spines; a dict can be used to set offset values per - side. - trim : bool, optional - If True, limit spines to the smallest and largest major tick - on each non-despined axis. - - Returns - ------- - None - - """ - # Get references to the axes we want - if fig is None and ax is None: - axes = plt.gcf().axes - elif fig is not None: - axes = fig.axes - elif ax is not None: - axes = [ax] - - for ax_i in axes: - for side in ["top", "right", "left", "bottom"]: - # Toggle the spine objects - is_visible = not locals()[side] - ax_i.spines[side].set_visible(is_visible) - if offset is not None and is_visible: - try: - val = offset.get(side, 0) - except AttributeError: - val = offset - _set_spine_position(ax_i.spines[side], ('outward', val)) - - # Potentially move the ticks - if left and not right: - maj_on = any( - t.tick1line.get_visible() - for t in ax_i.yaxis.majorTicks - ) - min_on = any( - t.tick1line.get_visible() - for t in ax_i.yaxis.minorTicks - ) - ax_i.yaxis.set_ticks_position("right") - for t in ax_i.yaxis.majorTicks: - t.tick2line.set_visible(maj_on) - for t in ax_i.yaxis.minorTicks: - t.tick2line.set_visible(min_on) - - if bottom and not top: - maj_on = any( - t.tick1line.get_visible() - for t in ax_i.xaxis.majorTicks - ) - min_on = any( - t.tick1line.get_visible() - for t in ax_i.xaxis.minorTicks - ) - ax_i.xaxis.set_ticks_position("top") - for t in ax_i.xaxis.majorTicks: - t.tick2line.set_visible(maj_on) - for t in ax_i.xaxis.minorTicks: - t.tick2line.set_visible(min_on) - - if trim: - # clip off the parts of the spines that extend past major ticks - xticks = ax_i.get_xticks() - if xticks.size: - firsttick = np.compress(xticks >= min(ax_i.get_xlim()), - xticks)[0] - lasttick = np.compress(xticks <= max(ax_i.get_xlim()), - xticks)[-1] - ax_i.spines['bottom'].set_bounds(firsttick, lasttick) - ax_i.spines['top'].set_bounds(firsttick, lasttick) - newticks = xticks.compress(xticks <= lasttick) - newticks = newticks.compress(newticks >= firsttick) - ax_i.set_xticks(newticks) - - xticks_minor = [t for t in ax_i.get_xticks(minor=True) if firsttick < t < lasttick] - ax_i.set_xticks(xticks_minor,minor=True) - - yticks = ax_i.get_yticks() - if yticks.size: - firsttick = np.compress(yticks >= min(ax_i.get_ylim()), - yticks)[0] - lasttick = np.compress(yticks <= max(ax_i.get_ylim()), - yticks)[-1] - ax_i.spines['left'].set_bounds(firsttick, lasttick) - ax_i.spines['right'].set_bounds(firsttick, lasttick) - newticks = yticks.compress(yticks <= lasttick) - newticks = newticks.compress(newticks >= firsttick) - ax_i.set_yticks(newticks) - - yticks_minor = [t for t in ax_i.get_yticks(minor=True) if firsttick < t < lasttick] - ax_i.set_yticks(yticks_minor,minor=True) +from sddm.plot import despine +from sddm import splitext # Data is in the form: Fluxes in bins of neutrino energy (equally spaced bins # in logE with 10 bins per decade with the low edge of the first bin at 100 diff --git a/utils/plot-energy b/utils/plot-energy index a057302..5c33969 100755 --- a/utils/plot-energy +++ b/utils/plot-energy @@ -37,61 +37,8 @@ from scipy.stats import iqr, norm, beta from scipy.special import spence from itertools import izip_longest -PSUP_RADIUS = 840.0 - -# from https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python -class bcolors: - HEADER = '\033[95m' - OKBLUE = '\033[94m' - OKGREEN = '\033[92m' - WARNING = '\033[93m' - FAIL = '\033[91m' - ENDC = '\033[0m' - BOLD = '\033[1m' - UNDERLINE = '\033[4m' - -# on retina screens, the default plots are way too small -# by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1 -# Qt5 will scale everything using the dpi in ~/.Xresources -import matplotlib -matplotlib.use("Qt5Agg") - -font = {'family':'serif', 'serif': ['computer modern roman']} -matplotlib.rc('font',**font) - -matplotlib.rc('text', usetex=True) - -SNOMAN_MASS = { - 20: 0.511, - 21: 0.511, - 22: 105.658, - 23: 105.658 -} - -AV_RADIUS = 600.0 - -# Data cleaning bitmasks. -DC_MUON = 0x1 -DC_JUNK = 0x2 -DC_CRATE_ISOTROPY = 0x4 -DC_QVNHIT = 0x8 -DC_NECK = 0x10 -DC_FLASHER = 0x20 -DC_ESUM = 0x40 -DC_OWL = 0x80 -DC_OWL_TRIGGER = 0x100 -DC_FTS = 0x200 -DC_ITC = 0x400 -DC_BREAKDOWN = 0x800 - particle_id = {20: 'e', 22: r'\mu'} -def grouper(iterable, n, fillvalue=None): - "Collect data into fixed-length chunks or blocks" - # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx - args = [iter(iterable)] * n - return izip_longest(fillvalue=fillvalue, *args) - def plot_hist2(df, muons=False): for id, df_id in sorted(df.groupby('id')): if id == 20: @@ -148,550 +95,14 @@ def plot_hist(df, muons=False): if len(df): plt.tight_layout() -def chunks(l, n): - """Yield successive n-sized chunks from l.""" - for i in range(0, len(l), n): - yield l[i:i + n] - -def print_warning(msg): - print(bcolors.FAIL + msg + bcolors.ENDC,file=sys.stderr) - -def unwrap(p, delta, axis=-1): - """ - A modified version of np.unwrap() useful for unwrapping the 50 MHz clock. - It unwraps discontinuities bigger than delta/2 by delta. - - Example: - - >>> a = np.arange(10) % 5 - >>> a - array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4]) - >>> unwrap(a,5) - array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]) - - In the case of the 50 MHz clock delta should be 0x7ffffffffff*20.0. - """ - p = np.asarray(p) - nd = p.ndim - dd = np.diff(p, axis=axis) - slice1 = [slice(None, None)]*nd # full slices - slice1[axis] = slice(1, None) - slice1 = tuple(slice1) - ddmod = np.mod(dd + delta/2, delta) - delta/2 - np.copyto(ddmod, delta/2, where=(ddmod == -delta/2) & (dd > 0)) - ph_correct = ddmod - dd - np.copyto(ph_correct, 0, where=abs(dd) < delta/2) - up = np.array(p, copy=True, dtype='d') - up[slice1] = p[slice1] + ph_correct.cumsum(axis) - return up - -def unwrap_50_mhz_clock(gtr): - """ - Unwrap an array with 50 MHz clock times. These times should all be in - nanoseconds and come from the KEV_GTR variable in the EV bank. - - Note: We assume here that the events are already ordered contiguously by - GTID, so you shouldn't pass an array with multiple runs! - """ - return unwrap(gtr,0x7ffffffffff*20.0) - -def retrigger_cut(ev): - """ - Cuts all retrigger events. - """ - return ev[ev.dt > 500] - -def breakdown_follower_cut(ev): - """ - Cuts all events within 1 second of breakdown events. - """ - breakdowns = ev[ev.dc & DC_BREAKDOWN != 0] - return ev[~np.any((ev.gtr.values > breakdowns.gtr.values[:,np.newaxis]) & \ - (ev.gtr.values < breakdowns.gtr.values[:,np.newaxis] + 1e9),axis=0)] - -def flasher_follower_cut(ev): - """ - Cuts all events within 200 microseconds of flasher events. - """ - flashers = ev[ev.dc & DC_FLASHER != 0] - return ev[~np.any((ev.gtr.values > flashers.gtr.values[:,np.newaxis]) & \ - (ev.gtr.values < flashers.gtr.values[:,np.newaxis] + 200e3),axis=0)] - -def muon_follower_cut(ev): - """ - Cuts all events 200 microseconds after a muon. - """ - muons = ev[ev.dc & DC_MUON != 0] - return ev[~np.any((ev.gtr.values > muons.gtr.values[:,np.newaxis]) & \ - (ev.gtr.values < muons.gtr.values[:,np.newaxis] + 200e3),axis=0)] - -def michel_cut(ev): - """ - Looks for Michel electrons after muons. - """ - prompt_plus_muons = ev[ev.prompt | ((ev.dc & DC_MUON) != 0)] - - # Michel electrons and neutrons can be any event which is not a prompt - # event - follower = ev[~ev.prompt] - - # require Michel events to pass more of the SNO data cleaning cuts - michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] - - michel = michel[michel.nhit >= 100] - - # Accept events which had a muon more than 800 nanoseconds but less than 20 - # microseconds before them. The 800 nanoseconds cut comes from Richie's - # thesis. He also mentions that the In Time Channel Spread Cut is very - # effective at cutting electron events caused by muons, so I should - # implement that. - # - # Note: We currently don't look across run boundaries. This should be a - # *very* small effect, and the logic to do so would be very complicated - # since I would have to deal with 50 MHz clock rollovers, etc. - if prompt_plus_muons.size and michel.size: - mask = (michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \ - (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3) - michel = michel.iloc[np.any(mask,axis=0)] - michel['muon_gtid'] = pd.Series(prompt_plus_muons['gtid'].iloc[np.argmax(mask[:,np.any(mask,axis=0)],axis=0)].values, - index=michel.index.values, - dtype=np.int32) - return michel - else: - # Return an empty slice since we need it to have the same datatype as - # the other dataframes - michel = ev[:0] - michel['muon_gtid'] = -1 - return michel - -def atmospheric_events(ev): - """ - Tags atmospheric events which have a neutron follower. - """ - prompt = ev[ev.prompt] - - # Michel electrons and neutrons can be any event which is not a prompt - # event - follower = ev[~ev.prompt] - - ev['atm'] = np.zeros(len(ev),dtype=np.bool) - - if prompt.size and follower.size: - # neutron followers have to obey stricter set of data cleaning cuts - neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] - neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)] - # FIXME: What should the radius cut be here? AV? (r/r_psup)^3 < 0.9? - neutron = neutron[neutron.ftp_r < AV_RADIUS] - neutron = neutron[neutron.rsp_energy > 4.0] - - # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt) - ev.loc[ev.prompt,'atm'] = np.any((neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \ - (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1) - - return ev - -def gtid_sort(ev, first_gtid): - """ - Adds 0x1000000 to the gtid_sort column for all gtids before the first gtid - in a run, which should be passed as a dictionary. This column can then be - used to sort the events sequentially. - - This function should be passed to ev.groupby('run').apply(). We use this - idiom instead of just looping over the groupby results since groupby() - makes a copy of the dataframe, i.e. - - for run, ev_run in ev.groupby('run'): - ev_run.loc[ev_run.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 - - would produce a SettingWithCopyWarning, so instead we use: - - ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid) - - which doesn't have this problem. - """ - # see https://stackoverflow.com/questions/32460593/including-the-group-name-in-the-apply-function-pandas-python - run = ev.name - - if run not in first_gtid: - print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run) - first_gtid[run] = ev.gtid.iloc[0] - - ev.loc[ev.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 - - return ev - -def prompt_event(ev): - ev['prompt'] = (ev.nhit >= 100) - ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6)) - return ev - -# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py -def despine(fig=None, ax=None, top=True, right=True, left=False, - bottom=False, offset=None, trim=False): - """Remove the top and right spines from plot(s). - - fig : matplotlib figure, optional - Figure to despine all axes of, default uses current figure. - ax : matplotlib axes, optional - Specific axes object to despine. - top, right, left, bottom : boolean, optional - If True, remove that spine. - offset : int or dict, optional - Absolute distance, in points, spines should be moved away - from the axes (negative values move spines inward). A single value - applies to all spines; a dict can be used to set offset values per - side. - trim : bool, optional - If True, limit spines to the smallest and largest major tick - on each non-despined axis. - - Returns - ------- - None - - """ - # Get references to the axes we want - if fig is None and ax is None: - axes = plt.gcf().axes - elif fig is not None: - axes = fig.axes - elif ax is not None: - axes = [ax] - - for ax_i in axes: - for side in ["top", "right", "left", "bottom"]: - # Toggle the spine objects - is_visible = not locals()[side] - ax_i.spines[side].set_visible(is_visible) - if offset is not None and is_visible: - try: - val = offset.get(side, 0) - except AttributeError: - val = offset - _set_spine_position(ax_i.spines[side], ('outward', val)) - - # Potentially move the ticks - if left and not right: - maj_on = any( - t.tick1line.get_visible() - for t in ax_i.yaxis.majorTicks - ) - min_on = any( - t.tick1line.get_visible() - for t in ax_i.yaxis.minorTicks - ) - ax_i.yaxis.set_ticks_position("right") - for t in ax_i.yaxis.majorTicks: - t.tick2line.set_visible(maj_on) - for t in ax_i.yaxis.minorTicks: - t.tick2line.set_visible(min_on) - - if bottom and not top: - maj_on = any( - t.tick1line.get_visible() - for t in ax_i.xaxis.majorTicks - ) - min_on = any( - t.tick1line.get_visible() - for t in ax_i.xaxis.minorTicks - ) - ax_i.xaxis.set_ticks_position("top") - for t in ax_i.xaxis.majorTicks: - t.tick2line.set_visible(maj_on) - for t in ax_i.xaxis.minorTicks: - t.tick2line.set_visible(min_on) - - if trim: - # clip off the parts of the spines that extend past major ticks - xticks = ax_i.get_xticks() - if xticks.size: - firsttick = np.compress(xticks >= min(ax_i.get_xlim()), - xticks)[0] - lasttick = np.compress(xticks <= max(ax_i.get_xlim()), - xticks)[-1] - ax_i.spines['bottom'].set_bounds(firsttick, lasttick) - ax_i.spines['top'].set_bounds(firsttick, lasttick) - newticks = xticks.compress(xticks <= lasttick) - newticks = newticks.compress(newticks >= firsttick) - ax_i.set_xticks(newticks) - - yticks = ax_i.get_yticks() - if yticks.size: - firsttick = np.compress(yticks >= min(ax_i.get_ylim()), - yticks)[0] - lasttick = np.compress(yticks <= max(ax_i.get_ylim()), - yticks)[-1] - ax_i.spines['left'].set_bounds(firsttick, lasttick) - ax_i.spines['right'].set_bounds(firsttick, lasttick) - newticks = yticks.compress(yticks <= lasttick) - newticks = newticks.compress(newticks >= firsttick) - ax_i.set_yticks(newticks) - -def plot_corner_plot(ev, title, save=None): - variables = ['r_psup','psi','z','udotr'] - labels = [r'$(r/r_\mathrm{PSUP})^3$',r'$\psi$','z',r'$\vec{u}\cdot\vec{r}$'] - limits = [(0,1),(0,10),(-840,840),(-1,1)] - cuts = [0.9,6,0,-0.5] - - ev = ev.dropna(subset=variables) - - fig = plt.figure(figsize=(FIGSIZE[0],FIGSIZE[0])) - despine(fig,trim=True) - for i in range(len(variables)): - for j in range(len(variables)): - if j > i: - continue - ax = plt.subplot(len(variables),len(variables),i*len(variables)+j+1) - if i == j: - plt.hist(ev[variables[i]],bins=np.linspace(limits[i][0],limits[i][1],100),histtype='step') - plt.gca().set_xlim(limits[i]) - else: - plt.scatter(ev[variables[j]],ev[variables[i]],s=0.5) - plt.gca().set_xlim(limits[j]) - plt.gca().set_ylim(limits[i]) - n = len(ev) - if n: - p_i_lo = np.count_nonzero(ev[variables[i]] < cuts[i])/n - p_j_lo = np.count_nonzero(ev[variables[j]] < cuts[j])/n - p_lolo = p_i_lo*p_j_lo - p_lohi = p_i_lo*(1-p_j_lo) - p_hilo = (1-p_i_lo)*p_j_lo - p_hihi = (1-p_i_lo)*(1-p_j_lo) - n_lolo = np.count_nonzero((ev[variables[i]] < cuts[i]) & (ev[variables[j]] < cuts[j])) - n_lohi = np.count_nonzero((ev[variables[i]] < cuts[i]) & (ev[variables[j]] >= cuts[j])) - n_hilo = np.count_nonzero((ev[variables[i]] >= cuts[i]) & (ev[variables[j]] < cuts[j])) - n_hihi = np.count_nonzero((ev[variables[i]] >= cuts[i]) & (ev[variables[j]] >= cuts[j])) - observed = np.array([n_lolo,n_lohi,n_hilo,n_hihi]) - expected = n*np.array([p_lolo,p_lohi,p_hilo,p_hihi]) - psi = -poisson.logpmf(observed,expected).sum() + poisson.logpmf(observed,observed).sum() - psi /= np.std(-poisson.logpmf(np.random.poisson(observed,size=(10000,4)),observed).sum(axis=1) + poisson.logpmf(observed,observed).sum()) - plt.title(r"$\psi = %.1f$" % psi) - if i == len(variables) - 1: - plt.xlabel(labels[j]) - else: - plt.setp(ax.get_xticklabels(),visible=False) - if j == 0: - plt.ylabel(labels[i]) - else: - plt.setp(ax.get_yticklabels(),visible=False) - plt.axvline(cuts[j],color='k',ls='--',alpha=0.5) - if i != j: - plt.axhline(cuts[i],color='k',ls='--',alpha=0.5) - - plt.tight_layout() - - if save: - plt.savefig(save + ".pdf") - plt.savefig(save + ".eps") - - plt.suptitle(title) - -def intersect_sphere(pos, dir, R): - """ - Compute the first intersection of a ray starting at `pos` with direction - `dir` and a sphere centered at the origin with radius `R`. The distance to - the intersection is returned. - - Example: - - pos = np.array([0,0,0]) - dir = np.array([1,0,0]) - - l = intersect_sphere(pos,dir,PSUP_RADIUS): - if l is not None: - hit = pos + l*dir - print("ray intersects sphere at %.2f %.2f %.2f", hit[0], hit[1], hit[2]) - else: - print("ray didn't intersect sphere") - """ - - b = 2*np.dot(dir,pos) - c = np.dot(pos,pos) - R*R - - if b*b - 4*c <= 0: - # Ray doesn't intersect the sphere. - return None - - # First, check the shorter solution. - l = (-b - np.sqrt(b*b - 4*c))/2 - - # If the shorter solution is less than 0, check the second solution. - if l < 0: - l = (-b + np.sqrt(b*b - 4*c))/2 - - # If the distance is still negative, we didn't intersect the sphere. - if l < 0: - return None - - return l - -def get_dx(row): - pos = np.array([row.x,row.y,row.z]) - dir = np.array([np.sin(row.theta1)*np.cos(row.phi1), - np.sin(row.theta1)*np.sin(row.phi1), - np.cos(row.theta1)]) - l = intersect_sphere(pos,-dir,PSUP_RADIUS) - if l is not None: - pos -= dir*l - michel_pos = np.array([row.x_michel,row.y_michel,row.z_michel]) - return np.linalg.norm(michel_pos-pos) - else: - return 0 - -def dx_to_energy(dx): - lines = [] - with open("../src/muE_water_liquid.txt") as f: - for i, line in enumerate(f): - if i < 10: - continue - if 'Minimum ionization' in line: - continue - if 'Muon critical energy' in line: - continue - lines.append(line) - data = np.genfromtxt(lines) - return np.interp(dx,data[:,8],data[:,0]) - -def iqr_std_err(x): - """ - Returns the approximate standard deviation assuming the central part of the - distribution is gaussian. - """ - x = x.dropna() - n = len(x) - if n == 0: - return np.nan - # see https://stats.stackexchange.com/questions/110902/error-on-interquartile-range - std = iqr(x.values)/1.3489795 - return 1.573*std/np.sqrt(n) - -def iqr_std(x): - """ - Returns the approximate standard deviation assuming the central part of the - distribution is gaussian. - """ - x = x.dropna() - n = len(x) - if n == 0: - return np.nan - return iqr(x.values)/1.3489795 - -def quantile_error(x,q): - """ - Returns the standard error for the qth quantile of `x`. The error is - computed using the Maritz-Jarrett method described here: - https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/quantse.htm. - """ - x = np.sort(x) - n = len(x) - m = int(q*n+0.5) - A = m - 1 - B = n - m - i = np.arange(1,len(x)+1) - w = beta.cdf(i/n,A,B) - beta.cdf((i-1)/n,A,B) - return np.sqrt(np.sum(w*x**2)-np.sum(w*x)**2) - -def q90_err(x): - """ - Returns the error on the 90th percentile for all the non NaN values in a - Series `x`. - """ - x = x.dropna() - n = len(x) - if n == 0: - return np.nan - return quantile_error(x.values,0.9) - -def q90(x): - """ - Returns the 90th percentile for all the non NaN values in a Series `x`. - """ - x = x.dropna() - n = len(x) - if n == 0: - return np.nan - return np.percentile(x.values,90.0) - -def median(x): - """ - Returns the median for all the non NaN values in a Series `x`. - """ - x = x.dropna() - n = len(x) - if n == 0: - return np.nan - return np.median(x.values) - -def median_err(x): - """ - Returns the approximate error on the median for all the non NaN values in a - Series `x`. The error on the median is approximated assuming the central - part of the distribution is gaussian. - """ - x = x.dropna() - n = len(x) - if n == 0: - return np.nan - # First we estimate the standard deviation using the interquartile range. - # Here we are essentially assuming the central part of the distribution is - # gaussian. - std = iqr(x.values)/1.3489795 - median = np.median(x.values) - # Now we estimate the error on the median for a gaussian - # See https://stats.stackexchange.com/questions/45124/central-limit-theorem-for-sample-medians. - return 1/(2*np.sqrt(n)*norm.pdf(median,median,std)) - -def std_err(x): - x = x.dropna() - mean = np.mean(x) - std = np.std(x) - n = len(x) - if n == 0: - return np.nan - elif n == 1: - return 0.0 - u4 = np.mean((x-mean)**4) - error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std) - return error - -# Fermi constant -GF = 1.16637887e-5 # 1/MeV^2 -ELECTRON_MASS = 0.5109989461 # MeV -MUON_MASS = 105.6583745 # MeV -PROTON_MASS = 938.272081 # MeV -FINE_STRUCTURE_CONSTANT = 7.297352566417e-3 - -def f(x): - y = (5/(3*x**2) + 16*x/3 + 4/x + (12-8*x)*np.log(1/x-1) - 8)*np.log(MUON_MASS/ELECTRON_MASS) - y += (6-4*x)*(2*spence(x) - 2*np.log(x)**2 + np.log(x) + np.log(1-x)*(3*np.log(x)-1/x-1) - np.pi**2/3-2) - y += (1-x)*(34*x**2+(5-34*x**2+17*x)*np.log(x) - 22*x)/(3*x**2) - y += 6*(1-x)*np.log(x) - return y - -def michel_spectrum(T): - """ - Michel electron energy spectrum for a free muon. `T` should be the kinetic - energy of the electron or positron in MeV. - - Note: The result is not normalized. - - From https://arxiv.org/abs/1406.3575. - """ - E = T + ELECTRON_MASS - x = 2*E/MUON_MASS - mask = (x > 0) & (x < 1) - y = np.zeros_like(x,dtype=np.double) - y[mask] = GF**2*MUON_MASS**5*x[mask]**2*(6-4*x[mask]+FINE_STRUCTURE_CONSTANT*f(x[mask])/np.pi)/(192*np.pi**3) - y *= 2*MUON_MASS - return y - if __name__ == '__main__': import argparse - import matplotlib.pyplot as plt import numpy as np import pandas as pd import sys import h5py + from sddm.plot_energy import * + from sddm.plot import despine parser = argparse.ArgumentParser("plot fit results") parser.add_argument("filenames", nargs='+', help="input files") @@ -699,6 +110,47 @@ if __name__ == '__main__': parser.add_argument("--save", action='store_true', default=False, help="save corner plots for backgrounds") args = parser.parse_args() + if args.save: + # default \textwidth for a fullpage article in Latex is 16.50764 cm. + # You can figure this out by compiling the following TeX document: + # + # \documentclass{article} + # \usepackage{fullpage} + # \usepackage{layouts} + # \begin{document} + # textwidth in cm: \printinunitsof{cm}\printlen{\textwidth} + # \end{document} + + width = 16.50764 + width /= 2.54 # cm -> inches + # According to this page: + # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf, + # Tufte suggests an aspect ratio of 1.5 - 1.6. + height = width/1.5 + FIGSIZE = (width,height) + + import matplotlib.pyplot as plt + + font = {'family':'serif', 'serif': ['computer modern roman']} + plt.rc('font',**font) + + plt.rc('text', usetex=True) + else: + # on retina screens, the default plots are way too small + # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1 + # Qt5 will scale everything using the dpi in ~/.Xresources + import matplotlib + matplotlib.use("Qt5Agg") + + import matplotlib.pyplot as plt + + # Default figure size. Currently set to my monitor width and height so that + # things are properly formatted + FIGSIZE = (13.78,7.48) + + # Make the defalt font bigger + plt.rc('font', size=22) + ev = pd.concat([pd.read_hdf(filename, "ev") for filename in args.filenames],ignore_index=True) fits = pd.concat([pd.read_hdf(filename, "fits") for filename in args.filenames],ignore_index=True) rhdr = pd.concat([pd.read_hdf(filename, "rhdr") for filename in args.filenames],ignore_index=True) @@ -834,47 +286,6 @@ if __name__ == '__main__': # retrigger cut ev = ev.groupby('run',group_keys=False).apply(retrigger_cut) - if args.save: - # default \textwidth for a fullpage article in Latex is 16.50764 cm. - # You can figure this out by compiling the following TeX document: - # - # \documentclass{article} - # \usepackage{fullpage} - # \usepackage{layouts} - # \begin{document} - # textwidth in cm: \printinunitsof{cm}\prntlen{\textwidth} - # \end{document} - - width = 16.50764 - width /= 2.54 # cm -> inches - # According to this page: - # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf, - # Tufte suggests an aspect ratio of 1.5 - 1.6. - height = width/1.5 - FIGSIZE = (width,height) - - import matplotlib.pyplot as plt - - font = {'family':'serif', 'serif': ['computer modern roman']} - plt.rc('font',**font) - - plt.rc('text', usetex=True) - else: - # on retina screens, the default plots are way too small - # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1 - # Qt5 will scale everything using the dpi in ~/.Xresources - import matplotlib - matplotlib.use("Qt5Agg") - - import matplotlib.pyplot as plt - - # Default figure size. Currently set to my monitor width and height so that - # things are properly formatted - FIGSIZE = (13.78,7.48) - - # Make the defalt font bigger - plt.rc('font', size=22) - if args.dc: ev = ev[ev.prompt] diff --git a/utils/plot-fit-results b/utils/plot-fit-results index 18139ff..e154b21 100755 --- a/utils/plot-fit-results +++ b/utils/plot-fit-results @@ -15,265 +15,62 @@ # this program. If not, see <https://www.gnu.org/licenses/>. from __future__ import print_function, division -import numpy as np -from scipy.stats import iqr, norm, beta -from matplotlib.lines import Line2D -import itertools - -IDP_E_MINUS = 20 -IDP_MU_MINUS = 22 - -SNOMAN_MASS = { - 20: 0.511, - 21: 0.511, - 22: 105.658, - 23: 105.658 -} - -def plot_hist(x, label=None): - # determine the bin width using the Freedman Diaconis rule - # see https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule - h = 2*iqr(x)/len(x)**(1/3) - n = max(int((np.max(x)-np.min(x))/h),10) - bins = np.linspace(np.min(x),np.max(x),n) - plt.hist(x, bins=bins, histtype='step', label=label) - -def plot_legend(n): - plt.figure(n) - ax = plt.gca() - handles, labels = ax.get_legend_handles_labels() - new_handles = [Line2D([],[],c=h.get_edgecolor()) for h in handles] - plt.legend(handles=new_handles,labels=labels) - -def get_stats(x): - """ - Returns a tuple (mean, error mean, std, error std) for the values in x. - - The formula for the standard error on the standard deviation comes from - https://stats.stackexchange.com/questions/156518. - """ - mean = np.mean(x) - std = np.std(x) - n = len(x) - u4 = np.mean((x-mean)**4) - error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std) - return mean, std/np.sqrt(n), std, error - -def iqr_std_err(x): - """ - Returns the approximate standard deviation assuming the central part of the - distribution is gaussian. - """ - x = x.dropna() - n = len(x) - if n == 0: - return np.nan - # see https://stats.stackexchange.com/questions/110902/error-on-interquartile-range - std = iqr(x.values)/1.3489795 - return 1.573*std/np.sqrt(n) - -def iqr_std(x): - """ - Returns the approximate standard deviation assuming the central part of the - distribution is gaussian. - """ - x = x.dropna() - n = len(x) - if n == 0: - return np.nan - return iqr(x.values)/1.3489795 - -def quantile_error(x,q): - """ - Returns the standard error for the qth quantile of `x`. The error is - computed using the Maritz-Jarrett method described here: - https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/quantse.htm. - """ - x = np.sort(x) - n = len(x) - m = int(q*n+0.5) - A = m - 1 - B = n - m - i = np.arange(1,len(x)+1) - w = beta.cdf(i/n,A,B) - beta.cdf((i-1)/n,A,B) - return np.sqrt(np.sum(w*x**2)-np.sum(w*x)**2) - -def q90_err(x): - """ - Returns the error on the 90th percentile for all the non NaN values in a - Series `x`. - """ - x = x.dropna() - n = len(x) - if n == 0: - return np.nan - return quantile_error(x.values,0.9) - -def q90(x): - """ - Returns the 90th percentile for all the non NaN values in a Series `x`. - """ - x = x.dropna() - n = len(x) - if n == 0: - return np.nan - return np.percentile(x.values,90.0) - -def median(x): - """ - Returns the median for all the non NaN values in a Series `x`. - """ - x = x.dropna() - n = len(x) - if n == 0: - return np.nan - return np.median(x.values) - -def median_err(x): - """ - Returns the approximate error on the median for all the non NaN values in a - Series `x`. The error on the median is approximated assuming the central - part of the distribution is gaussian. - """ - x = x.dropna() - n = len(x) - if n == 0: - return np.nan - # First we estimate the standard deviation using the interquartile range. - # Here we are essentially assuming the central part of the distribution is - # gaussian. - std = iqr(x.values)/1.3489795 - median = np.median(x.values) - # Now we estimate the error on the median for a gaussian - # See https://stats.stackexchange.com/questions/45124/central-limit-theorem-for-sample-medians. - return 1/(2*np.sqrt(n)*norm.pdf(median,median,std)) - -def std_err(x): - x = x.dropna() - mean = np.mean(x) - std = np.std(x) - n = len(x) - if n == 0: - return np.nan - elif n == 1: - return 0.0 - u4 = np.mean((x-mean)**4) - error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std) - return error - -# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py -def despine(fig=None, ax=None, top=True, right=True, left=False, - bottom=False, offset=None, trim=False): - """Remove the top and right spines from plot(s). - - fig : matplotlib figure, optional - Figure to despine all axes of, default uses current figure. - ax : matplotlib axes, optional - Specific axes object to despine. - top, right, left, bottom : boolean, optional - If True, remove that spine. - offset : int or dict, optional - Absolute distance, in points, spines should be moved away - from the axes (negative values move spines inward). A single value - applies to all spines; a dict can be used to set offset values per - side. - trim : bool, optional - If True, limit spines to the smallest and largest major tick - on each non-despined axis. - - Returns - ------- - None - - """ - # Get references to the axes we want - if fig is None and ax is None: - axes = plt.gcf().axes - elif fig is not None: - axes = fig.axes - elif ax is not None: - axes = [ax] - - for ax_i in axes: - for side in ["top", "right", "left", "bottom"]: - # Toggle the spine objects - is_visible = not locals()[side] - ax_i.spines[side].set_visible(is_visible) - if offset is not None and is_visible: - try: - val = offset.get(side, 0) - except AttributeError: - val = offset - _set_spine_position(ax_i.spines[side], ('outward', val)) - - # Potentially move the ticks - if left and not right: - maj_on = any( - t.tick1line.get_visible() - for t in ax_i.yaxis.majorTicks - ) - min_on = any( - t.tick1line.get_visible() - for t in ax_i.yaxis.minorTicks - ) - ax_i.yaxis.set_ticks_position("right") - for t in ax_i.yaxis.majorTicks: - t.tick2line.set_visible(maj_on) - for t in ax_i.yaxis.minorTicks: - t.tick2line.set_visible(min_on) - - if bottom and not top: - maj_on = any( - t.tick1line.get_visible() - for t in ax_i.xaxis.majorTicks - ) - min_on = any( - t.tick1line.get_visible() - for t in ax_i.xaxis.minorTicks - ) - ax_i.xaxis.set_ticks_position("top") - for t in ax_i.xaxis.majorTicks: - t.tick2line.set_visible(maj_on) - for t in ax_i.xaxis.minorTicks: - t.tick2line.set_visible(min_on) - - if trim: - # clip off the parts of the spines that extend past major ticks - xticks = ax_i.get_xticks() - if xticks.size: - firsttick = np.compress(xticks >= min(ax_i.get_xlim()), - xticks)[0] - lasttick = np.compress(xticks <= max(ax_i.get_xlim()), - xticks)[-1] - ax_i.spines['bottom'].set_bounds(firsttick, lasttick) - ax_i.spines['top'].set_bounds(firsttick, lasttick) - newticks = xticks.compress(xticks <= lasttick) - newticks = newticks.compress(newticks >= firsttick) - ax_i.set_xticks(newticks) - - yticks = ax_i.get_yticks() - if yticks.size: - firsttick = np.compress(yticks >= min(ax_i.get_ylim()), - yticks)[0] - lasttick = np.compress(yticks <= max(ax_i.get_ylim()), - yticks)[-1] - ax_i.spines['left'].set_bounds(firsttick, lasttick) - ax_i.spines['right'].set_bounds(firsttick, lasttick) - newticks = yticks.compress(yticks <= lasttick) - newticks = newticks.compress(newticks >= firsttick) - ax_i.set_yticks(newticks) if __name__ == '__main__': import argparse import numpy as np import h5py import pandas as pd + import itertools + from sddm import IDP_E_MINUS, IDP_MU_MINUS, SNOMAN_MASS + from sddm.plot import plot_hist, plot_legend, get_stats, despine, iqr_std_err, iqr_std, quantile_error, q90_err, q90, median, median_err, std_err parser = argparse.ArgumentParser("plot fit results") parser.add_argument("filenames", nargs='+', help="input files") parser.add_argument("--save", action="store_true", default=False, help="save plots") args = parser.parse_args() + if args.save: + # default \textwidth for a fullpage article in Latex is 16.50764 cm. + # You can figure this out by compiling the following TeX document: + # + # \documentclass{article} + # \usepackage{fullpage} + # \usepackage{layouts} + # \begin{document} + # textwidth in cm: \printinunitsof{cm}\prntlen{\textwidth} + # \end{document} + + width = 16.50764 + width /= 2.54 # cm -> inches + # According to this page: + # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf, + # Tufte suggests an aspect ratio of 1.5 - 1.6. + height = width/1.5 + FIGSIZE = (width,height) + + import matplotlib.pyplot as plt + + font = {'family':'serif', 'serif': ['computer modern roman']} + plt.rc('font',**font) + + plt.rc('text', usetex=True) + else: + # on retina screens, the default plots are way too small + # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1 + # Qt5 will scale everything using the dpi in ~/.Xresources + import matplotlib + matplotlib.use("Qt5Agg") + + import matplotlib.pyplot as plt + + # Default figure size. Currently set to my monitor width and height so that + # things are properly formatted + FIGSIZE = (13.78,7.48) + + # Make the defalt font bigger + plt.rc('font', size=22) + # Read in all the data. # # Note: We have to add the filename as a column to each dataset since this @@ -351,47 +148,6 @@ if __name__ == '__main__': markers = itertools.cycle(('o', 'v')) - if args.save: - # default \textwidth for a fullpage article in Latex is 16.50764 cm. - # You can figure this out by compiling the following TeX document: - # - # \documentclass{article} - # \usepackage{fullpage} - # \usepackage{layouts} - # \begin{document} - # textwidth in cm: \printinunitsof{cm}\prntlen{\textwidth} - # \end{document} - - width = 16.50764 - width /= 2.54 # cm -> inches - # According to this page: - # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf, - # Tufte suggests an aspect ratio of 1.5 - 1.6. - height = width/1.5 - FIGSIZE = (width,height) - - import matplotlib.pyplot as plt - - font = {'family':'serif', 'serif': ['computer modern roman']} - plt.rc('font',**font) - - plt.rc('text', usetex=True) - else: - # on retina screens, the default plots are way too small - # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1 - # Qt5 will scale everything using the dpi in ~/.Xresources - import matplotlib - matplotlib.use("Qt5Agg") - - import matplotlib.pyplot as plt - - # Default figure size. Currently set to my monitor width and height so that - # things are properly formatted - FIGSIZE = (13.78,7.48) - - # Make the defalt font bigger - plt.rc('font', size=22) - fig3, ax3 = plt.subplots(3,1,figsize=FIGSIZE,num=3,sharex=True) fig4, ax4 = plt.subplots(3,1,figsize=FIGSIZE,num=4,sharex=True) diff --git a/utils/plot-root-results b/utils/plot-root-results index e8dcf19..7f115f1 100755 --- a/utils/plot-root-results +++ b/utils/plot-root-results @@ -17,113 +17,11 @@ from __future__ import print_function, division import numpy as np -# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py -def despine(fig=None, ax=None, top=True, right=True, left=False, - bottom=False, offset=None, trim=False): - """Remove the top and right spines from plot(s). - - fig : matplotlib figure, optional - Figure to despine all axes of, default uses current figure. - ax : matplotlib axes, optional - Specific axes object to despine. - top, right, left, bottom : boolean, optional - If True, remove that spine. - offset : int or dict, optional - Absolute distance, in points, spines should be moved away - from the axes (negative values move spines inward). A single value - applies to all spines; a dict can be used to set offset values per - side. - trim : bool, optional - If True, limit spines to the smallest and largest major tick - on each non-despined axis. - - Returns - ------- - None - - """ - # Get references to the axes we want - if fig is None and ax is None: - axes = plt.gcf().axes - elif fig is not None: - axes = fig.axes - elif ax is not None: - axes = [ax] - - for ax_i in axes: - for side in ["top", "right", "left", "bottom"]: - # Toggle the spine objects - is_visible = not locals()[side] - ax_i.spines[side].set_visible(is_visible) - if offset is not None and is_visible: - try: - val = offset.get(side, 0) - except AttributeError: - val = offset - _set_spine_position(ax_i.spines[side], ('outward', val)) - - # Potentially move the ticks - if left and not right: - maj_on = any( - t.tick1line.get_visible() - for t in ax_i.yaxis.majorTicks - ) - min_on = any( - t.tick1line.get_visible() - for t in ax_i.yaxis.minorTicks - ) - ax_i.yaxis.set_ticks_position("right") - for t in ax_i.yaxis.majorTicks: - t.tick2line.set_visible(maj_on) - for t in ax_i.yaxis.minorTicks: - t.tick2line.set_visible(min_on) - - if bottom and not top: - maj_on = any( - t.tick1line.get_visible() - for t in ax_i.xaxis.majorTicks - ) - min_on = any( - t.tick1line.get_visible() - for t in ax_i.xaxis.minorTicks - ) - ax_i.xaxis.set_ticks_position("top") - for t in ax_i.xaxis.majorTicks: - t.tick2line.set_visible(maj_on) - for t in ax_i.xaxis.minorTicks: - t.tick2line.set_visible(min_on) - - if trim: - # clip off the parts of the spines that extend past major ticks - xticks = ax_i.get_xticks() - if xticks.size: - firsttick = np.compress(xticks >= min(ax_i.get_xlim()), - xticks)[0] - lasttick = np.compress(xticks <= max(ax_i.get_xlim()), - xticks)[-1] - ax_i.spines['bottom'].set_bounds(firsttick, lasttick) - ax_i.spines['top'].set_bounds(firsttick, lasttick) - newticks = xticks.compress(xticks <= lasttick) - newticks = newticks.compress(newticks >= firsttick) - ax_i.set_xticks(newticks) - - yticks = ax_i.get_yticks() - if yticks.size: - firsttick = np.compress(yticks >= min(ax_i.get_ylim()), - yticks)[0] - lasttick = np.compress(yticks <= max(ax_i.get_ylim()), - yticks)[-1] - ax_i.spines['left'].set_bounds(firsttick, lasttick) - ax_i.spines['right'].set_bounds(firsttick, lasttick) - newticks = yticks.compress(yticks <= lasttick) - newticks = newticks.compress(newticks >= firsttick) - ax_i.set_yticks(newticks) - if __name__ == '__main__': import ROOT import argparse from os.path import split - from matplotlib.ticker import FuncFormatter + from sddm.plot import despine parser = argparse.ArgumentParser("plot ROOT fit results") parser.add_argument("filename", help="input file") diff --git a/utils/sddm/.gitignore b/utils/sddm/.gitignore new file mode 100644 index 0000000..0d20b64 --- /dev/null +++ b/utils/sddm/.gitignore @@ -0,0 +1 @@ +*.pyc diff --git a/utils/sddm/__init__.py b/utils/sddm/__init__.py new file mode 100644 index 0000000..2525383 --- /dev/null +++ b/utils/sddm/__init__.py @@ -0,0 +1,139 @@ +from __future__ import print_function, division +import sys +from itertools import izip_longest +import os + +IDP_E_MINUS = 20 +IDP_MU_MINUS = 22 + +SNOMAN_MASS = { + 20: 0.511, + 21: 0.511, + 22: 105.658, + 23: 105.658 +} + +AV_RADIUS = 600.0 +PSUP_RADIUS = 840.0 + +# from https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python +class bcolors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + +def print_warning(msg): + print(bcolors.FAIL + msg + bcolors.ENDC,file=sys.stderr) + +def chunks(l, n): + """Yield successive n-sized chunks from l.""" + for i in range(0, len(l), n): + yield l[i:i + n] + +def grouper(iterable, n, fillvalue=None): + "Collect data into fixed-length chunks or blocks" + # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx + args = [iter(iterable)] * n + return izip_longest(fillvalue=fillvalue, *args) + +def splitext(path): + """ + Like os.path.splitext() except it returns the full extension if the + filename has multiple extensions, for example: + + splitext('foo.tar.gz') -> 'foo', '.tar.gz' + """ + full_root, full_ext = os.path.splitext(path) + while True: + root, ext = os.path.splitext(full_root) + if ext: + full_ext = ext + full_ext + full_root = root + else: + break + + return full_root, full_ext + +# Next two functions are a backport of the shutil.which() function from Python +# 3.3 from Lib/shutil.py in the CPython code See +# https://github.com/python/cpython/blob/master/Lib/shutil.py. + +# Check that a given file can be accessed with the correct mode. +# Additionally check that `file` is not a directory, as on Windows +# directories pass the os.access check. +def _access_check(fn, mode): + return (os.path.exists(fn) and os.access(fn, mode) and not os.path.isdir(fn)) + +def which(cmd, mode=os.F_OK | os.X_OK, path=None): + """Given a command, mode, and a PATH string, return the path which + conforms to the given mode on the PATH, or None if there is no such + file. + `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result + of os.environ.get("PATH"), or can be overridden with a custom search + path. + """ + # If we're given a path with a directory part, look it up directly rather + # than referring to PATH directories. This includes checking relative to the + # current directory, e.g. ./script + if os.path.dirname(cmd): + if _access_check(cmd, mode): + return cmd + return None + + if path is None: + path = os.environ.get("PATH", None) + if path is None: + try: + path = os.confstr("CS_PATH") + except (AttributeError, ValueError): + # os.confstr() or CS_PATH is not available + path = os.defpath + # bpo-35755: Don't use os.defpath if the PATH environment variable is + # set to an empty string + + # PATH='' doesn't match, whereas PATH=':' looks in the current directory + if not path: + return None + + path = path.split(os.pathsep) + + if sys.platform == "win32": + # The current directory takes precedence on Windows. + curdir = os.curdir + if curdir not in path: + path.insert(0, curdir) + + # PATHEXT is necessary to check on Windows. + pathext = os.environ.get("PATHEXT", "").split(os.pathsep) + # See if the given file matches any of the expected path extensions. + # This will allow us to short circuit when given "python.exe". + # If it does match, only test that one, otherwise we have to try + # others. + if any(cmd.lower().endswith(ext.lower()) for ext in pathext): + files = [cmd] + else: + files = [cmd + ext for ext in pathext] + else: + # On other platforms you don't have things like PATHEXT to tell you + # what file suffixes are executable, so just pass on cmd as-is. + files = [cmd] + + seen = set() + for dir in path: + normdir = os.path.normcase(dir) + if not normdir in seen: + seen.add(normdir) + for thefile in files: + name = os.path.join(dir, thefile) + if _access_check(name, mode): + return name + return None + +from . import plot +from . import plot_energy +from . import dc diff --git a/utils/sddm/dc.py b/utils/sddm/dc.py new file mode 100755 index 0000000..930d32f --- /dev/null +++ b/utils/sddm/dc.py @@ -0,0 +1,366 @@ +#!/usr/bin/env python +# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago> +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <https://www.gnu.org/licenses/>. + +from __future__ import print_function, division +import numpy as np +from scipy.stats import iqr +import nlopt +from scipy.stats import poisson +import contextlib +import sys +from math import exp +import emcee +from scipy.optimize import brentq +from scipy.stats import truncnorm +from matplotlib.lines import Line2D + +# Data cleaning bitmasks. +DC_MUON = 0x1 +DC_JUNK = 0x2 +DC_CRATE_ISOTROPY = 0x4 +DC_QVNHIT = 0x8 +DC_NECK = 0x10 +DC_FLASHER = 0x20 +DC_ESUM = 0x40 +DC_OWL = 0x80 +DC_OWL_TRIGGER = 0x100 +DC_FTS = 0x200 +DC_ITC = 0x400 +DC_BREAKDOWN = 0x800 + +# Lower bound for probabilities in fit. We set this to be nonzero to prevent +# nans in case an expected value is predicted to be zero. +EPSILON = 1e-10 + +def get_proposal_func(stepsizes, low, high): + """ + Returns a function which produces proposal steps for a Metropolis Hastings + MCMC. This function can then be passed to emcee.moves.MHMove(). + + Steps are proposed according to a truncated normal distribution with mean + at the current position and with standard deviations given by stepsizes. + The bounds for each step are given by the two arrays low and high. + """ + stepsizes = np.atleast_1d(stepsizes) + low = np.atleast_1d(low) + high = np.atleast_1d(high) + def proposal(x0, random): + """ + Returns a tuple of proposed positions and the log-ratio of the proposal + probabilities. + + x0 should be a (K,ndim) dimensional numpy array where K is the number + of walkers and ndim is the number of dimensions in the likelihood function. + """ + # calculate a and b needed for truncnorm + # Note: These are not just low and high b/c + # + # The standard form of this distribution is a standard normal + # truncated to the range [a, b] --- notice that a and b are defined + # over the domain of the standard normal. To convert clip values + # for a specific mean and standard deviation, use:: + # + # a, b = (myclip_a - my_mean) / my_std, (myclip_b - my_mean) / my_std + a, b = (low - x0)/stepsizes, (high - x0)/stepsizes + x = truncnorm.rvs(a,b,x0,stepsizes,random_state=random) + # Note: Should be able to do this: + # + # log_p_x_given_x0 = truncnorm.logpdf(x,a,b,x0,stepsizes).sum(axis=1) + # + # but I think there is a bug in truncnorm.logpdf() which barfs when + # passed 2D arrays, so instead we just loop over the first axis. + log_p_x_given_x0 = np.empty(x0.shape[0]) + for i in range(x0.shape[0]): + log_p_x_given_x0[i] = truncnorm.logpdf(x[i],a[i],b[i],x0[i],stepsizes).sum() + a, b = (low - x)/stepsizes, (high - x)/stepsizes + # Note: Should be able to do this: + # + # log_p_x0_given_x = truncnorm.logpdf(x0,a,b,x,stepsizes).sum(axis=1) + # + # but I think there is a bug in truncnorm.logpdf() which barfs when + # passed 2D arrays, so instead we just loop over the first axis. + log_p_x0_given_x = np.empty(x0.shape[0]) + for i in range(x0.shape[0]): + log_p_x0_given_x[i] = truncnorm.logpdf(x0[i],a[i],b[i],x[i],stepsizes).sum() + return x, log_p_x0_given_x - log_p_x_given_x0 + return proposal + +def estimate_errors(nll, xopt, constraints): + """ + Return approximate 1 sigma errors for each parameter assuming all + parameters are uncorrelated. + """ + nll_xopt = nll(xopt) + + def root(x, xopt, i): + """ + Function to bisect where the negative log likelihood reaches 0.5 from the minimum. + """ + xtest = xopt.copy() + xtest[i] = x + for constraint in constraints: + if constraint(xtest) >= 0: + xtest = constraint.renormalize(xtest,i) + return nll(xtest) - (nll_xopt + 0.5) + + errors = np.empty_like(xopt) + + for i in range(len(xopt)): + if i < 6: + xlo = brentq(root,0,xopt[i],args=(xopt,i)) + xhi = brentq(root,xopt[i],1e9,args=(xopt,i)) + else: + xlo = brentq(root,0,xopt[i],args=(xopt,i)) + xhi = brentq(root,xopt[i],1,args=(xopt,i)) + + errors[i] = max(xopt[i]-xlo,xhi-xopt[i]) + + return errors + +def metropolis_hastings(nll, x0, stepsizes=None, size=1000): + x = np.asarray(x0) + + if stepsizes is None: + stepsizes = np.ones_like(x) + else: + stepsizes = np.asarray(stepsizes) + + accept = 0 + + nll_old = nll(x) + + samples = np.empty((size,len(x0)),dtype=np.double) + + try: + for i in range(size): + if i % 100 == 0: + sys.stdout.write("\r%i/%i acceptance rate = %i/%i = %.2g%%" % (i+1,size,accept,i+1,accept*100/float(i+1))) + sys.stdout.flush() + + xnew = x + np.random.randn(len(x))*stepsizes + + nll_new = nll(xnew) + + if nll_new < nll_old or np.random.rand() < exp(nll_old-nll_new): + x = xnew + nll_new = nll_old + accept += 1 + + samples[i] = x + except KeyboardInterrupt: + sys.stdout.write("\n") + sys.stdout.flush() + print("ctrl-c caught. quitting...") + samples.resize((i,len(x))) + else: + sys.stdout.write("\n") + sys.stdout.flush() + + return samples + +def unwrap(p, delta, axis=-1): + """ + A modified version of np.unwrap() useful for unwrapping the 50 MHz clock. + It unwraps discontinuities bigger than delta/2 by delta. + + Example: + + >>> a = np.arange(10) % 5 + >>> a + array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4]) + >>> unwrap(a,5) + array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]) + + In the case of the 50 MHz clock delta should be 0x7ffffffffff*20.0. + """ + p = np.asarray(p) + nd = p.ndim + dd = np.diff(p, axis=axis) + slice1 = [slice(None, None)]*nd # full slices + slice1[axis] = slice(1, None) + slice1 = tuple(slice1) + ddmod = np.mod(dd + delta/2, delta) - delta/2 + np.copyto(ddmod, delta/2, where=(ddmod == -delta/2) & (dd > 0)) + ph_correct = ddmod - dd + np.copyto(ph_correct, 0, where=abs(dd) < delta/2) + up = np.array(p, copy=True, dtype='d') + up[slice1] = p[slice1] + ph_correct.cumsum(axis) + return up + +def unwrap_50_mhz_clock(gtr): + """ + Unwrap an array with 50 MHz clock times. These times should all be in + nanoseconds and come from the KEV_GTR variable in the EV bank. + + Note: We assume here that the events are already ordered contiguously by + GTID, so you shouldn't pass an array with multiple runs! + """ + return unwrap(gtr,0x7ffffffffff*20.0) + +def retrigger_cut(ev): + """ + Cuts all retrigger events. + """ + return ev[ev.dt > 500] + +def breakdown_follower_cut(ev): + """ + Cuts all events within 1 second of breakdown events. + """ + breakdowns = ev[ev.dc & DC_BREAKDOWN != 0] + return ev[~np.any((ev.gtr.values > breakdowns.gtr.values[:,np.newaxis]) & \ + (ev.gtr.values < breakdowns.gtr.values[:,np.newaxis] + 1e9),axis=0)] + +def flasher_follower_cut(ev): + """ + Cuts all events within 200 microseconds of flasher events. + """ + flashers = ev[ev.dc & DC_FLASHER != 0] + return ev[~np.any((ev.gtr.values > flashers.gtr.values[:,np.newaxis]) & \ + (ev.gtr.values < flashers.gtr.values[:,np.newaxis] + 200e3),axis=0)] + +def muon_follower_cut(ev): + """ + Cuts all events 200 microseconds after a muon. + """ + muons = ev[ev.dc & DC_MUON != 0] + return ev[~np.any((ev.gtr.values > muons.gtr.values[:,np.newaxis]) & \ + (ev.gtr.values < muons.gtr.values[:,np.newaxis] + 200e3),axis=0)] + +def michel_cut(ev): + """ + Looks for Michel electrons after muons. + """ + prompt_plus_muons = ev[ev.prompt | ((ev.dc & DC_MUON) != 0)] + + # Michel electrons and neutrons can be any event which is not a prompt + # event + follower = ev[~ev.prompt] + + # require Michel events to pass more of the SNO data cleaning cuts + michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] + + michel = michel[michel.nhit >= 100] + + # Accept events which had a muon more than 800 nanoseconds but less + # than 20 microseconds before them. The 800 nanoseconds cut comes from + # Richie's thesis. He also mentions that the In Time Channel Spread Cut + # is very effective at cutting electrons events caused by muons, so I + # should implement that. + # + # Note: We currently don't look across run boundaries. This should be a + # *very* small effect, and the logic to do so would be very complicated + # since I would have to deal with 50 MHz clock rollovers, etc. + # + # Do a simple python for loop here over the runs since we create + # temporary arrays with shape (michel.size,prompt_plus_muons.size) + # which could be too big for the full dataset. + # + # There might be some clever way to do this without the loop in Pandas, + # but I don't know how. + if prompt_plus_muons.size and michel.size: + return michel[np.any((michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \ + (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3),axis=0)] + else: + return pd.DataFrame(columns=ev.columns) + +def atmospheric_events(ev): + """ + Tags atmospheric events which have a neutron follower. + """ + prompt = ev[ev.prompt] + + # Michel electrons and neutrons can be any event which is not a prompt + # event + follower = ev[~ev.prompt] + + ev['atm'] = np.zeros(len(ev),dtype=np.bool) + + if prompt.size and follower.size: + # neutron followers have to obey stricter set of data cleaning cuts + neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] + neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)] + r = np.sqrt(neutron.ftp_x**2 + neutron.ftp_y**2 + neutron.ftp_z**2) + neutron = neutron[r < AV_RADIUS] + neutron = neutron[neutron.rsp_energy > 4.0] + + # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt) + ev.loc[ev.prompt,'atm'] = np.any((neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \ + (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1) + + return ev + +def gtid_sort(ev, first_gtid): + """ + Adds 0x1000000 to the gtid_sort column for all gtids before the first gtid + in a run, which should be passed as a dictionary. This column can then be + used to sort the events sequentially. + + This function should be passed to ev.groupby('run').apply(). We use this + idiom instead of just looping over the groupby results since groupby() + makes a copy of the dataframe, i.e. + + for run, ev_run in ev.groupby('run'): + ev_run.loc[ev_run.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 + + would produce a SettingWithCopyWarning, so instead we use: + + ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid) + + which doesn't have this problem. + """ + # see https://stackoverflow.com/questions/32460593/including-the-group-name-in-the-apply-function-pandas-python + run = ev.name + + if run not in first_gtid: + print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run) + first_gtid[run] = ev.gtid.iloc[0] + + ev.loc[ev.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 + + return ev + +def prompt_event(ev): + ev['prompt'] = (ev.nhit >= 100) + ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6)) + return ev + +class Constraint(object): + def __init__(self, range): + self.range = range + + def __call__(self, x, grad=None): + return np.sum(x[self.range]) - 1 + EPSILON + + def renormalize(self, x, fix): + if fix not in self.range: + return x + x = x.copy() + while self(x) >= 0: + for i in self.range: + if i == fix: + continue + x[i] -= self(x) + return x + + def renormalize_no_fix(self, x): + x = x.copy() + while self(x) >= 0: + for i in self.range: + if x[i] < 10*EPSILON: + continue + x[i] -= x[i]/2.0 + return x diff --git a/utils/sddm/logger.py b/utils/sddm/logger.py new file mode 100644 index 0000000..c641893 --- /dev/null +++ b/utils/sddm/logger.py @@ -0,0 +1,70 @@ +import os +import sys +from datetime import datetime + +DEBUG = 0 +VERBOSE = 1 +NOTICE = 2 +WARNING = 3 + +class Logger(object): + """ + Simple logger class that I wrote for the SNO+ DAQ. Very easy to use: + + log = Logger() + log.set_logfile("test.log") + log.notice("blah") + log.warn("foo") + + The log file format is taken from the Redis log file format which is really + nice since it shows the exact time and severity of each log message. + """ + def __init__(self): + self.logfile = sys.stdout + # by default, we log everything + self.verbosity = DEBUG + + def set_verbosity(self, level): + if isinstance(level, int): + self.verbosity = level + elif isinstance(level, basestring): + if level == 'debug': + self.verbosity = DEBUG + elif level == 'verbose': + self.verbosity = VERBOSE + elif level == 'notice': + self.verbosity = NOTICE + elif level == 'warning': + self.verbosity = WARNING + else: + raise ValueError("unknown loglevel '%s'" % level) + else: + raise TypeError("level must be a string or integer") + + def set_logfile(self, filename): + self.logfile = open(filename, 'a') + + def debug(self, msg): + self.log(DEBUG, msg) + + def verbose(self, msg): + self.log(VERBOSE, msg) + + def notice(self, msg): + self.log(NOTICE, msg) + + def warn(self, msg): + self.log(WARNING, msg) + + def log(self, level, msg): + if level < self.verbosity: + return + + c = '.-*#' + pid = os.getpid() + now = datetime.now() + buf = now.strftime('%d %b %H:%M:%S.%f')[:-3] + + self.logfile.write('%d:%s %c %s\n' % (pid, buf, c[level], msg)) + self.logfile.flush() + diff --git a/utils/sddm/plot.py b/utils/sddm/plot.py new file mode 100755 index 0000000..b00adbd --- /dev/null +++ b/utils/sddm/plot.py @@ -0,0 +1,257 @@ +#!/usr/bin/env python +# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago> +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <https://www.gnu.org/licenses/>. + +from __future__ import print_function, division +import numpy as np +from scipy.stats import iqr +from matplotlib.lines import Line2D +from scipy.stats import iqr, norm, beta + +def plot_hist(x, label=None): + # determine the bin width using the Freedman Diaconis rule + # see https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule + import matplotlib.pyplot as plt + h = 2*iqr(x)/len(x)**(1/3) + n = max(int((np.max(x)-np.min(x))/h),10) + bins = np.linspace(np.min(x),np.max(x),n) + plt.hist(x, bins=bins, histtype='step', label=label) + +def plot_legend(n): + import matplotlib.pyplot as plt + plt.figure(n) + ax = plt.gca() + handles, labels = ax.get_legend_handles_labels() + new_handles = [Line2D([],[],c=h.get_edgecolor()) for h in handles] + plt.legend(handles=new_handles,labels=labels) + +def get_stats(x): + """ + Returns a tuple (mean, error mean, std, error std) for the values in x. + + The formula for the standard error on the standard deviation comes from + https://stats.stackexchange.com/questions/156518. + """ + mean = np.mean(x) + std = np.std(x) + n = len(x) + u4 = np.mean((x-mean)**4) + error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std) + return mean, std/np.sqrt(n), std, error + +# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py +def despine(fig=None, ax=None, top=True, right=True, left=False, + bottom=False, offset=None, trim=False): + """Remove the top and right spines from plot(s). + + fig : matplotlib figure, optional + Figure to despine all axes of, default uses current figure. + ax : matplotlib axes, optional + Specific axes object to despine. + top, right, left, bottom : boolean, optional + If True, remove that spine. + offset : int or dict, optional + Absolute distance, in points, spines should be moved away + from the axes (negative values move spines inward). A single value + applies to all spines; a dict can be used to set offset values per + side. + trim : bool, optional + If True, limit spines to the smallest and largest major tick + on each non-despined axis. + + Returns + ------- + None + + """ + import matplotlib.pyplot as plt + # Get references to the axes we want + if fig is None and ax is None: + axes = plt.gcf().axes + elif fig is not None: + axes = fig.axes + elif ax is not None: + axes = [ax] + + for ax_i in axes: + for side in ["top", "right", "left", "bottom"]: + # Toggle the spine objects + is_visible = not locals()[side] + ax_i.spines[side].set_visible(is_visible) + if offset is not None and is_visible: + try: + val = offset.get(side, 0) + except AttributeError: + val = offset + _set_spine_position(ax_i.spines[side], ('outward', val)) + + # Potentially move the ticks + if left and not right: + maj_on = any( + t.tick1line.get_visible() + for t in ax_i.yaxis.majorTicks + ) + min_on = any( + t.tick1line.get_visible() + for t in ax_i.yaxis.minorTicks + ) + ax_i.yaxis.set_ticks_position("right") + for t in ax_i.yaxis.majorTicks: + t.tick2line.set_visible(maj_on) + for t in ax_i.yaxis.minorTicks: + t.tick2line.set_visible(min_on) + + if bottom and not top: + maj_on = any( + t.tick1line.get_visible() + for t in ax_i.xaxis.majorTicks + ) + min_on = any( + t.tick1line.get_visible() + for t in ax_i.xaxis.minorTicks + ) + ax_i.xaxis.set_ticks_position("top") + for t in ax_i.xaxis.majorTicks: + t.tick2line.set_visible(maj_on) + for t in ax_i.xaxis.minorTicks: + t.tick2line.set_visible(min_on) + + if trim: + # clip off the parts of the spines that extend past major ticks + xticks = ax_i.get_xticks() + if xticks.size: + firsttick = np.compress(xticks >= min(ax_i.get_xlim()), + xticks)[0] + lasttick = np.compress(xticks <= max(ax_i.get_xlim()), + xticks)[-1] + ax_i.spines['bottom'].set_bounds(firsttick, lasttick) + ax_i.spines['top'].set_bounds(firsttick, lasttick) + newticks = xticks.compress(xticks <= lasttick) + newticks = newticks.compress(newticks >= firsttick) + ax_i.set_xticks(newticks) + + yticks = ax_i.get_yticks() + if yticks.size: + firsttick = np.compress(yticks >= min(ax_i.get_ylim()), + yticks)[0] + lasttick = np.compress(yticks <= max(ax_i.get_ylim()), + yticks)[-1] + ax_i.spines['left'].set_bounds(firsttick, lasttick) + ax_i.spines['right'].set_bounds(firsttick, lasttick) + newticks = yticks.compress(yticks <= lasttick) + newticks = newticks.compress(newticks >= firsttick) + ax_i.set_yticks(newticks) + +def iqr_std_err(x): + """ + Returns the approximate standard deviation assuming the central part of the + distribution is gaussian. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + # see https://stats.stackexchange.com/questions/110902/error-on-interquartile-range + std = iqr(x.values)/1.3489795 + return 1.573*std/np.sqrt(n) + +def iqr_std(x): + """ + Returns the approximate standard deviation assuming the central part of the + distribution is gaussian. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + return iqr(x.values)/1.3489795 + +def quantile_error(x,q): + """ + Returns the standard error for the qth quantile of `x`. The error is + computed using the Maritz-Jarrett method described here: + https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/quantse.htm. + """ + x = np.sort(x) + n = len(x) + m = int(q*n+0.5) + A = m - 1 + B = n - m + i = np.arange(1,len(x)+1) + w = beta.cdf(i/n,A,B) - beta.cdf((i-1)/n,A,B) + return np.sqrt(np.sum(w*x**2)-np.sum(w*x)**2) + +def q90_err(x): + """ + Returns the error on the 90th percentile for all the non NaN values in a + Series `x`. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + return quantile_error(x.values,0.9) + +def q90(x): + """ + Returns the 90th percentile for all the non NaN values in a Series `x`. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + return np.percentile(x.values,90.0) + +def median(x): + """ + Returns the median for all the non NaN values in a Series `x`. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + return np.median(x.values) + +def median_err(x): + """ + Returns the approximate error on the median for all the non NaN values in a + Series `x`. The error on the median is approximated assuming the central + part of the distribution is gaussian. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + # First we estimate the standard deviation using the interquartile range. + # Here we are essentially assuming the central part of the distribution is + # gaussian. + std = iqr(x.values)/1.3489795 + median = np.median(x.values) + # Now we estimate the error on the median for a gaussian + # See https://stats.stackexchange.com/questions/45124/central-limit-theorem-for-sample-medians. + return 1/(2*np.sqrt(n)*norm.pdf(median,median,std)) + +def std_err(x): + x = x.dropna() + mean = np.mean(x) + std = np.std(x) + n = len(x) + if n == 0: + return np.nan + elif n == 1: + return 0.0 + u4 = np.mean((x-mean)**4) + error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std) + return error diff --git a/utils/sddm/plot_energy.py b/utils/sddm/plot_energy.py new file mode 100755 index 0000000..a24ea00 --- /dev/null +++ b/utils/sddm/plot_energy.py @@ -0,0 +1,352 @@ +#!/usr/bin/env python +# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago> +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <https://www.gnu.org/licenses/>. + +from __future__ import print_function, division +import numpy as np +from scipy.stats import iqr, poisson +from matplotlib.lines import Line2D +from scipy.stats import iqr, norm, beta +from scipy.special import spence +from itertools import izip_longest +from .dc import DC_MUON, DC_JUNK, DC_CRATE_ISOTROPY, DC_QVNHIT, DC_NECK, DC_FLASHER, DC_ESUM, DC_OWL, DC_OWL_TRIGGER, DC_FTS, DC_ITC, DC_BREAKDOWN +from . import grouper, print_warning, AV_RADIUS, PSUP_RADIUS +import pandas as pd + +# Fermi constant +GF = 1.16637887e-5 # 1/MeV^2 +ELECTRON_MASS = 0.5109989461 # MeV +MUON_MASS = 105.6583745 # MeV +PROTON_MASS = 938.272081 # MeV +FINE_STRUCTURE_CONSTANT = 7.297352566417e-3 + +def unwrap(p, delta, axis=-1): + """ + A modified version of np.unwrap() useful for unwrapping the 50 MHz clock. + It unwraps discontinuities bigger than delta/2 by delta. + + Example: + + >>> a = np.arange(10) % 5 + >>> a + array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4]) + >>> unwrap(a,5) + array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]) + + In the case of the 50 MHz clock delta should be 0x7ffffffffff*20.0. + """ + p = np.asarray(p) + nd = p.ndim + dd = np.diff(p, axis=axis) + slice1 = [slice(None, None)]*nd # full slices + slice1[axis] = slice(1, None) + slice1 = tuple(slice1) + ddmod = np.mod(dd + delta/2, delta) - delta/2 + np.copyto(ddmod, delta/2, where=(ddmod == -delta/2) & (dd > 0)) + ph_correct = ddmod - dd + np.copyto(ph_correct, 0, where=abs(dd) < delta/2) + up = np.array(p, copy=True, dtype='d') + up[slice1] = p[slice1] + ph_correct.cumsum(axis) + return up + +def unwrap_50_mhz_clock(gtr): + """ + Unwrap an array with 50 MHz clock times. These times should all be in + nanoseconds and come from the KEV_GTR variable in the EV bank. + + Note: We assume here that the events are already ordered contiguously by + GTID, so you shouldn't pass an array with multiple runs! + """ + return unwrap(gtr,0x7ffffffffff*20.0) + +def retrigger_cut(ev): + """ + Cuts all retrigger events. + """ + return ev[ev.dt > 500] + +def breakdown_follower_cut(ev): + """ + Cuts all events within 1 second of breakdown events. + """ + breakdowns = ev[ev.dc & DC_BREAKDOWN != 0] + return ev[~np.any((ev.gtr.values > breakdowns.gtr.values[:,np.newaxis]) & \ + (ev.gtr.values < breakdowns.gtr.values[:,np.newaxis] + 1e9),axis=0)] + +def flasher_follower_cut(ev): + """ + Cuts all events within 200 microseconds of flasher events. + """ + flashers = ev[ev.dc & DC_FLASHER != 0] + return ev[~np.any((ev.gtr.values > flashers.gtr.values[:,np.newaxis]) & \ + (ev.gtr.values < flashers.gtr.values[:,np.newaxis] + 200e3),axis=0)] + +def muon_follower_cut(ev): + """ + Cuts all events 200 microseconds after a muon. + """ + muons = ev[ev.dc & DC_MUON != 0] + return ev[~np.any((ev.gtr.values > muons.gtr.values[:,np.newaxis]) & \ + (ev.gtr.values < muons.gtr.values[:,np.newaxis] + 200e3),axis=0)] + +def michel_cut(ev): + """ + Looks for Michel electrons after muons. + """ + prompt_plus_muons = ev[ev.prompt | ((ev.dc & DC_MUON) != 0)] + + # Michel electrons and neutrons can be any event which is not a prompt + # event + follower = ev[~ev.prompt] + + # require Michel events to pass more of the SNO data cleaning cuts + michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] + + michel = michel[michel.nhit >= 100] + + # Accept events which had a muon more than 800 nanoseconds but less than 20 + # microseconds before them. The 800 nanoseconds cut comes from Richie's + # thesis. He also mentions that the In Time Channel Spread Cut is very + # effective at cutting electron events caused by muons, so I should + # implement that. + # + # Note: We currently don't look across run boundaries. This should be a + # *very* small effect, and the logic to do so would be very complicated + # since I would have to deal with 50 MHz clock rollovers, etc. + if prompt_plus_muons.size and michel.size: + mask = (michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \ + (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3) + michel = michel.iloc[np.any(mask,axis=0)] + michel['muon_gtid'] = pd.Series(prompt_plus_muons['gtid'].iloc[np.argmax(mask[:,np.any(mask,axis=0)],axis=0)].values, + index=michel.index.values, + dtype=np.int32) + return michel + else: + # Return an empty slice since we need it to have the same datatype as + # the other dataframes + michel = ev[:0] + michel['muon_gtid'] = -1 + return michel + +def atmospheric_events(ev): + """ + Tags atmospheric events which have a neutron follower. + """ + prompt = ev[ev.prompt] + + # Michel electrons and neutrons can be any event which is not a prompt + # event + follower = ev[~ev.prompt] + + ev['atm'] = np.zeros(len(ev),dtype=np.bool) + + if prompt.size and follower.size: + # neutron followers have to obey stricter set of data cleaning cuts + neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] + neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)] + # FIXME: What should the radius cut be here? AV? (r/r_psup)^3 < 0.9? + neutron = neutron[neutron.ftp_r < AV_RADIUS] + neutron = neutron[neutron.rsp_energy > 4.0] + + # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt) + ev.loc[ev.prompt,'atm'] = np.any((neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \ + (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1) + + return ev + +def gtid_sort(ev, first_gtid): + """ + Adds 0x1000000 to the gtid_sort column for all gtids before the first gtid + in a run, which should be passed as a dictionary. This column can then be + used to sort the events sequentially. + + This function should be passed to ev.groupby('run').apply(). We use this + idiom instead of just looping over the groupby results since groupby() + makes a copy of the dataframe, i.e. + + for run, ev_run in ev.groupby('run'): + ev_run.loc[ev_run.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 + + would produce a SettingWithCopyWarning, so instead we use: + + ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid) + + which doesn't have this problem. + """ + # see https://stackoverflow.com/questions/32460593/including-the-group-name-in-the-apply-function-pandas-python + run = ev.name + + if run not in first_gtid: + print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run) + first_gtid[run] = ev.gtid.iloc[0] + + ev.loc[ev.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 + + return ev + +def prompt_event(ev): + ev['prompt'] = (ev.nhit >= 100) + ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6)) + return ev + +def plot_corner_plot(ev, title, save=None): + import matplotlib.pyplot as plt + variables = ['r_psup','psi','z','udotr'] + labels = [r'$(r/r_\mathrm{PSUP})^3$',r'$\psi$','z',r'$\vec{u}\cdot\vec{r}$'] + limits = [(0,1),(0,10),(-840,840),(-1,1)] + cuts = [0.9,6,0,-0.5] + + ev = ev.dropna(subset=variables) + + fig = plt.figure(figsize=(FIGSIZE[0],FIGSIZE[0])) + despine(fig,trim=True) + for i in range(len(variables)): + for j in range(len(variables)): + if j > i: + continue + ax = plt.subplot(len(variables),len(variables),i*len(variables)+j+1) + if i == j: + plt.hist(ev[variables[i]],bins=np.linspace(limits[i][0],limits[i][1],100),histtype='step') + plt.gca().set_xlim(limits[i]) + else: + plt.scatter(ev[variables[j]],ev[variables[i]],s=0.5) + plt.gca().set_xlim(limits[j]) + plt.gca().set_ylim(limits[i]) + n = len(ev) + if n: + p_i_lo = np.count_nonzero(ev[variables[i]] < cuts[i])/n + p_j_lo = np.count_nonzero(ev[variables[j]] < cuts[j])/n + p_lolo = p_i_lo*p_j_lo + p_lohi = p_i_lo*(1-p_j_lo) + p_hilo = (1-p_i_lo)*p_j_lo + p_hihi = (1-p_i_lo)*(1-p_j_lo) + n_lolo = np.count_nonzero((ev[variables[i]] < cuts[i]) & (ev[variables[j]] < cuts[j])) + n_lohi = np.count_nonzero((ev[variables[i]] < cuts[i]) & (ev[variables[j]] >= cuts[j])) + n_hilo = np.count_nonzero((ev[variables[i]] >= cuts[i]) & (ev[variables[j]] < cuts[j])) + n_hihi = np.count_nonzero((ev[variables[i]] >= cuts[i]) & (ev[variables[j]] >= cuts[j])) + observed = np.array([n_lolo,n_lohi,n_hilo,n_hihi]) + expected = n*np.array([p_lolo,p_lohi,p_hilo,p_hihi]) + psi = -poisson.logpmf(observed,expected).sum() + poisson.logpmf(observed,observed).sum() + psi /= np.std(-poisson.logpmf(np.random.poisson(observed,size=(10000,4)),observed).sum(axis=1) + poisson.logpmf(observed,observed).sum()) + plt.title(r"$\psi = %.1f$" % psi) + if i == len(variables) - 1: + plt.xlabel(labels[j]) + else: + plt.setp(ax.get_xticklabels(),visible=False) + if j == 0: + plt.ylabel(labels[i]) + else: + plt.setp(ax.get_yticklabels(),visible=False) + plt.axvline(cuts[j],color='k',ls='--',alpha=0.5) + if i != j: + plt.axhline(cuts[i],color='k',ls='--',alpha=0.5) + + plt.tight_layout() + + if save: + plt.savefig(save + ".pdf") + plt.savefig(save + ".eps") + + plt.suptitle(title) + +def intersect_sphere(pos, dir, R): + """ + Compute the first intersection of a ray starting at `pos` with direction + `dir` and a sphere centered at the origin with radius `R`. The distance to + the intersection is returned. + + Example: + + pos = np.array([0,0,0]) + dir = np.array([1,0,0]) + + l = intersect_sphere(pos,dir,PSUP_RADIUS): + if l is not None: + hit = pos + l*dir + print("ray intersects sphere at %.2f %.2f %.2f", hit[0], hit[1], hit[2]) + else: + print("ray didn't intersect sphere") + """ + + b = 2*np.dot(dir,pos) + c = np.dot(pos,pos) - R*R + + if b*b - 4*c <= 0: + # Ray doesn't intersect the sphere. + return None + + # First, check the shorter solution. + l = (-b - np.sqrt(b*b - 4*c))/2 + + # If the shorter solution is less than 0, check the second solution. + if l < 0: + l = (-b + np.sqrt(b*b - 4*c))/2 + + # If the distance is still negative, we didn't intersect the sphere. + if l < 0: + return None + + return l + +def get_dx(row): + pos = np.array([row.x,row.y,row.z]) + dir = np.array([np.sin(row.theta1)*np.cos(row.phi1), + np.sin(row.theta1)*np.sin(row.phi1), + np.cos(row.theta1)]) + l = intersect_sphere(pos,-dir,PSUP_RADIUS) + if l is not None: + pos -= dir*l + michel_pos = np.array([row.x_michel,row.y_michel,row.z_michel]) + return np.linalg.norm(michel_pos-pos) + else: + return 0 + +def dx_to_energy(dx): + lines = [] + with open("../src/muE_water_liquid.txt") as f: + for i, line in enumerate(f): + if i < 10: + continue + if 'Minimum ionization' in line: + continue + if 'Muon critical energy' in line: + continue + lines.append(line) + data = np.genfromtxt(lines) + return np.interp(dx,data[:,8],data[:,0]) + +def f(x): + y = (5/(3*x**2) + 16*x/3 + 4/x + (12-8*x)*np.log(1/x-1) - 8)*np.log(MUON_MASS/ELECTRON_MASS) + y += (6-4*x)*(2*spence(x) - 2*np.log(x)**2 + np.log(x) + np.log(1-x)*(3*np.log(x)-1/x-1) - np.pi**2/3-2) + y += (1-x)*(34*x**2+(5-34*x**2+17*x)*np.log(x) - 22*x)/(3*x**2) + y += 6*(1-x)*np.log(x) + return y + +def michel_spectrum(T): + """ + Michel electron energy spectrum for a free muon. `T` should be the kinetic + energy of the electron or positron in MeV. + + Note: The result is not normalized. + + From https://arxiv.org/abs/1406.3575. + """ + E = T + ELECTRON_MASS + x = 2*E/MUON_MASS + mask = (x > 0) & (x < 1) + y = np.zeros_like(x,dtype=np.double) + y[mask] = GF**2*MUON_MASS**5*x[mask]**2*(6-4*x[mask]+FINE_STRUCTURE_CONSTANT*f(x[mask])/np.pi)/(192*np.pi**3) + y *= 2*MUON_MASS + return y diff --git a/utils/setup.py b/utils/setup.py new file mode 100755 index 0000000..23a2ebe --- /dev/null +++ b/utils/setup.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python +from distutils.core import setup + +setup(name='sddm', + version='0.1', + description="Self-destructing dark matter python scripts", + author="Anthony LaTorre", + packages=['sddm'], + install_requires=['numpy', 'pandas', 'matplotlib'] +) diff --git a/utils/submit-grid-jobs b/utils/submit-grid-jobs index 85cf59a..919f17a 100755 --- a/utils/submit-grid-jobs +++ b/utils/submit-grid-jobs @@ -50,150 +50,11 @@ import subprocess import json import h5py import numpy as np - -DEBUG = 0 -VERBOSE = 1 -NOTICE = 2 -WARNING = 3 - -class Logger(object): - """ - Simple logger class that I wrote for the SNO+ DAQ. Very easy to use: - - log = Logger() - log.set_logfile("test.log") - log.notice("blah") - log.warn("foo") - - The log file format is taken from the Redis log file format which is really - nice since it shows the exact time and severity of each log message. - """ - def __init__(self): - self.logfile = sys.stdout - # by default, we log everything - self.verbosity = DEBUG - - def set_verbosity(self, level): - if isinstance(level, int): - self.verbosity = level - elif isinstance(level, basestring): - if level == 'debug': - self.verbosity = DEBUG - elif level == 'verbose': - self.verbosity = VERBOSE - elif level == 'notice': - self.verbosity = NOTICE - elif level == 'warning': - self.verbosity = WARNING - else: - raise ValueError("unknown loglevel '%s'" % level) - else: - raise TypeError("level must be a string or integer") - - def set_logfile(self, filename): - self.logfile = open(filename, 'a') - - def debug(self, msg): - self.log(DEBUG, msg) - - def verbose(self, msg): - self.log(VERBOSE, msg) - - def notice(self, msg): - self.log(NOTICE, msg) - - def warn(self, msg): - self.log(WARNING, msg) - - def log(self, level, msg): - if level < self.verbosity: - return - - c = '.-*#' - pid = os.getpid() - now = datetime.now() - buf = now.strftime('%d %b %H:%M:%S.%f')[:-3] - - self.logfile.write('%d:%s %c %s\n' % (pid, buf, c[level], msg)) - self.logfile.flush() +from sddm.logger import Logger +from sddm import which, splitext log = Logger() -# Next two functions are a backport of the shutil.which() function from Python -# 3.3 from Lib/shutil.py in the CPython code See -# https://github.com/python/cpython/blob/master/Lib/shutil.py. - -# Check that a given file can be accessed with the correct mode. -# Additionally check that `file` is not a directory, as on Windows -# directories pass the os.access check. -def _access_check(fn, mode): - return (os.path.exists(fn) and os.access(fn, mode) and not os.path.isdir(fn)) - -def which(cmd, mode=os.F_OK | os.X_OK, path=None): - """Given a command, mode, and a PATH string, return the path which - conforms to the given mode on the PATH, or None if there is no such - file. - `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result - of os.environ.get("PATH"), or can be overridden with a custom search - path. - """ - # If we're given a path with a directory part, look it up directly rather - # than referring to PATH directories. This includes checking relative to the - # current directory, e.g. ./script - if os.path.dirname(cmd): - if _access_check(cmd, mode): - return cmd - return None - - if path is None: - path = os.environ.get("PATH", None) - if path is None: - try: - path = os.confstr("CS_PATH") - except (AttributeError, ValueError): - # os.confstr() or CS_PATH is not available - path = os.defpath - # bpo-35755: Don't use os.defpath if the PATH environment variable is - # set to an empty string - - # PATH='' doesn't match, whereas PATH=':' looks in the current directory - if not path: - return None - - path = path.split(os.pathsep) - - if sys.platform == "win32": - # The current directory takes precedence on Windows. - curdir = os.curdir - if curdir not in path: - path.insert(0, curdir) - - # PATHEXT is necessary to check on Windows. - pathext = os.environ.get("PATHEXT", "").split(os.pathsep) - # See if the given file matches any of the expected path extensions. - # This will allow us to short circuit when given "python.exe". - # If it does match, only test that one, otherwise we have to try - # others. - if any(cmd.lower().endswith(ext.lower()) for ext in pathext): - files = [cmd] - else: - files = [cmd + ext for ext in pathext] - else: - # On other platforms you don't have things like PATHEXT to tell you - # what file suffixes are executable, so just pass on cmd as-is. - files = [cmd] - - seen = set() - for dir in path: - normdir = os.path.normcase(dir) - if not normdir in seen: - seen.add(normdir) - for thefile in files: - name = os.path.join(dir, thefile) - if _access_check(name, mode): - return name - return None - CONDOR_TEMPLATE = \ """ # We need the job to run our executable script, with the @@ -230,24 +91,6 @@ INPUT_FILES = ["muE_water_liquid.txt","pmt_response_qoca_d2o_20060216.dat","rsp_ class MyTemplate(string.Template): delimiter = '@' -def splitext(path): - """ - Like os.path.splitext() except it returns the full extension if the - filename has multiple extensions, for example: - - splitext('foo.tar.gz') -> 'foo', '.tar.gz' - """ - full_root, full_ext = os.path.splitext(path) - while True: - root, ext = os.path.splitext(full_root) - if ext: - full_ext = ext + full_ext - full_root = root - else: - break - - return full_root, full_ext - def create_submit_file(filename, uuid, run, gtid, dir, dqxx_dir, particle_combo, max_time, priority): """ Creates a submit file and submits a job to the grid. |