diff options
Diffstat (limited to 'utils/sddm')
| -rw-r--r-- | utils/sddm/.gitignore | 1 | ||||
| -rw-r--r-- | utils/sddm/__init__.py | 139 | ||||
| -rwxr-xr-x | utils/sddm/dc.py | 366 | ||||
| -rw-r--r-- | utils/sddm/logger.py | 70 | ||||
| -rwxr-xr-x | utils/sddm/plot.py | 257 | ||||
| -rwxr-xr-x | utils/sddm/plot_energy.py | 352 |
6 files changed, 1185 insertions, 0 deletions
diff --git a/utils/sddm/.gitignore b/utils/sddm/.gitignore new file mode 100644 index 0000000..0d20b64 --- /dev/null +++ b/utils/sddm/.gitignore @@ -0,0 +1 @@ +*.pyc diff --git a/utils/sddm/__init__.py b/utils/sddm/__init__.py new file mode 100644 index 0000000..2525383 --- /dev/null +++ b/utils/sddm/__init__.py @@ -0,0 +1,139 @@ +from __future__ import print_function, division +import sys +from itertools import izip_longest +import os + +IDP_E_MINUS = 20 +IDP_MU_MINUS = 22 + +SNOMAN_MASS = { + 20: 0.511, + 21: 0.511, + 22: 105.658, + 23: 105.658 +} + +AV_RADIUS = 600.0 +PSUP_RADIUS = 840.0 + +# from https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python +class bcolors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + +def print_warning(msg): + print(bcolors.FAIL + msg + bcolors.ENDC,file=sys.stderr) + +def chunks(l, n): + """Yield successive n-sized chunks from l.""" + for i in range(0, len(l), n): + yield l[i:i + n] + +def grouper(iterable, n, fillvalue=None): + "Collect data into fixed-length chunks or blocks" + # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx + args = [iter(iterable)] * n + return izip_longest(fillvalue=fillvalue, *args) + +def splitext(path): + """ + Like os.path.splitext() except it returns the full extension if the + filename has multiple extensions, for example: + + splitext('foo.tar.gz') -> 'foo', '.tar.gz' + """ + full_root, full_ext = os.path.splitext(path) + while True: + root, ext = os.path.splitext(full_root) + if ext: + full_ext = ext + full_ext + full_root = root + else: + break + + return full_root, full_ext + +# Next two functions are a backport of the shutil.which() function from Python +# 3.3 from Lib/shutil.py in the CPython code See +# https://github.com/python/cpython/blob/master/Lib/shutil.py. + +# Check that a given file can be accessed with the correct mode. +# Additionally check that `file` is not a directory, as on Windows +# directories pass the os.access check. +def _access_check(fn, mode): + return (os.path.exists(fn) and os.access(fn, mode) and not os.path.isdir(fn)) + +def which(cmd, mode=os.F_OK | os.X_OK, path=None): + """Given a command, mode, and a PATH string, return the path which + conforms to the given mode on the PATH, or None if there is no such + file. + `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result + of os.environ.get("PATH"), or can be overridden with a custom search + path. + """ + # If we're given a path with a directory part, look it up directly rather + # than referring to PATH directories. This includes checking relative to the + # current directory, e.g. ./script + if os.path.dirname(cmd): + if _access_check(cmd, mode): + return cmd + return None + + if path is None: + path = os.environ.get("PATH", None) + if path is None: + try: + path = os.confstr("CS_PATH") + except (AttributeError, ValueError): + # os.confstr() or CS_PATH is not available + path = os.defpath + # bpo-35755: Don't use os.defpath if the PATH environment variable is + # set to an empty string + + # PATH='' doesn't match, whereas PATH=':' looks in the current directory + if not path: + return None + + path = path.split(os.pathsep) + + if sys.platform == "win32": + # The current directory takes precedence on Windows. + curdir = os.curdir + if curdir not in path: + path.insert(0, curdir) + + # PATHEXT is necessary to check on Windows. + pathext = os.environ.get("PATHEXT", "").split(os.pathsep) + # See if the given file matches any of the expected path extensions. + # This will allow us to short circuit when given "python.exe". + # If it does match, only test that one, otherwise we have to try + # others. + if any(cmd.lower().endswith(ext.lower()) for ext in pathext): + files = [cmd] + else: + files = [cmd + ext for ext in pathext] + else: + # On other platforms you don't have things like PATHEXT to tell you + # what file suffixes are executable, so just pass on cmd as-is. + files = [cmd] + + seen = set() + for dir in path: + normdir = os.path.normcase(dir) + if not normdir in seen: + seen.add(normdir) + for thefile in files: + name = os.path.join(dir, thefile) + if _access_check(name, mode): + return name + return None + +from . import plot +from . import plot_energy +from . import dc diff --git a/utils/sddm/dc.py b/utils/sddm/dc.py new file mode 100755 index 0000000..930d32f --- /dev/null +++ b/utils/sddm/dc.py @@ -0,0 +1,366 @@ +#!/usr/bin/env python +# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago> +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <https://www.gnu.org/licenses/>. + +from __future__ import print_function, division +import numpy as np +from scipy.stats import iqr +import nlopt +from scipy.stats import poisson +import contextlib +import sys +from math import exp +import emcee +from scipy.optimize import brentq +from scipy.stats import truncnorm +from matplotlib.lines import Line2D + +# Data cleaning bitmasks. +DC_MUON = 0x1 +DC_JUNK = 0x2 +DC_CRATE_ISOTROPY = 0x4 +DC_QVNHIT = 0x8 +DC_NECK = 0x10 +DC_FLASHER = 0x20 +DC_ESUM = 0x40 +DC_OWL = 0x80 +DC_OWL_TRIGGER = 0x100 +DC_FTS = 0x200 +DC_ITC = 0x400 +DC_BREAKDOWN = 0x800 + +# Lower bound for probabilities in fit. We set this to be nonzero to prevent +# nans in case an expected value is predicted to be zero. +EPSILON = 1e-10 + +def get_proposal_func(stepsizes, low, high): + """ + Returns a function which produces proposal steps for a Metropolis Hastings + MCMC. This function can then be passed to emcee.moves.MHMove(). + + Steps are proposed according to a truncated normal distribution with mean + at the current position and with standard deviations given by stepsizes. + The bounds for each step are given by the two arrays low and high. + """ + stepsizes = np.atleast_1d(stepsizes) + low = np.atleast_1d(low) + high = np.atleast_1d(high) + def proposal(x0, random): + """ + Returns a tuple of proposed positions and the log-ratio of the proposal + probabilities. + + x0 should be a (K,ndim) dimensional numpy array where K is the number + of walkers and ndim is the number of dimensions in the likelihood function. + """ + # calculate a and b needed for truncnorm + # Note: These are not just low and high b/c + # + # The standard form of this distribution is a standard normal + # truncated to the range [a, b] --- notice that a and b are defined + # over the domain of the standard normal. To convert clip values + # for a specific mean and standard deviation, use:: + # + # a, b = (myclip_a - my_mean) / my_std, (myclip_b - my_mean) / my_std + a, b = (low - x0)/stepsizes, (high - x0)/stepsizes + x = truncnorm.rvs(a,b,x0,stepsizes,random_state=random) + # Note: Should be able to do this: + # + # log_p_x_given_x0 = truncnorm.logpdf(x,a,b,x0,stepsizes).sum(axis=1) + # + # but I think there is a bug in truncnorm.logpdf() which barfs when + # passed 2D arrays, so instead we just loop over the first axis. + log_p_x_given_x0 = np.empty(x0.shape[0]) + for i in range(x0.shape[0]): + log_p_x_given_x0[i] = truncnorm.logpdf(x[i],a[i],b[i],x0[i],stepsizes).sum() + a, b = (low - x)/stepsizes, (high - x)/stepsizes + # Note: Should be able to do this: + # + # log_p_x0_given_x = truncnorm.logpdf(x0,a,b,x,stepsizes).sum(axis=1) + # + # but I think there is a bug in truncnorm.logpdf() which barfs when + # passed 2D arrays, so instead we just loop over the first axis. + log_p_x0_given_x = np.empty(x0.shape[0]) + for i in range(x0.shape[0]): + log_p_x0_given_x[i] = truncnorm.logpdf(x0[i],a[i],b[i],x[i],stepsizes).sum() + return x, log_p_x0_given_x - log_p_x_given_x0 + return proposal + +def estimate_errors(nll, xopt, constraints): + """ + Return approximate 1 sigma errors for each parameter assuming all + parameters are uncorrelated. + """ + nll_xopt = nll(xopt) + + def root(x, xopt, i): + """ + Function to bisect where the negative log likelihood reaches 0.5 from the minimum. + """ + xtest = xopt.copy() + xtest[i] = x + for constraint in constraints: + if constraint(xtest) >= 0: + xtest = constraint.renormalize(xtest,i) + return nll(xtest) - (nll_xopt + 0.5) + + errors = np.empty_like(xopt) + + for i in range(len(xopt)): + if i < 6: + xlo = brentq(root,0,xopt[i],args=(xopt,i)) + xhi = brentq(root,xopt[i],1e9,args=(xopt,i)) + else: + xlo = brentq(root,0,xopt[i],args=(xopt,i)) + xhi = brentq(root,xopt[i],1,args=(xopt,i)) + + errors[i] = max(xopt[i]-xlo,xhi-xopt[i]) + + return errors + +def metropolis_hastings(nll, x0, stepsizes=None, size=1000): + x = np.asarray(x0) + + if stepsizes is None: + stepsizes = np.ones_like(x) + else: + stepsizes = np.asarray(stepsizes) + + accept = 0 + + nll_old = nll(x) + + samples = np.empty((size,len(x0)),dtype=np.double) + + try: + for i in range(size): + if i % 100 == 0: + sys.stdout.write("\r%i/%i acceptance rate = %i/%i = %.2g%%" % (i+1,size,accept,i+1,accept*100/float(i+1))) + sys.stdout.flush() + + xnew = x + np.random.randn(len(x))*stepsizes + + nll_new = nll(xnew) + + if nll_new < nll_old or np.random.rand() < exp(nll_old-nll_new): + x = xnew + nll_new = nll_old + accept += 1 + + samples[i] = x + except KeyboardInterrupt: + sys.stdout.write("\n") + sys.stdout.flush() + print("ctrl-c caught. quitting...") + samples.resize((i,len(x))) + else: + sys.stdout.write("\n") + sys.stdout.flush() + + return samples + +def unwrap(p, delta, axis=-1): + """ + A modified version of np.unwrap() useful for unwrapping the 50 MHz clock. + It unwraps discontinuities bigger than delta/2 by delta. + + Example: + + >>> a = np.arange(10) % 5 + >>> a + array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4]) + >>> unwrap(a,5) + array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]) + + In the case of the 50 MHz clock delta should be 0x7ffffffffff*20.0. + """ + p = np.asarray(p) + nd = p.ndim + dd = np.diff(p, axis=axis) + slice1 = [slice(None, None)]*nd # full slices + slice1[axis] = slice(1, None) + slice1 = tuple(slice1) + ddmod = np.mod(dd + delta/2, delta) - delta/2 + np.copyto(ddmod, delta/2, where=(ddmod == -delta/2) & (dd > 0)) + ph_correct = ddmod - dd + np.copyto(ph_correct, 0, where=abs(dd) < delta/2) + up = np.array(p, copy=True, dtype='d') + up[slice1] = p[slice1] + ph_correct.cumsum(axis) + return up + +def unwrap_50_mhz_clock(gtr): + """ + Unwrap an array with 50 MHz clock times. These times should all be in + nanoseconds and come from the KEV_GTR variable in the EV bank. + + Note: We assume here that the events are already ordered contiguously by + GTID, so you shouldn't pass an array with multiple runs! + """ + return unwrap(gtr,0x7ffffffffff*20.0) + +def retrigger_cut(ev): + """ + Cuts all retrigger events. + """ + return ev[ev.dt > 500] + +def breakdown_follower_cut(ev): + """ + Cuts all events within 1 second of breakdown events. + """ + breakdowns = ev[ev.dc & DC_BREAKDOWN != 0] + return ev[~np.any((ev.gtr.values > breakdowns.gtr.values[:,np.newaxis]) & \ + (ev.gtr.values < breakdowns.gtr.values[:,np.newaxis] + 1e9),axis=0)] + +def flasher_follower_cut(ev): + """ + Cuts all events within 200 microseconds of flasher events. + """ + flashers = ev[ev.dc & DC_FLASHER != 0] + return ev[~np.any((ev.gtr.values > flashers.gtr.values[:,np.newaxis]) & \ + (ev.gtr.values < flashers.gtr.values[:,np.newaxis] + 200e3),axis=0)] + +def muon_follower_cut(ev): + """ + Cuts all events 200 microseconds after a muon. + """ + muons = ev[ev.dc & DC_MUON != 0] + return ev[~np.any((ev.gtr.values > muons.gtr.values[:,np.newaxis]) & \ + (ev.gtr.values < muons.gtr.values[:,np.newaxis] + 200e3),axis=0)] + +def michel_cut(ev): + """ + Looks for Michel electrons after muons. + """ + prompt_plus_muons = ev[ev.prompt | ((ev.dc & DC_MUON) != 0)] + + # Michel electrons and neutrons can be any event which is not a prompt + # event + follower = ev[~ev.prompt] + + # require Michel events to pass more of the SNO data cleaning cuts + michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] + + michel = michel[michel.nhit >= 100] + + # Accept events which had a muon more than 800 nanoseconds but less + # than 20 microseconds before them. The 800 nanoseconds cut comes from + # Richie's thesis. He also mentions that the In Time Channel Spread Cut + # is very effective at cutting electrons events caused by muons, so I + # should implement that. + # + # Note: We currently don't look across run boundaries. This should be a + # *very* small effect, and the logic to do so would be very complicated + # since I would have to deal with 50 MHz clock rollovers, etc. + # + # Do a simple python for loop here over the runs since we create + # temporary arrays with shape (michel.size,prompt_plus_muons.size) + # which could be too big for the full dataset. + # + # There might be some clever way to do this without the loop in Pandas, + # but I don't know how. + if prompt_plus_muons.size and michel.size: + return michel[np.any((michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \ + (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3),axis=0)] + else: + return pd.DataFrame(columns=ev.columns) + +def atmospheric_events(ev): + """ + Tags atmospheric events which have a neutron follower. + """ + prompt = ev[ev.prompt] + + # Michel electrons and neutrons can be any event which is not a prompt + # event + follower = ev[~ev.prompt] + + ev['atm'] = np.zeros(len(ev),dtype=np.bool) + + if prompt.size and follower.size: + # neutron followers have to obey stricter set of data cleaning cuts + neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] + neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)] + r = np.sqrt(neutron.ftp_x**2 + neutron.ftp_y**2 + neutron.ftp_z**2) + neutron = neutron[r < AV_RADIUS] + neutron = neutron[neutron.rsp_energy > 4.0] + + # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt) + ev.loc[ev.prompt,'atm'] = np.any((neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \ + (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1) + + return ev + +def gtid_sort(ev, first_gtid): + """ + Adds 0x1000000 to the gtid_sort column for all gtids before the first gtid + in a run, which should be passed as a dictionary. This column can then be + used to sort the events sequentially. + + This function should be passed to ev.groupby('run').apply(). We use this + idiom instead of just looping over the groupby results since groupby() + makes a copy of the dataframe, i.e. + + for run, ev_run in ev.groupby('run'): + ev_run.loc[ev_run.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 + + would produce a SettingWithCopyWarning, so instead we use: + + ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid) + + which doesn't have this problem. + """ + # see https://stackoverflow.com/questions/32460593/including-the-group-name-in-the-apply-function-pandas-python + run = ev.name + + if run not in first_gtid: + print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run) + first_gtid[run] = ev.gtid.iloc[0] + + ev.loc[ev.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 + + return ev + +def prompt_event(ev): + ev['prompt'] = (ev.nhit >= 100) + ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6)) + return ev + +class Constraint(object): + def __init__(self, range): + self.range = range + + def __call__(self, x, grad=None): + return np.sum(x[self.range]) - 1 + EPSILON + + def renormalize(self, x, fix): + if fix not in self.range: + return x + x = x.copy() + while self(x) >= 0: + for i in self.range: + if i == fix: + continue + x[i] -= self(x) + return x + + def renormalize_no_fix(self, x): + x = x.copy() + while self(x) >= 0: + for i in self.range: + if x[i] < 10*EPSILON: + continue + x[i] -= x[i]/2.0 + return x diff --git a/utils/sddm/logger.py b/utils/sddm/logger.py new file mode 100644 index 0000000..c641893 --- /dev/null +++ b/utils/sddm/logger.py @@ -0,0 +1,70 @@ +import os +import sys +from datetime import datetime + +DEBUG = 0 +VERBOSE = 1 +NOTICE = 2 +WARNING = 3 + +class Logger(object): + """ + Simple logger class that I wrote for the SNO+ DAQ. Very easy to use: + + log = Logger() + log.set_logfile("test.log") + log.notice("blah") + log.warn("foo") + + The log file format is taken from the Redis log file format which is really + nice since it shows the exact time and severity of each log message. + """ + def __init__(self): + self.logfile = sys.stdout + # by default, we log everything + self.verbosity = DEBUG + + def set_verbosity(self, level): + if isinstance(level, int): + self.verbosity = level + elif isinstance(level, basestring): + if level == 'debug': + self.verbosity = DEBUG + elif level == 'verbose': + self.verbosity = VERBOSE + elif level == 'notice': + self.verbosity = NOTICE + elif level == 'warning': + self.verbosity = WARNING + else: + raise ValueError("unknown loglevel '%s'" % level) + else: + raise TypeError("level must be a string or integer") + + def set_logfile(self, filename): + self.logfile = open(filename, 'a') + + def debug(self, msg): + self.log(DEBUG, msg) + + def verbose(self, msg): + self.log(VERBOSE, msg) + + def notice(self, msg): + self.log(NOTICE, msg) + + def warn(self, msg): + self.log(WARNING, msg) + + def log(self, level, msg): + if level < self.verbosity: + return + + c = '.-*#' + pid = os.getpid() + now = datetime.now() + buf = now.strftime('%d %b %H:%M:%S.%f')[:-3] + + self.logfile.write('%d:%s %c %s\n' % (pid, buf, c[level], msg)) + self.logfile.flush() + diff --git a/utils/sddm/plot.py b/utils/sddm/plot.py new file mode 100755 index 0000000..b00adbd --- /dev/null +++ b/utils/sddm/plot.py @@ -0,0 +1,257 @@ +#!/usr/bin/env python +# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago> +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <https://www.gnu.org/licenses/>. + +from __future__ import print_function, division +import numpy as np +from scipy.stats import iqr +from matplotlib.lines import Line2D +from scipy.stats import iqr, norm, beta + +def plot_hist(x, label=None): + # determine the bin width using the Freedman Diaconis rule + # see https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule + import matplotlib.pyplot as plt + h = 2*iqr(x)/len(x)**(1/3) + n = max(int((np.max(x)-np.min(x))/h),10) + bins = np.linspace(np.min(x),np.max(x),n) + plt.hist(x, bins=bins, histtype='step', label=label) + +def plot_legend(n): + import matplotlib.pyplot as plt + plt.figure(n) + ax = plt.gca() + handles, labels = ax.get_legend_handles_labels() + new_handles = [Line2D([],[],c=h.get_edgecolor()) for h in handles] + plt.legend(handles=new_handles,labels=labels) + +def get_stats(x): + """ + Returns a tuple (mean, error mean, std, error std) for the values in x. + + The formula for the standard error on the standard deviation comes from + https://stats.stackexchange.com/questions/156518. + """ + mean = np.mean(x) + std = np.std(x) + n = len(x) + u4 = np.mean((x-mean)**4) + error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std) + return mean, std/np.sqrt(n), std, error + +# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py +def despine(fig=None, ax=None, top=True, right=True, left=False, + bottom=False, offset=None, trim=False): + """Remove the top and right spines from plot(s). + + fig : matplotlib figure, optional + Figure to despine all axes of, default uses current figure. + ax : matplotlib axes, optional + Specific axes object to despine. + top, right, left, bottom : boolean, optional + If True, remove that spine. + offset : int or dict, optional + Absolute distance, in points, spines should be moved away + from the axes (negative values move spines inward). A single value + applies to all spines; a dict can be used to set offset values per + side. + trim : bool, optional + If True, limit spines to the smallest and largest major tick + on each non-despined axis. + + Returns + ------- + None + + """ + import matplotlib.pyplot as plt + # Get references to the axes we want + if fig is None and ax is None: + axes = plt.gcf().axes + elif fig is not None: + axes = fig.axes + elif ax is not None: + axes = [ax] + + for ax_i in axes: + for side in ["top", "right", "left", "bottom"]: + # Toggle the spine objects + is_visible = not locals()[side] + ax_i.spines[side].set_visible(is_visible) + if offset is not None and is_visible: + try: + val = offset.get(side, 0) + except AttributeError: + val = offset + _set_spine_position(ax_i.spines[side], ('outward', val)) + + # Potentially move the ticks + if left and not right: + maj_on = any( + t.tick1line.get_visible() + for t in ax_i.yaxis.majorTicks + ) + min_on = any( + t.tick1line.get_visible() + for t in ax_i.yaxis.minorTicks + ) + ax_i.yaxis.set_ticks_position("right") + for t in ax_i.yaxis.majorTicks: + t.tick2line.set_visible(maj_on) + for t in ax_i.yaxis.minorTicks: + t.tick2line.set_visible(min_on) + + if bottom and not top: + maj_on = any( + t.tick1line.get_visible() + for t in ax_i.xaxis.majorTicks + ) + min_on = any( + t.tick1line.get_visible() + for t in ax_i.xaxis.minorTicks + ) + ax_i.xaxis.set_ticks_position("top") + for t in ax_i.xaxis.majorTicks: + t.tick2line.set_visible(maj_on) + for t in ax_i.xaxis.minorTicks: + t.tick2line.set_visible(min_on) + + if trim: + # clip off the parts of the spines that extend past major ticks + xticks = ax_i.get_xticks() + if xticks.size: + firsttick = np.compress(xticks >= min(ax_i.get_xlim()), + xticks)[0] + lasttick = np.compress(xticks <= max(ax_i.get_xlim()), + xticks)[-1] + ax_i.spines['bottom'].set_bounds(firsttick, lasttick) + ax_i.spines['top'].set_bounds(firsttick, lasttick) + newticks = xticks.compress(xticks <= lasttick) + newticks = newticks.compress(newticks >= firsttick) + ax_i.set_xticks(newticks) + + yticks = ax_i.get_yticks() + if yticks.size: + firsttick = np.compress(yticks >= min(ax_i.get_ylim()), + yticks)[0] + lasttick = np.compress(yticks <= max(ax_i.get_ylim()), + yticks)[-1] + ax_i.spines['left'].set_bounds(firsttick, lasttick) + ax_i.spines['right'].set_bounds(firsttick, lasttick) + newticks = yticks.compress(yticks <= lasttick) + newticks = newticks.compress(newticks >= firsttick) + ax_i.set_yticks(newticks) + +def iqr_std_err(x): + """ + Returns the approximate standard deviation assuming the central part of the + distribution is gaussian. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + # see https://stats.stackexchange.com/questions/110902/error-on-interquartile-range + std = iqr(x.values)/1.3489795 + return 1.573*std/np.sqrt(n) + +def iqr_std(x): + """ + Returns the approximate standard deviation assuming the central part of the + distribution is gaussian. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + return iqr(x.values)/1.3489795 + +def quantile_error(x,q): + """ + Returns the standard error for the qth quantile of `x`. The error is + computed using the Maritz-Jarrett method described here: + https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/quantse.htm. + """ + x = np.sort(x) + n = len(x) + m = int(q*n+0.5) + A = m - 1 + B = n - m + i = np.arange(1,len(x)+1) + w = beta.cdf(i/n,A,B) - beta.cdf((i-1)/n,A,B) + return np.sqrt(np.sum(w*x**2)-np.sum(w*x)**2) + +def q90_err(x): + """ + Returns the error on the 90th percentile for all the non NaN values in a + Series `x`. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + return quantile_error(x.values,0.9) + +def q90(x): + """ + Returns the 90th percentile for all the non NaN values in a Series `x`. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + return np.percentile(x.values,90.0) + +def median(x): + """ + Returns the median for all the non NaN values in a Series `x`. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + return np.median(x.values) + +def median_err(x): + """ + Returns the approximate error on the median for all the non NaN values in a + Series `x`. The error on the median is approximated assuming the central + part of the distribution is gaussian. + """ + x = x.dropna() + n = len(x) + if n == 0: + return np.nan + # First we estimate the standard deviation using the interquartile range. + # Here we are essentially assuming the central part of the distribution is + # gaussian. + std = iqr(x.values)/1.3489795 + median = np.median(x.values) + # Now we estimate the error on the median for a gaussian + # See https://stats.stackexchange.com/questions/45124/central-limit-theorem-for-sample-medians. + return 1/(2*np.sqrt(n)*norm.pdf(median,median,std)) + +def std_err(x): + x = x.dropna() + mean = np.mean(x) + std = np.std(x) + n = len(x) + if n == 0: + return np.nan + elif n == 1: + return 0.0 + u4 = np.mean((x-mean)**4) + error = np.sqrt((u4-(n-3)*std**4/(n-1))/n)/(2*std) + return error diff --git a/utils/sddm/plot_energy.py b/utils/sddm/plot_energy.py new file mode 100755 index 0000000..a24ea00 --- /dev/null +++ b/utils/sddm/plot_energy.py @@ -0,0 +1,352 @@ +#!/usr/bin/env python +# Copyright (c) 2019, Anthony Latorre <tlatorre at uchicago> +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, see <https://www.gnu.org/licenses/>. + +from __future__ import print_function, division +import numpy as np +from scipy.stats import iqr, poisson +from matplotlib.lines import Line2D +from scipy.stats import iqr, norm, beta +from scipy.special import spence +from itertools import izip_longest +from .dc import DC_MUON, DC_JUNK, DC_CRATE_ISOTROPY, DC_QVNHIT, DC_NECK, DC_FLASHER, DC_ESUM, DC_OWL, DC_OWL_TRIGGER, DC_FTS, DC_ITC, DC_BREAKDOWN +from . import grouper, print_warning, AV_RADIUS, PSUP_RADIUS +import pandas as pd + +# Fermi constant +GF = 1.16637887e-5 # 1/MeV^2 +ELECTRON_MASS = 0.5109989461 # MeV +MUON_MASS = 105.6583745 # MeV +PROTON_MASS = 938.272081 # MeV +FINE_STRUCTURE_CONSTANT = 7.297352566417e-3 + +def unwrap(p, delta, axis=-1): + """ + A modified version of np.unwrap() useful for unwrapping the 50 MHz clock. + It unwraps discontinuities bigger than delta/2 by delta. + + Example: + + >>> a = np.arange(10) % 5 + >>> a + array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4]) + >>> unwrap(a,5) + array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]) + + In the case of the 50 MHz clock delta should be 0x7ffffffffff*20.0. + """ + p = np.asarray(p) + nd = p.ndim + dd = np.diff(p, axis=axis) + slice1 = [slice(None, None)]*nd # full slices + slice1[axis] = slice(1, None) + slice1 = tuple(slice1) + ddmod = np.mod(dd + delta/2, delta) - delta/2 + np.copyto(ddmod, delta/2, where=(ddmod == -delta/2) & (dd > 0)) + ph_correct = ddmod - dd + np.copyto(ph_correct, 0, where=abs(dd) < delta/2) + up = np.array(p, copy=True, dtype='d') + up[slice1] = p[slice1] + ph_correct.cumsum(axis) + return up + +def unwrap_50_mhz_clock(gtr): + """ + Unwrap an array with 50 MHz clock times. These times should all be in + nanoseconds and come from the KEV_GTR variable in the EV bank. + + Note: We assume here that the events are already ordered contiguously by + GTID, so you shouldn't pass an array with multiple runs! + """ + return unwrap(gtr,0x7ffffffffff*20.0) + +def retrigger_cut(ev): + """ + Cuts all retrigger events. + """ + return ev[ev.dt > 500] + +def breakdown_follower_cut(ev): + """ + Cuts all events within 1 second of breakdown events. + """ + breakdowns = ev[ev.dc & DC_BREAKDOWN != 0] + return ev[~np.any((ev.gtr.values > breakdowns.gtr.values[:,np.newaxis]) & \ + (ev.gtr.values < breakdowns.gtr.values[:,np.newaxis] + 1e9),axis=0)] + +def flasher_follower_cut(ev): + """ + Cuts all events within 200 microseconds of flasher events. + """ + flashers = ev[ev.dc & DC_FLASHER != 0] + return ev[~np.any((ev.gtr.values > flashers.gtr.values[:,np.newaxis]) & \ + (ev.gtr.values < flashers.gtr.values[:,np.newaxis] + 200e3),axis=0)] + +def muon_follower_cut(ev): + """ + Cuts all events 200 microseconds after a muon. + """ + muons = ev[ev.dc & DC_MUON != 0] + return ev[~np.any((ev.gtr.values > muons.gtr.values[:,np.newaxis]) & \ + (ev.gtr.values < muons.gtr.values[:,np.newaxis] + 200e3),axis=0)] + +def michel_cut(ev): + """ + Looks for Michel electrons after muons. + """ + prompt_plus_muons = ev[ev.prompt | ((ev.dc & DC_MUON) != 0)] + + # Michel electrons and neutrons can be any event which is not a prompt + # event + follower = ev[~ev.prompt] + + # require Michel events to pass more of the SNO data cleaning cuts + michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] + + michel = michel[michel.nhit >= 100] + + # Accept events which had a muon more than 800 nanoseconds but less than 20 + # microseconds before them. The 800 nanoseconds cut comes from Richie's + # thesis. He also mentions that the In Time Channel Spread Cut is very + # effective at cutting electron events caused by muons, so I should + # implement that. + # + # Note: We currently don't look across run boundaries. This should be a + # *very* small effect, and the logic to do so would be very complicated + # since I would have to deal with 50 MHz clock rollovers, etc. + if prompt_plus_muons.size and michel.size: + mask = (michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \ + (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3) + michel = michel.iloc[np.any(mask,axis=0)] + michel['muon_gtid'] = pd.Series(prompt_plus_muons['gtid'].iloc[np.argmax(mask[:,np.any(mask,axis=0)],axis=0)].values, + index=michel.index.values, + dtype=np.int32) + return michel + else: + # Return an empty slice since we need it to have the same datatype as + # the other dataframes + michel = ev[:0] + michel['muon_gtid'] = -1 + return michel + +def atmospheric_events(ev): + """ + Tags atmospheric events which have a neutron follower. + """ + prompt = ev[ev.prompt] + + # Michel electrons and neutrons can be any event which is not a prompt + # event + follower = ev[~ev.prompt] + + ev['atm'] = np.zeros(len(ev),dtype=np.bool) + + if prompt.size and follower.size: + # neutron followers have to obey stricter set of data cleaning cuts + neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] + neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)] + # FIXME: What should the radius cut be here? AV? (r/r_psup)^3 < 0.9? + neutron = neutron[neutron.ftp_r < AV_RADIUS] + neutron = neutron[neutron.rsp_energy > 4.0] + + # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt) + ev.loc[ev.prompt,'atm'] = np.any((neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \ + (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1) + + return ev + +def gtid_sort(ev, first_gtid): + """ + Adds 0x1000000 to the gtid_sort column for all gtids before the first gtid + in a run, which should be passed as a dictionary. This column can then be + used to sort the events sequentially. + + This function should be passed to ev.groupby('run').apply(). We use this + idiom instead of just looping over the groupby results since groupby() + makes a copy of the dataframe, i.e. + + for run, ev_run in ev.groupby('run'): + ev_run.loc[ev_run.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 + + would produce a SettingWithCopyWarning, so instead we use: + + ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid) + + which doesn't have this problem. + """ + # see https://stackoverflow.com/questions/32460593/including-the-group-name-in-the-apply-function-pandas-python + run = ev.name + + if run not in first_gtid: + print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run) + first_gtid[run] = ev.gtid.iloc[0] + + ev.loc[ev.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 + + return ev + +def prompt_event(ev): + ev['prompt'] = (ev.nhit >= 100) + ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6)) + return ev + +def plot_corner_plot(ev, title, save=None): + import matplotlib.pyplot as plt + variables = ['r_psup','psi','z','udotr'] + labels = [r'$(r/r_\mathrm{PSUP})^3$',r'$\psi$','z',r'$\vec{u}\cdot\vec{r}$'] + limits = [(0,1),(0,10),(-840,840),(-1,1)] + cuts = [0.9,6,0,-0.5] + + ev = ev.dropna(subset=variables) + + fig = plt.figure(figsize=(FIGSIZE[0],FIGSIZE[0])) + despine(fig,trim=True) + for i in range(len(variables)): + for j in range(len(variables)): + if j > i: + continue + ax = plt.subplot(len(variables),len(variables),i*len(variables)+j+1) + if i == j: + plt.hist(ev[variables[i]],bins=np.linspace(limits[i][0],limits[i][1],100),histtype='step') + plt.gca().set_xlim(limits[i]) + else: + plt.scatter(ev[variables[j]],ev[variables[i]],s=0.5) + plt.gca().set_xlim(limits[j]) + plt.gca().set_ylim(limits[i]) + n = len(ev) + if n: + p_i_lo = np.count_nonzero(ev[variables[i]] < cuts[i])/n + p_j_lo = np.count_nonzero(ev[variables[j]] < cuts[j])/n + p_lolo = p_i_lo*p_j_lo + p_lohi = p_i_lo*(1-p_j_lo) + p_hilo = (1-p_i_lo)*p_j_lo + p_hihi = (1-p_i_lo)*(1-p_j_lo) + n_lolo = np.count_nonzero((ev[variables[i]] < cuts[i]) & (ev[variables[j]] < cuts[j])) + n_lohi = np.count_nonzero((ev[variables[i]] < cuts[i]) & (ev[variables[j]] >= cuts[j])) + n_hilo = np.count_nonzero((ev[variables[i]] >= cuts[i]) & (ev[variables[j]] < cuts[j])) + n_hihi = np.count_nonzero((ev[variables[i]] >= cuts[i]) & (ev[variables[j]] >= cuts[j])) + observed = np.array([n_lolo,n_lohi,n_hilo,n_hihi]) + expected = n*np.array([p_lolo,p_lohi,p_hilo,p_hihi]) + psi = -poisson.logpmf(observed,expected).sum() + poisson.logpmf(observed,observed).sum() + psi /= np.std(-poisson.logpmf(np.random.poisson(observed,size=(10000,4)),observed).sum(axis=1) + poisson.logpmf(observed,observed).sum()) + plt.title(r"$\psi = %.1f$" % psi) + if i == len(variables) - 1: + plt.xlabel(labels[j]) + else: + plt.setp(ax.get_xticklabels(),visible=False) + if j == 0: + plt.ylabel(labels[i]) + else: + plt.setp(ax.get_yticklabels(),visible=False) + plt.axvline(cuts[j],color='k',ls='--',alpha=0.5) + if i != j: + plt.axhline(cuts[i],color='k',ls='--',alpha=0.5) + + plt.tight_layout() + + if save: + plt.savefig(save + ".pdf") + plt.savefig(save + ".eps") + + plt.suptitle(title) + +def intersect_sphere(pos, dir, R): + """ + Compute the first intersection of a ray starting at `pos` with direction + `dir` and a sphere centered at the origin with radius `R`. The distance to + the intersection is returned. + + Example: + + pos = np.array([0,0,0]) + dir = np.array([1,0,0]) + + l = intersect_sphere(pos,dir,PSUP_RADIUS): + if l is not None: + hit = pos + l*dir + print("ray intersects sphere at %.2f %.2f %.2f", hit[0], hit[1], hit[2]) + else: + print("ray didn't intersect sphere") + """ + + b = 2*np.dot(dir,pos) + c = np.dot(pos,pos) - R*R + + if b*b - 4*c <= 0: + # Ray doesn't intersect the sphere. + return None + + # First, check the shorter solution. + l = (-b - np.sqrt(b*b - 4*c))/2 + + # If the shorter solution is less than 0, check the second solution. + if l < 0: + l = (-b + np.sqrt(b*b - 4*c))/2 + + # If the distance is still negative, we didn't intersect the sphere. + if l < 0: + return None + + return l + +def get_dx(row): + pos = np.array([row.x,row.y,row.z]) + dir = np.array([np.sin(row.theta1)*np.cos(row.phi1), + np.sin(row.theta1)*np.sin(row.phi1), + np.cos(row.theta1)]) + l = intersect_sphere(pos,-dir,PSUP_RADIUS) + if l is not None: + pos -= dir*l + michel_pos = np.array([row.x_michel,row.y_michel,row.z_michel]) + return np.linalg.norm(michel_pos-pos) + else: + return 0 + +def dx_to_energy(dx): + lines = [] + with open("../src/muE_water_liquid.txt") as f: + for i, line in enumerate(f): + if i < 10: + continue + if 'Minimum ionization' in line: + continue + if 'Muon critical energy' in line: + continue + lines.append(line) + data = np.genfromtxt(lines) + return np.interp(dx,data[:,8],data[:,0]) + +def f(x): + y = (5/(3*x**2) + 16*x/3 + 4/x + (12-8*x)*np.log(1/x-1) - 8)*np.log(MUON_MASS/ELECTRON_MASS) + y += (6-4*x)*(2*spence(x) - 2*np.log(x)**2 + np.log(x) + np.log(1-x)*(3*np.log(x)-1/x-1) - np.pi**2/3-2) + y += (1-x)*(34*x**2+(5-34*x**2+17*x)*np.log(x) - 22*x)/(3*x**2) + y += 6*(1-x)*np.log(x) + return y + +def michel_spectrum(T): + """ + Michel electron energy spectrum for a free muon. `T` should be the kinetic + energy of the electron or positron in MeV. + + Note: The result is not normalized. + + From https://arxiv.org/abs/1406.3575. + """ + E = T + ELECTRON_MASS + x = 2*E/MUON_MASS + mask = (x > 0) & (x < 1) + y = np.zeros_like(x,dtype=np.double) + y[mask] = GF**2*MUON_MASS**5*x[mask]**2*(6-4*x[mask]+FINE_STRUCTURE_CONSTANT*f(x[mask])/np.pi)/(192*np.pi**3) + y *= 2*MUON_MASS + return y |
