diff options
Diffstat (limited to 'utils/dc')
-rwxr-xr-x | utils/dc | 595 |
1 files changed, 48 insertions, 547 deletions
@@ -26,6 +26,10 @@ import emcee from scipy.optimize import brentq from scipy.stats import truncnorm from matplotlib.lines import Line2D +from sddm.dc import get_proposal_func, estimate_errors, metropolis_hastings, EPSILON +from sddm.plot import despine +from sddm.dc import * +from sddm.plot_energy import * try: from emcee import moves @@ -33,238 +37,6 @@ except ImportError: print("emcee version 2.2.1 is required",file=sys.stderr) sys.exit(1) -# Taken from https://raw.githubusercontent.com/mwaskom/seaborn/c73055b2a9d9830c6fbbace07127c370389d04dd/seaborn/utils.py -def despine(fig=None, ax=None, top=True, right=True, left=False, - bottom=False, offset=None, trim=False): - """Remove the top and right spines from plot(s). - - fig : matplotlib figure, optional - Figure to despine all axes of, default uses current figure. - ax : matplotlib axes, optional - Specific axes object to despine. - top, right, left, bottom : boolean, optional - If True, remove that spine. - offset : int or dict, optional - Absolute distance, in points, spines should be moved away - from the axes (negative values move spines inward). A single value - applies to all spines; a dict can be used to set offset values per - side. - trim : bool, optional - If True, limit spines to the smallest and largest major tick - on each non-despined axis. - - Returns - ------- - None - - """ - # Get references to the axes we want - if fig is None and ax is None: - axes = plt.gcf().axes - elif fig is not None: - axes = fig.axes - elif ax is not None: - axes = [ax] - - for ax_i in axes: - for side in ["top", "right", "left", "bottom"]: - # Toggle the spine objects - is_visible = not locals()[side] - ax_i.spines[side].set_visible(is_visible) - if offset is not None and is_visible: - try: - val = offset.get(side, 0) - except AttributeError: - val = offset - _set_spine_position(ax_i.spines[side], ('outward', val)) - - # Potentially move the ticks - if left and not right: - maj_on = any( - t.tick1line.get_visible() - for t in ax_i.yaxis.majorTicks - ) - min_on = any( - t.tick1line.get_visible() - for t in ax_i.yaxis.minorTicks - ) - ax_i.yaxis.set_ticks_position("right") - for t in ax_i.yaxis.majorTicks: - t.tick2line.set_visible(maj_on) - for t in ax_i.yaxis.minorTicks: - t.tick2line.set_visible(min_on) - - if bottom and not top: - maj_on = any( - t.tick1line.get_visible() - for t in ax_i.xaxis.majorTicks - ) - min_on = any( - t.tick1line.get_visible() - for t in ax_i.xaxis.minorTicks - ) - ax_i.xaxis.set_ticks_position("top") - for t in ax_i.xaxis.majorTicks: - t.tick2line.set_visible(maj_on) - for t in ax_i.xaxis.minorTicks: - t.tick2line.set_visible(min_on) - - if trim: - # clip off the parts of the spines that extend past major ticks - xticks = ax_i.get_xticks() - if xticks.size: - firsttick = np.compress(xticks >= min(ax_i.get_xlim()), - xticks)[0] - lasttick = np.compress(xticks <= max(ax_i.get_xlim()), - xticks)[-1] - ax_i.spines['bottom'].set_bounds(firsttick, lasttick) - ax_i.spines['top'].set_bounds(firsttick, lasttick) - newticks = xticks.compress(xticks <= lasttick) - newticks = newticks.compress(newticks >= firsttick) - ax_i.set_xticks(newticks) - - yticks = ax_i.get_yticks() - if yticks.size: - firsttick = np.compress(yticks >= min(ax_i.get_ylim()), - yticks)[0] - lasttick = np.compress(yticks <= max(ax_i.get_ylim()), - yticks)[-1] - ax_i.spines['left'].set_bounds(firsttick, lasttick) - ax_i.spines['right'].set_bounds(firsttick, lasttick) - newticks = yticks.compress(yticks <= lasttick) - newticks = newticks.compress(newticks >= firsttick) - ax_i.set_yticks(newticks) - -# Lower bound for probabilities in fit. We set this to be nonzero to prevent -# nans in case an expected value is predicted to be zero. -EPSILON = 1e-10 - -def get_proposal_func(stepsizes, low, high): - """ - Returns a function which produces proposal steps for a Metropolis Hastings - MCMC. This function can then be passed to emcee.moves.MHMove(). - - Steps are proposed according to a truncated normal distribution with mean - at the current position and with standard deviations given by stepsizes. - The bounds for each step are given by the two arrays low and high. - """ - stepsizes = np.atleast_1d(stepsizes) - low = np.atleast_1d(low) - high = np.atleast_1d(high) - def proposal(x0, random): - """ - Returns a tuple of proposed positions and the log-ratio of the proposal - probabilities. - - x0 should be a (K,ndim) dimensional numpy array where K is the number - of walkers and ndim is the number of dimensions in the likelihood function. - """ - # calculate a and b needed for truncnorm - # Note: These are not just low and high b/c - # - # The standard form of this distribution is a standard normal - # truncated to the range [a, b] --- notice that a and b are defined - # over the domain of the standard normal. To convert clip values - # for a specific mean and standard deviation, use:: - # - # a, b = (myclip_a - my_mean) / my_std, (myclip_b - my_mean) / my_std - a, b = (low - x0)/stepsizes, (high - x0)/stepsizes - x = truncnorm.rvs(a,b,x0,stepsizes,random_state=random) - # Note: Should be able to do this: - # - # log_p_x_given_x0 = truncnorm.logpdf(x,a,b,x0,stepsizes).sum(axis=1) - # - # but I think there is a bug in truncnorm.logpdf() which barfs when - # passed 2D arrays, so instead we just loop over the first axis. - log_p_x_given_x0 = np.empty(x0.shape[0]) - for i in range(x0.shape[0]): - log_p_x_given_x0[i] = truncnorm.logpdf(x[i],a[i],b[i],x0[i],stepsizes).sum() - a, b = (low - x)/stepsizes, (high - x)/stepsizes - # Note: Should be able to do this: - # - # log_p_x0_given_x = truncnorm.logpdf(x0,a,b,x,stepsizes).sum(axis=1) - # - # but I think there is a bug in truncnorm.logpdf() which barfs when - # passed 2D arrays, so instead we just loop over the first axis. - log_p_x0_given_x = np.empty(x0.shape[0]) - for i in range(x0.shape[0]): - log_p_x0_given_x[i] = truncnorm.logpdf(x0[i],a[i],b[i],x[i],stepsizes).sum() - return x, log_p_x0_given_x - log_p_x_given_x0 - return proposal - -def estimate_errors(nll, xopt, constraints): - """ - Return approximate 1 sigma errors for each parameter assuming all - parameters are uncorrelated. - """ - nll_xopt = nll(xopt) - - def root(x, xopt, i): - """ - Function to bisect where the negative log likelihood reaches 0.5 from the minimum. - """ - xtest = xopt.copy() - xtest[i] = x - for constraint in constraints: - if constraint(xtest) >= 0: - xtest = constraint.renormalize(xtest,i) - return nll(xtest) - (nll_xopt + 0.5) - - errors = np.empty_like(xopt) - - for i in range(len(xopt)): - if i < 6: - xlo = brentq(root,0,xopt[i],args=(xopt,i)) - xhi = brentq(root,xopt[i],1e9,args=(xopt,i)) - else: - xlo = brentq(root,0,xopt[i],args=(xopt,i)) - xhi = brentq(root,xopt[i],1,args=(xopt,i)) - - errors[i] = max(xopt[i]-xlo,xhi-xopt[i]) - - return errors - -def metropolis_hastings(nll, x0, stepsizes=None, size=1000): - x = np.asarray(x0) - - if stepsizes is None: - stepsizes = np.ones_like(x) - else: - stepsizes = np.asarray(stepsizes) - - accept = 0 - - nll_old = nll(x) - - samples = np.empty((size,len(x0)),dtype=np.double) - - try: - for i in range(size): - if i % 100 == 0: - sys.stdout.write("\r%i/%i acceptance rate = %i/%i = %.2g%%" % (i+1,size,accept,i+1,accept*100/float(i+1))) - sys.stdout.flush() - - xnew = x + np.random.randn(len(x))*stepsizes - - nll_new = nll(xnew) - - if nll_new < nll_old or np.random.rand() < exp(nll_old-nll_new): - x = xnew - nll_new = nll_old - accept += 1 - - samples[i] = x - except KeyboardInterrupt: - sys.stdout.write("\n") - sys.stdout.flush() - print("ctrl-c caught. quitting...") - samples.resize((i,len(x))) - else: - sys.stdout.write("\n") - sys.stdout.flush() - - return samples - # from https://stackoverflow.com/questions/2891790/how-to-pretty-print-a-numpy-array-without-scientific-notation-and-with-given-pre @contextlib.contextmanager def printoptions(*args, **kwargs): @@ -275,250 +47,6 @@ def printoptions(*args, **kwargs): finally: np.set_printoptions(**original) -# from https://stackoverflow.com/questions/287871/how-to-print-colored-text-in-terminal-in-python -class bcolors: - HEADER = '\033[95m' - OKBLUE = '\033[94m' - OKGREEN = '\033[92m' - WARNING = '\033[93m' - FAIL = '\033[91m' - ENDC = '\033[0m' - BOLD = '\033[1m' - UNDERLINE = '\033[4m' - -SNOMAN_MASS = { - 20: 0.511, - 21: 0.511, - 22: 105.658, - 23: 105.658 -} - -AV_RADIUS = 600.0 -PSUP_RADIUS = 840.0 - -# Data cleaning bitmasks. -DC_MUON = 0x1 -DC_JUNK = 0x2 -DC_CRATE_ISOTROPY = 0x4 -DC_QVNHIT = 0x8 -DC_NECK = 0x10 -DC_FLASHER = 0x20 -DC_ESUM = 0x40 -DC_OWL = 0x80 -DC_OWL_TRIGGER = 0x100 -DC_FTS = 0x200 -DC_ITC = 0x400 -DC_BREAKDOWN = 0x800 - -def plot_hist(df, title=None, muons=False): - for id, df_id in sorted(df.groupby('id')): - if id == 20: - plt.subplot(3,4,1) - elif id == 22: - plt.subplot(3,4,2) - elif id == 2020: - plt.subplot(3,4,5) - elif id == 2022: - plt.subplot(3,4,6) - elif id == 2222: - plt.subplot(3,4,7) - elif id == 202020: - plt.subplot(3,4,9) - elif id == 202022: - plt.subplot(3,4,10) - elif id == 202222: - plt.subplot(3,4,11) - elif id == 222222: - plt.subplot(3,4,12) - - if muons: - plt.hist(df_id.ke.values, bins=np.linspace(20,1000e3,100), histtype='step') - else: - plt.hist(df_id.ke.values, bins=np.linspace(20,10e3,100), histtype='step') - plt.xlabel("Energy (MeV)") - plt.title(str(id)) - - if title: - plt.suptitle(title) - - if len(df): - plt.tight_layout() - -def chunks(l, n): - """Yield successive n-sized chunks from l.""" - for i in range(0, len(l), n): - yield l[i:i + n] - -def print_warning(msg): - print(bcolors.FAIL + msg + bcolors.ENDC,file=sys.stderr) - -def unwrap(p, delta, axis=-1): - """ - A modified version of np.unwrap() useful for unwrapping the 50 MHz clock. - It unwraps discontinuities bigger than delta/2 by delta. - - Example: - - >>> a = np.arange(10) % 5 - >>> a - array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4]) - >>> unwrap(a,5) - array([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]) - - In the case of the 50 MHz clock delta should be 0x7ffffffffff*20.0. - """ - p = np.asarray(p) - nd = p.ndim - dd = np.diff(p, axis=axis) - slice1 = [slice(None, None)]*nd # full slices - slice1[axis] = slice(1, None) - slice1 = tuple(slice1) - ddmod = np.mod(dd + delta/2, delta) - delta/2 - np.copyto(ddmod, delta/2, where=(ddmod == -delta/2) & (dd > 0)) - ph_correct = ddmod - dd - np.copyto(ph_correct, 0, where=abs(dd) < delta/2) - up = np.array(p, copy=True, dtype='d') - up[slice1] = p[slice1] + ph_correct.cumsum(axis) - return up - -def unwrap_50_mhz_clock(gtr): - """ - Unwrap an array with 50 MHz clock times. These times should all be in - nanoseconds and come from the KEV_GTR variable in the EV bank. - - Note: We assume here that the events are already ordered contiguously by - GTID, so you shouldn't pass an array with multiple runs! - """ - return unwrap(gtr,0x7ffffffffff*20.0) - -def retrigger_cut(ev): - """ - Cuts all retrigger events. - """ - return ev[ev.dt > 500] - -def breakdown_follower_cut(ev): - """ - Cuts all events within 1 second of breakdown events. - """ - breakdowns = ev[ev.dc & DC_BREAKDOWN != 0] - return ev[~np.any((ev.gtr.values > breakdowns.gtr.values[:,np.newaxis]) & \ - (ev.gtr.values < breakdowns.gtr.values[:,np.newaxis] + 1e9),axis=0)] - -def flasher_follower_cut(ev): - """ - Cuts all events within 200 microseconds of flasher events. - """ - flashers = ev[ev.dc & DC_FLASHER != 0] - return ev[~np.any((ev.gtr.values > flashers.gtr.values[:,np.newaxis]) & \ - (ev.gtr.values < flashers.gtr.values[:,np.newaxis] + 200e3),axis=0)] - -def muon_follower_cut(ev): - """ - Cuts all events 200 microseconds after a muon. - """ - muons = ev[ev.dc & DC_MUON != 0] - return ev[~np.any((ev.gtr.values > muons.gtr.values[:,np.newaxis]) & \ - (ev.gtr.values < muons.gtr.values[:,np.newaxis] + 200e3),axis=0)] - -def michel_cut(ev): - """ - Looks for Michel electrons after muons. - """ - prompt_plus_muons = ev[ev.prompt | ((ev.dc & DC_MUON) != 0)] - - # Michel electrons and neutrons can be any event which is not a prompt - # event - follower = ev[~ev.prompt] - - # require Michel events to pass more of the SNO data cleaning cuts - michel = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] - - michel = michel[michel.nhit >= 100] - - # Accept events which had a muon more than 800 nanoseconds but less - # than 20 microseconds before them. The 800 nanoseconds cut comes from - # Richie's thesis. He also mentions that the In Time Channel Spread Cut - # is very effective at cutting electrons events caused by muons, so I - # should implement that. - # - # Note: We currently don't look across run boundaries. This should be a - # *very* small effect, and the logic to do so would be very complicated - # since I would have to deal with 50 MHz clock rollovers, etc. - # - # Do a simple python for loop here over the runs since we create - # temporary arrays with shape (michel.size,prompt_plus_muons.size) - # which could be too big for the full dataset. - # - # There might be some clever way to do this without the loop in Pandas, - # but I don't know how. - if prompt_plus_muons.size and michel.size: - return michel[np.any((michel.gtr.values > prompt_plus_muons.gtr.values[:,np.newaxis] + 800) & \ - (michel.gtr.values < prompt_plus_muons.gtr.values[:,np.newaxis] + 20e3),axis=0)] - else: - return pd.DataFrame(columns=ev.columns) - -def atmospheric_events(ev): - """ - Tags atmospheric events which have a neutron follower. - """ - prompt = ev[ev.prompt] - - # Michel electrons and neutrons can be any event which is not a prompt - # event - follower = ev[~ev.prompt] - - ev['atm'] = np.zeros(len(ev),dtype=np.bool) - - if prompt.size and follower.size: - # neutron followers have to obey stricter set of data cleaning cuts - neutron = follower[follower.dc & (DC_JUNK | DC_CRATE_ISOTROPY | DC_QVNHIT | DC_FLASHER | DC_NECK | DC_ESUM | DC_OWL | DC_OWL_TRIGGER | DC_FTS) == 0] - neutron = neutron[~np.isnan(neutron.ftp_x) & ~np.isnan(neutron.rsp_energy)] - r = np.sqrt(neutron.ftp_x**2 + neutron.ftp_y**2 + neutron.ftp_z**2) - neutron = neutron[r < AV_RADIUS] - neutron = neutron[neutron.rsp_energy > 4.0] - - # neutron events accepted after 20 microseconds and before 250 ms (50 ms during salt) - ev.loc[ev.prompt,'atm'] = np.any((neutron.gtr.values > prompt.gtr.values[:,np.newaxis] + 20e3) & \ - (neutron.gtr.values < prompt.gtr.values[:,np.newaxis] + 250e6),axis=1) - - return ev - -def gtid_sort(ev, first_gtid): - """ - Adds 0x1000000 to the gtid_sort column for all gtids before the first gtid - in a run, which should be passed as a dictionary. This column can then be - used to sort the events sequentially. - - This function should be passed to ev.groupby('run').apply(). We use this - idiom instead of just looping over the groupby results since groupby() - makes a copy of the dataframe, i.e. - - for run, ev_run in ev.groupby('run'): - ev_run.loc[ev_run.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 - - would produce a SettingWithCopyWarning, so instead we use: - - ev = ev.groupby('run',as_index=False).apply(gtid_sort,first_gtid=first_gtid) - - which doesn't have this problem. - """ - # see https://stackoverflow.com/questions/32460593/including-the-group-name-in-the-apply-function-pandas-python - run = ev.name - - if run not in first_gtid: - print_warning("No RHDR bank for run %i! Assuming first event is the first GTID." % run) - first_gtid[run] = ev.gtid.iloc[0] - - ev.loc[ev.gtid < first_gtid[run],'gtid_sort'] += 0x1000000 - - return ev - -def prompt_event(ev): - ev['prompt'] = (ev.nhit >= 100) - ev.loc[ev.prompt,'prompt'] &= np.concatenate(([True],np.diff(ev[ev.prompt].gtr.values) > 250e6)) - return ev - def radius_cut(ev): ev['radius_cut'] = np.digitize((ev.r/PSUP_RADIUS)**3,(0.9,)) return ev @@ -539,33 +67,6 @@ def z_cut(ev): ev['z_cut'] = np.digitize(ev.z,(0.0,)) return ev -class Constraint(object): - def __init__(self, range): - self.range = range - - def __call__(self, x, grad=None): - return np.sum(x[self.range]) - 1 + EPSILON - - def renormalize(self, x, fix): - if fix not in self.range: - return x - x = x.copy() - while self(x) >= 0: - for i in self.range: - if i == fix: - continue - x[i] -= self(x) - return x - - def renormalize_no_fix(self, x): - x = x.copy() - while self(x) >= 0: - for i in self.range: - if x[i] < 10*EPSILON: - continue - x[i] -= x[i]/2.0 - return x - # Constraint to enforce the fact that P(r,psi,z,udotr|muon) all add up to 1.0. # In the likelihood function we set the last possibility for r and udotr equal # to 1.0 minus the others. Therefore, we need to enforce the fact that the @@ -596,7 +97,7 @@ flasher_r_udotr = Constraint(range(39,42)) # others must add up to less than 1. breakdown_r_udotr = Constraint(range(44,47)) -def make_nll(data, sacrifice,constraints): +def make_nll(data, sacrifice, constraints): def nll(x, grad=None, fill_value=1e9): if grad is not None and grad.size > 0: raise Exception("nll got passed grad!") @@ -762,7 +263,6 @@ def make_nll(data, sacrifice,constraints): if __name__ == '__main__': import argparse - import matplotlib.pyplot as plt import numpy as np import pandas as pd import sys @@ -770,9 +270,51 @@ if __name__ == '__main__': parser = argparse.ArgumentParser("plot fit results") parser.add_argument("filenames", nargs='+', help="input files") + parser.add_argument("--steps", type=int, default=100000, help="number of steps in the MCMC chain") parser.add_argument("--save", action="store_true", default=False, help="save plots") args = parser.parse_args() + if args.save: + # default \textwidth for a fullpage article in Latex is 16.50764 cm. + # You can figure this out by compiling the following TeX document: + # + # \documentclass{article} + # \usepackage{fullpage} + # \usepackage{layouts} + # \begin{document} + # textwidth in cm: \printinunitsof{cm}\prntlen{\textwidth} + # \end{document} + + width = 16.50764 + width /= 2.54 # cm -> inches + # According to this page: + # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf, + # Tufte suggests an aspect ratio of 1.5 - 1.6. + height = width/1.5 + FIGSIZE = (width,height) + + import matplotlib.pyplot as plt + + font = {'family':'serif', 'serif': ['computer modern roman']} + plt.rc('font',**font) + + plt.rc('text', usetex=True) + else: + # on retina screens, the default plots are way too small + # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1 + # Qt5 will scale everything using the dpi in ~/.Xresources + import matplotlib + matplotlib.use("Qt5Agg") + + import matplotlib.pyplot as plt + + # Default figure size. Currently set to my monitor width and height so that + # things are properly formatted + FIGSIZE = (13.78,7.48) + + # Make the defalt font bigger + plt.rc('font', size=22) + for filename in args.filenames: ev = pd.read_hdf(filename,"ev") @@ -1079,7 +621,7 @@ if __name__ == '__main__': proposal = get_proposal_func(stepsizes*0.1,low,high) sampler = emcee.EnsembleSampler(nwalkers, ndim, lambda x, grad, fill_value: -nll(x,grad,fill_value), moves=emcee.moves.MHMove(proposal),args=[None,np.inf]) with np.errstate(invalid='ignore'): - sampler.run_mcmc(pos, 100000) + sampler.run_mcmc(pos, args.steps) print("Mean acceptance fraction: {0:.3f}".format(np.mean(sampler.acceptance_fraction))) @@ -1088,47 +630,6 @@ if __name__ == '__main__': except Exception as e: print(e) - if args.save: - # default \textwidth for a fullpage article in Latex is 16.50764 cm. - # You can figure this out by compiling the following TeX document: - # - # \documentclass{article} - # \usepackage{fullpage} - # \usepackage{layouts} - # \begin{document} - # textwidth in cm: \printinunitsof{cm}\prntlen{\textwidth} - # \end{document} - - width = 16.50764 - width /= 2.54 # cm -> inches - # According to this page: - # http://www-personal.umich.edu/~jpboyd/eng403_chap2_tuftegospel.pdf, - # Tufte suggests an aspect ratio of 1.5 - 1.6. - height = width/1.5 - FIGSIZE = (width,height) - - import matplotlib.pyplot as plt - - font = {'family':'serif', 'serif': ['computer modern roman']} - plt.rc('font',**font) - - plt.rc('text', usetex=True) - else: - # on retina screens, the default plots are way too small - # by using Qt5 and setting QT_AUTO_SCREEN_SCALE_FACTOR=1 - # Qt5 will scale everything using the dpi in ~/.Xresources - import matplotlib - matplotlib.use("Qt5Agg") - - import matplotlib.pyplot as plt - - # Default figure size. Currently set to my monitor width and height so that - # things are properly formatted - FIGSIZE = (13.78,7.48) - - # Make the defalt font bigger - plt.rc('font', size=22) - # Plot walker positions as a function of step number for the expected # number of events fig, axes = plt.subplots(6, figsize=FIGSIZE, num=1, sharex=True) |