From da368858314c1aba32a7d6a08b963351afc49925 Mon Sep 17 00:00:00 2001 From: Stan Seibert Date: Sat, 8 Oct 2011 16:42:52 -0400 Subject: Mercurial subrepositories don't seem to work enough like SVN externals to automatically pull in Tony's histogram classes when someone clones the repository. Now the histogram code has been copied and committed as part of chroma. Maybe someday we can drop this when histogram is an installable python package. --- chroma/.hgsub | 1 - chroma/histogram/.hgignore | 3 + chroma/histogram/__init__.py | 4 + chroma/histogram/draw.py | 26 ++++ chroma/histogram/graph.py | 29 +++++ chroma/histogram/histogram.py | 258 ++++++++++++++++++++++++++++++++++++++++ chroma/histogram/histogramdd.py | 187 +++++++++++++++++++++++++++++ chroma/histogram/root.py | 64 ++++++++++ 8 files changed, 571 insertions(+), 1 deletion(-) delete mode 100644 chroma/.hgsub create mode 100644 chroma/histogram/.hgignore create mode 100644 chroma/histogram/__init__.py create mode 100644 chroma/histogram/draw.py create mode 100644 chroma/histogram/graph.py create mode 100644 chroma/histogram/histogram.py create mode 100644 chroma/histogram/histogramdd.py create mode 100644 chroma/histogram/root.py diff --git a/chroma/.hgsub b/chroma/.hgsub deleted file mode 100644 index d3f70fc..0000000 --- a/chroma/.hgsub +++ /dev/null @@ -1 +0,0 @@ -histogram = http://bitbucket.org/tlatorre/histogram diff --git a/chroma/histogram/.hgignore b/chroma/histogram/.hgignore new file mode 100644 index 0000000..ea5e656 --- /dev/null +++ b/chroma/histogram/.hgignore @@ -0,0 +1,3 @@ +syntax:glob +*.pyc +*~ \ No newline at end of file diff --git a/chroma/histogram/__init__.py b/chroma/histogram/__init__.py new file mode 100644 index 0000000..9929631 --- /dev/null +++ b/chroma/histogram/__init__.py @@ -0,0 +1,4 @@ +from histogram import Histogram +from histogramdd import HistogramDD +from graph import Graph +from root import rootify, getcanvas diff --git a/chroma/histogram/draw.py b/chroma/histogram/draw.py new file mode 100644 index 0000000..391ed31 --- /dev/null +++ b/chroma/histogram/draw.py @@ -0,0 +1,26 @@ +import numpy as np +import matplotlib.pyplot as plt +from histogram import Histogram +from histogramdd import HistogramDD + +def draw(obj, title='', xlabel='', ylabel='', **kwargs): + if isinstance(obj, Histogram): + plt.figure() + plt.bar(obj.bins[:-1], obj.hist, np.diff(obj.bins), yerr=obj.errs, + color='white', edgecolor='black', ecolor='black') + plt.title(title) + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.draw() + + if isinstance(obj, HistogramDD): + if len(obj.hist.shape) != 2: + raise TypeError("drawing only supported for 2-d histograms.") + + plt.figure() + cs = plt.contour(obj.bincenters[0], obj.bincenters[1], obj.hist) + plt.clabel(cs, fontsize=9, inline=1) + plt.title(title) + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.draw() diff --git a/chroma/histogram/graph.py b/chroma/histogram/graph.py new file mode 100644 index 0000000..5e27dee --- /dev/null +++ b/chroma/histogram/graph.py @@ -0,0 +1,29 @@ +import numpy as np + +class Graph(object): + """ + Graph object. + + Args: + - x: array, *optional* + - y: array, *optional* + - xerr: array, *optional* + - yerr: array, *optional* + """ + def __init__(self, x=[], y=[], xerr=None, yerr=None): + self.x = np.asarray(x) + self.y = np.asarray(y) + if xerr is None: + self.xerr = np.zeros(self.x.size) + else: + self.xerr = np.array(xerr) + if yerr is None: + self.yerr = np.zeros(self.x.size) + else: + self.yerr = np.array(yerr) + + if self.y.size != self.x.size or self.xerr.size != self.x.size \ + or self.yerr.size != self.x.size: + raise ValueError('array size mismatch') + + self.size = self.x.size diff --git a/chroma/histogram/histogram.py b/chroma/histogram/histogram.py new file mode 100644 index 0000000..9ae1280 --- /dev/null +++ b/chroma/histogram/histogram.py @@ -0,0 +1,258 @@ +from __future__ import division +import numpy as np +import numpy.ma as ma +from copy import copy, deepcopy + +try: + from uncertainties import ufloat, umath + from uncertainties.unumpy import uarray +except ImportError: + from warnings import warn + warn('unable to import uncertainties package') + +class Histogram(object): + """ + Histogram object. + + Args + - bins: int or sequence of scalars + If `bins` is an int, it defines the number of equal-width bins + in the range given by `interval`. If 'bins is a sequence, it + defines the bin edges, including the rightmost edge. + - range: (float, float), *optional* + The lower and upper range of the `bins` when `bins` is an int. + + .. note:: + (from numpy.histogram, which is used to fill bins) + + All but the last (righthand-most) bin is half-open. In other words, + if `bins` is ``[1,2,3,4]`` then the first bin is ``[1,2)`` + (including 1, but excluding 2) and the second ``[2, 3)``. The last + bin, however, is ``[3, 4]``, which *includes* 4. + + Example + >>> h = Histogram(100, (0, 100)) + >>> h.fill(np.random.exponential(3, size=10000) + """ + def __init__(self, bins=10, range=(-0.5,9.5)): + if np.isscalar(bins): + self.bins = np.linspace(range[0], range[1], bins+1) + else: + self.bins = np.asarray(bins, float) + + if (np.diff(self.bins) < 0).any(): + raise AttributeError('bins must increase monotonically.') + + self.bincenters = (self.bins[:-1] + self.bins[1:])/2 + + self.errs = np.zeros(self.bins.size - 1) + self.hist = np.zeros(self.bins.size - 1) + + self.nentries = 0 + + def fill(self, x): + """Fill histogram with values from the array `x`.""" + add = np.histogram(np.asarray(x), self.bins)[0] + self.hist += add + self.errs = np.sqrt(self.errs**2 + add) + + self.nentries += np.sum(add) + + def findbin(self, x): + """Find the bin index corresponding to the value `x`.""" + # the new numpy histogram has a closed right interval + # on the last bin, but this function will not give you that bin + # if called with the last bin edge + return np.searchsorted(self.bins, x, side='right') - 1 + + def eval(self, x, fill_value=0): + """Return the histogram value at `x`.""" + mbins = ma.masked_outside(self.findbin(x), 0, self.hist.size-1) + value = ma.masked_where(mbins.mask, self.hist[mbins.filled(0)]) + + if np.iterable(x): + return value.filled(fill_value) + else: + return value.filled(fill_value).item() + + def ueval(self, x, fill_value=0, fill_err=0): + """Return the histogram value and uncertainty at `x`.""" + mbins = ma.masked_outside(self.findbin(x), 0, self.hist.size-1) + value, err = ma.masked_where(mbins.mask, self.hist[mbins.filled(0)]), \ + ma.masked_where(mbins.mask, self.errs[mbins.filled(0)]) + + if np.iterable(x): + return uarray((value.filled(fill_value), err.filled(fill_err))) + else: + return ufloat((value.filled(fill_value).item(), \ + err.filled(fill_err).item())) + + def interp(self, x): + """ + Interpolate the histogram value at `x`. + + .. warning:: + ``interp()`` will return 0.0 for bincenter[0] < `x` < bincenter[-1] + """ + return np.interp(x, self.bincenters, self.hist, left=0.0, right=0.0) + + def mean(self): + """Return the mean of the histogram along the bin axis.""" + return np.dot(self.bincenters, self.hist)/np.sum(self.hist) + + def reset(self): + """Reset all bin contents/errors to zero.""" + self.hist[:] = 0.0 + self.errs[:] = 0.0 + + self.nentries = 0 + + def sum(self, width=False): + """ + Return the sum of the bin contents. + + Args + - width: boolean, *optional* + If `width` is True, multiply bin contents by bin widths. + """ + if width: + return np.dot(np.diff(self.bins), self.hist) + else: + return np.sum(self.hist) + + def usum(self, width=False): + """ + Return the sum of the bin contents and uncertainty. + + See sum(). + """ + if width: + return np.dot(np.diff(self.bins), uarray((self.hist, self.errs))) + else: + return np.sum(uarray((self.hist, self.errs))) + + def integrate(self, x1, x2, width=False): + """ + Return the integral of the bin contents from `x1` to `x2`. + + Args + - width: boolean, *optional* + If `width` is True, multiply bin contents by bin widths. + """ + i1, i2 = self.findbin([x1,x2]) + + if width: + return np.dot(np.diff(self.bins[i1:i2+2]), self.hist[i1:i2+1]) + else: + return np.sum(self.hist[i1:i2+1]) + + def uintegrate(self, x1, x2, width=False): + """ + Return the integral of the bin contents from `x1` to `x2` and + uncertainty. + + See integrate(). + """ + i1, i2 = self.findbin([x1,x2]) + + if width: + return np.dot(np.diff(self.bins[i1:i2+2]), + uarray((self.hist[i1:i2+1], self.errs[i1:i2+1]))) + else: + return np.sum(uarray((self.hist[i1:i2+1], self.errs[i1:i2+1]))) + + def scale(self, c): + """Scale bin contents and errors by `c`.""" + self.hist *= c + self.errs *= c + + def normalize(self, width=False): + """ + Normalize the histogram such that the sum of the bin contents is 1. + + Args + - width: boolean, *optional* + if `width` is True, multiply bin values by bin width + """ + self.scale(1/self.sum(width)) + + def fit(self, func, pars=(), xmin=None, xmax=None, fmin=None, **kwargs): + """ + Fit the histogram to the function `func` and return the optimum + parameters. + + Args + - func: callable + Function to fit histogram to; should be callable as + ``func(x, *pars)``. + - pars: tuple, *optional* + Set of parameters passed to `func`, i.e. ``f(x, *pars)``. + - xmin: float, *optional* + Minimum value along the bin axis to fit. + - xmax: float, *optional* + Maximum value along the bin axis to fit. + - fmin: callable, *optional* + Minimization function to use. Defaults to + ``scipy.optimize.fmin_powell``. See scipy.optimize + documenation for calling syntax. + + Returns + - xopt: float + Optimized parameters. + - chi2: float + Minimum chi^2 value + + .. warning:: + ``func(x, *pars)`` **must** accept `x` as an array and return + an array of mapped values. + """ + import scipy.optimize + + if fmin is None: + fmin = scipy.optimize.fmin + + return fmin(lambda x: self.chi2(func, x, xmin, xmax), pars, **kwargs) + + def chi2(self, func, pars=(), xmin=None, xmax=None): + """ + Return the chi2 test value of the histogram with respect to the + function `func`. + + Args: + - func: callable + Function which chi2 of histogram is computed against. + Should be callable as ``func(x, *pars)``. + - pars: tuple, *optional* + Set of parameters passed to func, i.e. ``f(x, *pars)``. + - xmin: float, *optional* + Minimum value along the bin axis to compute chi2 for. + - xmax: float, *optional* + Maximum value along the bin axis to compute chi2 for. + + Returns: + - chi2 : float + chi^2 value + + .. warning:: + ``func(x, *pars)`` **must** accept `x` as an array and return + an array of mapped values. + + Example + >>> from scipy.stats import norm + >>> h = Histogram(100) + >>> h.fill(np.random.normal(5,1,1000)) + >>> h.normalize(width=True) + >>> h.chi2(norm.pdf, (5,1)) + 39.358879146386258 + """ + + if xmin is None: + xmin = self.bins[0] + if xmax is None: + xmax = self.bins[-1] + + afunc = func(self.bincenters, *pars) + amask = (xmin < self.bincenters)*(xmax > self.bincenters)*\ + (self.errs != 0.0) + + return np.sum(((afunc[amask] - self.hist[amask])/self.errs[amask])**2) diff --git a/chroma/histogram/histogramdd.py b/chroma/histogram/histogramdd.py new file mode 100644 index 0000000..5030885 --- /dev/null +++ b/chroma/histogram/histogramdd.py @@ -0,0 +1,187 @@ +from __future__ import division +import numpy as np +import numpy.ma as ma +from copy import copy, deepcopy + +try: + from uncertainties import ufloat, umath + from uncertainties.unumpy import uarray +except ImportError: + from warnings import warn + warn('unable to import uncertainties package') + +class HistogramDD(object): + """ + Multi-dimensional histogram. + + Args + - bins: sequence, *optional* + - A sequence of arrays describing the bin edges along each + dimension. + - The number of bins for each dimension (nx,ny,...=bins) + - range: sequence, *optional* + A sequence of lower and upper bin edges to be used if the edges + are not given explicitly bin `bins`. + + .. note:: + (from numpy.histogram, which is used to fill bins) + + All but the last (righthand-most) bin is half-open. In other words, + if `bins` is ``[1,2,3,4]`` along some axis then the first bin along + that axis is ``[1,2)`` (including 1, but excluding 2) and the second + ``[2,3)``. The last bin, however, is ``[3,4]``, which *includes* 4. + + Example + >>> h = Histogram((10,10), [(-0.5, 9,5), (-0.5, 9.5)]) + >>> h.fill(np.random.normal(5,2,size=(1000,2))) + """ + def __init__(self, bins=(10,10), range=[(-0.5, 9.5), (-0.5, 9.5)]): + try: + D = len(bins) + except TypeError: + raise TypeError("bins must be a sequence.") + + self.bins = D*[None] + self.bincenters = D*[None] + + nbins = D*[None] + + for i in np.arange(D): + if np.isscalar(bins[i]): + self.bins[i] = np.linspace(range[i][0], range[i][1], bins[i]+1) + else: + self.bins[i] = np.asarray(bins[i], float) + + if (np.diff(bins[i]) < 0).any(): + raise AttributeError("bins must increase monotonically.") + + self.bincenters[i] = (self.bins[i][:-1] + self.bins[i][1:])/2 + + nbins[i] = self.bins[i].size-1 + + self.hist = np.zeros(nbins, float) + self.errs = np.zeros(nbins, float) + + self.nentries = 0 + + def fill(self, x): + """Fill histogram with values from the array `x`.""" + x = np.asarray(x) + + if len(x.shape) == 1: + x = np.array([x,]) + + add = np.histogramdd(np.asarray(x), self.bins)[0] + self.hist += add + self.errs = np.sqrt(self.errs**2 + add) + + self.nentries += x.shape[0] + + def findbin(self, x): + """ + Find the bin index corresponding to the value `x`. + + Args + - x: sequence + A list of arrays for the values along each axis, i.e. the + first element of `x` should be the values along the first + bin axis, the second element is the values along the second + bin axis, etc. + + .. note:: + This syntax might seem unintuitive, but it allows numpy functions + to be called directly which will allow for fast access to the + histogram data. + + Returns + - out: sequence + A list of arrays for the bin index along each axis. + + Example + >>> h = HistogramDD() + >>> h.fill(np.random.normal(5,2,size=(1000,2))) + + Now, we get the bin indices along the diagonal of the histogram + at (0,0), (1,1), ..., (9,9). + + >>> bins = h.findbin([range(10), range(10)]) + + At this point, bins is a list with two entries. The first entry + is the bin indices along the first axis, the second is the bin + indices along the second axis. Now, we get the value at (0,0). + + >> h.hist[bins[0][0]][bins[1][0]] + + or, + + >> h.hist[bins[0][0], bins[1,0]] + + Or, we can get all the values at once. + + >> h.hist[bins] + """ + # the new numpy histogram has a closed right interval on the last + # bin, but this function will not give you that bin if called with + # the last bin edge + return [np.searchsorted(self.bins[i], x[i], side='right') - 1 for \ + i in range(len(self.bins))] + + def eval(self, x, fill_value=0): + """ + Return the histogram value at `x`. + + See findbin(). + """ + bins = self.findbin(x) + mbins = [ma.masked_outside(bins[i], 0, self.hist[i].size-1, False) \ + for i in range(len(bins))] + value = ma.array(\ + self.hist[[mbins[i].filled(0) for i in range(len(mbins))]], + mask=np.logical_or.reduce([ma.getmaskarray(mbins[i]) \ + for i in range(len(mbins))])) + + return value.filled(fill_value) + + def ueval(self, x, fill_value=0, fill_err=0): + """ + Return the histogram value and uncertainty at `x`. + + See findbin(). + """ + bins = self.findbin(x) + mbins = [ma.masked_outside(bins[i], 0, self.hist[i].size-1, False) \ + for i in range(len(bins))] + valuemask = np.logical_or.reduce([ma.getmaskarray(mbins[i]) \ + for i in range(len(mbins))]) + + filledbins = [mbins[i].filled(0) for i in range(len(mbins))] + value, err = ma.array(self.hist[filledbins], mask=valuemask), \ + ma.array(self.errs[filledbins], mask=valuemask) + + return uarray((value.filled(fill_value), err.filled(fill_err))) + + def reset(self): + """Reset all bin contents/errors to zero.""" + self.hist[:] = 0.0 + self.errs[:] = 0.0 + + self.nentries = 0 + + def sum(self): + """Return the sum of the bin contents.""" + return np.sum(self.hist) + + def usum(self): + """Return the sum of the bin contents and uncertainty.""" + return np.sum(uarray((self.hist, self.errs))) + + def scale(self, c): + """Scale bin values and errors by `c`.""" + self.hist *= c + self.errs *= c + + def normalize(self): + """ + Normalize the histogram such that the sum of the bin contents is 1. + """ + self.scale(1/self.sum()) diff --git a/chroma/histogram/root.py b/chroma/histogram/root.py new file mode 100644 index 0000000..a6374e0 --- /dev/null +++ b/chroma/histogram/root.py @@ -0,0 +1,64 @@ +import numpy as np +import time +import ROOT +ROOT.gROOT.SetStyle('Plain') +from histogram import Histogram +from graph import Graph + +def rootify(obj, *pars, **kwargs): + if type(obj) is Histogram: + return rootify_histogram(obj, *pars, **kwargs) + if type(obj) is Graph: + return rootify_graph(obj, *pars, **kwargs) + if callable(obj): + return rootify_function(obj, *pars, **kwargs) + raise Exception("i don't know how to rootify %s" % type(obj)) + +def rootify_function(f, pars=(), name='', xmin=-1, xmax=50): + def func(xbuf, pars=()): + x = [x for x in xbuf] + return f(x[0], *pars) + + if name == '': + name = 'f%s' % len(ROOT.gROOT.GetListOfFunctions()) + + froot = ROOT.TF1(name, func, xmin, xmax, len(pars)) + + for i, par in enumerate(pars): + froot.SetParameter(i, par) + + return froot + +def rootify_graph(g, name='', title='', **kwargs): + groot = ROOT.TGraphErrors(g.size, + np.asarray(g.x, dtype=np.double), + np.asarray(g.y, dtype=np.double), + np.asarray(g.xerr, dtype=np.double), + np.asarray(g.yerr, dtype=np.double)) + groot.SetTitle(title) + return groot + +def rootify_histogram(h, name='', title='', **kwargs): + if name == '': + name = time.asctime() + + hroot = ROOT.TH1D(name, title, h.hist.size, h.bins) + for i in range(h.hist.size): + hroot[i+1] = h.hist[i] + hroot.SetBinError(i+1, h.errs[i]) + + if 'linecolor' in kwargs: + hroot.SetLineColor(kwargs['linecolor']) + + return hroot + +def update_histogram(h, hroot): + for i in range(h.hist.size): + hroot[i+1] = h.hist[i] + hroot.SetBinError(i+1, h.errs[i]) + +def getcanvas(log=False): + c = ROOT.TCanvas('c%s' % len(ROOT.gROOT.GetListOfCanvases()), '', 800, 600) + if log: + c.SetLogy() + return c -- cgit