[120071] trunk/dports/python/py-scikit-learn

Wed May 14 15:28:52 PDT 2014

Revision: 120071
          https://trac.macports.org/changeset/120071
Author:   stromnov at macports.org
Date:     2014-05-14 15:28:52 -0700 (Wed, 14 May 2014)
Log Message:
-----------
py-scikit-learn: add py34 subport

Modified Paths:
--------------
    trunk/dports/python/py-scikit-learn/Portfile

Added Paths:
-----------
    trunk/dports/python/py-scikit-learn/files/
    trunk/dports/python/py-scikit-learn/files/patch-sklearn_neighbors_binary_tree.pxi.diff

Modified: trunk/dports/python/py-scikit-learn/Portfile
===================================================================

--- trunk/dports/python/py-scikit-learn/Portfile	2014-05-14 22:27:37 UTC (rev 120070)
+++ trunk/dports/python/py-scikit-learn/Portfile	2014-05-14 22:28:52 UTC (rev 120071)
@@ -14,7 +14,7 @@
 # Stealth update for 0.14.1
 dist_subdir         ${name}/${version}_1
 
-python.versions     26 27 32 33
+python.versions     26 27 32 33 34
 
 maintainers         stromnov openmaintainer
 
@@ -35,10 +35,26 @@
                     sha256  34821b8f460bdb7aba8eb882353bacbd01671bfb8057649ffcdce7f7ffa4a21d
 
 if {${name} ne ${subport}} {
-    depends_lib-append  \
-                        port:py${python.version}-numpy \
+    depends_lib-append  port:py${python.version}-numpy \
                         port:py${python.version}-scipy
 
+    # Cythonized files in scikit-learn 0.14.1 are incompatible with Python 3.4
+    # See https://github.com/scikit-learn/scikit-learn/issues/3114
+    if {${subport} eq "py34-scikit-learn"} {
+        depends_build-append    \
+                            port:py34-cython
+
+        patchfiles-append   patch-sklearn_neighbors_binary_tree.pxi.diff
+
+        post-configure {
+            fs-traverse item ${worksrcpath} {
+                if {[file isfile ${item}] && [file extension ${item}] eq ".pyx"} {
+                    system "echo Cythoning ${item}...; \"${prefix}/bin/cython-3.4\" \"${item}\""
+                }
+            }
+        }
+    }
+
     livecheck.type      none
 } else {
     livecheck.type      regex

Added: trunk/dports/python/py-scikit-learn/files/patch-sklearn_neighbors_binary_tree.pxi.diff
===================================================================
--- trunk/dports/python/py-scikit-learn/files/patch-sklearn_neighbors_binary_tree.pxi.diff	                        (rev 0)
+++ trunk/dports/python/py-scikit-learn/files/patch-sklearn_neighbors_binary_tree.pxi.diff	2014-05-14 22:28:52 UTC (rev 120071)
@@ -0,0 +1,2532 @@
+--- /dev/null	2014-05-15 02:07:00.000000000 +0400
++++ sklearn/neighbors/binary_tree.pxi	2014-05-15 02:06:36.000000000 +0400
+@@ -0,0 +1,2529 @@
++#!python
++
++# KD Tree and Ball Tree
++# =====================
++#
++#    Author: Jake Vanderplas <jakevdp at cs.washington.edu>, 2012-2013
++#    License: BSD
++#
++# This file is meant to be a literal include in a pyx file.
++# See ball_tree.pyx and kd_tree.pyx
++#
++# The routines here are the core algorithms of the KDTree and BallTree
++# structures.  If Cython supported polymorphism, we would be able to
++# create a subclass and derive KDTree and BallTree from it.  Because
++# polymorphism is not an option, we use this single BinaryTree class
++# as a literal include to avoid duplicating the entire file.
++#
++# A series of functions are implemented in kd_tree.pyx and ball_tree.pyx
++# which use the information here to calculate the lower and upper bounds
++# between a node and a point, and between two nodes.  These functions are
++# used here, and are all that are needed to differentiate between the two
++# tree types.
++#
++# Description of Binary Tree Algorithms
++# -------------------------------------
++# A binary tree can be thought of as a collection of nodes.  The top node
++# contains all the points.  The next level consists of two nodes with half
++# the points in each, and this continues recursively.  Each node contains
++# metadata which allow fast computation of distance bounds: in the case of
++# a ball tree, the metadata is a center and a radius.  In the case of a
++# KD tree, the metadata is the minimum and maximum bound along each dimension.
++#
++# In a typical KD Tree or Ball Tree implementation, the nodes are implemented
++# as dynamically allocated structures with pointers linking them.  Here we
++# take a different approach, storing all relevant data in a set of arrays
++# so that the entire tree object can be saved in a pickle file. For efficiency,
++# the data can be stored in such a way that explicit pointers are not
++# necessary: for node data stored at index i, the two child nodes are at
++# index (2 * i + 1) and (2 * i + 2); the parent node is (i - 1) // 2
++# (where // indicates integer division).
++#
++# The data arrays used here are as follows:
++#   data : the [n_samples x n_features] array of data from which the tree
++#          is built
++#   idx_array : the length n_samples array used to keep track of the indices
++#          of data within each node.  Each node has values idx_start and
++#          idx_end: the points within the node are given by (using numpy
++#          syntax) data[idx_array[idx_start:idx_end]].
++#   node_data : the length n_nodes array of structures which store the node
++#          indices, node radii, and leaf information for each node.
++#   node_bounds : the [* x n_nodes x n_features] array containing the node
++#          bound information.  For ball tree, the first dimension is 1, and
++#          each row contains the centroid of the node.  For kd tree, the first
++#          dimension is 2 and the rows for each point contain the arrays of
++#          lower bounds and upper bounds in each direction.
++#
++# The lack of dynamic allocation means the number of nodes must be computed
++# before the building of the tree. This can be done assuming the points are
++# divided equally between child nodes at each step; although this removes
++# some flexibility in tree creation, it ensures a balanced tree and ensures
++# that the number of nodes required can be computed beforehand.  Given a
++# specified leaf_size (the minimum number of points in any node), it is
++# possible to show that a balanced tree will have
++#
++#     n_levels = 1 + max(0, floor(log2((n_samples - 1) / leaf_size)))
++#
++# in order to satisfy
++#
++#     leaf_size <= min(n_points) <= 2 * leaf_size
++#
++# with the exception of the special case where n_samples < leaf_size.
++# for a given number of levels, the number of nodes in the tree is given by
++#
++#     n_nodes = 2 ** n_levels - 1
++#
++# both these results can be straightforwardly shown by induction.  The
++# following code uses these values in the construction of the tree.
++#
++# Distance Metrics
++# ----------------
++# For flexibility, the trees can be built using a variety of distance metrics.
++# The metrics are described in the DistanceMetric class: the standard
++# Euclidean distance is the default, and is inlined to be faster than other
++# metrics.  In addition, each metric defines both a distance and a
++# "reduced distance", which is often faster to compute, and is therefore
++# used in the query architecture whenever possible. (For example, in the
++# case of the standard Euclidean distance, the reduced distance is the
++# squared-distance).
++#
++# Implementation Notes
++# --------------------
++# This implementation uses the common object-oriented approach of having an
++# abstract base class which is extended by the KDTree and BallTree
++# specializations.  Unfortunately, Cython does not currently support
++# polymorphism, so implementing dual tree queries would require rewriting
++# identical implementations of the base algorithm for each subclass.
++#
++# Because of this deficiency, we use a bit of a hack here: the BinaryTree
++# "base class" is defined here and then explicitly included in the BallTree
++# and KDTree pyx files.  These files include implementations of the
++# "abstract" methods.  The KDTree and BallTree classes are then explicit
++# copies of each separate BinaryTree class definition.
++#
++# Hackish?  Yes.  But it leads to fast execution without the need to duplicate
++# the code in two places.
++#
++# Necessary Helper Functions
++# --------------------------
++# These are the names and descriptions of the "abstract" functions which are
++# defined in kd_tree.pyx and ball_tree.pyx:
++
++# cdef int allocate_data(BinaryTree tree, ITYPE_t n_nodes, ITYPE_t n_features):
++#     """Allocate arrays needed for the KD Tree"""
++
++# cdef int init_node(BinaryTree tree, ITYPE_t i_node,
++#                    ITYPE_t idx_start, ITYPE_t idx_end):
++#    """Initialize the node for the dataset stored in tree.data"""
++
++# cdef DTYPE_t min_rdist(BinaryTree tree, ITYPE_t i_node, DTYPE_t* pt):
++#     """Compute the minimum reduced-distance between a point and a node"""
++
++# cdef DTYPE_t min_dist(BinaryTree tree, ITYPE_t i_node, DTYPE_t* pt):
++#     """Compute the minimum distance between a point and a node"""
++
++# cdef DTYPE_t max_rdist(BinaryTree tree, ITYPE_t i_node, DTYPE_t* pt):
++#     """Compute the maximum reduced-distance between a point and a node"""
++
++# cdef DTYPE_t max_dist(BinaryTree tree, ITYPE_t i_node, DTYPE_t* pt):
++#     """Compute the maximum distance between a point and a node"""
++
++# cdef inline int min_max_dist(BinaryTree tree, ITYPE_t i_node, DTYPE_t* pt,
++#                              DTYPE_t* min_dist, DTYPE_t* max_dist):
++#     """Compute the minimum and maximum distance between a point and a node"""
++
++# cdef inline DTYPE_t min_rdist_dual(BinaryTree tree1, ITYPE_t i_node1,
++#                                    BinaryTree tree2, ITYPE_t i_node2):
++#     """Compute the minimum reduced distance between two nodes"""
++
++# cdef inline DTYPE_t min_dist_dual(BinaryTree tree1, ITYPE_t i_node1,
++#                                   BinaryTree tree2, ITYPE_t i_node2):
++#     """Compute the minimum distance between two nodes"""
++
++# cdef inline DTYPE_t max_rdist_dual(BinaryTree tree1, ITYPE_t i_node1,
++#                                    BinaryTree tree2, ITYPE_t i_node2):
++#     """Compute the maximum reduced distance between two nodes"""
++
++# cdef inline DTYPE_t max_dist_dual(BinaryTree tree1, ITYPE_t i_node1,
++#                                   BinaryTree tree2, ITYPE_t i_node2):
++#     """Compute the maximum distance between two nodes"""
++
++cimport cython
++cimport numpy as np
++from libc.math cimport fabs, sqrt, exp, cos, pow, log
++from sklearn.utils.lgamma cimport lgamma
++
++import numpy as np
++import warnings
++from ..utils import array2d
++
++from typedefs cimport DTYPE_t, ITYPE_t, DITYPE_t
++from typedefs import DTYPE, ITYPE
++
++from dist_metrics cimport (DistanceMetric, euclidean_dist, euclidean_rdist,
++                           euclidean_dist_to_rdist, euclidean_rdist_to_dist)
++
++# some handy constants
++cdef DTYPE_t INF = np.inf
++cdef DTYPE_t NEG_INF = -np.inf
++cdef DTYPE_t PI = np.pi
++cdef DTYPE_t ROOT_2PI = sqrt(2 * PI)
++cdef DTYPE_t LOG_PI = log(PI)
++cdef DTYPE_t LOG_2PI = log(2 * PI)
++
++
++# Some compound datatypes used below:
++cdef struct NodeHeapData_t:
++    DTYPE_t val
++    ITYPE_t i1
++    ITYPE_t i2
++
++# build the corresponding numpy dtype for NodeHeapData
++# There is no offsetof() function in cython, so we hack it.
++# If we can ensure numpy 1.5 or greater, a cleaner way is to do
++#     cdef NodeHeapData_t nhd_tmp
++#     NodeHeapData = np.asarray(<NodeHeapData_t[:1]>(&nhd_tmp)).dtype
++cdef NodeHeapData_t nhd_tmp
++offsets = [<np.intp_t>&(nhd_tmp.val) - <np.intp_t>&nhd_tmp,
++           <np.intp_t>&(nhd_tmp.i1) - <np.intp_t>&nhd_tmp,
++           <np.intp_t>&(nhd_tmp.i2) - <np.intp_t>&nhd_tmp]
++NodeHeapData = np.dtype({'names': ['val', 'i1', 'i2'],
++                         'formats': [DTYPE, ITYPE, ITYPE],
++                         'offsets': offsets,
++                         'itemsize': sizeof(NodeHeapData_t)})
++
++cdef struct NodeData_t:
++    ITYPE_t idx_start
++    ITYPE_t idx_end
++    ITYPE_t is_leaf
++    DTYPE_t radius
++
++# build the corresponding numpy dtype for NodeData
++# There is no offsetof() function in cython, so we hack it.
++# If we can ensure numpy 1.5 or greater, a cleaner way is to do
++#     cdef NodeData_t nd_tmp
++#     NodeData = np.asarray(<NodeData_t[:1]>(&nd_tmp)).dtype
++cdef NodeData_t nd_tmp
++offsets = [<np.intp_t>&(nd_tmp.idx_start) - <np.intp_t>&nd_tmp,
++           <np.intp_t>&(nd_tmp.idx_end) - <np.intp_t>&nd_tmp,
++           <np.intp_t>&(nd_tmp.is_leaf) - <np.intp_t>&nd_tmp,
++           <np.intp_t>&(nd_tmp.radius) - <np.intp_t>&nd_tmp]
++NodeData = np.dtype({'names': ['idx_start', 'idx_end', 'is_leaf', 'radius'],
++                     'formats': [ITYPE, ITYPE, ITYPE, DTYPE],
++                     'offsets': offsets,
++                     'itemsize': sizeof(NodeData_t)})
++
++
++######################################################################
++# Numpy 1.3-1.4 compatibility utilities
++cdef DTYPE_t[::1] get_memview_DTYPE_1D(
++                               np.ndarray[DTYPE_t, ndim=1, mode='c'] X):
++    return <DTYPE_t[:X.shape[0]:1]> (<DTYPE_t*> X.data)
++
++
++cdef DTYPE_t[:, ::1] get_memview_DTYPE_2D(
++                               np.ndarray[DTYPE_t, ndim=2, mode='c'] X):
++    return <DTYPE_t[:X.shape[0], :X.shape[1]:1]> (<DTYPE_t*> X.data)
++
++
++cdef DTYPE_t[:, :, ::1] get_memview_DTYPE_3D(
++                               np.ndarray[DTYPE_t, ndim=3, mode='c'] X):
++    return <DTYPE_t[:X.shape[0], :X.shape[1], :X.shape[2]:1]>\
++                                                       (<DTYPE_t*> X.data)
++
++
++cdef ITYPE_t[::1] get_memview_ITYPE_1D(
++                               np.ndarray[ITYPE_t, ndim=1, mode='c'] X):
++    return <ITYPE_t[:X.shape[0]:1]> (<ITYPE_t*> X.data)
++
++
++cdef ITYPE_t[:, ::1] get_memview_ITYPE_2D(
++                               np.ndarray[ITYPE_t, ndim=2, mode='c'] X):
++    return <ITYPE_t[:X.shape[0], :X.shape[1]:1]> (<ITYPE_t*> X.data)
++
++
++cdef NodeHeapData_t[::1] get_memview_NodeHeapData_1D(
++                    np.ndarray[NodeHeapData_t, ndim=1, mode='c'] X):
++    return <NodeHeapData_t[:X.shape[0]:1]> (<NodeHeapData_t*> X.data)
++
++
++cdef NodeData_t[::1] get_memview_NodeData_1D(
++                    np.ndarray[NodeData_t, ndim=1, mode='c'] X):
++    return <NodeData_t[:X.shape[0]:1]> (<NodeData_t*> X.data)
++    
++######################################################################
++
++
++
++######################################################################
++# Define doc strings, substituting the appropriate class name using
++# the DOC_DICT variable defined in the pyx files.
++CLASS_DOC = \
++"""{BinaryTree} for fast generalized N-point problems
++
++{BinaryTree}(X, leaf_size=40, metric='minkowski', **kwargs)
++
++Parameters
++----------
++X : array-like, shape = [n_samples, n_features]
++    n_samples is the number of points in the data set, and
++    n_features is the dimension of the parameter space.
++    Note: if X is a C-contiguous array of doubles then data will
++    not be copied. Otherwise, an internal copy will be made.
++
++leaf_size : positive integer (default = 20)
++    Number of points at which to switch to brute-force. Changing
++    leaf_size will not affect the results of a query, but can
++    significantly impact the speed of a query and the memory required
++    to store the constructed tree.  The amount of memory needed to
++    store the tree scales as approximately n_samples / leaf_size.
++    For a specified ``leaf_size``, a leaf node is guaranteed to
++    satisfy ``leaf_size <= n_points <= 2 * leaf_size``, except in
++    the case that ``n_samples < leaf_size``.
++
++metric : string or DistanceMetric object
++    the distance metric to use for the tree.  Default='minkowski'
++    with p=2 (that is, a euclidean metric). See the documentation
++    of the DistanceMetric class for a list of available metrics.
++    {binary_tree}.valid_metrics gives a list of the metrics which
++    are valid for {BinaryTree}.
++
++Additional keywords are passed to the distance metric class.
++
++Attributes
++----------
++data : np.ndarray
++    The training data
++
++Examples
++--------
++Query for k-nearest neighbors
++
++    >>> import numpy as np
++
++    >>> np.random.seed(0)
++    >>> X = np.random.random((10, 3))  # 10 points in 3 dimensions
++    >>> tree = {BinaryTree}(X, leaf_size=2)              # doctest: +SKIP
++    >>> dist, ind = tree.query(X[0], k=3)                # doctest: +SKIP
++    >>> print ind  # indices of 3 closest neighbors
++    [0 3 1]
++    >>> print dist  # distances to 3 closest neighbors
++    [ 0.          0.19662693  0.29473397]
++
++Pickle and Unpickle a tree.  Note that the state of the tree is saved in the
++pickle operation: the tree needs not be rebuilt upon unpickling.
++
++    >>> import numpy as np
++    >>> import pickle
++    >>> np.random.seed(0)
++    >>> X = np.random.random((10, 3))  # 10 points in 3 dimensions
++    >>> tree = {BinaryTree}(X, leaf_size=2)        # doctest: +SKIP
++    >>> s = pickle.dumps(tree)                     # doctest: +SKIP
++    >>> tree_copy = pickle.loads(s)                # doctest: +SKIP
++    >>> dist, ind = tree_copy.query(X[0], k=3)     # doctest: +SKIP
++    >>> print ind  # indices of 3 closest neighbors
++    [0 3 1]
++    >>> print dist  # distances to 3 closest neighbors
++    [ 0.          0.19662693  0.29473397]
++
++Query for neighbors within a given radius
++
++    >>> import numpy as np
++    >>> np.random.seed(0)
++    >>> X = np.random.random((10, 3))  # 10 points in 3 dimensions
++    >>> tree = {BinaryTree}(X, leaf_size=2)     # doctest: +SKIP
++    >>> print tree.query_radius(X[0], r=0.3, count_only=True)
++    3
++    >>> ind = tree.query_radius(X[0], r=0.3)  # doctest: +SKIP
++    >>> print ind  # indices of neighbors within distance 0.3
++    [3 0 1]
++
++
++Compute a gaussian kernel density estimate:
++
++    >>> import numpy as np
++    >>> np.random.seed(1)
++    >>> X = np.random.random((100, 3))
++    >>> tree = {BinaryTree}(X)                # doctest: +SKIP
++    >>> tree.kernel_density(X[:3], h=0.1, kernel='gaussian')
++    array([ 6.94114649,  7.83281226,  7.2071716 ])
++
++Compute a two-point auto-correlation function
++
++    >>> import numpy as np
++    >>> np.random.seed(0)
++    >>> X = np.random.random((30, 3))
++    >>> r = np.linspace(0, 1, 5)
++    >>> tree = {BinaryTree}(X)                # doctest: +SKIP
++    >>> tree.two_point_correlation(X, r)
++    array([ 30,  62, 278, 580, 820])
++
++""".format(**DOC_DICT)
++
++
++######################################################################
++# Utility functions
++cdef DTYPE_t logaddexp(DTYPE_t x1, DTYPE_t x2):
++    """logaddexp(x1, x2) -> log(exp(x1) + exp(x2))"""
++    cdef DTYPE_t a = fmax(x1, x2)
++    if a == NEG_INF:
++        return NEG_INF
++    else:
++        return a + log(exp(x1 - a) + exp(x2 - a))
++
++cdef DTYPE_t logsubexp(DTYPE_t x1, DTYPE_t x2):
++    """logsubexp(x1, x2) -> log(exp(x1) - exp(x2))"""
++    if x1 <= x2:
++        return NEG_INF
++    else:
++        return x1 + log(1 - exp(x2 - x1))
++
++
++######################################################################
++# Kernel functions
++#
++# Note: Kernels assume dist is non-negative and and h is positive
++#       All kernel functions are normalized such that K(0, h) = 1.
++#       The fully normalized kernel is:
++#         K = exp[kernel_norm(h, d, kernel) + compute_kernel(dist, h, kernel)]
++#       The code only works with non-negative kernels: i.e. K(d, h) >= 0
++#       for all valid d and h.  Note that for precision, the log of both
++#       the kernel and kernel norm is returned.
++cdef enum KernelType:
++    GAUSSIAN_KERNEL = 1
++    TOPHAT_KERNEL = 2
++    EPANECHNIKOV_KERNEL = 3
++    EXPONENTIAL_KERNEL = 4
++    LINEAR_KERNEL = 5
++    COSINE_KERNEL = 6
++
++
++cdef inline DTYPE_t log_gaussian_kernel(DTYPE_t dist, DTYPE_t h):
++    """log of the gaussian kernel for bandwidth h (unnormalized)"""
++    return -0.5 * (dist * dist) / (h * h)
++
++
++cdef inline DTYPE_t log_tophat_kernel(DTYPE_t dist, DTYPE_t h):
++    """log of the tophat kernel for bandwidth h (unnormalized)"""
++    if dist < h:
++        return 0.0
++    else:
++        return NEG_INF
++
++
++cdef inline DTYPE_t log_epanechnikov_kernel(DTYPE_t dist, DTYPE_t h):
++    """log of the epanechnikov kernel for bandwidth h (unnormalized)"""
++    if dist < h:
++        return log(1.0 - (dist * dist) / (h * h))
++    else:
++        return NEG_INF
++
++
++cdef inline DTYPE_t log_exponential_kernel(DTYPE_t dist, DTYPE_t h):
++    """log of the exponential kernel for bandwidth h (unnormalized)"""
++    return -dist / h
++
++
++cdef inline DTYPE_t log_linear_kernel(DTYPE_t dist, DTYPE_t h):
++    """log of the linear kernel for bandwidth h (unnormalized)"""
++    if dist < h:
++        return log(1 - dist / h)
++    else:
++        return NEG_INF
++
++
++cdef inline DTYPE_t log_cosine_kernel(DTYPE_t dist, DTYPE_t h):
++    """log of the cosine kernel for bandwidth h (unnormalized)"""
++    if dist < h:
++        return log(cos(0.5 * PI * dist / h))
++    else:
++        return NEG_INF
++
++
++cdef inline DTYPE_t compute_log_kernel(DTYPE_t dist, DTYPE_t h,
++                                       KernelType kernel):
++    """Given a KernelType enumeration, compute the appropriate log-kernel"""
++    if kernel == GAUSSIAN_KERNEL:
++        return log_gaussian_kernel(dist, h)
++    elif kernel == TOPHAT_KERNEL:
++        return log_tophat_kernel(dist, h)
++    elif kernel == EPANECHNIKOV_KERNEL:
++        return log_epanechnikov_kernel(dist, h)
++    elif kernel == EXPONENTIAL_KERNEL:
++        return log_exponential_kernel(dist, h)
++    elif kernel == LINEAR_KERNEL:
++        return log_linear_kernel(dist, h)
++    elif kernel == COSINE_KERNEL:
++        return log_cosine_kernel(dist, h)
++
++
++#------------------------------------------------------------
++# Kernel norms are defined via the volume element V_n
++# and surface element S_(n-1) of an n-sphere.
++cdef DTYPE_t logVn(ITYPE_t n):
++    """V_n = pi^(n/2) / gamma(n/2 - 1)"""
++    return 0.5 * n * LOG_PI - lgamma(0.5 * n + 1)
++
++
++cdef DTYPE_t logSn(ITYPE_t n):
++    """V_(n+1) = int_0^1 S_n r^n dr"""
++    return LOG_2PI + logVn(n - 1)
++
++
++cdef DTYPE_t _log_kernel_norm(DTYPE_t h, ITYPE_t d,
++                              KernelType kernel) except -1:
++    """Given a KernelType enumeration, compute the kernel normalization.
++
++    h is the bandwidth, d is the dimension.
++    """
++    cdef DTYPE_t tmp, factor = 0
++    cdef ITYPE_t k
++    if kernel == GAUSSIAN_KERNEL:
++        factor = 0.5 * d * LOG_2PI
++    elif kernel == TOPHAT_KERNEL:
++        factor = logVn(d)
++    elif kernel == EPANECHNIKOV_KERNEL:
++        factor = logVn(d) + log(2. / (d + 2.))
++    elif kernel == EXPONENTIAL_KERNEL:
++        factor = logSn(d - 1) + lgamma(d)
++    elif kernel == LINEAR_KERNEL:
++        factor = logVn(d) - log(d + 1.)
++    elif kernel == COSINE_KERNEL:
++        # this is derived from a chain rule integration
++        factor = 0
++        tmp = 2. / PI
++        for k in range(1, d + 1, 2):
++            factor += tmp
++            tmp *= -(d - k) * (d - k - 1) * (2. / PI) ** 2
++        factor = log(factor) + logSn(d - 1)
++    else:
++        raise ValueError("Kernel code not recognized")
++    return -factor - d * log(h)
++
++
++def kernel_norm(h, d, kernel, return_log=False):
++    """Given a string specification of a kernel, compute the normalization.
++
++    Parameters
++    ----------
++    h : float
++        the bandwidth of the kernel
++    d : int
++        the dimension of the space in which the kernel norm is computed
++    kernel : string
++        The kernel identifier.  Must be one of
++        ['gaussian'|'tophat'|'epanechnikov'|
++         'exponential'|'linear'|'cosine']
++    return_log : boolean
++        if True, return the log of the kernel norm.  Otherwise, return the
++        kernel norm.
++    Returns
++    -------
++    knorm or log_knorm : float
++        the kernel norm or logarithm of the kernel norm.
++    """
++    if kernel == 'gaussian':
++        result = _log_kernel_norm(h, d, GAUSSIAN_KERNEL)
++    elif kernel == 'tophat':
++        result = _log_kernel_norm(h, d, TOPHAT_KERNEL)
++    elif kernel == 'epanechnikov':
++        result = _log_kernel_norm(h, d, EPANECHNIKOV_KERNEL)
++    elif kernel == 'exponential':
++        result = _log_kernel_norm(h, d, EXPONENTIAL_KERNEL)
++    elif kernel == 'linear':
++        result = _log_kernel_norm(h, d, LINEAR_KERNEL)
++    elif kernel == 'cosine':
++        result = _log_kernel_norm(h, d, COSINE_KERNEL)
++    else:
++        raise ValueError('kernel not recognized')
++
++    if return_log:
++        return result
++    else:
++        return np.exp(result)
++
++
++######################################################################
++# Tree Utility Routines
++cdef inline void swap(DITYPE_t* arr, ITYPE_t i1, ITYPE_t i2):
++    """swap the values at index i1 and i2 of arr"""
++    cdef DITYPE_t tmp = arr[i1]
++    arr[i1] = arr[i2]
++    arr[i2] = tmp
++
++
++cdef inline void dual_swap(DTYPE_t* darr, ITYPE_t* iarr,
++                           ITYPE_t i1, ITYPE_t i2):
++    """swap the values at inex i1 and i2 of both darr and iarr"""
++    cdef DTYPE_t dtmp = darr[i1]
++    darr[i1] = darr[i2]
++    darr[i2] = dtmp
++
++    cdef ITYPE_t itmp = iarr[i1]
++    iarr[i1] = iarr[i2]
++    iarr[i2] = itmp
++
++
++cdef class NeighborsHeap:
++    """A max-heap structure to keep track of distances/indices of neighbors
++
++    This implements an efficient pre-allocated set of fixed-size heaps
++    for chasing neighbors, holding both an index and a distance.
++    When any row of the heap is full, adding an additional point will push
++    the furthest point off the heap.
++
++    Parameters
++    ----------
++    n_pts : int
++        the number of heaps to use
++    n_nbrs : int
++        the size of each heap.
++    """
++    cdef np.ndarray distances_arr
++    cdef np.ndarray indices_arr
++
++    cdef DTYPE_t[:, ::1] distances
++    cdef ITYPE_t[:, ::1] indices
++
++    def __cinit__(self):
++        self.distances_arr = np.zeros((1, 1), dtype=DTYPE, order='C')
++        self.indices_arr = np.zeros((1, 1), dtype=ITYPE, order='C')
++        self.distances = get_memview_DTYPE_2D(self.distances_arr)
++        self.indices = get_memview_ITYPE_2D(self.indices_arr)
++
++    def __init__(self, n_pts, n_nbrs):
++        self.distances_arr = np.inf + np.zeros((n_pts, n_nbrs), dtype=DTYPE,
++                                               order='C')
++        self.indices_arr = np.zeros((n_pts, n_nbrs), dtype=ITYPE, order='C')
++        self.distances = get_memview_DTYPE_2D(self.distances_arr)
++        self.indices = get_memview_ITYPE_2D(self.indices_arr)
++
++    def get_arrays(self, sort=True):
++        """Get the arrays of distances and indices within the heap.
++
++        If sort=True, then simultaneously sort the indices and distances,
++        so the closer points are listed first.
++        """
++        if sort:
++            self._sort()
++        return self.distances_arr, self.indices_arr
++
++    cdef inline DTYPE_t largest(self, ITYPE_t row) except -1:
++        """Return the largest distance in the given row"""
++        return self.distances[row, 0]
++
++    cpdef int push(self, ITYPE_t row, DTYPE_t val, ITYPE_t i_val) except -1:
++        """push (val, i_val) into the given row"""
++        cdef ITYPE_t i, ic1, ic2, i_swap
++        cdef ITYPE_t size = self.distances.shape[1]
++        cdef DTYPE_t* dist_arr = &self.distances[row, 0]
++        cdef ITYPE_t* ind_arr = &self.indices[row, 0]
++
++        # check if val should be in heap
++        if val > dist_arr[0]:
++            return 0
++
++        # insert val at position zero
++        dist_arr[0] = val
++        ind_arr[0] = i_val
++
++        #descend the heap, swapping values until the max heap criterion is met
++        i = 0
++        while True:
++            ic1 = 2 * i + 1
++            ic2 = ic1 + 1
++
++            if ic1 >= size:
++                break
++            elif ic2 >= size:
++                if dist_arr[ic1] > val:
++                    i_swap = ic1
++                else:
++                    break
++            elif dist_arr[ic1] >= dist_arr[ic2]:
++                if val < dist_arr[ic1]:
++                    i_swap = ic1
++                else:
++                    break
++            else:
++                if val < dist_arr[ic2]:
++                    i_swap = ic2
++                else:
++                    break
++
++            dist_arr[i] = dist_arr[i_swap]
++            ind_arr[i] = ind_arr[i_swap]
++
++            i = i_swap
++
++        dist_arr[i] = val
++        ind_arr[i] = i_val
++
++        return 0
++
++    cdef int _sort(self) except -1:
++        """simultaneously sort the distances and indices"""
++        cdef DTYPE_t[:, ::1] distances = self.distances
++        cdef ITYPE_t[:, ::1] indices = self.indices
++        cdef ITYPE_t row
++        for row in range(distances.shape[0]):
++            _simultaneous_sort(&distances[row, 0],
++                               &indices[row, 0],
++                               distances.shape[1])
++        return 0
++
++
++cdef int _simultaneous_sort(DTYPE_t* dist, ITYPE_t* idx,
++                            ITYPE_t size) except -1:
++    """
++    Perform a recursive quicksort on the dist array, simultaneously
++    performing the same swaps on the idx array.  The equivalent in
++    numpy (though quite a bit slower) is
++
++    def simultaneous_sort(dist, idx):
++        i = np.argsort(dist)
++        return dist[i], idx[i]
++    """
++    cdef ITYPE_t pivot_idx, i, store_idx
++    cdef DTYPE_t pivot_val
++
++    # in the small-array case, do things efficiently
++    if size <= 1:
++        pass
++    elif size == 2:
++        if dist[0] > dist[1]:
++            dual_swap(dist, idx, 0, 1)
++    elif size == 3:
++        if dist[0] > dist[1]:
++            dual_swap(dist, idx, 0, 1)
++        if dist[1] > dist[2]:
++            dual_swap(dist, idx, 1, 2)
++            if dist[0] > dist[1]:
++                dual_swap(dist, idx, 0, 1)
++    else:
++        # Determine the pivot using the median-of-three rule.
++        # The smallest of the three is moved to the beginning of the array,
++        # the middle (the pivot value) is moved to the end, and the largest
++        # is moved to the pivot index.
++        pivot_idx = size / 2
++        if dist[0] > dist[size - 1]:
++            dual_swap(dist, idx, 0, size - 1)
++        if dist[size - 1] > dist[pivot_idx]:
++            dual_swap(dist, idx, size - 1, pivot_idx)
++            if dist[0] > dist[size - 1]:
++                dual_swap(dist, idx, 0, size - 1)
++        pivot_val = dist[size - 1]
++
++        # partition indices about pivot.  At the end of this operation,
++        # pivot_idx will contain the pivot value, everything to the left
++        # will be smaller, and everything to the right will be larger.
++        store_idx = 0
++        for i in range(size - 1):
++            if dist[i] < pivot_val:
++                dual_swap(dist, idx, i, store_idx)
++                store_idx += 1
++        dual_swap(dist, idx, store_idx, size - 1)
++        pivot_idx = store_idx
++
++        # recursively sort each side of the pivot
++        if pivot_idx > 1:
++            _simultaneous_sort(dist, idx, pivot_idx)
++        if pivot_idx + 2 < size:
++            _simultaneous_sort(dist + pivot_idx + 1,
++                               idx + pivot_idx + 1,
++                               size - pivot_idx - 1)
++    return 0
++
++#------------------------------------------------------------
++# find_node_split_dim:
++#  this computes the equivalent of
++#  j_max = np.argmax(np.max(data, 0) - np.min(data, 0))
++cdef ITYPE_t find_node_split_dim(DTYPE_t* data,
++                                 ITYPE_t* node_indices,
++                                 ITYPE_t n_features,
++                                 ITYPE_t n_points) except -1:
++    """Find the dimension with the largest spread.
++
++    Parameters
++    ----------
++    data : double pointer
++        Pointer to a 2D array of the training data, of shape [N, n_features].
++        N must be greater than any of the values in node_indices.
++    node_indices : int pointer
++        Pointer to a 1D array of length n_points.  This lists the indices of
++        each of the points within the current node.
++
++    Returns
++    -------
++    i_max : int
++        The index of the feature (dimension) within the node that has the
++        largest spread.
++
++    Notes
++    -----
++    In numpy, this operation is equivalent to
++
++    def find_node_split_dim(data, node_indices):
++        return np.argmax(data[node_indices].max(0) - data[node_indices].min(0))
++
++    The cython version is much more efficient in both computation and memory.
++    """
++    cdef DTYPE_t min_val, max_val, val, spread, max_spread
++    cdef ITYPE_t i, j, j_max
++
++    j_max = 0
++    max_spread = 0
++
++    for j in range(n_features):
++        max_val = data[node_indices[0] * n_features + j]
++        min_val = max_val
++        for i in range(1, n_points):
++            val = data[node_indices[i] * n_features + j]
++            max_val = fmax(max_val, val)
++            min_val = fmin(min_val, val)
++        spread = max_val - min_val
++        if spread > max_spread:
++            max_spread = spread
++            j_max = j
++    return j_max
++
++
++cdef int partition_node_indices(DTYPE_t* data,
++                                ITYPE_t* node_indices,
++                                ITYPE_t split_dim,
++                                ITYPE_t split_index,
++                                ITYPE_t n_features,
++                                ITYPE_t n_points) except -1:
++    """Partition points in the node into two equal-sized groups.
++
++    Upon return, the values in node_indices will be rearranged such that
++    (assuming numpy-style indexing):
++
++        data[node_indices[0:split_index], split_dim]
++          <= data[node_indices[split_index], split_dim]
++
++    and
++
++        data[node_indices[split_index], split_dim]
++          <= data[node_indices[split_index:n_points], split_dim]
++
++    The algorithm is essentially a partial in-place quicksort around a
++    set pivot.
++
++    Parameters
++    ----------
++    data : double pointer
++        Pointer to a 2D array of the training data, of shape [N, n_features].
++        N must be greater than any of the values in node_indices.
++    node_indices : int pointer
++        Pointer to a 1D array of length n_points.  This lists the indices of
++        each of the points within the current node.  This will be modified
++        in-place.
++    split_dim : int
++        the dimension on which to split.  This will usually be computed via
++        the routine ``find_node_split_dim``
++    split_index : int
++        the index within node_indices around which to split the points.
++
++    Returns
++    -------
++    status : int
++        integer exit status.  On return, the contents of node_indices are
++        modified as noted above.
++    """
++    cdef ITYPE_t left, right, midindex, i
++    cdef DTYPE_t d1, d2
++    left = 0
++    right = n_points - 1
++
++    while True:
++        midindex = left
++        for i in range(left, right):
++            d1 = data[node_indices[i] * n_features + split_dim]
++            d2 = data[node_indices[right] * n_features + split_dim]
++            if d1 < d2:
++                swap(node_indices, i, midindex)
++                midindex += 1
++        swap(node_indices, midindex, right)
++        if midindex == split_index:
++            break
++        elif midindex < split_index:
++            left = midindex + 1
++        else:
++            right = midindex - 1
++
++    return 0
++
++
++######################################################################
++# NodeHeap : min-heap used to keep track of nodes during
++#            breadth-first query
++cdef inline void swap_nodes(NodeHeapData_t* arr, ITYPE_t i1, ITYPE_t i2):
++    cdef NodeHeapData_t tmp = arr[i1]
++    arr[i1] = arr[i2]
++    arr[i2] = tmp
++
++
++cdef class NodeHeap:
++    """NodeHeap
++
++    This is a min-heap implementation for keeping track of nodes
++    during a breadth-first search.  Unlike the NeighborsHeap above,
++    the NodeHeap does not have a fixed size and must be able to grow
++    as elements are added.
++
++    Internally, the data is stored in a simple binary heap which meets
++    the min heap condition:
++
++        heap[i].val < min(heap[2 * i + 1].val, heap[2 * i + 2].val)
++    """
++    cdef np.ndarray data_arr
++    cdef NodeHeapData_t[::1] data
++    cdef ITYPE_t n
++
++    def __cinit__(self):
++        self.data_arr = np.zeros(1, dtype=NodeHeapData, order='C')
++        self.data = get_memview_NodeHeapData_1D(self.data_arr)
++
++    def __init__(self, size_guess=100):
++        size_guess = max(size_guess, 1)  # need space for at least one item
++        self.data_arr = np.zeros(size_guess, dtype=NodeHeapData, order='C')
++        self.data = get_memview_NodeHeapData_1D(self.data_arr)
++        self.n = size_guess
++        self.clear()
++
++    cdef int resize(self, ITYPE_t new_size) except -1:
++        """Resize the heap to be either larger or smaller"""
++        cdef NodeHeapData_t *data_ptr, *new_data_ptr
++        cdef ITYPE_t i
++        cdef ITYPE_t size = self.data.shape[0]
++        cdef np.ndarray new_data_arr = np.zeros(new_size,
++                                                dtype=NodeHeapData)
++        cdef NodeHeapData_t[::1] new_data =\
++                                    get_memview_NodeHeapData_1D(new_data_arr)
++
++        if size > 0 and new_size > 0:
++            data_ptr = &self.data[0]
++            new_data_ptr = &new_data[0]
++            for i in range(min(size, new_size)):
++                new_data_ptr[i] = data_ptr[i]
++
++        if new_size < size:
++            self.n = new_size
++
++        self.data = new_data
++        self.data_arr = new_data_arr
++        return 0
++
++    cdef int push(self, NodeHeapData_t data) except -1:
++        """Push a new item onto the heap"""
++        cdef ITYPE_t i, i_parent
++        cdef NodeHeapData_t* data_arr
++        self.n += 1
++        if self.n > self.data.shape[0]:
++            self.resize(2 * self.n)
++
++        # put the new element at the end,
++        # and then perform swaps until the heap is in order
++        data_arr = &self.data[0]
++        i = self.n - 1
++        data_arr[i] = data
++
++        while i > 0:
++            i_parent = (i - 1) // 2
++            if data_arr[i_parent].val <= data_arr[i].val:
++                break
++            else:
++                swap_nodes(data_arr, i, i_parent)
++                i = i_parent
++        return 0
++
++    cdef NodeHeapData_t peek(self):
++        """Peek at the root of the heap, without removing it"""
++        return self.data[0]
++
++    cdef NodeHeapData_t pop(self):
++        """Remove the root of the heap, and update the remaining nodes"""
++        if self.n == 0:
++            raise ValueError('cannot pop on empty heap')
++
++        cdef ITYPE_t i, i_child1, i_child2, i_swap
++        cdef NodeHeapData_t* data_arr = &self.data[0]
++        cdef NodeHeapData_t popped_element = data_arr[0]
++
++        # pop off the first element, move the last element to the front,
++        # and then perform swaps until the heap is back in order
++        data_arr[0] = data_arr[self.n - 1]
++        self.n -= 1
++
++        i = 0
++
++        while (i < self.n):
++            i_child1 = 2 * i + 1
++            i_child2 = 2 * i + 2
++            i_swap = 0
++
++            if i_child2 < self.n:
++                if data_arr[i_child1].val <= data_arr[i_child2].val:
++                    i_swap = i_child1
++                else:
++                    i_swap = i_child2
++            elif i_child1 < self.n:
++                i_swap = i_child1
++            else:
++                break
++
++            if (i_swap > 0) and (data_arr[i_swap].val <= data_arr[i].val):
++                swap_nodes(data_arr, i, i_swap)
++                i = i_swap
++            else:
++                break
++
++        return popped_element
++
++    cdef void clear(self):
++        """Clear the heap"""
++        self.n = 0
++
++
++######################################################################
++# newObj function
++#  this is a helper function for pickling
++def newObj(obj):
++    return obj.__new__(obj)
++
++
++######################################################################
++# define the reverse mapping of VALID_METRICS
++from dist_metrics import get_valid_metric_ids
++VALID_METRIC_IDS = get_valid_metric_ids(VALID_METRICS)
++
++
++######################################################################
++# Binary Tree class
++cdef class BinaryTree:
++    __doc__ = CLASS_DOC
++
++    cdef np.ndarray data_arr
++    cdef np.ndarray idx_array_arr
++    cdef np.ndarray node_data_arr
++    cdef np.ndarray node_bounds_arr
++
++    cdef readonly DTYPE_t[:, ::1] data
++    cdef public ITYPE_t[::1] idx_array
++    cdef public NodeData_t[::1] node_data
++    cdef public DTYPE_t[:, :, ::1] node_bounds
++
++    cdef ITYPE_t leaf_size
++    cdef ITYPE_t n_levels
++    cdef ITYPE_t n_nodes
++
++    cdef DistanceMetric dist_metric
++    cdef int euclidean
++
++    # variables to keep track of building & querying stats
++    cdef int n_trims
++    cdef int n_leaves
++    cdef int n_splits
++    cdef int n_calls
++
++    valid_metrics = VALID_METRIC_IDS
++
++    # Use cinit to initialize all arrays to empty: this will prevent memory
++    # errors and seg-faults in rare cases where __init__ is not called
++    def __cinit__(self):
++        self.data_arr = np.empty((1, 1), dtype=DTYPE, order='C')
++        self.idx_array_arr = np.empty(1, dtype=ITYPE, order='C')
++        self.node_data_arr = np.empty(1, dtype=NodeData, order='C')
++        self.node_bounds_arr = np.empty((1, 1, 1), dtype=DTYPE)
++
++        self.data = get_memview_DTYPE_2D(self.data_arr)
++        self.idx_array = get_memview_ITYPE_1D(self.idx_array_arr)
++        self.node_data = get_memview_NodeData_1D(self.node_data_arr)
++        self.node_bounds = get_memview_DTYPE_3D(self.node_bounds_arr)
++
++        self.leaf_size = 0
++        self.n_levels = 0
++        self.n_nodes = 0
++
++        self.euclidean = False
++
++        self.n_trims = 0
++        self.n_leaves = 0
++        self.n_splits = 0
++        self.n_calls = 0
++
++    def __init__(self, data,
++                 leaf_size=40, metric='minkowski', **kwargs):
++        self.data_arr = np.asarray(data, dtype=DTYPE, order='C')
++        self.data = get_memview_DTYPE_2D(self.data_arr)
++
++        self.leaf_size = leaf_size
++        self.dist_metric = DistanceMetric.get_metric(metric, **kwargs)
++        self.euclidean = (self.dist_metric.__class__.__name__
++                          == 'EuclideanDistance')
++
++        metric = self.dist_metric.__class__.__name__
++        if metric not in VALID_METRICS:
++            raise ValueError('metric {metric} is not valid for '
++                             '{BinaryTree}'.format(metric=metric,
++                                                   **DOC_DICT))
++
++        # validate data
++        if self.data.size == 0:
++            raise ValueError("X is an empty array")
++
++        if leaf_size < 1:
++            raise ValueError("leaf_size must be greater than or equal to 1")
++
++        n_samples = self.data.shape[0]
++        n_features = self.data.shape[1]
++
++        # determine number of levels in the tree, and from this
++        # the number of nodes in the tree.  This results in leaf nodes
++        # with numbers of points betweeen leaf_size and 2 * leaf_size
++        self.n_levels = np.log2(fmax(1, (n_samples - 1) / self.leaf_size)) + 1
++        self.n_nodes = (2 ** self.n_levels) - 1
++
++        # allocate arrays for storage
++        self.idx_array_arr = np.arange(n_samples, dtype=ITYPE)
++        self.idx_array = get_memview_ITYPE_1D(self.idx_array_arr)
++
++        self.node_data_arr = np.zeros(self.n_nodes, dtype=NodeData)
++        self.node_data = get_memview_NodeData_1D(self.node_data_arr)
++
++        # Allocate tree-specific data
++        allocate_data(self, self.n_nodes, n_features)
++        self._recursive_build(0, 0, n_samples)
++
++    def __reduce__(self):
++        """
++        reduce method used for pickling
++        """
++        return (newObj, (BinaryTree,), self.__getstate__())
++
++    def __getstate__(self):
++        """
++        get state for pickling
++        """
++        return (self.data_arr,
++                self.idx_array_arr,
++                self.node_data_arr,
++                self.node_bounds_arr,
++                int(self.leaf_size),
++                int(self.n_levels),
++                int(self.n_nodes),
++                int(self.n_trims),
++                int(self.n_leaves),
++                int(self.n_splits),
++                int(self.n_calls),
++                self.dist_metric)
++
++    def __setstate__(self, state):
++        """
++        set state for pickling
++        """
++        self.data_arr = state[0]
++        self.idx_array_arr = state[1]
++        self.node_data_arr = state[2]
++        self.node_bounds_arr = state[3]
++
++        self.data = get_memview_DTYPE_2D(self.data_arr)
++        self.idx_array = get_memview_ITYPE_1D(self.idx_array_arr)
++        self.node_data = get_memview_NodeData_1D(self.node_data_arr)
++        self.node_bounds = get_memview_DTYPE_3D(self.node_bounds_arr)
++
++        self.leaf_size = state[4]
++        self.n_levels = state[5]
++        self.n_nodes = state[6]
++        self.n_trims = state[7]
++        self.n_leaves = state[8]
++        self.n_splits = state[9]
++        self.n_calls = state[10]
++        self.dist_metric = state[11]
++        self.euclidean = (self.dist_metric.__class__.__name__
++                          == 'EuclideanDistance')
++
++    def get_tree_stats(self):
++        return (self.n_trims, self.n_leaves, self.n_splits)
++
++    def reset_n_calls(self):
++        self.n_calls = 0
++
++    def get_n_calls(self):
++        return self.n_calls
++
++    def get_arrays(self):
++        return (self.data_arr, self.idx_array_arr,
++                self.node_data_arr, self.node_bounds_arr)
++
++    cdef inline DTYPE_t dist(self, DTYPE_t* x1, DTYPE_t* x2,
++                             ITYPE_t size) except -1:
++        """Compute the distance between arrays x1 and x2"""
++        self.n_calls += 1
++        if self.euclidean:
++            return euclidean_dist(x1, x2, size)
++        else:
++            return self.dist_metric.dist(x1, x2, size)
++
++    cdef inline DTYPE_t rdist(self, DTYPE_t* x1, DTYPE_t* x2,
++                              ITYPE_t size) except -1:
++        """Compute the reduced distance between arrays x1 and x2.
++
++        The reduced distance, defined for some metrics, is a quantity which
++        is more efficient to compute than the distance, but preserves the
++        relative rankings of the true distance.  For example, the reduced
++        distance for the Euclidean metric is the squared-euclidean distance.
++        """
++        self.n_calls += 1
++        if self.euclidean:
++            return euclidean_rdist(x1, x2, size)
++        else:
++            return self.dist_metric.rdist(x1, x2, size)
++
++    cdef int _recursive_build(self, ITYPE_t i_node, ITYPE_t idx_start,
++                              ITYPE_t idx_end) except -1:
++        """Recursively build the tree.
++
++        Parameters
++        ----------
++        i_node : int
++            the node for the current step
++        idx_start, idx_end : int
++            the bounding indices in the idx_array which define the points that
++            belong to this node.
++        """
++        cdef ITYPE_t imax
++        cdef ITYPE_t n_features = self.data.shape[1]
++        cdef ITYPE_t n_points = idx_end - idx_start
++        cdef ITYPE_t n_mid = n_points / 2
++        cdef ITYPE_t* idx_array = &self.idx_array[idx_start]
++        cdef DTYPE_t* data = &self.data[0, 0]
++
++        # initialize node data
++        init_node(self, i_node, idx_start, idx_end)
++
++        if 2 * i_node + 1 >= self.n_nodes:
++            self.node_data[i_node].is_leaf = True
++            if idx_end - idx_start > 2 * self.leaf_size:
++                # this shouldn't happen if our memory allocation is correct
++                # we'll proactively prevent memory errors, but raise a
++                # warning saying we're doing so.
++                import warnings
++                warnings.warn("Internal: memory layout is flawed: "
++                              "not enough nodes allocated")
++
++        elif idx_end - idx_start < 2:
++            # again, this shouldn't happen if our memory allocation
++            # is correct.  Raise a warning.
++            import warnings
++            warnings.warn("Internal: memory layout is flawed: "
++                          "too many nodes allocated")
++            self.node_data[i_node].is_leaf = True
++
++        else:
++            # split node and recursively construct child nodes.
++            self.node_data[i_node].is_leaf = False
++            i_max = find_node_split_dim(data, idx_array,
++                                        n_features, n_points)
++            partition_node_indices(data, idx_array, i_max, n_mid,
++                                   n_features, n_points)
++            self._recursive_build(2 * i_node + 1,
++                                  idx_start, idx_start + n_mid)
++            self._recursive_build(2 * i_node + 2,
++                                  idx_start + n_mid, idx_end)
++
++    def query(self, X, k=1, return_distance=True,
++              dualtree=False, breadth_first=False,
++              sort_results=True):
++        """
++        query(X, k=1, return_distance=True,
++              dualtree=False, breadth_first=False)
++
++        query the treeree for the k nearest neighbors
++
++        Parameters
++        ----------
++        X : array-like, last dimension self.dim
++            An array of points to query
++        k : integer  (default = 1)
++            The number of nearest neighbors to return
++        return_distance : boolean (default = True)
++            if True, return a tuple (d, i) of distances and indices
++            if False, return array i
++        dualtree : boolean (default = False)
++            if True, use the dual tree formalism for the query: a tree is
++            built for the query points, and the pair of trees is used to
++            efficiently search this space.  This can lead to better
++            performance as the number of points grows large.
++        breadth_first : boolean (default = False)
++            if True, then query the nodes in a breadth-first manner.
++            Otherwise, query the nodes in a depth-first manner.
++        sort_results : boolean (default = True)
++            if True, then distances and indices of each point are sorted
++            on return, so that the first column contains the closest points.
++            Otherwise, neighbors are returned in an arbitrary order.
++
++        Returns
++        -------
++        i    : if return_distance == False
++        (d,i) : if return_distance == True
++
++        d : array of doubles - shape: x.shape[:-1] + (k,)
++            each entry gives the list of distances to the
++            neighbors of the corresponding point
++
++        i : array of integers - shape: x.shape[:-1] + (k,)
++            each entry gives the list of indices of
++            neighbors of the corresponding point
++
++        Examples
++        --------
++        Query for k-nearest neighbors
++
++            >>> import numpy as np
++            >>> np.random.seed(0)
++            >>> X = np.random.random((10, 3))  # 10 points in 3 dimensions
++            >>> tree = BinaryTree(X, leaf_size=2)    # doctest: +SKIP
++            >>> dist, ind = tree.query(X[0], k=3)    # doctest: +SKIP
++            >>> print ind  # indices of 3 closest neighbors
++            [0 3 1]
++            >>> print dist  # distances to 3 closest neighbors
++            [ 0.          0.19662693  0.29473397]
++        """
++        # XXX: we should allow X to be a pre-built tree.
++        X = array2d(X, dtype=DTYPE, order='C')
++
++        if X.shape[X.ndim - 1] != self.data.shape[1]:
++            raise ValueError("query data dimension must "
++                             "match training data dimension")
++
++        if self.data.shape[0] < k:
++            raise ValueError("k must be less than or equal "
++                             "to the number of training points")
++
++        # flatten X, and save original shape information
++        np_Xarr = X.reshape((-1, self.data.shape[1]))
++        cdef DTYPE_t[:, ::1] Xarr = get_memview_DTYPE_2D(np_Xarr)
++        cdef DTYPE_t reduced_dist_LB
++        cdef ITYPE_t i
++        cdef DTYPE_t* pt
++
++        # initialize heap for neighbors
++        cdef NeighborsHeap heap = NeighborsHeap(Xarr.shape[0], k)
++
++        # node heap for breadth-first queries
++        cdef NodeHeap nodeheap
++        if breadth_first:
++            nodeheap = NodeHeap(self.data.shape[0] // self.leaf_size)
++
++        # bounds is needed for the dual tree algorithm
++        cdef DTYPE_t[::1] bounds
++
++        self.n_trims = 0
++        self.n_leaves = 0
++        self.n_splits = 0
++
++        if dualtree:
++            other = self.__class__(np_Xarr, metric=self.dist_metric,
++                                   leaf_size=self.leaf_size)
++            if breadth_first:
++                self._query_dual_breadthfirst(other, heap, nodeheap)
++            else:
++                reduced_dist_LB = min_rdist_dual(self, 0, other, 0)
++                bounds = np.inf + np.zeros(other.node_data.shape[0])
++                self._query_dual_depthfirst(0, other, 0, bounds,
++                                            heap, reduced_dist_LB)
++
++        else:
++            pt = &Xarr[0, 0]
++            if breadth_first:
++                for i in range(Xarr.shape[0]):
++                    self._query_single_breadthfirst(pt, i, heap, nodeheap)
++                    pt += Xarr.shape[1]
++            else:
++                for i in range(Xarr.shape[0]):
++                    reduced_dist_LB = min_rdist(self, 0, pt)
++                    self._query_single_depthfirst(0, pt, i, heap,
++                                                  reduced_dist_LB)
++                    pt += Xarr.shape[1]
++
++        distances, indices = heap.get_arrays(sort=sort_results)
++        distances = self.dist_metric.rdist_to_dist(distances)
++
++        # deflatten results
++        if return_distance:
++            return (distances.reshape(X.shape[:X.ndim - 1] + (k,)),
++                    indices.reshape(X.shape[:X.ndim - 1] + (k,)))
++        else:
++            return indices.reshape(X.shape[:X.ndim - 1] + (k,))
++
++    def query_radius(self, X, r, return_distance=False,
++                     int count_only=False, int sort_results=False):
++        """
++        query_radius(self, X, r, count_only = False):
++
++        query the tree for neighbors within a radius r
++
++        Parameters
++        ----------
++        X : array-like, last dimension self.dim
++            An array of points to query
++        r : distance within which neighbors are returned
++            r can be a single value, or an array of values of shape
++            x.shape[:-1] if different radii are desired for each point.
++        return_distance : boolean (default = False)
++            if True,  return distances to neighbors of each point
++            if False, return only neighbors
++            Note that unlike the query() method, setting return_distance=True
++            here adds to the computation time.  Not all distances need to be
++            calculated explicitly for return_distance=False.  Results are
++            not sorted by default: see ``sort_results`` keyword.
++        count_only : boolean (default = False)
++            if True,  return only the count of points within distance r
++            if False, return the indices of all points within distance r
++            If return_distance==True, setting count_only=True will
++            result in an error.
++        sort_results : boolean (default = False)
++            if True, the distances and indices will be sorted before being
++            returned.  If False, the results will not be sorted.  If
++            return_distance == False, setting sort_results = True will
++            result in an error.
++
++        Returns
++        -------
++        count       : if count_only == True
++        ind         : if count_only == False and return_distance == False
++        (ind, dist) : if count_only == False and return_distance == True
++
++        count : array of integers, shape = X.shape[:-1]
++            each entry gives the number of neighbors within
++            a distance r of the corresponding point.
++
++        ind : array of objects, shape = X.shape[:-1]
++            each element is a numpy integer array listing the indices of
++            neighbors of the corresponding point.  Note that unlike
++            the results of a k-neighbors query, the returned neighbors
++            are not sorted by distance by default.
++
++        dist : array of objects, shape = X.shape[:-1]
++            each element is a numpy double array
++            listing the distances corresponding to indices in i.
++
++        Examples
++        --------
++        Query for neighbors in a given radius
++
++        >>> import numpy as np
++        >>> np.random.seed(0)
++        >>> X = np.random.random((10, 3))  # 10 points in 3 dimensions
++        >>> tree = BinaryTree(X, leaf_size=2)     # doctest: +SKIP
++        >>> print tree.query_radius(X[0], r=0.3, count_only=True)
++        3
++        >>> ind = tree.query_radius(X[0], r=0.3)  # doctest: +SKIP
++        >>> print ind  # indices of neighbors within distance 0.3
++        [3 0 1]
++        """
++        if count_only and return_distance:
++            raise ValueError("count_only and return_distance "
++                             "cannot both be true")
++
++        if sort_results and not return_distance:
++            raise ValueError("return_distance must be True "
++                             "if sort_results is True")
++
++        cdef ITYPE_t i, count_i = 0
++        cdef ITYPE_t n_features = self.data.shape[1]
++        cdef DTYPE_t[::1] dist_arr_i
++        cdef ITYPE_t[::1] idx_arr_i, counts
++        cdef DTYPE_t* pt
++
++        # validate X and prepare for query
++        X = array2d(X, dtype=DTYPE, order='C')
++
++        if X.shape[X.ndim - 1] != self.data.shape[1]:
++            raise ValueError("query data dimension must "
++                             "match training data dimension")
++
++        cdef DTYPE_t[:, ::1] Xarr =\
++                get_memview_DTYPE_2D(X.reshape((-1, self.data.shape[1])))
++
++        # prepare r for query
++        r = np.asarray(r, dtype=DTYPE, order='C')
++        r = np.atleast_1d(r)
++        if r.shape == (1,):
++            r = r[0] + np.zeros(X.shape[:X.ndim - 1], dtype=DTYPE)
++        else:
++            if r.shape != X.shape[:X.ndim - 1]:
++                raise ValueError("r must be broadcastable to X.shape")
++
++        rarr_np = r.reshape(-1)  # store explicitly to keep in scope
++        cdef DTYPE_t[::1] rarr = get_memview_DTYPE_1D(rarr_np)
++
++        # prepare variables for iteration
++        if not count_only:
++            indices = np.zeros(Xarr.shape[0], dtype='object')
++            if return_distance:
++                distances = np.zeros(Xarr.shape[0], dtype='object')
++
++        np_idx_arr = np.zeros(self.data.shape[0], dtype=ITYPE)
++        idx_arr_i = get_memview_ITYPE_1D(np_idx_arr)
++
++        np_dist_arr = np.zeros(self.data.shape[0], dtype=DTYPE)
++        dist_arr_i = get_memview_DTYPE_1D(np_dist_arr)
++
++        counts_arr = np.zeros(Xarr.shape[0], dtype=ITYPE)
++        counts = get_memview_ITYPE_1D(counts_arr)
++
++        pt = &Xarr[0, 0]
++        for i in range(Xarr.shape[0]):
++            counts[i] = self._query_radius_single(0, pt, rarr[i],
++                                                  &idx_arr_i[0],
++                                                  &dist_arr_i[0],
++                                                  0, count_only,
++                                                  return_distance)
++            pt += n_features
++
++            if count_only:
++                pass
++            else:
++                if sort_results:
++                    _simultaneous_sort(&dist_arr_i[0], &idx_arr_i[0],
++                                       counts[i])
++
++                indices[i] = np_idx_arr[:counts[i]].copy()
++                if return_distance:
++                    distances[i] = np_dist_arr[:counts[i]].copy()
++
++        # deflatten results
++        if count_only:
++            return counts_arr.reshape(X.shape[:X.ndim - 1])
++        elif return_distance:
++            return (indices.reshape(X.shape[:X.ndim - 1]),
++                    distances.reshape(X.shape[:X.ndim - 1]))
++        else:
++            return indices.reshape(X.shape[:X.ndim - 1])
++
++    def kernel_density(BinaryTree self, X, h, kernel='gaussian',
++                       atol=0, rtol=1E-8,
++                       breadth_first=True, return_log=False):
++        """
++        kernel_density(self, X, h, kernel='gaussian', atol=0, rtol=1E-8,
++                       breadth_first=True, return_log=False)
++
++        Compute the kernel density estimate at points X with the given kernel,
++        using the distance metric specified at tree creation.
++
++        Parameters
++        ----------
++        X : array_like
++            An array of points to query.  Last dimension should match dimension
++            of training data.
++        h : float
++            the bandwidth of the kernel
++        kernel : string
++            specify the kernel to use.  Options are
++            - 'gaussian'
++            - 'tophat'
++            - 'epanechnikov'
++            - 'exponential'
++            - 'linear'
++            - 'cosine'
++            Default is kernel = 'gaussian'
++        atol, rtol : float (default = 0)
++            Specify the desired relative and absolute tolerance of the result.
++            If the true result is K_true, then the returned result K_ret
++            satisfies ``abs(K_true - K_ret) < atol + rtol * K_ret``
++            The default is zero (i.e. machine precision) for both.
++        breadth_first : boolean (default = False)
++            if True, use a breadth-first search.  If False (default) use a
++            depth-first search.  Breadth-first is generally faster for
++            compact kernels and/or high tolerances.
++        return_log : boolean (default = False)
++            return the logarithm of the result.  This can be more accurate
++            than returning the result itself for narrow kernels.
++
++        Returns
++        -------
++        density : ndarray
++            The array of (log)-density evaluations, shape = X.shape[:-1]
++
++        Examples
++        --------
++        Compute a gaussian kernel density estimate:
++
++        >>> import numpy as np
++        >>> np.random.seed(1)
++        >>> X = np.random.random((100, 3))
++        >>> tree = BinaryTree(X)           # doctest: +SKIP
++        >>> tree.kernel_density(X[:3], h=0.1, kernel='gaussian')
++        array([ 6.94114649,  7.83281226,  7.2071716 ])
++        """
++        cdef DTYPE_t h_c = h
++        cdef DTYPE_t log_atol = log(atol)
++        cdef DTYPE_t log_rtol = log(rtol)
++        cdef DTYPE_t log_min_bound, log_max_bound, log_bound_spread
++        cdef DTYPE_t dist_LB = 0, dist_UB = 0
++
++        cdef ITYPE_t n_samples = self.data.shape[0]
++        cdef ITYPE_t n_features = self.data.shape[1]
++        cdef ITYPE_t i
++        cdef KernelType kernel_c
++
++        # validate kernel
++        if kernel == 'gaussian':
++            kernel_c = GAUSSIAN_KERNEL
++        elif kernel == 'tophat':
++            kernel_c = TOPHAT_KERNEL
++        elif kernel == 'epanechnikov':
++            kernel_c = EPANECHNIKOV_KERNEL
++        elif kernel == 'exponential':
++            kernel_c = EXPONENTIAL_KERNEL
++        elif kernel == 'linear':
++            kernel_c = LINEAR_KERNEL
++        elif kernel == 'cosine':
++            kernel_c = COSINE_KERNEL
++        else:
++            raise ValueError("kernel = '%s' not recognized" % kernel)
++
++        cdef DTYPE_t log_knorm = _log_kernel_norm(h_c, n_features, kernel_c)
++
++        # validate X and prepare for query
++        X = array2d(X, dtype=DTYPE, order='C')
++
++        if X.shape[X.ndim - 1] != n_features:
++            raise ValueError("query data dimension must "
++                             "match training data dimension")
++        Xarr_np = X.reshape((-1, n_features))
++        cdef DTYPE_t[:, ::1] Xarr = get_memview_DTYPE_2D(Xarr_np)
++
++        log_density_arr = np.zeros(Xarr.shape[0], dtype=DTYPE)
++        cdef DTYPE_t[::1] log_density = get_memview_DTYPE_1D(log_density_arr)
++
++        cdef DTYPE_t* pt = &Xarr[0, 0]
++
++        cdef NodeHeap nodeheap
++        if breadth_first:
++            nodeheap = NodeHeap(self.data.shape[0] // self.leaf_size)
++        cdef DTYPE_t[::1] node_log_min_bounds
++        cdef DTYPE_t[::1] node_bound_widths
++        # TODO: implement dual tree approach.
++        #       this is difficult because of the need to cache values
++        #       computed between node pairs.
++        if breadth_first:
++            node_log_min_bounds_arr = -np.inf + np.zeros(self.n_nodes)
++            node_log_min_bounds = get_memview_DTYPE_1D(node_log_min_bounds_arr)
++            node_bound_widths_arr = np.zeros(self.n_nodes)
++            node_bound_widths = get_memview_DTYPE_1D(node_bound_widths_arr)
++            for i in range(Xarr.shape[0]):
++                log_density[i] = self._kde_single_breadthfirst(
++                                            pt, kernel_c, h_c,
++                                            log_knorm, log_atol, log_rtol,
++                                            nodeheap,
++                                            &node_log_min_bounds[0],
++                                            &node_bound_widths[0])
++                pt += n_features
++        else:
++            for i in range(Xarr.shape[0]):
++                min_max_dist(self, 0, pt, &dist_LB, &dist_UB)
++                # compute max & min bounds on density within top node
++                log_min_bound = (log(n_samples) +
++                                 compute_log_kernel(dist_UB,
++                                                    h_c, kernel_c))
++                log_max_bound = (log(n_samples) +
++                                 compute_log_kernel(dist_LB,
++                                                    h_c, kernel_c))
++                log_bound_spread = logsubexp(log_max_bound, log_min_bound)
++                self._kde_single_depthfirst(0, pt, kernel_c, h_c,
++                                            log_knorm, log_atol, log_rtol,
++                                            log_min_bound,
++                                            log_bound_spread,
++                                            &log_min_bound,
++                                            &log_bound_spread)
++                log_density[i] = logaddexp(log_min_bound,
++                                           log_bound_spread - log(2))
++                pt += n_features
++
++        # normalize the results
++        for i in range(log_density.shape[0]):
++            log_density[i] += log_knorm
++
++        log_density_arr = log_density_arr.reshape(X.shape[:X.ndim - 1])
++
++        if return_log:
++            return log_density_arr
++        else:
++            return np.exp(log_density_arr)
++
++    def two_point_correlation(self, X, r, dualtree=False):
++        """Compute the two-point correlation function
++
++        Parameters
++        ----------
++        X : array_like
++            An array of points to query.  Last dimension should match dimension
++            of training data.
++        r : array_like
++            A one-dimensional array of distances
++        dualtree : boolean (default = False)
++            If true, use a dualtree algorithm.  Otherwise, use a single-tree
++            algorithm.  Dual tree algorithms can have better scaling for
++            large N.
++
++        Returns
++        -------
++        counts : ndarray
++            counts[i] contains the number of pairs of points with distance
++            less than or equal to r[i]
++
++        Examples
++        --------
++        Compute the two-point autocorrelation function of X:
++
++        >>> import numpy as np
++        >>> np.random.seed(0)
++        >>> X = np.random.random((30, 3))
++        >>> r = np.linspace(0, 1, 5)
++        >>> tree = BinaryTree(X)     # doctest: +SKIP
++        >>> tree.two_point_correlation(X, r)
++        array([ 30,  62, 278, 580, 820])
++        """
++        cdef ITYPE_t n_features = self.data.shape[1]
++        cdef ITYPE_t i
++
++        # validate X and prepare for query
++        X = array2d(X, dtype=DTYPE, order='C')
++
++        if X.shape[X.ndim - 1] != self.data.shape[1]:
++            raise ValueError("query data dimension must "
++                             "match training data dimension")
++
++        np_Xarr = X.reshape((-1, self.data.shape[1]))
++        cdef DTYPE_t[:, ::1] Xarr = get_memview_DTYPE_2D(np_Xarr)
++
++        # prepare r for query
++        r = np.asarray(r, dtype=DTYPE, order='C')
++        r = np.atleast_1d(r).astype(DTYPE)
++        if r.ndim != 1:
++            raise ValueError("r must be a 1-dimensional array")
++        i_rsort = np.argsort(r)
++        rarr_np = r[i_rsort]  # needed to keep memory in scope
++        cdef DTYPE_t[::1] rarr = get_memview_DTYPE_1D(rarr_np)
++
++        # create array to hold counts
++        count = np.zeros(r.shape[0], dtype=ITYPE)
++        cdef ITYPE_t[::1] carr = get_memview_ITYPE_1D(count)
++
++        cdef DTYPE_t* pt = &Xarr[0, 0]
++
++        if dualtree:
++            other = self.__class__(Xarr, metric=self.dist_metric,
++                                   leaf_size=self.leaf_size)
++            self._two_point_dual(0, other, 0, &rarr[0], &carr[0],
++                                 0, rarr.shape[0])
++        else:
++            for i in range(Xarr.shape[0]):
++                self._two_point_single(0, pt, &rarr[0], &carr[0],
++                                       0, rarr.shape[0])
++                pt += n_features
++
++        return count
++
++    cdef int _query_single_depthfirst(BinaryTree self, ITYPE_t i_node,
++                                      DTYPE_t* pt, ITYPE_t i_pt,
++                                      NeighborsHeap heap,
++                                      DTYPE_t reduced_dist_LB) except -1:
++        """Recursive Single-tree k-neighbors query, depth-first approach"""
++        cdef NodeData_t node_info = self.node_data[i_node]
++
++        cdef DTYPE_t dist_pt, reduced_dist_LB_1, reduced_dist_LB_2
++        cdef ITYPE_t i, i1, i2
++
++        cdef DTYPE_t* data = &self.data[0, 0]
++
++        #------------------------------------------------------------
++        # Case 1: query point is outside node radius:
++        #         trim it from the query
++        if reduced_dist_LB > heap.largest(i_pt):
++            self.n_trims += 1
++
++        #------------------------------------------------------------
++        # Case 2: this is a leaf node.  Update set of nearby points
++        elif node_info.is_leaf:
++            self.n_leaves += 1
++            for i in range(node_info.idx_start, node_info.idx_end):
++                dist_pt = self.rdist(pt,
++                                     &self.data[self.idx_array[i], 0],
++                                     self.data.shape[1])
++                if dist_pt < heap.largest(i_pt):
++                    heap.push(i_pt, dist_pt, self.idx_array[i])
++
++        #------------------------------------------------------------
++        # Case 3: Node is not a leaf.  Recursively query subnodes
++        #         starting with the closest
++        else:
++            self.n_splits += 1
++            i1 = 2 * i_node + 1
++            i2 = i1 + 1
++            reduced_dist_LB_1 = min_rdist(self, i1, pt)
++            reduced_dist_LB_2 = min_rdist(self, i2, pt)
++
++            # recursively query subnodes
++            if reduced_dist_LB_1 <= reduced_dist_LB_2:
++                self._query_single_depthfirst(i1, pt, i_pt, heap,
++                                              reduced_dist_LB_1)
++                self._query_single_depthfirst(i2, pt, i_pt, heap,
++                                              reduced_dist_LB_2)
++            else:
++                self._query_single_depthfirst(i2, pt, i_pt, heap,
++                                              reduced_dist_LB_2)
++                self._query_single_depthfirst(i1, pt, i_pt, heap,
++                                              reduced_dist_LB_1)
++        return 0
++
++    cdef int _query_single_breadthfirst(BinaryTree self, DTYPE_t* pt,
++                                        ITYPE_t i_pt,
++                                        NeighborsHeap heap,
++                                        NodeHeap nodeheap) except -1:
++        """Non-recursive single-tree k-neighbors query, breadth-first search"""
++        cdef ITYPE_t i, i_node
++        cdef DTYPE_t dist_pt, reduced_dist_LB
++        cdef NodeData_t* node_data = &self.node_data[0]
++        cdef DTYPE_t* data = &self.data[0, 0]
++
++        # Set up the node heap and push the head node onto it
++        cdef NodeHeapData_t nodeheap_item
++        nodeheap_item.val = min_rdist(self, 0, pt)
++        nodeheap_item.i1 = 0
++        nodeheap.push(nodeheap_item)
++
++        while nodeheap.n > 0:
++            nodeheap_item = nodeheap.pop()
++            reduced_dist_LB = nodeheap_item.val
++            i_node = nodeheap_item.i1
++            node_info = node_data[i_node]
++
++            #------------------------------------------------------------
++            # Case 1: query point is outside node radius:
++            #         trim it from the query
++            if reduced_dist_LB > heap.largest(i_pt):
++                self.n_trims += 1
++
++            #------------------------------------------------------------
++            # Case 2: this is a leaf node.  Update set of nearby points
++            elif node_data[i_node].is_leaf:
++                self.n_leaves += 1
++                for i in range(node_data[i_node].idx_start,
++                               node_data[i_node].idx_end):
++                    dist_pt = self.rdist(pt,
++                                         &self.data[self.idx_array[i], 0],
++                                         self.data.shape[1])
++                    if dist_pt < heap.largest(i_pt):
++                        heap.push(i_pt, dist_pt, self.idx_array[i])
++
++            #------------------------------------------------------------
++            # Case 3: Node is not a leaf.  Add subnodes to the node heap
++            else:
++                self.n_splits += 1
++                for i in range(2 * i_node + 1, 2 * i_node + 3):
++                    nodeheap_item.i1 = i
++                    nodeheap_item.val = min_rdist(self, i, pt)
++                    nodeheap.push(nodeheap_item)
++        return 0
++
++    cdef int _query_dual_depthfirst(BinaryTree self, ITYPE_t i_node1,
++                                    BinaryTree other, ITYPE_t i_node2,
++                                    DTYPE_t[::1] bounds,
++                                    NeighborsHeap heap,
++                                    DTYPE_t reduced_dist_LB) except -1:
++        """Recursive dual-tree k-neighbors query, depth-first"""
++        # note that the array `bounds` is maintained such that
++        # bounds[i] is the largest distance among any of the
++        # current neighbors in node i of the other tree.
++        cdef NodeData_t node_info1 = self.node_data[i_node1]
++        cdef NodeData_t node_info2 = other.node_data[i_node2]
++
++        cdef DTYPE_t* data1 = &self.data[0, 0]
++        cdef DTYPE_t* data2 = &other.data[0, 0]
++        cdef ITYPE_t n_features = self.data.shape[1]
++
++        cdef DTYPE_t bound_max, dist_pt, reduced_dist_LB1, reduced_dist_LB2
++        cdef ITYPE_t i1, i2, i_pt, i_parent
++
++        #------------------------------------------------------------
++        # Case 1: nodes are further apart than the current bound:
++        #         trim both from the query
++        if reduced_dist_LB > bounds[i_node2]:
++            pass
++
++        #------------------------------------------------------------
++        # Case 2: both nodes are leaves:
++        #         do a brute-force search comparing all pairs
++        elif node_info1.is_leaf and node_info2.is_leaf:
++            bounds[i_node2] = 0
++
++            for i2 in range(node_info2.idx_start, node_info2.idx_end):
++                i_pt = other.idx_array[i2]
++
++                if heap.largest(i_pt) <= reduced_dist_LB:
++                    continue
++
++                for i1 in range(node_info1.idx_start, node_info1.idx_end):
++                    dist_pt = self.rdist(
++                        data1 + n_features * self.idx_array[i1],
++                        data2 + n_features * i_pt,
++                        n_features)
++                    if dist_pt < heap.largest(i_pt):
++                        heap.push(i_pt, dist_pt, self.idx_array[i1])
++
++                # keep track of node bound
++                bounds[i_node2] = fmax(bounds[i_node2],
++                                       heap.largest(i_pt))
++
++            # update bounds up the tree
++            while i_node2 > 0:
++                i_parent = (i_node2 - 1) // 2
++                bound_max = fmax(bounds[2 * i_parent + 1],
++                                 bounds[2 * i_parent + 2])
++                if bound_max < bounds[i_parent]:
++                    bounds[i_parent] = bound_max
++                    i_node2 = i_parent
++                else:
++                    break
++
++        #------------------------------------------------------------
++        # Case 3a: node 1 is a leaf or is smaller: split node 2 and
++        #          recursively query, starting with the nearest subnode
++        elif node_info1.is_leaf or (not node_info2.is_leaf
++                                    and node_info2.radius > node_info1.radius):
++            reduced_dist_LB1 = min_rdist_dual(self, i_node1,
++                                              other, 2 * i_node2 + 1)
++            reduced_dist_LB2 = min_rdist_dual(self, i_node1,
++                                              other, 2 * i_node2 + 2)
++
++            if reduced_dist_LB1 < reduced_dist_LB2:
++                self._query_dual_depthfirst(i_node1, other, 2 * i_node2 + 1,
++                                            bounds, heap, reduced_dist_LB1)
++                self._query_dual_depthfirst(i_node1, other, 2 * i_node2 + 2,
++                                            bounds, heap, reduced_dist_LB2)
++            else:
++                self._query_dual_depthfirst(i_node1, other, 2 * i_node2 + 2,
++                                            bounds, heap, reduced_dist_LB2)
++                self._query_dual_depthfirst(i_node1, other, 2 * i_node2 + 1,
++                                            bounds, heap, reduced_dist_LB1)
++
++        #------------------------------------------------------------
++        # Case 3b: node 2 is a leaf or is smaller: split node 1 and
++        #          recursively query, starting with the nearest subnode
++        else:
++            reduced_dist_LB1 = min_rdist_dual(self, 2 * i_node1 + 1,
++                                              other, i_node2)
++            reduced_dist_LB2 = min_rdist_dual(self, 2 * i_node1 + 2,
++                                              other, i_node2)
++
++            if reduced_dist_LB1 < reduced_dist_LB2:
++                self._query_dual_depthfirst(2 * i_node1 + 1, other, i_node2,
++                                            bounds, heap, reduced_dist_LB1)
++                self._query_dual_depthfirst(2 * i_node1 + 2, other, i_node2,
++                                            bounds, heap, reduced_dist_LB2)
++            else:
++                self._query_dual_depthfirst(2 * i_node1 + 2, other, i_node2,
++                                            bounds, heap, reduced_dist_LB2)
++                self._query_dual_depthfirst(2 * i_node1 + 1, other, i_node2,
++                                            bounds, heap, reduced_dist_LB1)
++        return 0
++
++    cdef int _query_dual_breadthfirst(BinaryTree self, BinaryTree other,
++                                      NeighborsHeap heap,
++                                      NodeHeap nodeheap) except -1:
++        """Non-recursive dual-tree k-neighbors query, breadth-first"""
++        cdef ITYPE_t i, i1, i2, i_node1, i_node2, i_pt
++        cdef DTYPE_t dist_pt, reduced_dist_LB
++        cdef DTYPE_t[::1] bounds = np.inf + np.zeros(other.node_data.shape[0])
++        cdef NodeData_t* node_data1 = &self.node_data[0]
++        cdef NodeData_t* node_data2 = &other.node_data[0]
++        cdef NodeData_t node_info1, node_info2
++        cdef DTYPE_t* data1 = &self.data[0, 0]
++        cdef DTYPE_t* data2 = &other.data[0, 0]
++        cdef ITYPE_t n_features = self.data.shape[1]
++
++        # Set up the node heap and push the head nodes onto it
++        cdef NodeHeapData_t nodeheap_item
++        nodeheap_item.val = min_rdist_dual(self, 0, other, 0)
++        nodeheap_item.i1 = 0
++        nodeheap_item.i2 = 0
++        nodeheap.push(nodeheap_item)
++
++        while nodeheap.n > 0:
++            nodeheap_item = nodeheap.pop()
++            reduced_dist_LB = nodeheap_item.val
++            i_node1 = nodeheap_item.i1
++            i_node2 = nodeheap_item.i2
++
++            node_info1 = node_data1[i_node1]
++            node_info2 = node_data2[i_node2]
++
++            #------------------------------------------------------------
++            # Case 1: nodes are further apart than the current bound:
++            #         trim both from the query
++            if reduced_dist_LB > bounds[i_node2]:
++                pass
++
++            #------------------------------------------------------------
++            # Case 2: both nodes are leaves:
++            #         do a brute-force search comparing all pairs
++            elif node_info1.is_leaf and node_info2.is_leaf:
++                bounds[i_node2] = -1
++
++                for i2 in range(node_info2.idx_start, node_info2.idx_end):
++                    i_pt = other.idx_array[i2]
++
++                    if heap.largest(i_pt) <= reduced_dist_LB:
++                        continue
++
++                    for i1 in range(node_info1.idx_start, node_info1.idx_end):
++                        dist_pt = self.rdist(
++                            data1 + n_features * self.idx_array[i1],
++                            data2 + n_features * i_pt,
++                            n_features)
++                        if dist_pt < heap.largest(i_pt):
++                            heap.push(i_pt, dist_pt, self.idx_array[i1])
++
++                    # keep track of node bound
++                    bounds[i_node2] = fmax(bounds[i_node2],
++                                           heap.largest(i_pt))
++
++            #------------------------------------------------------------
++            # Case 3a: node 1 is a leaf or is smaller: split node 2 and
++            #          recursively query, starting with the nearest subnode
++            elif node_info1.is_leaf or (not node_info2.is_leaf
++                                        and (node_info2.radius
++                                             > node_info1.radius)):
++                nodeheap_item.i1 = i_node1
++                for i2 in range(2 * i_node2 + 1, 2 * i_node2 + 3):
++                    nodeheap_item.i2 = i2
++                    nodeheap_item.val = min_rdist_dual(self, i_node1,
++                                                       other, i2)
++                    nodeheap.push(nodeheap_item)
++
++            #------------------------------------------------------------
++            # Case 3b: node 2 is a leaf or is smaller: split node 1 and
++            #          recursively query, starting with the nearest subnode
++            else:
++                nodeheap_item.i2 = i_node2
++                for i1 in range(2 * i_node1 + 1, 2 * i_node1 + 3):
++                    nodeheap_item.i1 = i1
++                    nodeheap_item.val = min_rdist_dual(self, i1,
++                                                       other, i_node2)
++                    nodeheap.push(nodeheap_item)
++        return 0
++
++    cdef ITYPE_t _query_radius_single(BinaryTree self,
++                                      ITYPE_t i_node,
++                                      DTYPE_t* pt, DTYPE_t r,
++                                      ITYPE_t* indices,
++                                      DTYPE_t* distances,
++                                      ITYPE_t count,
++                                      int count_only,
++                                      int return_distance) except -1:
++        """recursive single-tree radius query, depth-first"""
++        cdef DTYPE_t* data = &self.data[0, 0]
++        cdef ITYPE_t* idx_array = &self.idx_array[0]
++        cdef ITYPE_t n_features = self.data.shape[1]
++        cdef NodeData_t node_info = self.node_data[i_node]
++
++        cdef ITYPE_t i
++        cdef DTYPE_t reduced_r
++
++        cdef DTYPE_t dist_pt, dist_LB = 0, dist_UB = 0
++        min_max_dist(self, i_node, pt, &dist_LB, &dist_UB)
++
++        #------------------------------------------------------------
++        # Case 1: all node points are outside distance r.
++        #         prune this branch.
++        if dist_LB > r:
++            pass
++
++        #------------------------------------------------------------
++        # Case 2: all node points are within distance r
++        #         add all points to neighbors
++        elif dist_UB <= r:
++            if count_only:
++                count += (node_info.idx_end - node_info.idx_start)
++            else:
++                for i in range(node_info.idx_start, node_info.idx_end):
++                    if (count < 0) or (count >= self.data.shape[0]):
++                        raise ValueError("Fatal: count too big: "
++                                         "this should never happen")
++                    indices[count] = idx_array[i]
++                    if return_distance:
++                        distances[count] = self.dist(pt, (data + n_features
++                                                          * idx_array[i]),
++                                                     n_features)
++                    count += 1
++
++        #------------------------------------------------------------
++        # Case 3: this is a leaf node.  Go through all points to
++        #         determine if they fall within radius
++        elif node_info.is_leaf:
++            reduced_r = self.dist_metric._dist_to_rdist(r)
++
++            for i in range(node_info.idx_start, node_info.idx_end):
++                dist_pt = self.rdist(pt, (data + n_features * idx_array[i]),
++                                     n_features)
++                if dist_pt <= reduced_r:
++                    if (count < 0) or (count >= self.data.shape[0]):
++                        raise ValueError("Fatal: count out of range. "
++                                         "This should never happen.")
++                    if count_only:
++                        pass
++                    else:
++                        indices[count] = idx_array[i]
++                        if return_distance:
++                            distances[count] =\
++                                self.dist_metric._rdist_to_dist(dist_pt)
++                    count += 1
++
++        #------------------------------------------------------------
++        # Case 4: Node is not a leaf.  Recursively query subnodes
++        else:
++            count = self._query_radius_single(2 * i_node + 1, pt, r,
++                                              indices, distances, count,
++                                              count_only, return_distance)
++            count = self._query_radius_single(2 * i_node + 2, pt, r,
++                                              indices, distances, count,
++                                              count_only, return_distance)
++
++        return count
++
++    cdef DTYPE_t _kde_single_breadthfirst(self, DTYPE_t* pt,
++                                          KernelType kernel, DTYPE_t h,
++                                          DTYPE_t log_knorm,
++                                          DTYPE_t log_atol, DTYPE_t log_rtol,
++                                          NodeHeap nodeheap,
++                                          DTYPE_t* node_log_min_bounds,
++                                          DTYPE_t* node_log_bound_spreads):
++        """non-recursive single-tree kernel density estimation"""
++        # For the given point, node_log_min_bounds and node_log_bound_spreads
++        # will encode the current bounds on the density between the point
++        # and the associated node.
++        # The variables global_log_min_bound and global_log_bound_spread
++        # keep track of the global bounds on density.  The procedure here is
++        # to split nodes, updating these bounds, until the bounds are within
++        # atol & rtol.
++        cdef ITYPE_t i, i1, i2, N1, N2, i_node
++        cdef DTYPE_t global_log_min_bound, global_log_bound_spread
++        cdef DTYPE_t global_log_max_bound
++
++        cdef DTYPE_t* data = &self.data[0, 0]
++        cdef ITYPE_t* idx_array = &self.idx_array[0]
++        cdef NodeData_t* node_data = &self.node_data[0]
++        cdef ITYPE_t N = self.data.shape[0]
++        cdef ITYPE_t n_features = self.data.shape[1]
++
++        cdef NodeData_t node_info
++        cdef DTYPE_t dist_pt, log_density
++        cdef DTYPE_t dist_LB_1 = 0, dist_LB_2 = 0
++        cdef DTYPE_t dist_UB_1 = 0, dist_UB_2 = 0
++
++        cdef DTYPE_t dist_UB, dist_LB
++
++        # push the top node to the heap
++        cdef NodeHeapData_t nodeheap_item
++        nodeheap_item.val = min_dist(self, 0, pt)
++        nodeheap_item.i1 = 0
++        nodeheap.push(nodeheap_item)
++
++        global_log_min_bound = log(N) + compute_log_kernel(max_dist(self,
++                                                                    0, pt),
++                                                           h, kernel)
++        global_log_max_bound = log(N) + compute_log_kernel(nodeheap_item.val,
++                                                           h, kernel)
++        global_log_bound_spread = logsubexp(global_log_max_bound,
++                                            global_log_min_bound)
++
++        node_log_min_bounds[0] = global_log_min_bound
++        node_log_bound_spreads[0] = global_log_bound_spread
++
++        while nodeheap.n > 0:
++            nodeheap_item = nodeheap.pop()
++            i_node = nodeheap_item.i1
++
++            node_info = node_data[i_node]
++            N1 = node_info.idx_end - node_info.idx_start
++
++            #------------------------------------------------------------
++            # Case 1: local bounds are equal to within per-point tolerance.
++            if (log_knorm + node_log_bound_spreads[i_node] - log(N1) + log(N)
++                <= logaddexp(log_atol, (log_rtol + log_knorm
++                                        + node_log_min_bounds[i_node]))):
++                pass
++
++            #------------------------------------------------------------
++            # Case 2: global bounds are within rtol & atol.
++            elif (log_knorm + global_log_bound_spread
++                  <= logaddexp(log_atol,
++                               log_rtol + log_knorm + global_log_min_bound)):
++                break
++
++            #------------------------------------------------------------
++            # Case 3: node is a leaf. Count contributions from all points
++            elif node_info.is_leaf:
++                global_log_min_bound =\
++                    logsubexp(global_log_min_bound,
++                              node_log_min_bounds[i_node])
++                global_log_bound_spread =\
++                    logsubexp(global_log_bound_spread,
++                              node_log_bound_spreads[i_node])
++                for i in range(node_info.idx_start, node_info.idx_end):
++                    dist_pt = self.dist(pt, data + n_features * idx_array[i],
++                                        n_features)
++                    log_density = compute_log_kernel(dist_pt, h, kernel)
++                    global_log_min_bound = logaddexp(global_log_min_bound,
++                                                     log_density)
++
++            #------------------------------------------------------------
++            # Case 4: split node and query subnodes
++            else:
++                i1 = 2 * i_node + 1
++                i2 = 2 * i_node + 2
++
++                N1 = node_data[i1].idx_end - node_data[i1].idx_start
++                N2 = node_data[i2].idx_end - node_data[i2].idx_start
++
++                min_max_dist(self, i1, pt, &dist_LB_1, &dist_UB_1)
++                min_max_dist(self, i2, pt, &dist_LB_2, &dist_UB_2)
++
++                node_log_min_bounds[i1] = (log(N1) +
++                                           compute_log_kernel(dist_UB_1,
++                                                              h, kernel))
++                node_log_bound_spreads[i1] = (log(N1) +
++                                              compute_log_kernel(dist_LB_1,
++                                                                 h, kernel))
++
++                node_log_min_bounds[i2] = (log(N2) +
++                                           compute_log_kernel(dist_UB_2,
++                                                              h, kernel))
++                node_log_bound_spreads[i2] = (log(N2) +
++                                              compute_log_kernel(dist_LB_2,
++                                                                 h, kernel))
++
++                global_log_min_bound = logsubexp(global_log_min_bound,
++                                                 node_log_min_bounds[i_node])
++                global_log_min_bound = logaddexp(global_log_min_bound,
++                                                 node_log_min_bounds[i1])
++                global_log_min_bound = logaddexp(global_log_min_bound,
++                                                 node_log_min_bounds[i2])
++
++                global_log_bound_spread =\
++                    logsubexp(global_log_bound_spread,
++                              node_log_bound_spreads[i_node])
++                global_log_bound_spread = logaddexp(global_log_bound_spread,
++                                                    node_log_bound_spreads[i1])
++                global_log_bound_spread = logaddexp(global_log_bound_spread,
++                                                    node_log_bound_spreads[i2])
++
++                # TODO: rank by the spread rather than the distance?
++                nodeheap_item.val = dist_LB_1
++                nodeheap_item.i1 = i1
++                nodeheap.push(nodeheap_item)
++
++                nodeheap_item.val = dist_LB_2
++                nodeheap_item.i1 = i2
++                nodeheap.push(nodeheap_item)
++
++        nodeheap.clear()
++        return logaddexp(global_log_min_bound,
++                         global_log_bound_spread - log(2))
++
++    cdef int _kde_single_depthfirst(
++                   self, ITYPE_t i_node, DTYPE_t* pt,
++                   KernelType kernel, DTYPE_t h,
++                   DTYPE_t log_knorm,
++                   DTYPE_t log_atol, DTYPE_t log_rtol,
++                   DTYPE_t local_log_min_bound,
++                   DTYPE_t local_log_bound_spread,
++                   DTYPE_t* global_log_min_bound,
++                   DTYPE_t* global_log_bound_spread) except -1:
++        """recursive single-tree kernel density estimate, depth-first"""
++        # For the given point, local_min_bound and local_max_bound give the
++        # minimum and maximum density for the current node, while
++        # global_min_bound and global_max_bound give the minimum and maximum
++        # density over the entire tree.  We recurse down until global_min_bound
++        # and global_max_bound are within rtol and atol.
++        cdef ITYPE_t i, i1, i2, N1, N2
++
++        cdef DTYPE_t* data = &self.data[0, 0]
++        cdef ITYPE_t* idx_array = &self.idx_array[0]
++        cdef ITYPE_t n_features = self.data.shape[1]
++
++        cdef NodeData_t node_info = self.node_data[i_node]
++        cdef DTYPE_t dist_pt, log_dens_contribution
++
++        cdef DTYPE_t child1_log_min_bound, child2_log_min_bound
++        cdef DTYPE_t child1_log_bound_spread, child2_log_bound_spread
++        cdef DTYPE_t dist_UB = 0, dist_LB = 0
++
++        N1 = node_info.idx_end - node_info.idx_start
++        N2 = self.data.shape[0]
++
++        #------------------------------------------------------------
++        # Case 1: local bounds are equal to within errors.  Return
++        if (log_knorm + local_log_bound_spread - log(N1) + log(N2)
++            <= logaddexp(log_atol, (log_rtol + log_knorm
++                                    + local_log_min_bound))):
++            pass
++
++        #------------------------------------------------------------
++        # Case 2: global bounds are within rtol & atol. Return
++        elif (log_knorm + global_log_bound_spread[0]
++            <= logaddexp(log_atol, (log_rtol + log_knorm
++                                    + global_log_min_bound[0]))):
++            pass
++
++        #------------------------------------------------------------
++        # Case 3: node is a leaf. Count contributions from all points
++        elif node_info.is_leaf:
++            global_log_min_bound[0] = logsubexp(global_log_min_bound[0],
++                                                local_log_min_bound)
++            global_log_bound_spread[0] = logsubexp(global_log_bound_spread[0],
++                                                   local_log_bound_spread)
++            for i in range(node_info.idx_start, node_info.idx_end):
++                dist_pt = self.dist(pt, (data + n_features * idx_array[i]),
++                                    n_features)
++                log_dens_contribution = compute_log_kernel(dist_pt, h, kernel)
++                global_log_min_bound[0] = logaddexp(global_log_min_bound[0],
++                                                    log_dens_contribution)
++
++        #------------------------------------------------------------
++        # Case 4: split node and query subnodes
++        else:
++            i1 = 2 * i_node + 1
++            i2 = 2 * i_node + 2
++
++            N1 = self.node_data[i1].idx_end - self.node_data[i1].idx_start
++            N2 = self.node_data[i2].idx_end - self.node_data[i2].idx_start
++
++            min_max_dist(self, i1, pt, &dist_LB, &dist_UB)
++            child1_log_min_bound = log(N1) + compute_log_kernel(dist_UB, h,
++                                                                kernel)
++            child1_log_bound_spread = logsubexp(log(N1) +
++                                                compute_log_kernel(dist_LB, h,
++                                                                   kernel),
++                                                child1_log_min_bound)
++
++            min_max_dist(self, i2, pt, &dist_LB, &dist_UB)
++            child2_log_min_bound = log(N2) + compute_log_kernel(dist_UB, h,
++                                                                kernel)
++            child2_log_bound_spread = logsubexp(log(N2) +
++                                                compute_log_kernel(dist_LB, h,
++                                                                   kernel),
++                                                child2_log_min_bound)
++
++            global_log_min_bound[0] = logsubexp(global_log_min_bound[0],
++                                                local_log_min_bound)
++            global_log_min_bound[0] = logaddexp(global_log_min_bound[0],
++                                                child1_log_min_bound)
++            global_log_min_bound[0] = logaddexp(global_log_min_bound[0],
++                                                child2_log_min_bound)
++
++            global_log_bound_spread[0] = logsubexp(global_log_bound_spread[0],
++                                                   local_log_bound_spread)
++            global_log_bound_spread[0] = logaddexp(global_log_bound_spread[0],
++                                                   child1_log_bound_spread)
++            global_log_bound_spread[0] = logaddexp(global_log_bound_spread[0],
++                                                   child2_log_bound_spread)
++
++            self._kde_single_depthfirst(i1, pt, kernel, h, log_knorm,
++                                        log_atol, log_rtol,
++                                        child1_log_min_bound,
++                                        child1_log_bound_spread,
++                                        global_log_min_bound,
++                                        global_log_bound_spread)
++            self._kde_single_depthfirst(i2, pt, kernel, h, log_knorm,
++                                        log_atol, log_rtol,
++                                        child2_log_min_bound,
++                                        child2_log_bound_spread,
++                                        global_log_min_bound,
++                                        global_log_bound_spread)
++        return 0
++
++    cdef int _two_point_single(self, ITYPE_t i_node, DTYPE_t* pt, DTYPE_t* r,
++                               ITYPE_t* count, ITYPE_t i_min,
++                               ITYPE_t i_max) except -1:
++        """recursive single-tree two-point correlation function query"""
++        cdef DTYPE_t* data = &self.data[0, 0]
++        cdef ITYPE_t* idx_array = &self.idx_array[0]
++        cdef ITYPE_t n_features = self.data.shape[1]
++        cdef NodeData_t node_info = self.node_data[i_node]
++
++        cdef ITYPE_t i, j, Npts
++        cdef DTYPE_t reduced_r
++
++        cdef DTYPE_t dist_pt, dist_LB = 0, dist_UB = 0
++        min_max_dist(self, i_node, pt, &dist_LB, &dist_UB)
++
++        #------------------------------------------------------------
++        # Go through bounds and check for cuts
++        while i_min < i_max:
++            if dist_LB > r[i_min]:
++                i_min += 1
++            else:
++                break
++
++        while i_max > i_min:
++            Npts = (node_info.idx_end - node_info.idx_start)
++            if dist_UB <= r[i_max - 1]:
++                count[i_max - 1] += Npts
++                i_max -= 1
++            else:
++                break
++
++        if i_min < i_max:
++            # If node is a leaf, go through all points
++            if node_info.is_leaf:
++                for i in range(node_info.idx_start, node_info.idx_end):
++                    dist_pt = self.dist(pt, (data + n_features * idx_array[i]),
++                                        n_features)
++                    j = i_max - 1
++                    while (j >= i_min) and (dist_pt <= r[j]):
++                        count[j] += 1
++                        j -= 1
++
++            else:
++                self._two_point_single(2 * i_node + 1, pt, r,
++                                       count, i_min, i_max)
++                self._two_point_single(2 * i_node + 2, pt, r,
++                                       count, i_min, i_max)
++        return 0
++
++    cdef int _two_point_dual(self, ITYPE_t i_node1,
++                             BinaryTree other, ITYPE_t i_node2,
++                             DTYPE_t* r, ITYPE_t* count,
++                             ITYPE_t i_min, ITYPE_t i_max) except -1:
++        """recursive dual-tree two-point correlation function query"""
++        cdef DTYPE_t* data1 = &self.data[0, 0]
++        cdef DTYPE_t* data2 = &other.data[0, 0]
++        cdef ITYPE_t* idx_array1 = &self.idx_array[0]
++        cdef ITYPE_t* idx_array2 = &other.idx_array[0]
++        cdef NodeData_t node_info1 = self.node_data[i_node1]
++        cdef NodeData_t node_info2 = other.node_data[i_node2]
++
++        cdef ITYPE_t n_features = self.data.shape[1]
++
++        cdef ITYPE_t i1, i2, j, Npts
++        cdef DTYPE_t reduced_r
++
++        cdef DTYPE_t dist_pt, dist_LB = 0, dist_UB = 0
++        dist_LB = min_dist_dual(self, i_node1, other, i_node2)
++        dist_UB = max_dist_dual(self, i_node1, other, i_node2)
++
++        #------------------------------------------------------------
++        # Go through bounds and check for cuts
++        while i_min < i_max:
++            if dist_LB > r[i_min]:
++                i_min += 1
++            else:
++                break
++
++        while i_max > i_min:
++            Npts = ((node_info1.idx_end - node_info1.idx_start)
++                    * (node_info2.idx_end - node_info2.idx_start))
++            if dist_UB <= r[i_max - 1]:
++                count[i_max - 1] += Npts
++                i_max -= 1
++            else:
++                break
++
++        if i_min < i_max:
++            if node_info1.is_leaf and node_info2.is_leaf:
++                # If both nodes are leaves, go through all points
++                for i1 in range(node_info1.idx_start, node_info1.idx_end):
++                    for i2 in range(node_info2.idx_start, node_info2.idx_end):
++                        dist_pt = self.dist((data1 + n_features
++                                             * idx_array1[i1]),
++                                            (data2 + n_features
++                                             * idx_array2[i2]),
++                                            n_features)
++                        j = i_max - 1
++                        while (j >= i_min) and (dist_pt <= r[j]):
++                            count[j] += 1
++                            j -= 1
++
++            elif node_info1.is_leaf:
++                # If only one is a leaf, split the other
++                for i2 in range(2 * i_node2 + 1, 2 * i_node2 + 3):
++                    self._two_point_dual(i_node1, other, i2,
++                                         r, count, i_min, i_max)
++
++            elif node_info2.is_leaf:
++                for i1 in range(2 * i_node1 + 1, 2 * i_node1 + 3):
++                    self._two_point_dual(i1, other, i_node2,
++                                         r, count, i_min, i_max)
++
++            else:
++                 # neither is a leaf: split & query both
++                for i1 in range(2 * i_node1 + 1, 2 * i_node1 + 3):
++                    for i2 in range(2 * i_node2 + 1, 2 * i_node2 + 3):
++                        self._two_point_dual(i1, other, i2,
++                                             r, count, i_min, i_max)
++        return 0
++
++
++######################################################################
++# Python functions for benchmarking and testing C implementations
++
++def load_heap(DTYPE_t[:, ::1] X, ITYPE_t k):
++    """test fully loading the heap"""
++    assert k <= X.shape[1]
++    cdef NeighborsHeap heap = NeighborsHeap(X.shape[0], k)
++    cdef ITYPE_t i, j
++    for i in range(X.shape[0]):
++        for j in range(X.shape[1]):
++            heap.push(i, X[i, j], j)
++    return heap.get_arrays()
++
++
++def simultaneous_sort(DTYPE_t[:, ::1] distances, ITYPE_t[:, ::1] indices):
++    """In-place simultaneous sort the given row of the arrays
++
++    This python wrapper exists primarily to enable unit testing
++    of the _simultaneous_sort C routine.
++    """
++    assert distances.shape[0] == indices.shape[0]
++    assert distances.shape[1] == indices.shape[1]
++    cdef ITYPE_t row
++    for row in range(distances.shape[0]):
++        _simultaneous_sort(&distances[row, 0],
++                           &indices[row, 0],
++                           distances.shape[1])
++
++
++def nodeheap_sort(DTYPE_t[::1] vals):
++    """In-place reverse sort of vals using NodeHeap"""
++    cdef ITYPE_t[::1] indices = np.zeros(vals.shape[0], dtype=ITYPE)
++    cdef DTYPE_t[::1] vals_sorted = np.zeros_like(vals)
++
++    # use initial size 0 to check corner case
++    cdef NodeHeap heap = NodeHeap(0)
++    cdef NodeHeapData_t data
++    cdef ITYPE_t i
++    for i in range(vals.shape[0]):
++        data.val = vals[i]
++        data.i1 = i
++        data.i2 = i + 1
++        heap.push(data)
++
++    for i in range(vals.shape[0]):
++        data = heap.pop()
++        vals_sorted[i] = data.val
++        indices[i] = data.i1
++
++    return np.asarray(vals_sorted), np.asarray(indices)
++
++# Reimplementation for MSVC support
++cdef inline double fmin(double a, double b):
++    return min(a, b)
++
++cdef inline double fmax(double a, double b):
++    return max(a, b)
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.macosforge.org/pipermail/macports-changes/attachments/20140514/6e8917af/attachment-0001.html>