Source code for objscale._fractal_dimensions

from __future__ import annotations

import numpy as np
from numpy.typing import NDArray
import numba
from numba.typed import List
from scipy.ndimage import label
import warnings
from warnings import warn
from ._object_analysis import (
    label_structures,
    _merge_periodic_labels,
    remove_structures_touching_border_nan,
    remove_structure_holes,
    get_structure_areas,
    get_structure_perimeters,
    get_structure_height_width,
)
from ._utils import linear_regression, encase_in_value

__all__ = [
    'ensemble_correlation_dimension',
    'ensemble_box_dimension',
    'ensemble_information_dimension',
    'ensemble_box_renyi_dimension',
    'ensemble_sandbox_renyi_dimension',
    'individual_fractal_dimension',
    'get_coords_of_boundaries',
    'get_locations_from_pixel_sizes',
    'coarsen_array',
    'total_perimeter',
    'total_number',
    'individual_correlation_dimension',
    'isolate_nth_largest_structure',
    'label_size',
]


@numba.njit(parallel=True, fastmath=True)
def _box_sum_2d(arr: NDArray, factor: int) -> NDArray:
    """Sum a 2D array into ``(h//factor, w//factor)`` boxes of size ``factor``.

    Parallel over output rows; each thread reads a contiguous strip of input
    rows so the inner loop runs against private L2 (no strided cache aliasing
    on Zen 5, no thread contention). Caller is responsible for trimming the
    input so ``arr.shape[0]`` and ``arr.shape[1]`` are exact multiples of
    ``factor``.
    """
    h, w = arr.shape
    out_h = h // factor
    out_w = w // factor
    out = np.zeros((out_h, out_w), dtype=np.int64)
    for i in numba.prange(out_h):
        i0 = i * factor
        for j in range(out_w):
            j0 = j * factor
            s = 0
            for di in range(factor):
                for dj in range(factor):
                    s += arr[i0 + di, j0 + dj]
            out[i, j] = s
    return out



[docs]
def ensemble_correlation_dimension(
    arrays: NDArray | list[NDArray],
    x_sizes: NDArray | None = None,
    y_sizes: NDArray | None = None,
    minlength: str | float = 'auto',
    maxlength: str | float = 'auto',
    interior_circles_only: bool = False,
    return_C_l: bool = False,
    bins: NDArray | int | None = None,
    point_reduction_factor: float = 1,
    nbins: int = 50
) -> tuple[float, float] | tuple[float, float, NDArray, NDArray]:
    """
    Calculate the correlation dimension D where C_l ∝ l^D for binary arrays.

    The resulting dimension is for the set of object edge points.

    This function is a thin wrapper around
    :func:`ensemble_sandbox_renyi_dimension` at ``q=2``: the
    Grassberger-Procaccia correlation integral is exactly the ``q=2`` case
    of the sandbox partition function (``M_i^{q-1} = M_i`` and
    ``sum_i M_i`` is the pair count). The function is preserved under its
    historical name because Grassberger-Procaccia is the standard
    designation for the ``q=2`` correlation dimension. For arbitrary
    ``q``, call :func:`ensemble_sandbox_renyi_dimension` directly.

    Parameters
    ----------
    arrays : list or np.ndarray
        List of binary arrays to calculate correlation dimension of.
    x_sizes : np.ndarray, optional
        Pixel sizes in the x direction. If None, assume all pixel dimensions are 1.
        This single grid is used for every array in ``arrays``.
    y_sizes : np.ndarray, optional
        Pixel sizes in the y direction. If None, assume all pixel dimensions are 1.
        This single grid is used for every array in ``arrays``.
    minlength : str or float, default='auto'
        Minimum length scale for correlation calculation. If 'auto', uses 8 times
        the minimum pixel size.
    maxlength : str or float, default='auto'
        Maximum length scale for correlation calculation. If 'auto', uses 0.33 times
        the minimum array dimension.
    interior_circles_only : bool, default=False
        If True, only use circle centers that are at least maxlength distance from
        all array edges to avoid boundary effects. Defaults to False; see the
        section "Correlation dimension and domain boundary effects" at
        https://thomasddewitt.com/thought-cloud/too-many-exponents/index.html
        for the rationale.
    return_C_l : bool, default=False
        If True, return dimension, error, bins, C_l. Otherwise, return dimension, error.
    bins : None, int, or array-like, optional
        Values of l to use for the regression. Can be:
        - None: automatically calculate as logarithmically spaced intervals between
          3*minimum length and the array width or height using nbins points
        - int: number of logarithmically spaced bins to generate automatically
        - array-like: explicit bin edges to use
    point_reduction_factor : float, default=1
        Draw N/point_reduction_factor circles, where N is the total number of
        available circles. Choose the circle centers randomly. Must be >= 1.
    nbins : int, default=50
        Number of bins to use when bins=None or when bins is an int. Only used
        for automatic bin generation.

    Returns
    -------
    dimension : float
        The correlation dimension.
    error : float
        Error estimate for the dimension (95% CI half-width).
    bins : np.ndarray, optional
        The bins used for calculation. Only returned if return_C_l=True.
    C_l : np.ndarray, optional
        The correlation integral values (= sandbox ``Z_{q=2}`` =
        ``sum_i M_i``). Only returned if return_C_l=True.

    Raises
    ------
    ValueError
        If arrays contain NaN values, if pixel sizes are invalid, or if scale
        range is insufficient.

    See Also
    --------
    ensemble_sandbox_renyi_dimension : The general sandbox-method Rényi
        family for arbitrary ``q``. This function is the ``q=2`` case.
    ensemble_box_renyi_dimension : The box-counting Rényi family.
    """
    result = ensemble_sandbox_renyi_dimension(
        binary_arrays=arrays,
        q=2.0,
        set='edge',
        x_sizes=x_sizes,
        y_sizes=y_sizes,
        minlength=minlength,
        maxlength=maxlength,
        interior_circles_only=interior_circles_only,
        nbins=nbins,
        bins=bins,
        point_reduction_factor=point_reduction_factor,
        return_values=return_C_l,
    )
    # Preserve the pre-refactor failure-mode contract: when the fit
    # returns nan, the historical `ensemble_correlation_dimension` emitted
    # a warning and returned ``(nan, nan, [nan], [nan])`` from
    # ``return_C_l=True``. The sandbox wrapper instead returns
    # ``(nan, nan, full_bins, full_Z)``; rewrap here so that
    # downstream code keyed to the old contract keeps working.
    if return_C_l:
        dimension, error, bins_used, Z = result
        if not np.isfinite(dimension):
            warn('Not enough data to estimate correlation dimension, returning nan')
            return np.nan, np.nan, np.array([np.nan]), np.array([np.nan])
        return dimension, error, bins_used, Z
    dimension, error = result
    if not np.isfinite(dimension):
        warn('Not enough data to estimate correlation dimension, returning nan')
    return dimension, error




[docs]
def individual_correlation_dimension(
    array: NDArray,
    n: int = 1,
    x_sizes: NDArray | None = None,
    y_sizes: NDArray | None = None,
    minlength: str | float = 'auto',
    maxlength: str | float = 'auto',
    return_C_l: bool = False,
    point_reduction_factor: float = 1,
    nbins: int = 50,
    filled: bool = True,
) -> tuple[float, float] | tuple[float, float, NDArray, NDArray]:
    """
    Calculate the correlation dimension of the Nth largest structure in an array.

    Removes border-touching structures, isolates the Nth largest remaining
    structure, crops to its bounding box, and computes the correlation dimension
    via :func:`ensemble_correlation_dimension`.

    Parameters
    ----------
    array : np.ndarray
        2-D binary array. May optionally have np.nan at borders; if not, a NaN
        border is added internally.
    n : int, default=1
        Which structure to analyze, ranked by pixel count (1 = largest).
    x_sizes : np.ndarray, optional
        Pixel sizes in the x direction. If None, assume all pixel dimensions are 1.
    y_sizes : np.ndarray, optional
        Pixel sizes in the y direction. If None, assume all pixel dimensions are 1.
    minlength : str or float, default='auto'
        Minimum length scale for correlation calculation. If 'auto', uses 8 times
        the minimum pixel size.
    maxlength : str or float, default='auto'
        Maximum length scale for correlation calculation. If 'auto', uses
        ``0.33 * max(bbox_height, bbox_width)`` of the isolated structure in
        physical units.
    return_C_l : bool, default=False
        If True, return dimension, error, bins, C_l. Otherwise, return dimension,
        error.
    filled : bool, default=True
        If True, fill interior holes in the isolated structure before computing
        the correlation dimension, so that only the outer boundary contributes.
        If False, holes are left as-is and interior boundaries are included.
    point_reduction_factor : float, default=1
        Draw N/point_reduction_factor circles, where N is the total number of
        available circles. Must be >= 1.
    nbins : int, default=50
        Number of logarithmically spaced bins for the correlation integral.

    Returns
    -------
    dimension : float
        The correlation dimension.
    error : float
        Error estimate for the dimension (95% confidence interval).
    bins : np.ndarray, optional
        The bins used for calculation. Only returned if return_C_l=True.
    C_l : np.ndarray, optional
        The correlation integral values. Only returned if return_C_l=True.

    Raises
    ------
    ValueError
        If array is not 2-D, n < 1, or n exceeds the number of available
        structures after border removal.
    """
    if array.ndim != 2:
        raise ValueError('array must be 2-dimensional')
    if n < 1:
        raise ValueError('n must be >= 1')

    # Pad with NaN border if not already present
    if not np.any(np.isnan(array)):
        array = np.pad(array.astype(np.float32), pad_width=1, mode='constant',
                       constant_values=np.nan)
        if x_sizes is not None:
            x_sizes = np.pad(x_sizes, pad_width=1, mode='edge')
        if y_sizes is not None:
            y_sizes = np.pad(y_sizes, pad_width=1, mode='edge')

    # Remove border-touching structures and clean NaN
    cleaned = remove_structures_touching_border_nan(array)
    binary = np.nan_to_num(cleaned, nan=0.0).astype(bool)

    # Isolate the Nth largest structure
    isolated = isolate_nth_largest_structure(binary, n=n)

    # Optionally fill interior holes so only the outer boundary contributes
    if filled:
        isolated = remove_structure_holes(isolated.astype(float)).astype(bool)

    # Crop to bounding box
    rows = np.any(isolated, axis=1)
    cols = np.any(isolated, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]
    cropped = isolated[rmin:rmax + 1, cmin:cmax + 1].astype(np.float32)

    # Pad with 1px of zeros to prevent toroidal wrap artifacts
    padded = np.pad(cropped, pad_width=1, mode='constant', constant_values=0)

    # Handle pixel sizes
    if x_sizes is not None:
        cropped_x = x_sizes[rmin:rmax + 1, cmin:cmax + 1]
        cropped_x = np.pad(cropped_x, pad_width=1, mode='edge')
    else:
        cropped_x = None

    if y_sizes is not None:
        cropped_y = y_sizes[rmin:rmax + 1, cmin:cmax + 1]
        cropped_y = np.pad(cropped_y, pad_width=1, mode='edge')
    else:
        cropped_y = None

    # Compute maxlength from crop dimensions if auto
    if maxlength == 'auto':
        if cropped_x is not None and cropped_y is not None:
            locs_x, locs_y = get_locations_from_pixel_sizes(cropped_x, cropped_y)
            h, w = cropped_x.shape
            phys_width = locs_x[h // 2, w - 1] - locs_x[h // 2, 0]
            phys_height = locs_y[h - 1, w // 2] - locs_y[0, w // 2]
            maxlength = 0.33 * max(phys_width, phys_height)
        else:
            crop_h, crop_w = cropped.shape
            maxlength = 0.33 * float(max(crop_h, crop_w))

    return ensemble_correlation_dimension(
        [padded],
        x_sizes=cropped_x,
        y_sizes=cropped_y,
        minlength=minlength,
        maxlength=maxlength,
        interior_circles_only=False,
        return_C_l=return_C_l,
        point_reduction_factor=point_reduction_factor,
        nbins=nbins,
    )



def _one_sided_edge_mask(binary: NDArray) -> NDArray:
    """One-sided 4-connected edge mask: 1-pixels with at least one 0-neighbor.

    Pixels on the array boundary count as having a "0 neighbor" outside the
    domain, matching the convention of `set='ones'` for boundary handling.
    Returns an int8 mask.
    """
    b = binary.astype(np.int8, copy=False)
    h, w = b.shape
    # neighbor[i,j] = OR of (b[i,j] == 1 AND any 4-neighbor == 0)
    has_zero_nbr = np.zeros((h, w), dtype=bool)
    # Up neighbor (treat domain edge as 0)
    has_zero_nbr[1:, :] |= (b[:-1, :] == 0)
    has_zero_nbr[0, :] = True
    # Down
    has_zero_nbr[:-1, :] |= (b[1:, :] == 0)
    has_zero_nbr[-1, :] = True
    # Left
    has_zero_nbr[:, 1:] |= (b[:, :-1] == 0)
    has_zero_nbr[:, 0] = True
    # Right
    has_zero_nbr[:, :-1] |= (b[:, 1:] == 0)
    has_zero_nbr[:, -1] = True
    return ((b == 1) & has_zero_nbr).astype(np.int8)


def _box_renyi_from_set(
    set_arrays: list[NDArray],
    q_arr: NDArray,
    box_sizes: NDArray,
    box_origin_shift: tuple[float, float],
) -> tuple[NDArray, NDArray, NDArray]:
    """Core box-Rényi routine: count 1-pixels per box, fit slopes.

    Parameters
    ----------
    set_arrays : list of int8 arrays
        Binary arrays where 1 = pixel in the set being measured. The caller
        is responsible for whatever preprocessing turns the user's input into
        a "set" (e.g. one-sided edge mask for ``set='edge'``).
    q_arr : np.ndarray
        1-D array of Rényi orders.
    box_sizes : np.ndarray
        Integer box sizes (in pixels), already filtered to be valid for the
        input shapes.
    box_origin_shift : tuple of float
        ``(sx, sy)`` fractional shifts of the box grid origin, in units of
        the current box size.

    Returns
    -------
    D_q : np.ndarray, shape (len(q_arr),)
        Estimated Rényi dimension for each ``q``.
    err : np.ndarray, shape (len(q_arr),)
        95% CI half-width for each estimate.
    partition : np.ndarray, shape (len(q_arr), len(box_sizes))
        ``Z_q^(p)(eps)`` for q != 1, ``-S_1(eps)`` for q == 1, in the same
        order as ``q_arr``. Useful for plotting and diagnostics.
    """
    nq = q_arr.shape[0]
    nb = box_sizes.shape[0]
    partition = np.full((nq, nb), np.nan, dtype=np.float64)

    sx, sy = box_origin_shift

    for k, factor in enumerate(box_sizes):
        factor = int(factor)
        # Pool box counts and total interior pixels across the ensemble.
        # The caller (ensemble_box_renyi_dimension) is responsible for ensuring
        # that every array fits at every factor; if h_full or w_full ever
        # came out 0 here, the ensemble would silently shrink at that factor
        # and bias the slope. Treat that as an internal error.
        pooled_n: list[NDArray] = []
        V_total = 0  # total interior pixel area summed across arrays
        for arr_idx, arr in enumerate(set_arrays):
            sy_pix = int(round(sy * factor))
            sx_pix = int(round(sx * factor))
            shifted = arr[sy_pix:, sx_pix:]
            h_full = (shifted.shape[0] // factor) * factor
            w_full = (shifted.shape[1] // factor) * factor
            if h_full == 0 or w_full == 0:
                raise AssertionError(
                    f'Internal error: array {arr_idx} of shape {arr.shape} cannot '
                    f'fit a single box of size {factor} after shift {(sx_pix, sy_pix)}. '
                    f'The wrapper should have filtered this box size out.'
                )
            trimmed = shifted[:h_full, :w_full]
            if not trimmed.flags['C_CONTIGUOUS']:
                trimmed = np.ascontiguousarray(trimmed)
            box_counts = _box_sum_2d(trimmed, factor)
            pooled_n.append(box_counts.ravel())
            V_total += h_full * w_full

        if not pooled_n or V_total == 0:
            continue

        n = np.concatenate(pooled_n)
        n_sum = float(n.sum())
        if n_sum <= 0:
            continue

        for qi in range(nq):
            q = float(q_arr[qi])
            if abs(q - 1.0) < 1e-10:
                # Entropy form: requires probability normalization (sum = 1).
                # Use log10 so the slope vs log10(eps) is directly -D_1.
                p = n[n > 0] / n_sum
                S1 = -float(np.sum(p * np.log10(p)))
                partition[qi, k] = S1
            else:
                # Geometric / interior-pixel-area normalization.
                # Z_q = sum_i (n_i / V) ** q
                if q <= 0:
                    n_pos = n[n > 0]
                    Zq = float(np.sum((n_pos / V_total) ** q))
                else:
                    Zq = float(np.sum((n / V_total) ** q))
                partition[qi, k] = Zq

    # Fit slopes
    log_eps = np.log10(box_sizes.astype(np.float64))
    D_q = np.full(nq, np.nan, dtype=np.float64)
    err = np.full(nq, np.nan, dtype=np.float64)
    for qi in range(nq):
        q = float(q_arr[qi])
        y = partition[qi].copy()
        if abs(q - 1.0) < 1e-10:
            # S_1 itself is linear in log_eps with slope -D_1.
            yfit = y
        else:
            # Z_q is non-negative; log it.
            yfit = np.where(y > 0, np.log10(y), np.nan)
        (slope, _), (slope_err, _) = linear_regression(log_eps, yfit)
        if not np.isfinite(slope):
            continue
        if abs(q - 1.0) < 1e-10:
            D_q[qi] = -slope
            err[qi] = slope_err
        else:
            D_q[qi] = slope / (q - 1.0)
            err[qi] = slope_err / abs(q - 1.0)

    return D_q, err, partition


def ensemble_box_renyi_dimension(
    binary_arrays: NDArray | list[NDArray],
    q: float | NDArray = 0.0,
    set: str = 'edge',
    box_sizes: str | NDArray = 'default',
    max_box_size: int | None = None,
    min_box_size: int = 8,
    box_origin_shift: tuple[float, float] = (0.0, 0.0),
    return_values: bool = False,
):
    """Calculate the generalized Rényi dimension D_q via box counting.

    The Rényi dimensions form a one-parameter family of fractal dimensions
    indexed by an order ``q``, generalizing the box-counting dimension
    (``q=0``), information dimension (``q=1``), and correlation dimension
    (``q=2``). For a scale-invariant set, the partition function

    .. math::
        Z_q(\\varepsilon) = \\sum_i p_i^q \\propto \\varepsilon^{(q-1) D_q}

    where ``p_i`` is the (probability or density) measure of the set in box
    ``i`` of size ``\\varepsilon``. The slope of ``log Z_q`` vs
    ``log \\varepsilon`` is ``(q-1) D_q``. At ``q=1`` the formula has a
    removable singularity that resolves to the Shannon entropy form
    ``S_1 = -sum p_i log p_i``, which scales as ``-D_1 log \\varepsilon``.

    For monofractal sets (e.g. level sets of fractional Brownian motion),
    ``D_q`` is constant in ``q``. For multifractal sets, ``D_q`` is a
    monotonically decreasing function of ``q`` whose values quantify the
    distribution's heterogeneity at different moment orders.

    Parameters
    ----------
    binary_arrays : np.ndarray or list of np.ndarray
        2D binary arrays. May contain 0/1 integers, booleans, or floats.
    q : float or np.ndarray, default=0.0
        Rényi order(s). May be a scalar or a 1-D array of values. ``q=0``
        gives the box-counting dimension; ``q=1`` gives the information
        dimension; ``q=2`` gives the correlation dimension.
    set : {'edge', 'ones'}, default='edge'
        Which set is being measured.

        * ``'edge'``: the boundary of the binary array, computed as a
          one-sided 4-connected edge mask (1-pixels that have at least one
          0-neighbor; pixels on the array border count as having a "0
          neighbor" outside the domain).
        * ``'ones'``: the set of 1-pixels themselves, no preprocessing.
    box_sizes : 'default' or array-like, default='default'
        Integer box sizes in pixels. If 'default', powers of 2 from
        ``min_box_size`` up to ``max_box_size``.
    max_box_size : int or None, default=None
        Largest box size in pixels. If None, uses the smaller array
        dimension (i.e. the largest box that fits at all).
    min_box_size : int, default=8
        Smallest box size in pixels. The default of 8 keeps the fit
        away from pixel-discretization noise at the smallest scales.
    box_origin_shift : tuple of (float, float), default=(0.0, 0.0)
        Fractional shift ``(sx, sy)`` of the box-grid origin, in units of
        the current box size. At each box size ``factor``, the actual
        integer pixel shift is ``int(round(sx * factor))`` along x and
        ``int(round(sy * factor))`` along y. Used to probe sensitivity to
        the box-grid alignment.
    return_values : bool, default=False
        If True, also return the partition function values used in the fit.

    Returns
    -------
    D_q : float or np.ndarray
        Estimated Rényi dimension. Scalar if ``q`` was scalar, array
        otherwise.
    err : float or np.ndarray
        95% CI half-width for the estimate(s).
    box_sizes : np.ndarray, optional
        The box sizes used. Returned only if ``return_values=True``.
    partition : np.ndarray, optional
        The partition function values: ``Z_q^(p)(eps)`` for ``q != 1`` (with
        geometric normalization ``p_i = n_i / V``, where ``V`` is the total
        interior pixel area at this ``eps``), and the Shannon entropy
        ``S_1(eps)`` for ``q == 1``. Shape ``(len(q), len(box_sizes))`` if
        ``q`` is array, ``(len(box_sizes),)`` if scalar. Returned only if
        ``return_values=True``.

    Notes
    -----
    **Interior boxes only.** Boxes that would extend past the domain edge
    are not counted. The function trims each input array to a multiple of
    the current box size (after applying any ``box_origin_shift``) and
    discards the remainder. This matches the philosophy of
    ``ensemble_correlation_dimension(interior_circles_only=True)``.

    **Normalization.** For ``q != 1``, ``p_i = n_i / V(\\varepsilon)`` where
    ``V`` is the total interior pixel area at this box size — a purely
    geometric normalization that removes coverage-fluctuation bias from the
    fitted slope. For ``q == 1``, the Shannon entropy requires probability
    normalization ``p_i = n_i / sum_j n_j``; the two coincide for
    uniform-density fields.

    **Boundary convention for** ``set='edge'``. The edge mask is one-sided:
    a pixel is in the edge set iff it is 1 and has at least one 0-neighbor.
    This differs slightly from the convention used by older versions of
    ``ensemble_box_dimension``, which counted a coarsened cell as "edge"
    iff it contained both 0s and 1s.

    See Also
    --------
    ensemble_box_dimension : Equivalent to ``q=0``.
    ensemble_information_dimension : Equivalent to ``q=1``.
    ensemble_correlation_dimension : Theoretically equivalent to ``q=2`` for
        the same set, computed via the Grassberger-Procaccia pairwise method.
    """
    # Normalize inputs
    if isinstance(binary_arrays, np.ndarray):
        binary_arrays = [binary_arrays]
    if len(binary_arrays) == 0:
        raise ValueError('binary_arrays must be non-empty')

    if np.any([np.any(np.isnan(arr)) for arr in binary_arrays]):
        raise ValueError('arrays must not contain NaN values')

    if set not in ('edge', 'ones'):
        raise ValueError(f'set={set!r} not supported (supported values are "edge" or "ones")')

    q_scalar = np.isscalar(q)
    q_arr = np.atleast_1d(np.asarray(q, dtype=np.float64))

    # Resolve box_sizes. The maximum box size is bounded by the *smallest*
    # array in the ensemble (after subtracting any box_origin_shift overhead),
    # so that every array contributes a fully-tiled interior region at every
    # box size. Otherwise, smaller arrays would silently drop out at large
    # box sizes and bias the slope.
    if isinstance(box_sizes, str):
        if box_sizes != 'default':
            raise ValueError(f'box_sizes={box_sizes} not supported')
        box_sizes_arr = 2 ** np.arange(1, 15)
    else:
        box_sizes_arr = np.asarray(box_sizes)

    smallest_dim = min(min(arr.shape) for arr in binary_arrays)
    # box_origin_shift can push the start past the first row/col by up to
    # factor-1 pixels, so the effective "available" extent is one factor
    # short in the worst case. Require shape >= factor + (factor - 1) =
    # 2*factor - 1, i.e. factor <= (shape + 1) // 2 in the worst case.
    # For shift=(0, 0) (the common case), the bound is just factor <= shape.
    sx, sy = box_origin_shift
    max_shift_frac = max(abs(sx), abs(sy))
    if max_shift_frac >= 1.0:
        raise ValueError(
            f'box_origin_shift fractions must lie in [0, 1), got {box_origin_shift}'
        )
    # Worst-case shift in pixels at factor F is round(max_shift_frac * F).
    # Require F + round(max_shift_frac * F) <= smallest_dim.
    # Rearranging: F <= smallest_dim / (1 + max_shift_frac).
    auto_max = int(smallest_dim / (1.0 + max_shift_frac))
    if max_box_size is None:
        max_box_size_eff = auto_max
    else:
        max_box_size_eff = min(int(max_box_size), auto_max)

    box_sizes_arr = box_sizes_arr[box_sizes_arr <= max_box_size_eff]
    box_sizes_arr = box_sizes_arr[box_sizes_arr >= min_box_size]
    box_sizes_arr = np.unique(box_sizes_arr.astype(np.int64))
    if box_sizes_arr.size < 3:
        raise ValueError(
            f'Need at least 3 valid box sizes for a slope fit, got {box_sizes_arr.size}. '
            f'Smallest array dimension is {smallest_dim}, max usable box size is '
            f'{max_box_size_eff}. Reduce min_box_size, raise max_box_size, increase '
            f'array size, or pass an explicit box_sizes array.'
        )

    # Build the set arrays
    set_arrays: list[NDArray] = []
    for arr in binary_arrays:
        if arr.ndim != 2:
            raise ValueError('binary_arrays must be 2-dimensional')
        if set == 'ones':
            set_arrays.append((np.asarray(arr) > 0).astype(np.int8))
        else:  # 'edge'
            set_arrays.append(_one_sided_edge_mask(np.asarray(arr) > 0))

    D_q, err, partition = _box_renyi_from_set(
        set_arrays, q_arr, box_sizes_arr, box_origin_shift
    )

    if q_scalar:
        D_q_out = float(D_q[0])
        err_out = float(err[0])
        partition_out = partition[0]
    else:
        D_q_out = D_q
        err_out = err
        partition_out = partition

    if return_values:
        return D_q_out, err_out, box_sizes_arr, partition_out
    return D_q_out, err_out



[docs]
def ensemble_box_dimension(
    binary_arrays: NDArray | list[NDArray],
    set: str = 'edge',
    max_box_size: int | None = None,
    min_box_size: int = 8,
    box_sizes: str | NDArray = 'default',
    return_values: bool = False
) -> tuple[float, float] | tuple[float, float, NDArray, NDArray]:
    """
    Calculate the ensemble box-counting dimension of binary arrays.

    Equivalent to ``ensemble_box_renyi_dimension(..., q=0)``. Kept as a
    convenience wrapper because box counting is the most familiar special
    case.

    Parameters
    ----------
    binary_arrays : list of np.ndarray or np.ndarray
        A list of 2D binary arrays or a single 2D binary array.
    set : str, default='edge'
        Specifies which set to consider for box counting:
        - 'edge': Box dimension of the set of boundaries (1-pixels with at
          least one 0-neighbor).
        - 'ones': Box dimension of the set of 1-pixels.
    max_box_size : int or None, default=None
        Largest box size in pixels. If None, uses the smaller array dimension.
    min_box_size : int, default=8
        Smallest box size in pixels. The default of 8 keeps the fit
        away from pixel-discretization noise at the smallest scales.
    box_sizes : array-like or 'default', default='default'
        Box sizes used. If 'default', uses powers of 2 from ``min_box_size``
        up to ``max_box_size``.
    return_values : bool, default=False
        If True, return additional data used in the calculation.

    Returns
    -------
    dimension : float
        The estimated box-counting dimension.
    error : float
        The error of the estimate.
    box_sizes : np.ndarray, optional
        Box sizes used. Only returned if return_values=True.
    number_boxes : np.ndarray, optional
        Number of nonempty boxes (pooled across the input ensemble) at each
        box size. Only returned if return_values=True.

    Raises
    ------
    ValueError
        If an unsupported value is provided for 'set' or if arrays contain NaN
        values.
    """
    return ensemble_box_renyi_dimension(
        binary_arrays,
        q=0.0,
        set=set,
        box_sizes=box_sizes,
        max_box_size=max_box_size,
        min_box_size=min_box_size,
        return_values=return_values,
    )



def ensemble_information_dimension(
    binary_arrays: NDArray | list[NDArray],
    method: str = 'sandbox',
    set: str = 'edge',
    return_values: bool = False,
    **kwargs,
) -> tuple[float, float] | tuple[float, float, NDArray, NDArray]:
    """
    Calculate the ensemble information dimension D_1 of binary arrays.

    The information dimension is the ``q=1`` member of the Rényi-dimension
    family. It has a removable singularity at ``q=1`` that resolves to the
    Shannon entropy form

    .. math::
        S_1(\\varepsilon) = -\\sum_i p_i \\log p_i \\sim -D_1 \\log \\varepsilon
        \\qquad (\\text{box estimator})

    or, equivalently, the set-centered log average

    .. math::
        \\langle \\log M(r) \\rangle \\sim D_1 \\log r
        \\qquad (\\text{sandbox estimator}).

    Both estimators converge to the same ``D_1``.

    Two methods are supported:

    * ``method='sandbox'`` (default): forwards to
      :func:`ensemble_sandbox_renyi_dimension` at ``q=1``. Set-centered
      balls at continuous radii. Lower grid-quantization noise; recommended
      for most use cases.
    * ``method='box'``: forwards to :func:`ensemble_box_renyi_dimension`
      at ``q=1``. Fixed-grid box counting with the Shannon entropy form.

    Parameters
    ----------
    binary_arrays : list of np.ndarray or np.ndarray
        A list of 2D binary arrays or a single 2D binary array.
    method : {'sandbox', 'box'}, default='sandbox'
        Which estimator to use. ``'sandbox'`` is the default because it
        avoids the grid-alignment noise that box counting suffers from at
        ``q=1``.
    set : {'edge', 'ones'}, default='edge'
        Which set to measure. See :func:`ensemble_sandbox_renyi_dimension`
        or :func:`ensemble_box_renyi_dimension`.
    return_values : bool, default=False
        If True, also return the bins and partition values used in the fit.
    **kwargs
        Method-specific options forwarded to the underlying estimator.

        For ``method='sandbox'``: ``x_sizes``, ``y_sizes``, ``minlength``,
        ``maxlength``, ``interior_circles_only``, ``nbins``, ``bins``,
        ``point_reduction_factor``.

        For ``method='box'``: ``max_box_size``, ``min_box_size``,
        ``box_sizes``, ``box_origin_shift``.

    Returns
    -------
    dimension : float
        The estimated information dimension.
    error : float
        The 95% CI half-width.
    bins : np.ndarray, optional
        Distance bins (sandbox) or box sizes (box). Only returned if
        ``return_values=True``.
    partition : np.ndarray, optional
        Partition function values: ``\\langle log10 M(r)\\rangle``
        (sandbox) or Shannon entropy ``S_1(eps)`` (box). Only returned if
        ``return_values=True``.

    See Also
    --------
    ensemble_sandbox_renyi_dimension : Sandbox-method Rényi family.
    ensemble_box_renyi_dimension : Box-counting Rényi family.
    ensemble_box_dimension : The ``q=0`` box-counting wrapper.
    ensemble_correlation_dimension : The ``q=2`` sandbox-method wrapper.
    """
    if method == 'sandbox':
        return ensemble_sandbox_renyi_dimension(
            binary_arrays,
            q=1.0,
            set=set,
            return_values=return_values,
            **kwargs,
        )
    elif method == 'box':
        return ensemble_box_renyi_dimension(
            binary_arrays,
            q=1.0,
            set=set,
            return_values=return_values,
            **kwargs,
        )
    else:
        raise ValueError(
            f"method={method!r} not supported (use 'sandbox' or 'box')"
        )


def ensemble_sandbox_renyi_dimension(
    binary_arrays: NDArray | list[NDArray],
    q: float | NDArray = 2.0,
    set: str = 'edge',
    x_sizes: NDArray | None = None,
    y_sizes: NDArray | None = None,
    minlength: str | float = 'auto',
    maxlength: str | float = 'auto',
    interior_circles_only: bool = False,
    nbins: int = 50,
    bins: NDArray | int | None = None,
    point_reduction_factor: float = 1,
    return_values: bool = False,
):
    """Sandbox-method estimate of the Rényi dimension D_q of binary arrays.

    For each set point i (the "sandbox center"), count the number M_i(r) of
    other set points within distance r, then aggregate

    .. math::
        Z_q(r) = \\sum_i M_i(r)^{q-1}    \\quad (q \\ne 1)

        Z_1(r) = \\sum_i \\log_{10} M_i(r)  \\quad (q = 1, set-point average)

    For a multifractal set, ``\\langle M(r)^{q-1}\\rangle \\sim r^{(q-1) D_q}``,
    so the slope of ``log Z_q`` vs ``log r`` is ``(q-1) D_q`` for ``q != 1``,
    recovered as ``D_q = slope / (q - 1)``. At ``q = 1`` the formula has a
    removable singularity that resolves to ``\\langle \\log M(r)\\rangle \\sim
    D_1 \\log r``, fit directly.

    The sandbox method is a strict generalization of the
    Grassberger-Procaccia correlation integral to arbitrary ``q``: at
    ``q = 2``, ``M_i^{q-1} = M_i`` and ``sum_i M_i = `` the pair count, so
    ``D_2`` is the Grassberger-Procaccia correlation dimension. The function
    :func:`ensemble_correlation_dimension` is now a thin wrapper around this
    one at ``q=2``.

    Compared to the box-counting Rényi family
    (:func:`ensemble_box_renyi_dimension`), sandbox samples the partition
    function at continuous radii instead of fixed-grid tiles, and uses a
    set-centered measure that bounds ``M_i >= 1`` and so behaves better at
    ``q < 1`` where box counting suffers from grid-quantization noise.

    References
    ----------
    Tél, Fülöp, Vicsek 1989, *Physica A* 159, 155–166.
    Vicsek 1992, *Fractal Growth Phenomena*, Ch. 3.

    Parameters
    ----------
    binary_arrays : np.ndarray or list of np.ndarray
        2D binary arrays. May contain 0/1 integers, booleans, or floats.
    q : float or np.ndarray, default=2.0
        Rényi order(s). Scalar or 1-D array. ``q == 1`` (within ``1e-10``)
        is special-cased to the log form.
    set : {'edge', 'ones'}, default='edge'
        Which set is being measured. Same convention as
        :func:`ensemble_box_renyi_dimension`.

        * ``'edge'`` — one-sided 4-connected edge mask of the binary array
          (1-pixels with at least one 0-neighbor; pixels on the array border
          count as having a "0 neighbor" outside the domain).
        * ``'ones'`` — the set of 1-pixels itself.
    x_sizes, y_sizes : np.ndarray, optional
        Per-pixel physical sizes. If None, uniform unit pixels.
    minlength, maxlength : str or float, default='auto'
        Distance scale range. ``'auto'`` = ``8 * min pixel size`` and
        ``0.33 * min array dimension`` respectively. The default
        ``minlength`` of 8× pixel size keeps the fit away from
        pixel-discretization noise at the smallest scales.
    interior_circles_only : bool, default=False
        If True, only sandbox centers at least ``maxlength`` from every
        domain edge contribute, avoiding boundary truncation bias. Defaults
        to False; see the section "Correlation dimension and domain boundary
        effects" at
        https://thomasddewitt.com/thought-cloud/too-many-exponents/index.html
        for the rationale.
    nbins : int, default=50
        Number of log-spaced distance bins (used when ``bins`` is None).
    bins : np.ndarray or int, optional
        Explicit bin array (ascending physical distances) or an int
        (number of log-spaced bins).
    point_reduction_factor : float, default=1
        Subsample sandbox centers by this factor (must be ``>= 1``).
    return_values : bool, default=False
        If True, return ``(D_q, err, bins, Z)`` instead of ``(D_q, err)``.
        For scalar ``q``, ``Z`` has shape ``(B,)``. For vector ``q``, ``Z``
        has shape ``(len(q), B)``. At the ``q=1`` index, ``Z`` holds the
        per-center mean ``\\langle log10 M_i\\rangle`` (already normalized).

    Returns
    -------
    D_q : float or np.ndarray
        Rényi dimension estimate(s). Scalar if ``q`` is scalar.
    err : float or np.ndarray
        95% CI half-width.
    bins : np.ndarray, optional
        Distance bins used. Returned only if ``return_values=True``.
    Z : np.ndarray, optional
        Sandbox partition function values.

    See Also
    --------
    ensemble_box_renyi_dimension : Box-based Rényi dimension family.
    ensemble_correlation_dimension : Thin wrapper around this function at
        ``q=2``, kept under its historical Grassberger-Procaccia name.
    ensemble_information_dimension : ``q=1`` wrapper, supports both
        ``method='sandbox'`` (default) and ``method='box'``.
    """
    # ----- normalize inputs -----
    if isinstance(binary_arrays, np.ndarray):
        binary_arrays = [binary_arrays]
    if len(binary_arrays) == 0:
        raise ValueError('binary_arrays must be non-empty')

    if set not in ('edge', 'ones'):
        raise ValueError(f"set={set!r} not supported (use 'edge' or 'ones')")

    if point_reduction_factor < 1:
        raise ValueError('point_reduction_factor must be >= 1')

    q_scalar = np.isscalar(q)
    q_arr = np.atleast_1d(np.asarray(q, dtype=np.float64))
    Q = q_arr.shape[0]

    # Mask of q entries within tolerance of 1.0 (entropy-form special case).
    # Stored as a per-element bool so duplicate q==1 entries are all handled.
    is_q1 = np.abs(q_arr - 1.0) < 1e-10

    # Use the first array's shape to set up the physical grid (caller is
    # responsible for passing arrays of compatible shape; preserved from GP).
    if x_sizes is None:
        x_sizes = np.ones(binary_arrays[0].shape, dtype=np.float32)
    if y_sizes is None:
        y_sizes = np.ones(binary_arrays[0].shape, dtype=np.float32)
    locations_x, locations_y = get_locations_from_pixel_sizes(x_sizes, y_sizes)

    h = x_sizes.shape[0]
    w = x_sizes.shape[1]

    # ----- resolve scale range -----
    if maxlength == 'auto':
        maxlength = 0.33 * min(
            (locations_x[int(h / 2), w - 1] - locations_x[int(h / 2), 0]),
            (locations_y[h - 1, int(w / 2)] - locations_y[0, int(w / 2)]),
        )
    if minlength == 'auto':
        minlength = 8 * min(np.nanmin(x_sizes), np.nanmin(y_sizes))

    # ----- validate -----
    if np.any([np.any(np.isnan(arr)) for arr in binary_arrays]):
        raise ValueError('arrays must not contain NaN values')
    if np.any(np.isnan(x_sizes)) or np.any(np.isnan(y_sizes)):
        raise ValueError('x_sizes and y_sizes cannot contain NaN values')
    if np.any(x_sizes <= 0) or np.any(y_sizes <= 0):
        raise ValueError('x_sizes and y_sizes must be positive')

    if bins is None:
        bins = np.geomspace(minlength, maxlength, nbins)
    elif isinstance(bins, int):
        bins = np.geomspace(minlength, maxlength, bins)
    bins = np.asarray(bins, dtype=np.float64)

    if bins[-1] <= bins[0]:
        raise ValueError(
            f'bin maximum length ({bins[-1]:.3f}) must be greater than bin '
            f'minimum length ({bins[0]:.3f}); or if bins are passed, they '
            f'must be increasing. Did you pass invalid values for '
            f'minlength/maxlength?'
        )
    if bins[-1] / bins[0] < 10:
        raise ValueError(
            f'Available scale ratio ({maxlength / minlength:.2f}) is less '
            f'than 10. Need at least one order of magnitude separation for '
            f'reliable dimension estimation.'
        )

    if interior_circles_only:
        domain_width = locations_x[int(h / 2), w - 1] - locations_x[int(h / 2), 0]
        domain_height = locations_y[h - 1, int(w / 2)] - locations_y[0, int(w / 2)]
        interior_width = domain_width - 2 * maxlength
        interior_height = domain_height - 2 * maxlength
        min_pixel_x = np.min(x_sizes)
        min_pixel_y = np.min(y_sizes)
        interior_pixels_x = interior_width / min_pixel_x if min_pixel_x > 0 else 0
        interior_pixels_y = interior_height / min_pixel_y if min_pixel_y > 0 else 0
        if interior_pixels_x < 5 or interior_pixels_y < 5:
            raise ValueError(
                f'interior_circles_only=True requires that maxlength leaves a usable '
                f'interior region, but maxlength={maxlength:.1f} with a domain of '
                f'{domain_width:.1f} x {domain_height:.1f} leaves only '
                f'{max(interior_pixels_x, 0):.0f} x {max(interior_pixels_y, 0):.0f} '
                f'pixels of interior (need at least 5x5). Reduce maxlength or set '
                f'interior_circles_only=False.'
            )

    bins_sq = bins ** 2
    max_bin = float(bins[-1])

    # ----- per-array kernel call, accumulate Z_total -----
    Z_total = np.zeros((Q, bins.shape[0]), dtype=np.float64)
    n_centers_total = 0

    for array in binary_arrays:
        if np.any(array.shape != x_sizes.shape):
            raise ValueError(
                f'All arrays must be same shape as pixel sizes (currently '
                f'{array.shape} and {x_sizes.shape}, respectively)'
            )

        # Build the set mask, then extract pixel coordinates of set points.
        if set == 'edge':
            set_mask = _one_sided_edge_mask(np.asarray(array) > 0)
        else:  # set == 'ones'
            set_mask = (np.asarray(array) > 0).astype(np.int8)

        all_set_coords = np.argwhere(set_mask > 0)

        if all_set_coords.shape[0] == 0:
            continue

        # Interior filter: keep set points at least maxlength from every edge
        if interior_circles_only:
            coord_locations_x = locations_x[all_set_coords[:, 0], all_set_coords[:, 1]]
            coord_locations_y = locations_y[all_set_coords[:, 0], all_set_coords[:, 1]]
            dist_to_left = coord_locations_x - locations_x[int(h / 2), 0]
            dist_to_right = locations_x[int(h / 2), w - 1] - coord_locations_x
            dist_to_top = coord_locations_y - locations_y[0, int(w / 2)]
            dist_to_bottom = locations_y[h - 1, int(w / 2)] - coord_locations_y
            min_dist_to_any_edge = np.minimum.reduce(
                [dist_to_left, dist_to_right, dist_to_top, dist_to_bottom]
            )
            interior_mask = min_dist_to_any_edge >= maxlength
            sandbox_centers_idx = all_set_coords[interior_mask]
        else:
            sandbox_centers_idx = all_set_coords

        if sandbox_centers_idx.shape[0] == 0:
            continue

        # Subsample sandbox centers
        if point_reduction_factor > 1:
            n_keep = int(len(sandbox_centers_idx) / point_reduction_factor)
            if n_keep == 0:
                continue
            chosen = np.random.choice(
                np.arange(len(sandbox_centers_idx)), n_keep, replace=False
            )
            sandbox_centers_idx = sandbox_centers_idx[chosen]

        # Convert to physical coordinates
        boundary_phys_x = locations_x[all_set_coords[:, 0], all_set_coords[:, 1]]
        boundary_phys_y = locations_y[all_set_coords[:, 0], all_set_coords[:, 1]]
        boundary_phys = np.column_stack([boundary_phys_x, boundary_phys_y])
        del boundary_phys_x, boundary_phys_y

        center_phys_x = locations_x[sandbox_centers_idx[:, 0], sandbox_centers_idx[:, 1]]
        center_phys_y = locations_y[sandbox_centers_idx[:, 0], sandbox_centers_idx[:, 1]]
        center_phys = np.column_stack([center_phys_x, center_phys_y])
        n_centers_here = center_phys.shape[0]
        del set_mask, all_set_coords, sandbox_centers_idx
        del center_phys_x, center_phys_y

        # Sort boundary points by physical y for the kernel's bbox trick
        sort_order = np.argsort(boundary_phys[:, 1])
        sorted_boundary_phys = boundary_phys[sort_order]
        del sort_order, boundary_phys

        Z_total += _sandbox_partition(
            center_phys, sorted_boundary_phys, bins_sq, max_bin, q_arr, is_q1
        )
        n_centers_total += n_centers_here

    # At q=1 the kernel accumulates sum_i log10 M_i, which scales as
    # n_centers_total * D_1 * log10(r). Convert to per-center mean so the
    # slope vs log10(r) is D_1 directly. Apply to every q==1 entry.
    if n_centers_total > 0 and is_q1.any():
        Z_total[is_q1, :] /= float(n_centers_total)

    # ----- linear regression per q -----
    log_r = np.log10(bins)
    D_q = np.full(Q, np.nan, dtype=np.float64)
    err_arr = np.full(Q, np.nan, dtype=np.float64)
    for qi in range(Q):
        y = Z_total[qi].copy()
        q_val = float(q_arr[qi])
        if is_q1[qi]:
            # Z[qi] is already <log10 M_i> linear in log10 r.
            y_fit = y
        else:
            y_fit = np.full_like(y, np.nan)
            pos = y > 0
            y_fit[pos] = np.log10(y[pos])
        (slope, _), (slope_err, _) = linear_regression(log_r, y_fit)
        if not np.isfinite(slope):
            continue
        if is_q1[qi]:
            D_q[qi] = slope
            err_arr[qi] = slope_err
        else:
            D_q[qi] = slope / (q_val - 1.0)
            err_arr[qi] = slope_err / abs(q_val - 1.0)

    # Unbox return shape
    if q_scalar:
        D_out = float(D_q[0])
        err_out = float(err_arr[0])
        Z_out = Z_total[0]
    else:
        D_out = D_q
        err_out = err_arr
        Z_out = Z_total

    if return_values:
        return D_out, err_out, bins, Z_out
    return D_out, err_out


def ensemble_coarsening_dimension(
    arrays: NDArray | list[NDArray],
    x_sizes: NDArray | list[NDArray] | None = None,
    y_sizes: NDArray | list[NDArray] | None = None,
    cloudy_threshold: float = 0.5,
    min_pixels: int = 30,
    return_values: bool = False,
    coarsening_factors: str | NDArray = 'default',
    count_exterior: bool = False
) -> tuple[float, float] | tuple[float, float, NDArray, NDArray]:
    """
    NOT RECOMMENDED due to ambiguity in coarsening a binary array

    Calculate the ensemble fractal dimension by coarsening image resolution and 
    calculating total perimeter as a function of resolution.

    Parameters
    ----------
    arrays : np.ndarray or list of np.ndarray
        Array, or list of arrays, to coarsen, apply cloudy_threshold to make binary,
        then calculate total perimeter.
    x_sizes : np.ndarray or list, optional
        Pixel sizes in the x direction. If None, assume all pixel dimensions are 1.
        If np.ndarray, use these for each array in 'arrays'. If list, assume
        x_sizes[i] corresponds to arrays[i].
    y_sizes : np.ndarray or list, optional
        Pixel sizes in the y direction. If None, assume all pixel dimensions are 1.
        If np.ndarray, use these for each array in 'arrays'. If list, assume
        y_sizes[i] corresponds to arrays[i].
    cloudy_threshold : float, default=0.5
        Threshold for making arrays binary.
    min_pixels : int, default=30
        Limit the coarsening factors such that coarsened matrices always have
        shape >= (min_pixels, min_pixels).
    return_values : bool, default=False
        If True, return additional data used in the calculation.
    coarsening_factors : str or array-like, default='default'
        Coarsening factors to use. If 'default', automatically determined.
    count_exterior : bool, default=False
        Whether to count exterior perimeter.

    Returns
    -------
    D_e : float
        The ensemble fractal dimension.
    error : float
        Error estimate (95% confidence).
    coarsening_factors : np.ndarray, optional
        The coarsening factors used. Only returned if return_values=True.
    mean_total_perimeters : np.ndarray, optional
        Mean total perimeters. Only returned if return_values=True.
    """
    raise NotImplementedError(
        "ensemble_coarsening_dimension has been removed due to ambiguity in "
        "coarsening binary arrays. Use ensemble_correlation_dimension (q=2), "
        "ensemble_sandbox_renyi_dimension (general q), or "
        "ensemble_box_dimension (q=0) instead."
    )


_UNSET = object()

_INDIVIDUAL_METHODS = {
    'filled perimeter vs filled area',
    'summed perimeter vs unfilled area',
    'filled perimeter vs width',
    'filled perimeter vs height',
    'summed perimeter vs width',
    'summed perimeter vs height',
}



[docs]
def individual_fractal_dimension(
    arrays: NDArray | list[NDArray],
    x_sizes: NDArray | list[NDArray] | None = None,
    y_sizes: NDArray | list[NDArray] | None = None,
    min_length_scale: float = _UNSET,
    max_length_scale: float = _UNSET,
    bins: int | None = 30,
    return_values: bool = False,
    method: str = _UNSET,
    filled: bool = _UNSET,
    min_a: float = _UNSET,
    max_a: float = _UNSET,
) -> tuple[float, float] | tuple[float, float, NDArray, NDArray]:
    """
    Calculate the individual fractal dimension Df of objects within arrays.

    The method uses linear regression on log(length scale) vs. log(perimeter),
    omitting structures touching the array edge.

    Parameters
    ----------
    arrays : list of np.ndarray
        List of boolean 2D arrays.
    x_sizes : np.ndarray or list, optional
        Pixel sizes in the x direction. If None, assume all pixel dimensions are 1.
        If np.ndarray, use these for each array in 'arrays'. If list, assume
        x_sizes[i] corresponds to arrays[i].
    y_sizes : np.ndarray or list, optional
        Pixel sizes in the y direction. If None, assume all pixel dimensions are 1.
        If np.ndarray, use these for each array in 'arrays'. If list, assume
        y_sizes[i] corresponds to arrays[i].
    min_length_scale : float, default=3
        Minimum length scale to include. Filters on the x-axis quantity:
        sqrt(area) for area methods, width or height for those methods.
    max_length_scale : float, default=np.inf
        Maximum length scale to include.
    bins : int or None, default=30
        Number of bins along the log10 length scale for averaging. The
        regression is performed on the bin-averaged values. If None, fit on
        all individual points without binning.
    return_values : bool, default=False
        If True, return additional data used in the calculation.
    method : str, default='filled perimeter vs filled area'
        Which perimeter and length-scale combination to use. Options:

        - ``'filled perimeter vs filled area'``: fill holes, regress
          log(perimeter) vs log(sqrt(area)). Default.
        - ``'summed perimeter vs unfilled area'``: no hole filling, perimeter
          includes inner boundaries, area excludes holes.
        - ``'filled perimeter vs width'``: fill holes, regress log(perimeter)
          vs log(bounding-box width).
        - ``'filled perimeter vs height'``: fill holes, regress log(perimeter)
          vs log(bounding-box height).
        - ``'summed perimeter vs width'``: no hole filling, regress
          log(perimeter) vs log(bounding-box width).
        - ``'summed perimeter vs height'``: no hole filling, regress
          log(perimeter) vs log(bounding-box height).
    filled : bool, optional
        .. deprecated::
            Use ``method`` instead. ``filled=True`` maps to
            ``'filled perimeter vs filled area'``; ``filled=False`` maps to
            ``'summed perimeter vs unfilled area'``.
    min_a : float, optional
        .. deprecated::
            Use ``min_length_scale`` instead. Converted via ``sqrt(min_a)``.
    max_a : float, optional
        .. deprecated::
            Use ``max_length_scale`` instead. Converted via ``sqrt(max_a)``.

    Returns
    -------
    Df : float
        The individual fractal dimension.
    uncertainty : float
        Uncertainty estimate (95% confidence).
    log10_length_scale : np.ndarray, optional
        Log10 of the length-scale values (bin centers if bins is not None).
        Only returned if return_values=True.
    log10_p : np.ndarray, optional
        Log10 of perimeter values (bin means if bins is not None).
        Only returned if return_values=True.

    Raises
    ------
    ValueError
        If array shapes don't match pixel size shapes, if an invalid method
        is given, or if deprecated and new parameters are mixed.
    """
    # --- resolve deprecated 'filled' parameter ---
    if filled is not _UNSET and method is not _UNSET:
        raise ValueError("Cannot pass both 'filled' and 'method'.")
    if filled is not _UNSET:
        warnings.warn(
            "The 'filled' parameter is deprecated. Use method='filled perimeter"
            " vs filled area' or method='summed perimeter vs unfilled area'.",
            DeprecationWarning, stacklevel=2,
        )
        method = ('filled perimeter vs filled area' if filled
                  else 'summed perimeter vs unfilled area')
    elif method is _UNSET:
        method = 'filled perimeter vs filled area'

    if method not in _INDIVIDUAL_METHODS:
        raise ValueError(
            f"method={method!r} not recognized. Supported methods: "
            + ', '.join(sorted(_INDIVIDUAL_METHODS))
        )

    # --- resolve deprecated min_a / max_a ---
    if min_a is not _UNSET and min_length_scale is not _UNSET:
        raise ValueError("Cannot pass both 'min_a' and 'min_length_scale'.")
    if max_a is not _UNSET and max_length_scale is not _UNSET:
        raise ValueError("Cannot pass both 'max_a' and 'max_length_scale'.")
    if min_a is not _UNSET:
        warnings.warn(
            "The 'min_a' parameter is deprecated. Use 'min_length_scale' instead.",
            DeprecationWarning, stacklevel=2,
        )
        min_length_scale = np.sqrt(min_a)
    if max_a is not _UNSET:
        warnings.warn(
            "The 'max_a' parameter is deprecated. Use 'max_length_scale' instead.",
            DeprecationWarning, stacklevel=2,
        )
        max_length_scale = np.sqrt(max_a)
    if min_length_scale is _UNSET:
        min_length_scale = 3
    if max_length_scale is _UNSET:
        max_length_scale = np.inf

    # --- parse method ---
    fill_holes = method.startswith('filled perimeter')
    if 'vs filled area' in method or 'vs unfilled area' in method:
        length_scale_type = 'sqrt_area'
    elif 'vs width' in method:
        length_scale_type = 'width'
    else:
        length_scale_type = 'height'

    # --- collect per-structure data ---
    length_scales, perimeters = [], []
    if not isinstance(arrays, list):
        arrays = [arrays]

    if x_sizes is None:
        x_sizes = np.ones_like(arrays[0])
    if y_sizes is None:
        y_sizes = np.ones_like(arrays[0])

    for i in range(len(arrays)):
        array = arrays[i]
        if isinstance(x_sizes, list):
            xs = x_sizes[i]
        else:
            xs = x_sizes
        if isinstance(y_sizes, list):
            ys = y_sizes[i]
        else:
            ys = y_sizes

        if np.any(array.shape != xs.shape):
            raise ValueError('Each array shape must match corresponding pixel sizes shape')

        array = remove_structures_touching_border_nan(array)
        if fill_holes:
            array = remove_structure_holes(array)
        lab, nm, nl = label_structures(array, wrap='both')
        if lab is None:
            continue

        new_p = get_structure_perimeters(lab, nm, nl, xs, ys)
        if length_scale_type == 'sqrt_area':
            new_a = get_structure_areas(lab, nm, nl, xs, ys)
            new_ls = np.sqrt(new_a)
            valid = (new_a > 0) & (new_p > 0)
        else:
            heights, widths = get_structure_height_width(lab, nm, nl, xs, ys)
            new_ls = widths if length_scale_type == 'width' else heights
            valid = (new_ls > 0) & (new_p > 0)

        length_scales.extend(new_ls[valid])
        perimeters.extend(new_p[valid])

    length_scales = np.array(length_scales)
    perimeters = np.array(perimeters)
    mask = (length_scales > min_length_scale) & (length_scales < max_length_scale)
    length_scales, perimeters = length_scales[mask], perimeters[mask]

    log_ls = np.log10(length_scales)
    log_p = np.log10(perimeters)

    if bins is not None:
        bin_edges = np.linspace(log_ls.min(), log_ls.max(), bins + 1)
        bin_indices = np.digitize(log_ls, bin_edges) - 1
        bin_indices = np.clip(bin_indices, 0, bins - 1)
        bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:])
        bin_means = np.array([log_p[bin_indices == i].mean()
                              if np.any(bin_indices == i) else np.nan
                              for i in range(bins)])
        valid = np.isfinite(bin_means)
        log_ls = bin_centers[valid]
        log_p = bin_means[valid]

    (slope, _), (err, _) = linear_regression(log_ls, log_p)

    if return_values:
        return slope, err, log_ls, log_p
    else:
        return slope, err



# Helper functions for fractal dimension calculations


[docs]
def get_coords_of_boundaries(array: NDArray) -> NDArray:
    """
    Find coordinates of pixels with value 1 that are adjacent to pixels with value 0.

    Parameters
    ----------
    array : np.ndarray
        2-D binary array.

    Returns
    -------
    np.ndarray
        Array of shape (n_boundaries, 2) where each element is a pair of indices
        corresponding to the locations of pixels with value 1 that are adjacent
        to a pixel of value 0.

    Notes
    -----
    Topology is toroidal, so pixels on one edge are considered adjacent to pixels
    on the opposite edge.

    Examples
    --------
    >>> array1 = np.zeros((10, 10))
    >>> array1[6:8, 6:8] = 1
    >>> array1[3:4, 2:5] = 1
    >>> array2 = np.zeros((10, 10))
    >>> for i, j in get_coords_of_boundaries(array1):
    ...     array2[i, j] = 1
    >>> np.all(array2 == array1)
    True
    """
    array = array.astype(np.int16)
    shifted_right = np.roll(array, shift=1, axis=1)
    shifted_down = np.roll(array, 1, axis=0)
    diff_right = shifted_right - array
    diff_down = shifted_down - array
    del shifted_right, shifted_down

    right_side_of_pixel = np.argwhere(diff_right == 1)
    right_side_of_pixel[:, 1] -= 1
    left_side_of_pixel = np.argwhere(diff_right == -1)
    bottom_of_pixel = np.argwhere(diff_down == 1)
    bottom_of_pixel[:, 0] -= 1
    top_of_pixel = np.argwhere(diff_down == -1)

    all_coords = np.append(right_side_of_pixel, left_side_of_pixel, axis=0)
    all_coords = np.append(all_coords, top_of_pixel, axis=0)
    all_coords = np.append(all_coords, bottom_of_pixel, axis=0)

    # remove duplicates
    all_coords = np.unique(all_coords, axis=0)

    return all_coords




[docs]
def get_locations_from_pixel_sizes(
    pixel_sizes_x: NDArray,
    pixel_sizes_y: NDArray
) -> tuple[NDArray, NDArray]:
    """
    Convert pixel sizes to cumulative location coordinates.

    Parameters
    ----------
    pixel_sizes_x : np.ndarray
        2-D array of pixel sizes in x direction.
    pixel_sizes_y : np.ndarray
        2-D array of pixel sizes in y direction.

    Returns
    -------
    locations_x : np.ndarray
        Cumulative x locations.
    locations_y : np.ndarray
        Cumulative y locations.
    """
    return np.nancumsum(pixel_sizes_x, 1), np.nancumsum(pixel_sizes_y, 0)



@numba.njit(parallel=True, fastmath=True)
def _sandbox_partition(
    centers_phys,        # (N_c, 2) sandbox centers (physical x, y), drawn from set
    sorted_boundary,     # (N_s, 2) set points (physical x, y), sorted by y
    bins_sq,             # (B,) ascending squared distance bin edges
    max_bin,             # float: sqrt(bins_sq[-1])
    qs,                  # (Q,) Rényi orders
    is_q1,               # (Q,) bool: True where qs[qi] is within tolerance of 1.0
):
    """For each center, compute M_i(r_k) = #set points within distance r_k.
    Accumulate Z[qi, k] across centers:
        Z[qi, k] = sum_i M_i(r_k)^(qs[qi] - 1)        (q != 1)
        Z[qi, k] = sum_i log10(M_i(r_k))              (q == 1, set elementwise via is_q1)

    Centers are assumed to be drawn from the same set as sorted_boundary,
    so each center self-counts at distance 0 (M_i >= 1 always at any
    positive radius). This matches the convention of the legacy
    `correlation_integral` kernel and makes the q=2 output bit-identical
    to the previous Grassberger-Procaccia implementation:

        sum_i M_i(r_k) ^ (2 - 1) = sum_i M_i(r_k) = pair count = old C_l[k].

    Binning convention: `searchsorted(bins_sq, dist_sq, side='right')` —
    bin index is the smallest k with bins_sq[k] > dist_sq, i.e. the bin
    "first reached" by this pair. After per-center cumulative sum,
    M_i(bins[k]) = #neighbors with dist < bins[k]. Same as old kernel.
    """
    N_c = centers_phys.shape[0]
    B = bins_sq.shape[0]
    Q = qs.shape[0]
    n_threads = numba.config.NUMBA_NUM_THREADS

    # Per-thread accumulators (avoid races, no atomics needed)
    Z_per_thread = np.zeros((n_threads, Q, B), dtype=np.float64)
    # Per-thread scratch histograms, reused across centers
    hist_per_thread = np.zeros((n_threads, B), dtype=np.int64)

    sorted_y = sorted_boundary[:, 1].copy()

    for i in numba.prange(N_c):
        thread_id = numba.get_thread_id()
        cx = centers_phys[i, 0]
        cy = centers_phys[i, 1]

        # Zero this thread's scratch histogram
        for k in range(B):
            hist_per_thread[thread_id, k] = 0

        # y bounding box via binary search
        lo = np.searchsorted(sorted_y, cy - max_bin, side='left')
        hi = np.searchsorted(sorted_y, cy + max_bin, side='right')

        for j in range(lo, hi):
            bx = sorted_boundary[j, 0]
            if abs(bx - cx) > max_bin:
                continue
            dx = cx - bx
            dy = cy - sorted_boundary[j, 1]
            dist_sq = dx * dx + dy * dy
            bin_idx = np.searchsorted(bins_sq, dist_sq, side='right')
            if bin_idx < B:
                hist_per_thread[thread_id, bin_idx] += 1

        # Per-center cumulative sum gives M_i(bins[k]) at each k.
        # Then accumulate M_i^(q-1) (or log10(M_i) at q==1) into Z[qi, k].
        # is_q1 is checked per qi so duplicate q==1 entries all get the
        # log form (vs. a single q1_index, which only handles the first).
        M = 0
        for k in range(B):
            M += hist_per_thread[thread_id, k]
            if M > 0:
                M_f = float(M)
                for qi in range(Q):
                    if is_q1[qi]:
                        Z_per_thread[thread_id, qi, k] += np.log10(M_f)
                    else:
                        exponent = qs[qi] - 1.0
                        Z_per_thread[thread_id, qi, k] += M_f ** exponent

    # Reduce across threads
    Z = np.zeros((Q, B), dtype=np.float64)
    for t in range(n_threads):
        for qi in range(Q):
            for k in range(B):
                Z[qi, k] += Z_per_thread[t, qi, k]
    return Z



[docs]
def coarsen_array(array: NDArray, factor: int) -> NDArray:
    """
    Coarsen an array by averaging superpixel regions.

    Takes an input array and reduces it by a given factor along both the x and y
    dimensions. The coarsening is achieved by summing 'superpixel' regions of the
    original array and dividing by the number of pixels in each region.

    Parameters
    ----------
    array : np.ndarray
        The input array to be coarsened.
    factor : int
        The coarsening factor for reducing the array resolution. Must be a positive integer.

    Returns
    -------
    np.ndarray
        The coarsened array with reduced resolution.

    Examples
    --------
    >>> original_array = np.array([[1, 2], [3, 4]])
    >>> coarsened = coarsen_array(original_array, factor=2)
    >>> coarsened
    array([[2.5]])
    """
    coarsened_array = np.add.reduceat(array, np.arange(array.shape[0], step=factor), axis=0)
    coarsened_array = np.add.reduceat(coarsened_array, np.arange(array.shape[1], step=factor), axis=1)

    # The number of pixels that are coarsened is usually factor**2, but not for edge superpixels
    # if the factor does not evenly divide into array size. Solution:
    pixel_counts = np.add.reduceat(np.ones(array.shape), np.arange(array.shape[0], step=factor), axis=0)
    pixel_counts = np.add.reduceat(pixel_counts, np.arange(array.shape[1], step=factor), axis=1)

    coarsened_array = coarsened_array / pixel_counts

    return coarsened_array




[docs]
@numba.njit()
def total_perimeter(array, x_sizes, y_sizes):
    """
    Calculate the total perimeter of a binary array.

    Given a binary array, calculate the total perimeter. Only counts perimeter
    along edges between 1 and 0. Assumes periodic boundary conditions; for other
    boundary conditions, pad inputs with 0s or nans.

    Parameters
    ----------
    array : np.ndarray
        Binary array (0s and 1s).
    x_sizes : np.ndarray
        Pixel sizes in x direction.
    y_sizes : np.ndarray
        Pixel sizes in y direction.

    Returns
    -------
    float
        Total perimeter length.

    Raises
    ------
    ValueError
        If x_sizes or y_sizes is nan where array is 1.
    """
    perimeter = 0
    for (i, j), value in np.ndenumerate(array):
        if value == 1:
            if np.isnan(x_sizes[i, j]) or np.isnan(y_sizes[i, j]):
                raise ValueError('x_sizes or y_sizes is nan where array is 1')
            if i != array.shape[0] - 1 and array[i + 1, j] == 0:
                perimeter += x_sizes[i, j]
            elif i == array.shape[0] - 1 and array[0, j] == 0:
                perimeter += x_sizes[i, j]

            if i != 0 and array[i - 1, j] == 0:
                perimeter += x_sizes[i, j]
            elif i == 0 and array[array.shape[0] - 1, j] == 0:
                perimeter += x_sizes[i, j]

            if j != array.shape[1] - 1 and array[i, j + 1] == 0:
                perimeter += y_sizes[i, j]
            elif j == array.shape[1] - 1 and array[i, 0] == 0:
                perimeter += y_sizes[i, j]

            if j != 0 and array[i, j - 1] == 0:
                perimeter += y_sizes[i, j]
            elif j == 0 and array[i, array.shape[1] - 1] == 0:
                perimeter += y_sizes[i, j]

    return perimeter




[docs]
def total_number(
    array: NDArray,
    structure: NDArray = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
) -> int:
    """
    Count the number of connected objects in an array.

    Given a 2-D array with 0's, nans, and 1's, calculate number of objects of
    connected 1's where connectivity is defined by structure.

    Parameters
    ----------
    array : np.ndarray
        2-D array with 0s, 1s, and optionally nans.
    structure : np.ndarray, default=np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
        Connectivity structure.

    Returns
    -------
    int
        Number of connected objects.
    """
    clean = np.where(np.isnan(array), 0, array)
    _, n_structures = label(clean.astype(bool), structure, output=np.float32)
    return n_structures




[docs]
def isolate_nth_largest_structure(
    binary_array: NDArray,
    n: int = 1,
    structure: NDArray = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
) -> NDArray[np.bool_]:
    """
    Isolate the Nth largest connected structure in a binary array.

    Parameters
    ----------
    binary_array : np.ndarray
        Binary input array.
    n : int, default=1
        Which structure to return, ranked by pixel count (1 = largest).
    structure : np.ndarray, default=np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
        Connectivity structure.

    Returns
    -------
    np.ndarray
        Boolean array with only the Nth largest structure set to True.

    Raises
    ------
    ValueError
        If ``binary_array`` contains no structures or ``n`` exceeds the number
        of structures.
    """
    if n < 1:
        raise ValueError('n must be >= 1')
    labelled_array = label(binary_array, structure)[0]
    cloud_values = labelled_array[labelled_array != 0]  # remove background
    if cloud_values.size == 0:
        raise ValueError('binary_array contains no structures')
    values, counts = np.unique(cloud_values, return_counts=True)
    sorted_indices = np.argsort(counts)[::-1]
    if n > len(values):
        raise ValueError(f'Requested n={n} but only {len(values)} structures exist')
    selected = values[sorted_indices[n - 1]]
    return labelled_array == selected



def isolate_largest_structure(*args, **kwargs):
    """Removed. Use :func:`isolate_nth_largest_structure` instead."""
    raise NotImplementedError(
        'isolate_largest_structure has been renamed to '
        'isolate_nth_largest_structure(binary_array, n=1)'
    )



[docs]
def label_size(
    array: NDArray,
    variable: str = 'area',
    wrap: str | None = 'both',
    x_sizes: NDArray | None = None,
    y_sizes: NDArray | None = None
) -> NDArray:
    """
    Label structures with their size values.

    Creates a labelled array where each structure is labelled with its size
    (area, summed perimeter, width, or height) instead of a unique identifier.

    Parameters
    ----------
    array : np.ndarray
        Binary array of structures: 2-d array, padded with 0's or np.nan's.
    variable : str, default='area'
        Which variable to use for 'size'. Options: ``'area'``,
        ``'summed perimeter'``, ``'width'``, ``'height'``.
        ``'perimeter'`` is accepted but deprecated.
    wrap : str or None, default='both'
        Boundary wrapping options: None, 'sides', 'both'.
        If 'sides', connect structures that span the left/right edge.
        If 'both', connect structures that span all edges.
    x_sizes : np.ndarray, optional
        Pixel sizes in x direction. If None, assume all sizes are 1.
    y_sizes : np.ndarray, optional
        Pixel sizes in y direction. If None, assume all sizes are 1.

    Returns
    -------
    np.ndarray
        Array where structures are labelled with their size value and background is 0.

    Raises
    ------
    ValueError
        If variable or wrap is not a supported value.

    Notes
    -----
    If x_sizes or y_sizes are not uniform, the width will be the sum of the average
    pixel widths of the pixels in the column and in the object. Similarly, the height
    will be the sum of the average pixel heights of the pixels in the row and in the object.
    """
    binary = np.where(np.isnan(array), 0, array).astype(bool)
    labelled_array, n_structures = label(binary, output=np.float32)

    if variable == 'perimeter':
        warnings.warn(
            "variable='perimeter' is deprecated, use 'summed perimeter' instead.",
            DeprecationWarning, stacklevel=2,
        )
        variable = 'summed perimeter'
    if variable not in ['area', 'summed perimeter', 'width', 'height']:
        raise ValueError(f'variable={variable} not supported (supported values are "area", "summed perimeter", "width", "height")')

    if x_sizes is None:
        x_sizes = np.ones_like(labelled_array)
    if y_sizes is None:
        y_sizes = np.ones_like(labelled_array)

    if wrap is None:
        pass
    elif wrap == 'sides':
        # set those on right to the same i.d. as those on left
        for j, value in enumerate(labelled_array[:, 0]):
            if value != 0:
                if labelled_array[j, labelled_array.shape[1] - 1] != 0 and labelled_array[j, labelled_array.shape[1] - 1] != value:
                    labelled_array[labelled_array == labelled_array[j, labelled_array.shape[1] - 1]] = value

    if wrap is None:
        pass
    elif wrap == 'both' or wrap == 'sides':
        labelled_array = _merge_periodic_labels(labelled_array, wrap)
    else:
        raise ValueError(f'wrap={wrap} not supported')

    # Flatten arrays to find their indices.
    values = np.sort(labelled_array.ravel())
    original_locations = np.argsort(labelled_array.ravel())
    indices_2d = np.array(np.unravel_index(original_locations, labelled_array.shape)).T

    labelled_array[np.isnan(array)] = np.nan  # Turn this back to nan so perimeter along it is not included
    split_here = np.roll(values, shift=-1) - values  # Split where the values changed.
    split_here[-1] = 0  # Last value rolled over from first

    separated_structure_indices = np.split(indices_2d, np.where(split_here != 0)[0] + 1)
    separated_structure_indices = separated_structure_indices[1:]  # Remove the locations that were 0 (not structure)
    if len(separated_structure_indices) == 0:
        return np.zeros(array.shape, dtype=int)

    labelled_with_sizes = np.zeros(array.shape, dtype=np.float32)

    # must use numba.typed.List here for Numba compatibility
    # https://numba.readthedocs.io/en/stable/reference/pysupported.html#feature-typed-list
    labelled_with_sizes = _label_size_helper(labelled_array, List(separated_structure_indices), labelled_with_sizes, variable, x_sizes, y_sizes)
    return labelled_with_sizes



@numba.njit()
def _label_size_helper(labelled_array, separated_structure_indices, labelled_with_sizes, variable, x_sizes, y_sizes):
    for indices in separated_structure_indices:
        perimeter = 0
        area = 0

        y_coords_structure = np.array([c[0] for c in indices])
        x_coords_structure = np.array([c[1] for c in indices])
        unique_y_coords = []
        unique_x_coords = []
        height = 0
        width = 0

        for (i, j) in indices:
            # Height, Width
            if i not in unique_y_coords:
                unique_y_coords.append(i)
                mask = (y_coords_structure == i)
                y_sizes_here = []
                for loc, take in enumerate(mask):
                    if take:
                        y_sizes_here.append(y_sizes[y_coords_structure[loc], x_coords_structure[loc]])
                y_sizes_here = np.array(y_sizes_here)
                height += np.mean(y_sizes_here)
            if j not in unique_x_coords:
                unique_x_coords.append(j)
                mask = (x_coords_structure == j)
                x_sizes_here = []
                for loc, take in enumerate(mask):
                    if take:
                        x_sizes_here.append(x_sizes[y_coords_structure[loc], x_coords_structure[loc]])
                x_sizes_here = np.array(x_sizes_here)
                width += np.mean(x_sizes_here)

            # Perimeter:
            if i != labelled_array.shape[0] - 1 and labelled_array[i + 1, j] == 0:
                perimeter += x_sizes[i, j]
            elif i == labelled_array.shape[0] - 1 and labelled_array[0, j] == 0:
                perimeter += x_sizes[i, j]

            if i != 0 and labelled_array[i - 1, j] == 0:
                perimeter += x_sizes[i, j]
            elif i == 0 and labelled_array[labelled_array.shape[0] - 1, j] == 0:
                perimeter += x_sizes[i, j]

            if j != labelled_array.shape[1] - 1 and labelled_array[i, j + 1] == 0:
                perimeter += y_sizes[i, j]
            elif j == labelled_array.shape[1] - 1 and labelled_array[i, 0] == 0:
                perimeter += y_sizes[i, j]

            if j != 0 and labelled_array[i, j - 1] == 0:
                perimeter += y_sizes[i, j]
            elif j == 0 and labelled_array[i, labelled_array.shape[1] - 1] == 0:
                perimeter += y_sizes[i, j]

            # Area:
            area += y_sizes[i, j] * x_sizes[i, j]

        for (i, j) in indices:
            if variable == 'summed perimeter':
                labelled_with_sizes[i, j] = perimeter
            elif variable == 'area':
                labelled_with_sizes[i, j] = area
            elif variable == 'width':
                labelled_with_sizes[i, j] = width
            elif variable == 'height':
                labelled_with_sizes[i, j] = height

    return labelled_with_sizes