Source code for objscale._fractal_dimensions

from __future__ import annotations

import numpy as np
from numpy.typing import NDArray
import numba
from numba.typed import List
from scipy.ndimage import label
import warnings
from warnings import warn
from ._object_analysis import (
    label_structures,
    _merge_periodic_labels,
    remove_structures_touching_border_nan,
    remove_structure_holes,
    get_structure_areas,
    get_structure_perimeters,
    get_structure_height_width,
)
from ._utils import linear_regression, encase_in_value

__all__ = [
    'ensemble_correlation_dimension',
    'ensemble_box_dimension',
    'ensemble_information_dimension',
    'ensemble_box_renyi_dimension',
    'ensemble_sandbox_renyi_dimension',
    'individual_fractal_dimension',
    'get_coords_of_boundaries',
    'get_locations_from_pixel_sizes',
    'coarsen_array',
    'total_perimeter',
    'total_number',
    'individual_correlation_dimension',
    'isolate_nth_largest_structure',
    'label_size',
]


@numba.njit(parallel=True, fastmath=True)
def _box_sum_2d(arr: NDArray, factor: int) -> NDArray:
    """Sum a 2D array into ``(h//factor, w//factor)`` boxes of size ``factor``.

    Parallel over output rows; each thread reads a contiguous strip of input
    rows so the inner loop runs against private L2 (no strided cache aliasing
    on Zen 5, no thread contention). Caller is responsible for trimming the
    input so ``arr.shape[0]`` and ``arr.shape[1]`` are exact multiples of
    ``factor``.
    """
    h, w = arr.shape
    out_h = h // factor
    out_w = w // factor
    out = np.zeros((out_h, out_w), dtype=np.int64)
    for i in numba.prange(out_h):
        i0 = i * factor
        for j in range(out_w):
            j0 = j * factor
            s = 0
            for di in range(factor):
                for dj in range(factor):
                    s += arr[i0 + di, j0 + dj]
            out[i, j] = s
    return out


[docs] def ensemble_correlation_dimension( arrays: NDArray | list[NDArray], x_sizes: NDArray | None = None, y_sizes: NDArray | None = None, minlength: str | float = 'auto', maxlength: str | float = 'auto', interior_circles_only: bool = False, return_C_l: bool = False, bins: NDArray | int | None = None, point_reduction_factor: float = 1, nbins: int = 50 ) -> tuple[float, float] | tuple[float, float, NDArray, NDArray]: """ Calculate the correlation dimension D where C_l ∝ l^D for binary arrays. The resulting dimension is for the set of object edge points. This function is a thin wrapper around :func:`ensemble_sandbox_renyi_dimension` at ``q=2``: the Grassberger-Procaccia correlation integral is exactly the ``q=2`` case of the sandbox partition function (``M_i^{q-1} = M_i`` and ``sum_i M_i`` is the pair count). The function is preserved under its historical name because Grassberger-Procaccia is the standard designation for the ``q=2`` correlation dimension. For arbitrary ``q``, call :func:`ensemble_sandbox_renyi_dimension` directly. Parameters ---------- arrays : list or np.ndarray List of binary arrays to calculate correlation dimension of. x_sizes : np.ndarray, optional Pixel sizes in the x direction. If None, assume all pixel dimensions are 1. This single grid is used for every array in ``arrays``. y_sizes : np.ndarray, optional Pixel sizes in the y direction. If None, assume all pixel dimensions are 1. This single grid is used for every array in ``arrays``. minlength : str or float, default='auto' Minimum length scale for correlation calculation. If 'auto', uses 8 times the minimum pixel size. maxlength : str or float, default='auto' Maximum length scale for correlation calculation. If 'auto', uses 0.33 times the minimum array dimension. interior_circles_only : bool, default=False If True, only use circle centers that are at least maxlength distance from all array edges to avoid boundary effects. Defaults to False; see the section "Correlation dimension and domain boundary effects" at https://thomasddewitt.com/thought-cloud/too-many-exponents/index.html for the rationale. return_C_l : bool, default=False If True, return dimension, error, bins, C_l. Otherwise, return dimension, error. bins : None, int, or array-like, optional Values of l to use for the regression. Can be: - None: automatically calculate as logarithmically spaced intervals between 3*minimum length and the array width or height using nbins points - int: number of logarithmically spaced bins to generate automatically - array-like: explicit bin edges to use point_reduction_factor : float, default=1 Draw N/point_reduction_factor circles, where N is the total number of available circles. Choose the circle centers randomly. Must be >= 1. nbins : int, default=50 Number of bins to use when bins=None or when bins is an int. Only used for automatic bin generation. Returns ------- dimension : float The correlation dimension. error : float Error estimate for the dimension (95% CI half-width). bins : np.ndarray, optional The bins used for calculation. Only returned if return_C_l=True. C_l : np.ndarray, optional The correlation integral values (= sandbox ``Z_{q=2}`` = ``sum_i M_i``). Only returned if return_C_l=True. Raises ------ ValueError If arrays contain NaN values, if pixel sizes are invalid, or if scale range is insufficient. See Also -------- ensemble_sandbox_renyi_dimension : The general sandbox-method Rényi family for arbitrary ``q``. This function is the ``q=2`` case. ensemble_box_renyi_dimension : The box-counting Rényi family. """ result = ensemble_sandbox_renyi_dimension( binary_arrays=arrays, q=2.0, set='edge', x_sizes=x_sizes, y_sizes=y_sizes, minlength=minlength, maxlength=maxlength, interior_circles_only=interior_circles_only, nbins=nbins, bins=bins, point_reduction_factor=point_reduction_factor, return_values=return_C_l, ) # Preserve the pre-refactor failure-mode contract: when the fit # returns nan, the historical `ensemble_correlation_dimension` emitted # a warning and returned ``(nan, nan, [nan], [nan])`` from # ``return_C_l=True``. The sandbox wrapper instead returns # ``(nan, nan, full_bins, full_Z)``; rewrap here so that # downstream code keyed to the old contract keeps working. if return_C_l: dimension, error, bins_used, Z = result if not np.isfinite(dimension): warn('Not enough data to estimate correlation dimension, returning nan') return np.nan, np.nan, np.array([np.nan]), np.array([np.nan]) return dimension, error, bins_used, Z dimension, error = result if not np.isfinite(dimension): warn('Not enough data to estimate correlation dimension, returning nan') return dimension, error
[docs] def individual_correlation_dimension( array: NDArray, n: int = 1, x_sizes: NDArray | None = None, y_sizes: NDArray | None = None, minlength: str | float = 'auto', maxlength: str | float = 'auto', return_C_l: bool = False, point_reduction_factor: float = 1, nbins: int = 50, filled: bool = True, ) -> tuple[float, float] | tuple[float, float, NDArray, NDArray]: """ Calculate the correlation dimension of the Nth largest structure in an array. Removes border-touching structures, isolates the Nth largest remaining structure, crops to its bounding box, and computes the correlation dimension via :func:`ensemble_correlation_dimension`. Parameters ---------- array : np.ndarray 2-D binary array. May optionally have np.nan at borders; if not, a NaN border is added internally. n : int, default=1 Which structure to analyze, ranked by pixel count (1 = largest). x_sizes : np.ndarray, optional Pixel sizes in the x direction. If None, assume all pixel dimensions are 1. y_sizes : np.ndarray, optional Pixel sizes in the y direction. If None, assume all pixel dimensions are 1. minlength : str or float, default='auto' Minimum length scale for correlation calculation. If 'auto', uses 8 times the minimum pixel size. maxlength : str or float, default='auto' Maximum length scale for correlation calculation. If 'auto', uses ``0.33 * max(bbox_height, bbox_width)`` of the isolated structure in physical units. return_C_l : bool, default=False If True, return dimension, error, bins, C_l. Otherwise, return dimension, error. filled : bool, default=True If True, fill interior holes in the isolated structure before computing the correlation dimension, so that only the outer boundary contributes. If False, holes are left as-is and interior boundaries are included. point_reduction_factor : float, default=1 Draw N/point_reduction_factor circles, where N is the total number of available circles. Must be >= 1. nbins : int, default=50 Number of logarithmically spaced bins for the correlation integral. Returns ------- dimension : float The correlation dimension. error : float Error estimate for the dimension (95% confidence interval). bins : np.ndarray, optional The bins used for calculation. Only returned if return_C_l=True. C_l : np.ndarray, optional The correlation integral values. Only returned if return_C_l=True. Raises ------ ValueError If array is not 2-D, n < 1, or n exceeds the number of available structures after border removal. """ if array.ndim != 2: raise ValueError('array must be 2-dimensional') if n < 1: raise ValueError('n must be >= 1') # Pad with NaN border if not already present if not np.any(np.isnan(array)): array = np.pad(array.astype(np.float32), pad_width=1, mode='constant', constant_values=np.nan) if x_sizes is not None: x_sizes = np.pad(x_sizes, pad_width=1, mode='edge') if y_sizes is not None: y_sizes = np.pad(y_sizes, pad_width=1, mode='edge') # Remove border-touching structures and clean NaN cleaned = remove_structures_touching_border_nan(array) binary = np.nan_to_num(cleaned, nan=0.0).astype(bool) # Isolate the Nth largest structure isolated = isolate_nth_largest_structure(binary, n=n) # Optionally fill interior holes so only the outer boundary contributes if filled: isolated = remove_structure_holes(isolated.astype(float)).astype(bool) # Crop to bounding box rows = np.any(isolated, axis=1) cols = np.any(isolated, axis=0) rmin, rmax = np.where(rows)[0][[0, -1]] cmin, cmax = np.where(cols)[0][[0, -1]] cropped = isolated[rmin:rmax + 1, cmin:cmax + 1].astype(np.float32) # Pad with 1px of zeros to prevent toroidal wrap artifacts padded = np.pad(cropped, pad_width=1, mode='constant', constant_values=0) # Handle pixel sizes if x_sizes is not None: cropped_x = x_sizes[rmin:rmax + 1, cmin:cmax + 1] cropped_x = np.pad(cropped_x, pad_width=1, mode='edge') else: cropped_x = None if y_sizes is not None: cropped_y = y_sizes[rmin:rmax + 1, cmin:cmax + 1] cropped_y = np.pad(cropped_y, pad_width=1, mode='edge') else: cropped_y = None # Compute maxlength from crop dimensions if auto if maxlength == 'auto': if cropped_x is not None and cropped_y is not None: locs_x, locs_y = get_locations_from_pixel_sizes(cropped_x, cropped_y) h, w = cropped_x.shape phys_width = locs_x[h // 2, w - 1] - locs_x[h // 2, 0] phys_height = locs_y[h - 1, w // 2] - locs_y[0, w // 2] maxlength = 0.33 * max(phys_width, phys_height) else: crop_h, crop_w = cropped.shape maxlength = 0.33 * float(max(crop_h, crop_w)) return ensemble_correlation_dimension( [padded], x_sizes=cropped_x, y_sizes=cropped_y, minlength=minlength, maxlength=maxlength, interior_circles_only=False, return_C_l=return_C_l, point_reduction_factor=point_reduction_factor, nbins=nbins, )
def _one_sided_edge_mask(binary: NDArray) -> NDArray: """One-sided 4-connected edge mask: 1-pixels with at least one 0-neighbor. Pixels on the array boundary count as having a "0 neighbor" outside the domain, matching the convention of `set='ones'` for boundary handling. Returns an int8 mask. """ b = binary.astype(np.int8, copy=False) h, w = b.shape # neighbor[i,j] = OR of (b[i,j] == 1 AND any 4-neighbor == 0) has_zero_nbr = np.zeros((h, w), dtype=bool) # Up neighbor (treat domain edge as 0) has_zero_nbr[1:, :] |= (b[:-1, :] == 0) has_zero_nbr[0, :] = True # Down has_zero_nbr[:-1, :] |= (b[1:, :] == 0) has_zero_nbr[-1, :] = True # Left has_zero_nbr[:, 1:] |= (b[:, :-1] == 0) has_zero_nbr[:, 0] = True # Right has_zero_nbr[:, :-1] |= (b[:, 1:] == 0) has_zero_nbr[:, -1] = True return ((b == 1) & has_zero_nbr).astype(np.int8) def _box_renyi_from_set( set_arrays: list[NDArray], q_arr: NDArray, box_sizes: NDArray, box_origin_shift: tuple[float, float], ) -> tuple[NDArray, NDArray, NDArray]: """Core box-Rényi routine: count 1-pixels per box, fit slopes. Parameters ---------- set_arrays : list of int8 arrays Binary arrays where 1 = pixel in the set being measured. The caller is responsible for whatever preprocessing turns the user's input into a "set" (e.g. one-sided edge mask for ``set='edge'``). q_arr : np.ndarray 1-D array of Rényi orders. box_sizes : np.ndarray Integer box sizes (in pixels), already filtered to be valid for the input shapes. box_origin_shift : tuple of float ``(sx, sy)`` fractional shifts of the box grid origin, in units of the current box size. Returns ------- D_q : np.ndarray, shape (len(q_arr),) Estimated Rényi dimension for each ``q``. err : np.ndarray, shape (len(q_arr),) 95% CI half-width for each estimate. partition : np.ndarray, shape (len(q_arr), len(box_sizes)) ``Z_q^(p)(eps)`` for q != 1, ``-S_1(eps)`` for q == 1, in the same order as ``q_arr``. Useful for plotting and diagnostics. """ nq = q_arr.shape[0] nb = box_sizes.shape[0] partition = np.full((nq, nb), np.nan, dtype=np.float64) sx, sy = box_origin_shift for k, factor in enumerate(box_sizes): factor = int(factor) # Pool box counts and total interior pixels across the ensemble. # The caller (ensemble_box_renyi_dimension) is responsible for ensuring # that every array fits at every factor; if h_full or w_full ever # came out 0 here, the ensemble would silently shrink at that factor # and bias the slope. Treat that as an internal error. pooled_n: list[NDArray] = [] V_total = 0 # total interior pixel area summed across arrays for arr_idx, arr in enumerate(set_arrays): sy_pix = int(round(sy * factor)) sx_pix = int(round(sx * factor)) shifted = arr[sy_pix:, sx_pix:] h_full = (shifted.shape[0] // factor) * factor w_full = (shifted.shape[1] // factor) * factor if h_full == 0 or w_full == 0: raise AssertionError( f'Internal error: array {arr_idx} of shape {arr.shape} cannot ' f'fit a single box of size {factor} after shift {(sx_pix, sy_pix)}. ' f'The wrapper should have filtered this box size out.' ) trimmed = shifted[:h_full, :w_full] if not trimmed.flags['C_CONTIGUOUS']: trimmed = np.ascontiguousarray(trimmed) box_counts = _box_sum_2d(trimmed, factor) pooled_n.append(box_counts.ravel()) V_total += h_full * w_full if not pooled_n or V_total == 0: continue n = np.concatenate(pooled_n) n_sum = float(n.sum()) if n_sum <= 0: continue for qi in range(nq): q = float(q_arr[qi]) if abs(q - 1.0) < 1e-10: # Entropy form: requires probability normalization (sum = 1). # Use log10 so the slope vs log10(eps) is directly -D_1. p = n[n > 0] / n_sum S1 = -float(np.sum(p * np.log10(p))) partition[qi, k] = S1 else: # Geometric / interior-pixel-area normalization. # Z_q = sum_i (n_i / V) ** q if q <= 0: n_pos = n[n > 0] Zq = float(np.sum((n_pos / V_total) ** q)) else: Zq = float(np.sum((n / V_total) ** q)) partition[qi, k] = Zq # Fit slopes log_eps = np.log10(box_sizes.astype(np.float64)) D_q = np.full(nq, np.nan, dtype=np.float64) err = np.full(nq, np.nan, dtype=np.float64) for qi in range(nq): q = float(q_arr[qi]) y = partition[qi].copy() if abs(q - 1.0) < 1e-10: # S_1 itself is linear in log_eps with slope -D_1. yfit = y else: # Z_q is non-negative; log it. yfit = np.where(y > 0, np.log10(y), np.nan) (slope, _), (slope_err, _) = linear_regression(log_eps, yfit) if not np.isfinite(slope): continue if abs(q - 1.0) < 1e-10: D_q[qi] = -slope err[qi] = slope_err else: D_q[qi] = slope / (q - 1.0) err[qi] = slope_err / abs(q - 1.0) return D_q, err, partition def ensemble_box_renyi_dimension( binary_arrays: NDArray | list[NDArray], q: float | NDArray = 0.0, set: str = 'edge', box_sizes: str | NDArray = 'default', max_box_size: int | None = None, min_box_size: int = 8, box_origin_shift: tuple[float, float] = (0.0, 0.0), return_values: bool = False, ): """Calculate the generalized Rényi dimension D_q via box counting. The Rényi dimensions form a one-parameter family of fractal dimensions indexed by an order ``q``, generalizing the box-counting dimension (``q=0``), information dimension (``q=1``), and correlation dimension (``q=2``). For a scale-invariant set, the partition function .. math:: Z_q(\\varepsilon) = \\sum_i p_i^q \\propto \\varepsilon^{(q-1) D_q} where ``p_i`` is the (probability or density) measure of the set in box ``i`` of size ``\\varepsilon``. The slope of ``log Z_q`` vs ``log \\varepsilon`` is ``(q-1) D_q``. At ``q=1`` the formula has a removable singularity that resolves to the Shannon entropy form ``S_1 = -sum p_i log p_i``, which scales as ``-D_1 log \\varepsilon``. For monofractal sets (e.g. level sets of fractional Brownian motion), ``D_q`` is constant in ``q``. For multifractal sets, ``D_q`` is a monotonically decreasing function of ``q`` whose values quantify the distribution's heterogeneity at different moment orders. Parameters ---------- binary_arrays : np.ndarray or list of np.ndarray 2D binary arrays. May contain 0/1 integers, booleans, or floats. q : float or np.ndarray, default=0.0 Rényi order(s). May be a scalar or a 1-D array of values. ``q=0`` gives the box-counting dimension; ``q=1`` gives the information dimension; ``q=2`` gives the correlation dimension. set : {'edge', 'ones'}, default='edge' Which set is being measured. * ``'edge'``: the boundary of the binary array, computed as a one-sided 4-connected edge mask (1-pixels that have at least one 0-neighbor; pixels on the array border count as having a "0 neighbor" outside the domain). * ``'ones'``: the set of 1-pixels themselves, no preprocessing. box_sizes : 'default' or array-like, default='default' Integer box sizes in pixels. If 'default', powers of 2 from ``min_box_size`` up to ``max_box_size``. max_box_size : int or None, default=None Largest box size in pixels. If None, uses the smaller array dimension (i.e. the largest box that fits at all). min_box_size : int, default=8 Smallest box size in pixels. The default of 8 keeps the fit away from pixel-discretization noise at the smallest scales. box_origin_shift : tuple of (float, float), default=(0.0, 0.0) Fractional shift ``(sx, sy)`` of the box-grid origin, in units of the current box size. At each box size ``factor``, the actual integer pixel shift is ``int(round(sx * factor))`` along x and ``int(round(sy * factor))`` along y. Used to probe sensitivity to the box-grid alignment. return_values : bool, default=False If True, also return the partition function values used in the fit. Returns ------- D_q : float or np.ndarray Estimated Rényi dimension. Scalar if ``q`` was scalar, array otherwise. err : float or np.ndarray 95% CI half-width for the estimate(s). box_sizes : np.ndarray, optional The box sizes used. Returned only if ``return_values=True``. partition : np.ndarray, optional The partition function values: ``Z_q^(p)(eps)`` for ``q != 1`` (with geometric normalization ``p_i = n_i / V``, where ``V`` is the total interior pixel area at this ``eps``), and the Shannon entropy ``S_1(eps)`` for ``q == 1``. Shape ``(len(q), len(box_sizes))`` if ``q`` is array, ``(len(box_sizes),)`` if scalar. Returned only if ``return_values=True``. Notes ----- **Interior boxes only.** Boxes that would extend past the domain edge are not counted. The function trims each input array to a multiple of the current box size (after applying any ``box_origin_shift``) and discards the remainder. This matches the philosophy of ``ensemble_correlation_dimension(interior_circles_only=True)``. **Normalization.** For ``q != 1``, ``p_i = n_i / V(\\varepsilon)`` where ``V`` is the total interior pixel area at this box size — a purely geometric normalization that removes coverage-fluctuation bias from the fitted slope. For ``q == 1``, the Shannon entropy requires probability normalization ``p_i = n_i / sum_j n_j``; the two coincide for uniform-density fields. **Boundary convention for** ``set='edge'``. The edge mask is one-sided: a pixel is in the edge set iff it is 1 and has at least one 0-neighbor. This differs slightly from the convention used by older versions of ``ensemble_box_dimension``, which counted a coarsened cell as "edge" iff it contained both 0s and 1s. See Also -------- ensemble_box_dimension : Equivalent to ``q=0``. ensemble_information_dimension : Equivalent to ``q=1``. ensemble_correlation_dimension : Theoretically equivalent to ``q=2`` for the same set, computed via the Grassberger-Procaccia pairwise method. """ # Normalize inputs if isinstance(binary_arrays, np.ndarray): binary_arrays = [binary_arrays] if len(binary_arrays) == 0: raise ValueError('binary_arrays must be non-empty') if np.any([np.any(np.isnan(arr)) for arr in binary_arrays]): raise ValueError('arrays must not contain NaN values') if set not in ('edge', 'ones'): raise ValueError(f'set={set!r} not supported (supported values are "edge" or "ones")') q_scalar = np.isscalar(q) q_arr = np.atleast_1d(np.asarray(q, dtype=np.float64)) # Resolve box_sizes. The maximum box size is bounded by the *smallest* # array in the ensemble (after subtracting any box_origin_shift overhead), # so that every array contributes a fully-tiled interior region at every # box size. Otherwise, smaller arrays would silently drop out at large # box sizes and bias the slope. if isinstance(box_sizes, str): if box_sizes != 'default': raise ValueError(f'box_sizes={box_sizes} not supported') box_sizes_arr = 2 ** np.arange(1, 15) else: box_sizes_arr = np.asarray(box_sizes) smallest_dim = min(min(arr.shape) for arr in binary_arrays) # box_origin_shift can push the start past the first row/col by up to # factor-1 pixels, so the effective "available" extent is one factor # short in the worst case. Require shape >= factor + (factor - 1) = # 2*factor - 1, i.e. factor <= (shape + 1) // 2 in the worst case. # For shift=(0, 0) (the common case), the bound is just factor <= shape. sx, sy = box_origin_shift max_shift_frac = max(abs(sx), abs(sy)) if max_shift_frac >= 1.0: raise ValueError( f'box_origin_shift fractions must lie in [0, 1), got {box_origin_shift}' ) # Worst-case shift in pixels at factor F is round(max_shift_frac * F). # Require F + round(max_shift_frac * F) <= smallest_dim. # Rearranging: F <= smallest_dim / (1 + max_shift_frac). auto_max = int(smallest_dim / (1.0 + max_shift_frac)) if max_box_size is None: max_box_size_eff = auto_max else: max_box_size_eff = min(int(max_box_size), auto_max) box_sizes_arr = box_sizes_arr[box_sizes_arr <= max_box_size_eff] box_sizes_arr = box_sizes_arr[box_sizes_arr >= min_box_size] box_sizes_arr = np.unique(box_sizes_arr.astype(np.int64)) if box_sizes_arr.size < 3: raise ValueError( f'Need at least 3 valid box sizes for a slope fit, got {box_sizes_arr.size}. ' f'Smallest array dimension is {smallest_dim}, max usable box size is ' f'{max_box_size_eff}. Reduce min_box_size, raise max_box_size, increase ' f'array size, or pass an explicit box_sizes array.' ) # Build the set arrays set_arrays: list[NDArray] = [] for arr in binary_arrays: if arr.ndim != 2: raise ValueError('binary_arrays must be 2-dimensional') if set == 'ones': set_arrays.append((np.asarray(arr) > 0).astype(np.int8)) else: # 'edge' set_arrays.append(_one_sided_edge_mask(np.asarray(arr) > 0)) D_q, err, partition = _box_renyi_from_set( set_arrays, q_arr, box_sizes_arr, box_origin_shift ) if q_scalar: D_q_out = float(D_q[0]) err_out = float(err[0]) partition_out = partition[0] else: D_q_out = D_q err_out = err partition_out = partition if return_values: return D_q_out, err_out, box_sizes_arr, partition_out return D_q_out, err_out
[docs] def ensemble_box_dimension( binary_arrays: NDArray | list[NDArray], set: str = 'edge', max_box_size: int | None = None, min_box_size: int = 8, box_sizes: str | NDArray = 'default', return_values: bool = False ) -> tuple[float, float] | tuple[float, float, NDArray, NDArray]: """ Calculate the ensemble box-counting dimension of binary arrays. Equivalent to ``ensemble_box_renyi_dimension(..., q=0)``. Kept as a convenience wrapper because box counting is the most familiar special case. Parameters ---------- binary_arrays : list of np.ndarray or np.ndarray A list of 2D binary arrays or a single 2D binary array. set : str, default='edge' Specifies which set to consider for box counting: - 'edge': Box dimension of the set of boundaries (1-pixels with at least one 0-neighbor). - 'ones': Box dimension of the set of 1-pixels. max_box_size : int or None, default=None Largest box size in pixels. If None, uses the smaller array dimension. min_box_size : int, default=8 Smallest box size in pixels. The default of 8 keeps the fit away from pixel-discretization noise at the smallest scales. box_sizes : array-like or 'default', default='default' Box sizes used. If 'default', uses powers of 2 from ``min_box_size`` up to ``max_box_size``. return_values : bool, default=False If True, return additional data used in the calculation. Returns ------- dimension : float The estimated box-counting dimension. error : float The error of the estimate. box_sizes : np.ndarray, optional Box sizes used. Only returned if return_values=True. number_boxes : np.ndarray, optional Number of nonempty boxes (pooled across the input ensemble) at each box size. Only returned if return_values=True. Raises ------ ValueError If an unsupported value is provided for 'set' or if arrays contain NaN values. """ return ensemble_box_renyi_dimension( binary_arrays, q=0.0, set=set, box_sizes=box_sizes, max_box_size=max_box_size, min_box_size=min_box_size, return_values=return_values, )
def ensemble_information_dimension( binary_arrays: NDArray | list[NDArray], method: str = 'sandbox', set: str = 'edge', return_values: bool = False, **kwargs, ) -> tuple[float, float] | tuple[float, float, NDArray, NDArray]: """ Calculate the ensemble information dimension D_1 of binary arrays. The information dimension is the ``q=1`` member of the Rényi-dimension family. It has a removable singularity at ``q=1`` that resolves to the Shannon entropy form .. math:: S_1(\\varepsilon) = -\\sum_i p_i \\log p_i \\sim -D_1 \\log \\varepsilon \\qquad (\\text{box estimator}) or, equivalently, the set-centered log average .. math:: \\langle \\log M(r) \\rangle \\sim D_1 \\log r \\qquad (\\text{sandbox estimator}). Both estimators converge to the same ``D_1``. Two methods are supported: * ``method='sandbox'`` (default): forwards to :func:`ensemble_sandbox_renyi_dimension` at ``q=1``. Set-centered balls at continuous radii. Lower grid-quantization noise; recommended for most use cases. * ``method='box'``: forwards to :func:`ensemble_box_renyi_dimension` at ``q=1``. Fixed-grid box counting with the Shannon entropy form. Parameters ---------- binary_arrays : list of np.ndarray or np.ndarray A list of 2D binary arrays or a single 2D binary array. method : {'sandbox', 'box'}, default='sandbox' Which estimator to use. ``'sandbox'`` is the default because it avoids the grid-alignment noise that box counting suffers from at ``q=1``. set : {'edge', 'ones'}, default='edge' Which set to measure. See :func:`ensemble_sandbox_renyi_dimension` or :func:`ensemble_box_renyi_dimension`. return_values : bool, default=False If True, also return the bins and partition values used in the fit. **kwargs Method-specific options forwarded to the underlying estimator. For ``method='sandbox'``: ``x_sizes``, ``y_sizes``, ``minlength``, ``maxlength``, ``interior_circles_only``, ``nbins``, ``bins``, ``point_reduction_factor``. For ``method='box'``: ``max_box_size``, ``min_box_size``, ``box_sizes``, ``box_origin_shift``. Returns ------- dimension : float The estimated information dimension. error : float The 95% CI half-width. bins : np.ndarray, optional Distance bins (sandbox) or box sizes (box). Only returned if ``return_values=True``. partition : np.ndarray, optional Partition function values: ``\\langle log10 M(r)\\rangle`` (sandbox) or Shannon entropy ``S_1(eps)`` (box). Only returned if ``return_values=True``. See Also -------- ensemble_sandbox_renyi_dimension : Sandbox-method Rényi family. ensemble_box_renyi_dimension : Box-counting Rényi family. ensemble_box_dimension : The ``q=0`` box-counting wrapper. ensemble_correlation_dimension : The ``q=2`` sandbox-method wrapper. """ if method == 'sandbox': return ensemble_sandbox_renyi_dimension( binary_arrays, q=1.0, set=set, return_values=return_values, **kwargs, ) elif method == 'box': return ensemble_box_renyi_dimension( binary_arrays, q=1.0, set=set, return_values=return_values, **kwargs, ) else: raise ValueError( f"method={method!r} not supported (use 'sandbox' or 'box')" ) def ensemble_sandbox_renyi_dimension( binary_arrays: NDArray | list[NDArray], q: float | NDArray = 2.0, set: str = 'edge', x_sizes: NDArray | None = None, y_sizes: NDArray | None = None, minlength: str | float = 'auto', maxlength: str | float = 'auto', interior_circles_only: bool = False, nbins: int = 50, bins: NDArray | int | None = None, point_reduction_factor: float = 1, return_values: bool = False, ): """Sandbox-method estimate of the Rényi dimension D_q of binary arrays. For each set point i (the "sandbox center"), count the number M_i(r) of other set points within distance r, then aggregate .. math:: Z_q(r) = \\sum_i M_i(r)^{q-1} \\quad (q \\ne 1) Z_1(r) = \\sum_i \\log_{10} M_i(r) \\quad (q = 1, set-point average) For a multifractal set, ``\\langle M(r)^{q-1}\\rangle \\sim r^{(q-1) D_q}``, so the slope of ``log Z_q`` vs ``log r`` is ``(q-1) D_q`` for ``q != 1``, recovered as ``D_q = slope / (q - 1)``. At ``q = 1`` the formula has a removable singularity that resolves to ``\\langle \\log M(r)\\rangle \\sim D_1 \\log r``, fit directly. The sandbox method is a strict generalization of the Grassberger-Procaccia correlation integral to arbitrary ``q``: at ``q = 2``, ``M_i^{q-1} = M_i`` and ``sum_i M_i = `` the pair count, so ``D_2`` is the Grassberger-Procaccia correlation dimension. The function :func:`ensemble_correlation_dimension` is now a thin wrapper around this one at ``q=2``. Compared to the box-counting Rényi family (:func:`ensemble_box_renyi_dimension`), sandbox samples the partition function at continuous radii instead of fixed-grid tiles, and uses a set-centered measure that bounds ``M_i >= 1`` and so behaves better at ``q < 1`` where box counting suffers from grid-quantization noise. References ---------- Tél, Fülöp, Vicsek 1989, *Physica A* 159, 155–166. Vicsek 1992, *Fractal Growth Phenomena*, Ch. 3. Parameters ---------- binary_arrays : np.ndarray or list of np.ndarray 2D binary arrays. May contain 0/1 integers, booleans, or floats. q : float or np.ndarray, default=2.0 Rényi order(s). Scalar or 1-D array. ``q == 1`` (within ``1e-10``) is special-cased to the log form. set : {'edge', 'ones'}, default='edge' Which set is being measured. Same convention as :func:`ensemble_box_renyi_dimension`. * ``'edge'`` — one-sided 4-connected edge mask of the binary array (1-pixels with at least one 0-neighbor; pixels on the array border count as having a "0 neighbor" outside the domain). * ``'ones'`` — the set of 1-pixels itself. x_sizes, y_sizes : np.ndarray, optional Per-pixel physical sizes. If None, uniform unit pixels. minlength, maxlength : str or float, default='auto' Distance scale range. ``'auto'`` = ``8 * min pixel size`` and ``0.33 * min array dimension`` respectively. The default ``minlength`` of 8× pixel size keeps the fit away from pixel-discretization noise at the smallest scales. interior_circles_only : bool, default=False If True, only sandbox centers at least ``maxlength`` from every domain edge contribute, avoiding boundary truncation bias. Defaults to False; see the section "Correlation dimension and domain boundary effects" at https://thomasddewitt.com/thought-cloud/too-many-exponents/index.html for the rationale. nbins : int, default=50 Number of log-spaced distance bins (used when ``bins`` is None). bins : np.ndarray or int, optional Explicit bin array (ascending physical distances) or an int (number of log-spaced bins). point_reduction_factor : float, default=1 Subsample sandbox centers by this factor (must be ``>= 1``). return_values : bool, default=False If True, return ``(D_q, err, bins, Z)`` instead of ``(D_q, err)``. For scalar ``q``, ``Z`` has shape ``(B,)``. For vector ``q``, ``Z`` has shape ``(len(q), B)``. At the ``q=1`` index, ``Z`` holds the per-center mean ``\\langle log10 M_i\\rangle`` (already normalized). Returns ------- D_q : float or np.ndarray Rényi dimension estimate(s). Scalar if ``q`` is scalar. err : float or np.ndarray 95% CI half-width. bins : np.ndarray, optional Distance bins used. Returned only if ``return_values=True``. Z : np.ndarray, optional Sandbox partition function values. See Also -------- ensemble_box_renyi_dimension : Box-based Rényi dimension family. ensemble_correlation_dimension : Thin wrapper around this function at ``q=2``, kept under its historical Grassberger-Procaccia name. ensemble_information_dimension : ``q=1`` wrapper, supports both ``method='sandbox'`` (default) and ``method='box'``. """ # ----- normalize inputs ----- if isinstance(binary_arrays, np.ndarray): binary_arrays = [binary_arrays] if len(binary_arrays) == 0: raise ValueError('binary_arrays must be non-empty') if set not in ('edge', 'ones'): raise ValueError(f"set={set!r} not supported (use 'edge' or 'ones')") if point_reduction_factor < 1: raise ValueError('point_reduction_factor must be >= 1') q_scalar = np.isscalar(q) q_arr = np.atleast_1d(np.asarray(q, dtype=np.float64)) Q = q_arr.shape[0] # Mask of q entries within tolerance of 1.0 (entropy-form special case). # Stored as a per-element bool so duplicate q==1 entries are all handled. is_q1 = np.abs(q_arr - 1.0) < 1e-10 # Use the first array's shape to set up the physical grid (caller is # responsible for passing arrays of compatible shape; preserved from GP). if x_sizes is None: x_sizes = np.ones(binary_arrays[0].shape, dtype=np.float32) if y_sizes is None: y_sizes = np.ones(binary_arrays[0].shape, dtype=np.float32) locations_x, locations_y = get_locations_from_pixel_sizes(x_sizes, y_sizes) h = x_sizes.shape[0] w = x_sizes.shape[1] # ----- resolve scale range ----- if maxlength == 'auto': maxlength = 0.33 * min( (locations_x[int(h / 2), w - 1] - locations_x[int(h / 2), 0]), (locations_y[h - 1, int(w / 2)] - locations_y[0, int(w / 2)]), ) if minlength == 'auto': minlength = 8 * min(np.nanmin(x_sizes), np.nanmin(y_sizes)) # ----- validate ----- if np.any([np.any(np.isnan(arr)) for arr in binary_arrays]): raise ValueError('arrays must not contain NaN values') if np.any(np.isnan(x_sizes)) or np.any(np.isnan(y_sizes)): raise ValueError('x_sizes and y_sizes cannot contain NaN values') if np.any(x_sizes <= 0) or np.any(y_sizes <= 0): raise ValueError('x_sizes and y_sizes must be positive') if bins is None: bins = np.geomspace(minlength, maxlength, nbins) elif isinstance(bins, int): bins = np.geomspace(minlength, maxlength, bins) bins = np.asarray(bins, dtype=np.float64) if bins[-1] <= bins[0]: raise ValueError( f'bin maximum length ({bins[-1]:.3f}) must be greater than bin ' f'minimum length ({bins[0]:.3f}); or if bins are passed, they ' f'must be increasing. Did you pass invalid values for ' f'minlength/maxlength?' ) if bins[-1] / bins[0] < 10: raise ValueError( f'Available scale ratio ({maxlength / minlength:.2f}) is less ' f'than 10. Need at least one order of magnitude separation for ' f'reliable dimension estimation.' ) if interior_circles_only: domain_width = locations_x[int(h / 2), w - 1] - locations_x[int(h / 2), 0] domain_height = locations_y[h - 1, int(w / 2)] - locations_y[0, int(w / 2)] interior_width = domain_width - 2 * maxlength interior_height = domain_height - 2 * maxlength min_pixel_x = np.min(x_sizes) min_pixel_y = np.min(y_sizes) interior_pixels_x = interior_width / min_pixel_x if min_pixel_x > 0 else 0 interior_pixels_y = interior_height / min_pixel_y if min_pixel_y > 0 else 0 if interior_pixels_x < 5 or interior_pixels_y < 5: raise ValueError( f'interior_circles_only=True requires that maxlength leaves a usable ' f'interior region, but maxlength={maxlength:.1f} with a domain of ' f'{domain_width:.1f} x {domain_height:.1f} leaves only ' f'{max(interior_pixels_x, 0):.0f} x {max(interior_pixels_y, 0):.0f} ' f'pixels of interior (need at least 5x5). Reduce maxlength or set ' f'interior_circles_only=False.' ) bins_sq = bins ** 2 max_bin = float(bins[-1]) # ----- per-array kernel call, accumulate Z_total ----- Z_total = np.zeros((Q, bins.shape[0]), dtype=np.float64) n_centers_total = 0 for array in binary_arrays: if np.any(array.shape != x_sizes.shape): raise ValueError( f'All arrays must be same shape as pixel sizes (currently ' f'{array.shape} and {x_sizes.shape}, respectively)' ) # Build the set mask, then extract pixel coordinates of set points. if set == 'edge': set_mask = _one_sided_edge_mask(np.asarray(array) > 0) else: # set == 'ones' set_mask = (np.asarray(array) > 0).astype(np.int8) all_set_coords = np.argwhere(set_mask > 0) if all_set_coords.shape[0] == 0: continue # Interior filter: keep set points at least maxlength from every edge if interior_circles_only: coord_locations_x = locations_x[all_set_coords[:, 0], all_set_coords[:, 1]] coord_locations_y = locations_y[all_set_coords[:, 0], all_set_coords[:, 1]] dist_to_left = coord_locations_x - locations_x[int(h / 2), 0] dist_to_right = locations_x[int(h / 2), w - 1] - coord_locations_x dist_to_top = coord_locations_y - locations_y[0, int(w / 2)] dist_to_bottom = locations_y[h - 1, int(w / 2)] - coord_locations_y min_dist_to_any_edge = np.minimum.reduce( [dist_to_left, dist_to_right, dist_to_top, dist_to_bottom] ) interior_mask = min_dist_to_any_edge >= maxlength sandbox_centers_idx = all_set_coords[interior_mask] else: sandbox_centers_idx = all_set_coords if sandbox_centers_idx.shape[0] == 0: continue # Subsample sandbox centers if point_reduction_factor > 1: n_keep = int(len(sandbox_centers_idx) / point_reduction_factor) if n_keep == 0: continue chosen = np.random.choice( np.arange(len(sandbox_centers_idx)), n_keep, replace=False ) sandbox_centers_idx = sandbox_centers_idx[chosen] # Convert to physical coordinates boundary_phys_x = locations_x[all_set_coords[:, 0], all_set_coords[:, 1]] boundary_phys_y = locations_y[all_set_coords[:, 0], all_set_coords[:, 1]] boundary_phys = np.column_stack([boundary_phys_x, boundary_phys_y]) del boundary_phys_x, boundary_phys_y center_phys_x = locations_x[sandbox_centers_idx[:, 0], sandbox_centers_idx[:, 1]] center_phys_y = locations_y[sandbox_centers_idx[:, 0], sandbox_centers_idx[:, 1]] center_phys = np.column_stack([center_phys_x, center_phys_y]) n_centers_here = center_phys.shape[0] del set_mask, all_set_coords, sandbox_centers_idx del center_phys_x, center_phys_y # Sort boundary points by physical y for the kernel's bbox trick sort_order = np.argsort(boundary_phys[:, 1]) sorted_boundary_phys = boundary_phys[sort_order] del sort_order, boundary_phys Z_total += _sandbox_partition( center_phys, sorted_boundary_phys, bins_sq, max_bin, q_arr, is_q1 ) n_centers_total += n_centers_here # At q=1 the kernel accumulates sum_i log10 M_i, which scales as # n_centers_total * D_1 * log10(r). Convert to per-center mean so the # slope vs log10(r) is D_1 directly. Apply to every q==1 entry. if n_centers_total > 0 and is_q1.any(): Z_total[is_q1, :] /= float(n_centers_total) # ----- linear regression per q ----- log_r = np.log10(bins) D_q = np.full(Q, np.nan, dtype=np.float64) err_arr = np.full(Q, np.nan, dtype=np.float64) for qi in range(Q): y = Z_total[qi].copy() q_val = float(q_arr[qi]) if is_q1[qi]: # Z[qi] is already <log10 M_i> linear in log10 r. y_fit = y else: y_fit = np.full_like(y, np.nan) pos = y > 0 y_fit[pos] = np.log10(y[pos]) (slope, _), (slope_err, _) = linear_regression(log_r, y_fit) if not np.isfinite(slope): continue if is_q1[qi]: D_q[qi] = slope err_arr[qi] = slope_err else: D_q[qi] = slope / (q_val - 1.0) err_arr[qi] = slope_err / abs(q_val - 1.0) # Unbox return shape if q_scalar: D_out = float(D_q[0]) err_out = float(err_arr[0]) Z_out = Z_total[0] else: D_out = D_q err_out = err_arr Z_out = Z_total if return_values: return D_out, err_out, bins, Z_out return D_out, err_out def ensemble_coarsening_dimension( arrays: NDArray | list[NDArray], x_sizes: NDArray | list[NDArray] | None = None, y_sizes: NDArray | list[NDArray] | None = None, cloudy_threshold: float = 0.5, min_pixels: int = 30, return_values: bool = False, coarsening_factors: str | NDArray = 'default', count_exterior: bool = False ) -> tuple[float, float] | tuple[float, float, NDArray, NDArray]: """ NOT RECOMMENDED due to ambiguity in coarsening a binary array Calculate the ensemble fractal dimension by coarsening image resolution and calculating total perimeter as a function of resolution. Parameters ---------- arrays : np.ndarray or list of np.ndarray Array, or list of arrays, to coarsen, apply cloudy_threshold to make binary, then calculate total perimeter. x_sizes : np.ndarray or list, optional Pixel sizes in the x direction. If None, assume all pixel dimensions are 1. If np.ndarray, use these for each array in 'arrays'. If list, assume x_sizes[i] corresponds to arrays[i]. y_sizes : np.ndarray or list, optional Pixel sizes in the y direction. If None, assume all pixel dimensions are 1. If np.ndarray, use these for each array in 'arrays'. If list, assume y_sizes[i] corresponds to arrays[i]. cloudy_threshold : float, default=0.5 Threshold for making arrays binary. min_pixels : int, default=30 Limit the coarsening factors such that coarsened matrices always have shape >= (min_pixels, min_pixels). return_values : bool, default=False If True, return additional data used in the calculation. coarsening_factors : str or array-like, default='default' Coarsening factors to use. If 'default', automatically determined. count_exterior : bool, default=False Whether to count exterior perimeter. Returns ------- D_e : float The ensemble fractal dimension. error : float Error estimate (95% confidence). coarsening_factors : np.ndarray, optional The coarsening factors used. Only returned if return_values=True. mean_total_perimeters : np.ndarray, optional Mean total perimeters. Only returned if return_values=True. """ raise NotImplementedError( "ensemble_coarsening_dimension has been removed due to ambiguity in " "coarsening binary arrays. Use ensemble_correlation_dimension (q=2), " "ensemble_sandbox_renyi_dimension (general q), or " "ensemble_box_dimension (q=0) instead." ) _UNSET = object() _INDIVIDUAL_METHODS = { 'filled perimeter vs filled area', 'summed perimeter vs unfilled area', 'filled perimeter vs width', 'filled perimeter vs height', 'summed perimeter vs width', 'summed perimeter vs height', }
[docs] def individual_fractal_dimension( arrays: NDArray | list[NDArray], x_sizes: NDArray | list[NDArray] | None = None, y_sizes: NDArray | list[NDArray] | None = None, min_length_scale: float = _UNSET, max_length_scale: float = _UNSET, bins: int | None = 30, return_values: bool = False, method: str = _UNSET, filled: bool = _UNSET, min_a: float = _UNSET, max_a: float = _UNSET, ) -> tuple[float, float] | tuple[float, float, NDArray, NDArray]: """ Calculate the individual fractal dimension Df of objects within arrays. The method uses linear regression on log(length scale) vs. log(perimeter), omitting structures touching the array edge. Parameters ---------- arrays : list of np.ndarray List of boolean 2D arrays. x_sizes : np.ndarray or list, optional Pixel sizes in the x direction. If None, assume all pixel dimensions are 1. If np.ndarray, use these for each array in 'arrays'. If list, assume x_sizes[i] corresponds to arrays[i]. y_sizes : np.ndarray or list, optional Pixel sizes in the y direction. If None, assume all pixel dimensions are 1. If np.ndarray, use these for each array in 'arrays'. If list, assume y_sizes[i] corresponds to arrays[i]. min_length_scale : float, default=3 Minimum length scale to include. Filters on the x-axis quantity: sqrt(area) for area methods, width or height for those methods. max_length_scale : float, default=np.inf Maximum length scale to include. bins : int or None, default=30 Number of bins along the log10 length scale for averaging. The regression is performed on the bin-averaged values. If None, fit on all individual points without binning. return_values : bool, default=False If True, return additional data used in the calculation. method : str, default='filled perimeter vs filled area' Which perimeter and length-scale combination to use. Options: - ``'filled perimeter vs filled area'``: fill holes, regress log(perimeter) vs log(sqrt(area)). Default. - ``'summed perimeter vs unfilled area'``: no hole filling, perimeter includes inner boundaries, area excludes holes. - ``'filled perimeter vs width'``: fill holes, regress log(perimeter) vs log(bounding-box width). - ``'filled perimeter vs height'``: fill holes, regress log(perimeter) vs log(bounding-box height). - ``'summed perimeter vs width'``: no hole filling, regress log(perimeter) vs log(bounding-box width). - ``'summed perimeter vs height'``: no hole filling, regress log(perimeter) vs log(bounding-box height). filled : bool, optional .. deprecated:: Use ``method`` instead. ``filled=True`` maps to ``'filled perimeter vs filled area'``; ``filled=False`` maps to ``'summed perimeter vs unfilled area'``. min_a : float, optional .. deprecated:: Use ``min_length_scale`` instead. Converted via ``sqrt(min_a)``. max_a : float, optional .. deprecated:: Use ``max_length_scale`` instead. Converted via ``sqrt(max_a)``. Returns ------- Df : float The individual fractal dimension. uncertainty : float Uncertainty estimate (95% confidence). log10_length_scale : np.ndarray, optional Log10 of the length-scale values (bin centers if bins is not None). Only returned if return_values=True. log10_p : np.ndarray, optional Log10 of perimeter values (bin means if bins is not None). Only returned if return_values=True. Raises ------ ValueError If array shapes don't match pixel size shapes, if an invalid method is given, or if deprecated and new parameters are mixed. """ # --- resolve deprecated 'filled' parameter --- if filled is not _UNSET and method is not _UNSET: raise ValueError("Cannot pass both 'filled' and 'method'.") if filled is not _UNSET: warnings.warn( "The 'filled' parameter is deprecated. Use method='filled perimeter" " vs filled area' or method='summed perimeter vs unfilled area'.", DeprecationWarning, stacklevel=2, ) method = ('filled perimeter vs filled area' if filled else 'summed perimeter vs unfilled area') elif method is _UNSET: method = 'filled perimeter vs filled area' if method not in _INDIVIDUAL_METHODS: raise ValueError( f"method={method!r} not recognized. Supported methods: " + ', '.join(sorted(_INDIVIDUAL_METHODS)) ) # --- resolve deprecated min_a / max_a --- if min_a is not _UNSET and min_length_scale is not _UNSET: raise ValueError("Cannot pass both 'min_a' and 'min_length_scale'.") if max_a is not _UNSET and max_length_scale is not _UNSET: raise ValueError("Cannot pass both 'max_a' and 'max_length_scale'.") if min_a is not _UNSET: warnings.warn( "The 'min_a' parameter is deprecated. Use 'min_length_scale' instead.", DeprecationWarning, stacklevel=2, ) min_length_scale = np.sqrt(min_a) if max_a is not _UNSET: warnings.warn( "The 'max_a' parameter is deprecated. Use 'max_length_scale' instead.", DeprecationWarning, stacklevel=2, ) max_length_scale = np.sqrt(max_a) if min_length_scale is _UNSET: min_length_scale = 3 if max_length_scale is _UNSET: max_length_scale = np.inf # --- parse method --- fill_holes = method.startswith('filled perimeter') if 'vs filled area' in method or 'vs unfilled area' in method: length_scale_type = 'sqrt_area' elif 'vs width' in method: length_scale_type = 'width' else: length_scale_type = 'height' # --- collect per-structure data --- length_scales, perimeters = [], [] if not isinstance(arrays, list): arrays = [arrays] if x_sizes is None: x_sizes = np.ones_like(arrays[0]) if y_sizes is None: y_sizes = np.ones_like(arrays[0]) for i in range(len(arrays)): array = arrays[i] if isinstance(x_sizes, list): xs = x_sizes[i] else: xs = x_sizes if isinstance(y_sizes, list): ys = y_sizes[i] else: ys = y_sizes if np.any(array.shape != xs.shape): raise ValueError('Each array shape must match corresponding pixel sizes shape') array = remove_structures_touching_border_nan(array) if fill_holes: array = remove_structure_holes(array) lab, nm, nl = label_structures(array, wrap='both') if lab is None: continue new_p = get_structure_perimeters(lab, nm, nl, xs, ys) if length_scale_type == 'sqrt_area': new_a = get_structure_areas(lab, nm, nl, xs, ys) new_ls = np.sqrt(new_a) valid = (new_a > 0) & (new_p > 0) else: heights, widths = get_structure_height_width(lab, nm, nl, xs, ys) new_ls = widths if length_scale_type == 'width' else heights valid = (new_ls > 0) & (new_p > 0) length_scales.extend(new_ls[valid]) perimeters.extend(new_p[valid]) length_scales = np.array(length_scales) perimeters = np.array(perimeters) mask = (length_scales > min_length_scale) & (length_scales < max_length_scale) length_scales, perimeters = length_scales[mask], perimeters[mask] log_ls = np.log10(length_scales) log_p = np.log10(perimeters) if bins is not None: bin_edges = np.linspace(log_ls.min(), log_ls.max(), bins + 1) bin_indices = np.digitize(log_ls, bin_edges) - 1 bin_indices = np.clip(bin_indices, 0, bins - 1) bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:]) bin_means = np.array([log_p[bin_indices == i].mean() if np.any(bin_indices == i) else np.nan for i in range(bins)]) valid = np.isfinite(bin_means) log_ls = bin_centers[valid] log_p = bin_means[valid] (slope, _), (err, _) = linear_regression(log_ls, log_p) if return_values: return slope, err, log_ls, log_p else: return slope, err
# Helper functions for fractal dimension calculations
[docs] def get_coords_of_boundaries(array: NDArray) -> NDArray: """ Find coordinates of pixels with value 1 that are adjacent to pixels with value 0. Parameters ---------- array : np.ndarray 2-D binary array. Returns ------- np.ndarray Array of shape (n_boundaries, 2) where each element is a pair of indices corresponding to the locations of pixels with value 1 that are adjacent to a pixel of value 0. Notes ----- Topology is toroidal, so pixels on one edge are considered adjacent to pixels on the opposite edge. Examples -------- >>> array1 = np.zeros((10, 10)) >>> array1[6:8, 6:8] = 1 >>> array1[3:4, 2:5] = 1 >>> array2 = np.zeros((10, 10)) >>> for i, j in get_coords_of_boundaries(array1): ... array2[i, j] = 1 >>> np.all(array2 == array1) True """ array = array.astype(np.int16) shifted_right = np.roll(array, shift=1, axis=1) shifted_down = np.roll(array, 1, axis=0) diff_right = shifted_right - array diff_down = shifted_down - array del shifted_right, shifted_down right_side_of_pixel = np.argwhere(diff_right == 1) right_side_of_pixel[:, 1] -= 1 left_side_of_pixel = np.argwhere(diff_right == -1) bottom_of_pixel = np.argwhere(diff_down == 1) bottom_of_pixel[:, 0] -= 1 top_of_pixel = np.argwhere(diff_down == -1) all_coords = np.append(right_side_of_pixel, left_side_of_pixel, axis=0) all_coords = np.append(all_coords, top_of_pixel, axis=0) all_coords = np.append(all_coords, bottom_of_pixel, axis=0) # remove duplicates all_coords = np.unique(all_coords, axis=0) return all_coords
[docs] def get_locations_from_pixel_sizes( pixel_sizes_x: NDArray, pixel_sizes_y: NDArray ) -> tuple[NDArray, NDArray]: """ Convert pixel sizes to cumulative location coordinates. Parameters ---------- pixel_sizes_x : np.ndarray 2-D array of pixel sizes in x direction. pixel_sizes_y : np.ndarray 2-D array of pixel sizes in y direction. Returns ------- locations_x : np.ndarray Cumulative x locations. locations_y : np.ndarray Cumulative y locations. """ return np.nancumsum(pixel_sizes_x, 1), np.nancumsum(pixel_sizes_y, 0)
@numba.njit(parallel=True, fastmath=True) def _sandbox_partition( centers_phys, # (N_c, 2) sandbox centers (physical x, y), drawn from set sorted_boundary, # (N_s, 2) set points (physical x, y), sorted by y bins_sq, # (B,) ascending squared distance bin edges max_bin, # float: sqrt(bins_sq[-1]) qs, # (Q,) Rényi orders is_q1, # (Q,) bool: True where qs[qi] is within tolerance of 1.0 ): """For each center, compute M_i(r_k) = #set points within distance r_k. Accumulate Z[qi, k] across centers: Z[qi, k] = sum_i M_i(r_k)^(qs[qi] - 1) (q != 1) Z[qi, k] = sum_i log10(M_i(r_k)) (q == 1, set elementwise via is_q1) Centers are assumed to be drawn from the same set as sorted_boundary, so each center self-counts at distance 0 (M_i >= 1 always at any positive radius). This matches the convention of the legacy `correlation_integral` kernel and makes the q=2 output bit-identical to the previous Grassberger-Procaccia implementation: sum_i M_i(r_k) ^ (2 - 1) = sum_i M_i(r_k) = pair count = old C_l[k]. Binning convention: `searchsorted(bins_sq, dist_sq, side='right')` — bin index is the smallest k with bins_sq[k] > dist_sq, i.e. the bin "first reached" by this pair. After per-center cumulative sum, M_i(bins[k]) = #neighbors with dist < bins[k]. Same as old kernel. """ N_c = centers_phys.shape[0] B = bins_sq.shape[0] Q = qs.shape[0] n_threads = numba.config.NUMBA_NUM_THREADS # Per-thread accumulators (avoid races, no atomics needed) Z_per_thread = np.zeros((n_threads, Q, B), dtype=np.float64) # Per-thread scratch histograms, reused across centers hist_per_thread = np.zeros((n_threads, B), dtype=np.int64) sorted_y = sorted_boundary[:, 1].copy() for i in numba.prange(N_c): thread_id = numba.get_thread_id() cx = centers_phys[i, 0] cy = centers_phys[i, 1] # Zero this thread's scratch histogram for k in range(B): hist_per_thread[thread_id, k] = 0 # y bounding box via binary search lo = np.searchsorted(sorted_y, cy - max_bin, side='left') hi = np.searchsorted(sorted_y, cy + max_bin, side='right') for j in range(lo, hi): bx = sorted_boundary[j, 0] if abs(bx - cx) > max_bin: continue dx = cx - bx dy = cy - sorted_boundary[j, 1] dist_sq = dx * dx + dy * dy bin_idx = np.searchsorted(bins_sq, dist_sq, side='right') if bin_idx < B: hist_per_thread[thread_id, bin_idx] += 1 # Per-center cumulative sum gives M_i(bins[k]) at each k. # Then accumulate M_i^(q-1) (or log10(M_i) at q==1) into Z[qi, k]. # is_q1 is checked per qi so duplicate q==1 entries all get the # log form (vs. a single q1_index, which only handles the first). M = 0 for k in range(B): M += hist_per_thread[thread_id, k] if M > 0: M_f = float(M) for qi in range(Q): if is_q1[qi]: Z_per_thread[thread_id, qi, k] += np.log10(M_f) else: exponent = qs[qi] - 1.0 Z_per_thread[thread_id, qi, k] += M_f ** exponent # Reduce across threads Z = np.zeros((Q, B), dtype=np.float64) for t in range(n_threads): for qi in range(Q): for k in range(B): Z[qi, k] += Z_per_thread[t, qi, k] return Z
[docs] def coarsen_array(array: NDArray, factor: int) -> NDArray: """ Coarsen an array by averaging superpixel regions. Takes an input array and reduces it by a given factor along both the x and y dimensions. The coarsening is achieved by summing 'superpixel' regions of the original array and dividing by the number of pixels in each region. Parameters ---------- array : np.ndarray The input array to be coarsened. factor : int The coarsening factor for reducing the array resolution. Must be a positive integer. Returns ------- np.ndarray The coarsened array with reduced resolution. Examples -------- >>> original_array = np.array([[1, 2], [3, 4]]) >>> coarsened = coarsen_array(original_array, factor=2) >>> coarsened array([[2.5]]) """ coarsened_array = np.add.reduceat(array, np.arange(array.shape[0], step=factor), axis=0) coarsened_array = np.add.reduceat(coarsened_array, np.arange(array.shape[1], step=factor), axis=1) # The number of pixels that are coarsened is usually factor**2, but not for edge superpixels # if the factor does not evenly divide into array size. Solution: pixel_counts = np.add.reduceat(np.ones(array.shape), np.arange(array.shape[0], step=factor), axis=0) pixel_counts = np.add.reduceat(pixel_counts, np.arange(array.shape[1], step=factor), axis=1) coarsened_array = coarsened_array / pixel_counts return coarsened_array
[docs] @numba.njit() def total_perimeter(array, x_sizes, y_sizes): """ Calculate the total perimeter of a binary array. Given a binary array, calculate the total perimeter. Only counts perimeter along edges between 1 and 0. Assumes periodic boundary conditions; for other boundary conditions, pad inputs with 0s or nans. Parameters ---------- array : np.ndarray Binary array (0s and 1s). x_sizes : np.ndarray Pixel sizes in x direction. y_sizes : np.ndarray Pixel sizes in y direction. Returns ------- float Total perimeter length. Raises ------ ValueError If x_sizes or y_sizes is nan where array is 1. """ perimeter = 0 for (i, j), value in np.ndenumerate(array): if value == 1: if np.isnan(x_sizes[i, j]) or np.isnan(y_sizes[i, j]): raise ValueError('x_sizes or y_sizes is nan where array is 1') if i != array.shape[0] - 1 and array[i + 1, j] == 0: perimeter += x_sizes[i, j] elif i == array.shape[0] - 1 and array[0, j] == 0: perimeter += x_sizes[i, j] if i != 0 and array[i - 1, j] == 0: perimeter += x_sizes[i, j] elif i == 0 and array[array.shape[0] - 1, j] == 0: perimeter += x_sizes[i, j] if j != array.shape[1] - 1 and array[i, j + 1] == 0: perimeter += y_sizes[i, j] elif j == array.shape[1] - 1 and array[i, 0] == 0: perimeter += y_sizes[i, j] if j != 0 and array[i, j - 1] == 0: perimeter += y_sizes[i, j] elif j == 0 and array[i, array.shape[1] - 1] == 0: perimeter += y_sizes[i, j] return perimeter
[docs] def total_number( array: NDArray, structure: NDArray = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]) ) -> int: """ Count the number of connected objects in an array. Given a 2-D array with 0's, nans, and 1's, calculate number of objects of connected 1's where connectivity is defined by structure. Parameters ---------- array : np.ndarray 2-D array with 0s, 1s, and optionally nans. structure : np.ndarray, default=np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]) Connectivity structure. Returns ------- int Number of connected objects. """ clean = np.where(np.isnan(array), 0, array) _, n_structures = label(clean.astype(bool), structure, output=np.float32) return n_structures
[docs] def isolate_nth_largest_structure( binary_array: NDArray, n: int = 1, structure: NDArray = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]) ) -> NDArray[np.bool_]: """ Isolate the Nth largest connected structure in a binary array. Parameters ---------- binary_array : np.ndarray Binary input array. n : int, default=1 Which structure to return, ranked by pixel count (1 = largest). structure : np.ndarray, default=np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]) Connectivity structure. Returns ------- np.ndarray Boolean array with only the Nth largest structure set to True. Raises ------ ValueError If ``binary_array`` contains no structures or ``n`` exceeds the number of structures. """ if n < 1: raise ValueError('n must be >= 1') labelled_array = label(binary_array, structure)[0] cloud_values = labelled_array[labelled_array != 0] # remove background if cloud_values.size == 0: raise ValueError('binary_array contains no structures') values, counts = np.unique(cloud_values, return_counts=True) sorted_indices = np.argsort(counts)[::-1] if n > len(values): raise ValueError(f'Requested n={n} but only {len(values)} structures exist') selected = values[sorted_indices[n - 1]] return labelled_array == selected
def isolate_largest_structure(*args, **kwargs): """Removed. Use :func:`isolate_nth_largest_structure` instead.""" raise NotImplementedError( 'isolate_largest_structure has been renamed to ' 'isolate_nth_largest_structure(binary_array, n=1)' )
[docs] def label_size( array: NDArray, variable: str = 'area', wrap: str | None = 'both', x_sizes: NDArray | None = None, y_sizes: NDArray | None = None ) -> NDArray: """ Label structures with their size values. Creates a labelled array where each structure is labelled with its size (area, summed perimeter, width, or height) instead of a unique identifier. Parameters ---------- array : np.ndarray Binary array of structures: 2-d array, padded with 0's or np.nan's. variable : str, default='area' Which variable to use for 'size'. Options: ``'area'``, ``'summed perimeter'``, ``'width'``, ``'height'``. ``'perimeter'`` is accepted but deprecated. wrap : str or None, default='both' Boundary wrapping options: None, 'sides', 'both'. If 'sides', connect structures that span the left/right edge. If 'both', connect structures that span all edges. x_sizes : np.ndarray, optional Pixel sizes in x direction. If None, assume all sizes are 1. y_sizes : np.ndarray, optional Pixel sizes in y direction. If None, assume all sizes are 1. Returns ------- np.ndarray Array where structures are labelled with their size value and background is 0. Raises ------ ValueError If variable or wrap is not a supported value. Notes ----- If x_sizes or y_sizes are not uniform, the width will be the sum of the average pixel widths of the pixels in the column and in the object. Similarly, the height will be the sum of the average pixel heights of the pixels in the row and in the object. """ binary = np.where(np.isnan(array), 0, array).astype(bool) labelled_array, n_structures = label(binary, output=np.float32) if variable == 'perimeter': warnings.warn( "variable='perimeter' is deprecated, use 'summed perimeter' instead.", DeprecationWarning, stacklevel=2, ) variable = 'summed perimeter' if variable not in ['area', 'summed perimeter', 'width', 'height']: raise ValueError(f'variable={variable} not supported (supported values are "area", "summed perimeter", "width", "height")') if x_sizes is None: x_sizes = np.ones_like(labelled_array) if y_sizes is None: y_sizes = np.ones_like(labelled_array) if wrap is None: pass elif wrap == 'sides': # set those on right to the same i.d. as those on left for j, value in enumerate(labelled_array[:, 0]): if value != 0: if labelled_array[j, labelled_array.shape[1] - 1] != 0 and labelled_array[j, labelled_array.shape[1] - 1] != value: labelled_array[labelled_array == labelled_array[j, labelled_array.shape[1] - 1]] = value if wrap is None: pass elif wrap == 'both' or wrap == 'sides': labelled_array = _merge_periodic_labels(labelled_array, wrap) else: raise ValueError(f'wrap={wrap} not supported') # Flatten arrays to find their indices. values = np.sort(labelled_array.ravel()) original_locations = np.argsort(labelled_array.ravel()) indices_2d = np.array(np.unravel_index(original_locations, labelled_array.shape)).T labelled_array[np.isnan(array)] = np.nan # Turn this back to nan so perimeter along it is not included split_here = np.roll(values, shift=-1) - values # Split where the values changed. split_here[-1] = 0 # Last value rolled over from first separated_structure_indices = np.split(indices_2d, np.where(split_here != 0)[0] + 1) separated_structure_indices = separated_structure_indices[1:] # Remove the locations that were 0 (not structure) if len(separated_structure_indices) == 0: return np.zeros(array.shape, dtype=int) labelled_with_sizes = np.zeros(array.shape, dtype=np.float32) # must use numba.typed.List here for Numba compatibility # https://numba.readthedocs.io/en/stable/reference/pysupported.html#feature-typed-list labelled_with_sizes = _label_size_helper(labelled_array, List(separated_structure_indices), labelled_with_sizes, variable, x_sizes, y_sizes) return labelled_with_sizes
@numba.njit() def _label_size_helper(labelled_array, separated_structure_indices, labelled_with_sizes, variable, x_sizes, y_sizes): for indices in separated_structure_indices: perimeter = 0 area = 0 y_coords_structure = np.array([c[0] for c in indices]) x_coords_structure = np.array([c[1] for c in indices]) unique_y_coords = [] unique_x_coords = [] height = 0 width = 0 for (i, j) in indices: # Height, Width if i not in unique_y_coords: unique_y_coords.append(i) mask = (y_coords_structure == i) y_sizes_here = [] for loc, take in enumerate(mask): if take: y_sizes_here.append(y_sizes[y_coords_structure[loc], x_coords_structure[loc]]) y_sizes_here = np.array(y_sizes_here) height += np.mean(y_sizes_here) if j not in unique_x_coords: unique_x_coords.append(j) mask = (x_coords_structure == j) x_sizes_here = [] for loc, take in enumerate(mask): if take: x_sizes_here.append(x_sizes[y_coords_structure[loc], x_coords_structure[loc]]) x_sizes_here = np.array(x_sizes_here) width += np.mean(x_sizes_here) # Perimeter: if i != labelled_array.shape[0] - 1 and labelled_array[i + 1, j] == 0: perimeter += x_sizes[i, j] elif i == labelled_array.shape[0] - 1 and labelled_array[0, j] == 0: perimeter += x_sizes[i, j] if i != 0 and labelled_array[i - 1, j] == 0: perimeter += x_sizes[i, j] elif i == 0 and labelled_array[labelled_array.shape[0] - 1, j] == 0: perimeter += x_sizes[i, j] if j != labelled_array.shape[1] - 1 and labelled_array[i, j + 1] == 0: perimeter += y_sizes[i, j] elif j == labelled_array.shape[1] - 1 and labelled_array[i, 0] == 0: perimeter += y_sizes[i, j] if j != 0 and labelled_array[i, j - 1] == 0: perimeter += y_sizes[i, j] elif j == 0 and labelled_array[i, labelled_array.shape[1] - 1] == 0: perimeter += y_sizes[i, j] # Area: area += y_sizes[i, j] * x_sizes[i, j] for (i, j) in indices: if variable == 'summed perimeter': labelled_with_sizes[i, j] = perimeter elif variable == 'area': labelled_with_sizes[i, j] = area elif variable == 'width': labelled_with_sizes[i, j] = width elif variable == 'height': labelled_with_sizes[i, j] = height return labelled_with_sizes