Source code for objscale._fractal_dimensions

import numpy as np
import numba
from numba.typed import List
from scipy.ndimage import label
from skimage.segmentation import clear_border
from warnings import warn
from ._object_analysis import remove_structures_touching_border_nan, remove_structure_holes
from ._utils import linear_regression, encase_in_value



[docs]
def ensemble_correlation_dimension(arrays, x_sizes=None, y_sizes=None, minlength='auto', maxlength='auto', interior_circles_only=True, return_C_l=False, bins=None, point_reduction_factor=1, nbins=50):
    """
    Calculate the correlation dimension D where C_l ∝ l^D for binary arrays.

    Requires that each array has the same pixel sizes, although across the array 
    they may be nonuniform (e.g. increasing from left to right)

    Note that the resulting dimension is for the set of *object edge* points.

    Parameters
    ----------
    arrays : list or np.ndarray
        List of binary arrays to calculate correlation dimension of.
    x_sizes : np.ndarray or list, optional
        Pixel sizes in the x direction. If None, assume all pixel dimensions are 1.
        If np.ndarray, use these for each array in 'arrays'. If list, assume
        x_sizes[i] corresponds to arrays[i].
    y_sizes : np.ndarray or list, optional
        Pixel sizes in the y direction. If None, assume all pixel dimensions are 1.
        If np.ndarray, use these for each array in 'arrays'. If list, assume
        y_sizes[i] corresponds to arrays[i].
    minlength : str or float, default='auto'
        Minimum length scale for correlation calculation. If 'auto', uses 3 times
        the minimum pixel size.
    maxlength : str or float, default='auto'
        Maximum length scale for correlation calculation. If 'auto', uses 0.1 times
        the minimum array dimension.
    interior_circles_only : bool, default=True
        If True, only use circle centers that are at least maxlength distance from
        all array edges to avoid boundary effects. In other words, only use circles
        that are fully contained within the array. Recommended!
    return_C_l : bool, default=False
        If True, return dimension, error, bins, C_l. Otherwise, return dimension, error.
    bins : None, int, or array-like, optional
        Values of l to use for the regression. Can be:
        - None: automatically calculate as logarithmically spaced intervals between
          3*minimum length and the array width or height using nbins points
        - int: number of logarithmically spaced bins to generate automatically
        - array-like: explicit bin edges to use
    point_reduction_factor : float, default=1
        Draw N/point_reduction_factor circles, where N is the total number of
        available circles. Choose the circle centers randomly. Must be >= 1.
    nbins : int, default=50
        Number of bins to use when bins=None or when bins is an int. Only used
        for automatic bin generation.

    Returns
    -------
    dimension : float
        The correlation dimension.
    error : float
        Error estimate for the dimension.
    bins : np.ndarray, optional
        The bins used for calculation. Only returned if return_C_l=True.
    C_l : np.ndarray, optional
        The correlation integral values. Only returned if return_C_l=True.
    """

    if type(arrays) == np.ndarray: arrays = [arrays]

    if x_sizes is None: x_sizes = np.ones(arrays[0].shape, dtype=np.float32)
    if y_sizes is None: y_sizes = np.ones(arrays[0].shape, dtype=np.float32)
    locations_x, locations_y = get_locations_from_pixel_sizes(x_sizes, y_sizes)

    h = x_sizes.shape[0]
    w = x_sizes.shape[1]

    if maxlength == 'auto': 
          # One tenth of min(width, height) of entire array, where width, height are calculated in the center
        maxlength =  0.1 * min((locations_x[int(h/2), w-1]-locations_x[int(h/2), 0]), (locations_y[h-1, int(w/2)]-locations_y[0, int(w/2)]))

    if minlength == 'auto': minlength = 3*min(np.nanmin(x_sizes), np.nanmin(y_sizes))

    # Basic validation checks
    if np.any(np.isnan(arrays)):
        raise ValueError('arrays must not contain NaN values')
    
    if np.any(np.isnan(x_sizes)) or np.any(np.isnan(y_sizes)):
        raise ValueError("x_sizes and y_sizes cannot contain NaN values")

    if np.any(x_sizes <= 0) or np.any(y_sizes <= 0):
        raise ValueError("x_sizes and y_sizes must be positive")
    

    if bins is None: 
        bins = np.geomspace(minlength, maxlength, nbins)
    elif isinstance(bins, int):
        bins = np.geomspace(minlength, maxlength, bins)

    # range of scale checks
    # check these with the actual calculated bins for greatest relevance
    if bins[-1] <= bins[0]:
        raise ValueError(f"bin maximum length ({bins[-1]:.3f}) must be greater than bin minimum length ({bins[0]:.3f}); or if bins are passed, they must be increasing. Did you pass invalid values for minlength/maxlength?")

    if bins[-1] / bins[0] < 10:
        raise ValueError(f"Available scale ratio ({maxlength/minlength:.2f}) is less than 10. "
                        f"Need at least one order of magnitude separation for reliable dimension estimation.")


    C_l = np.zeros(bins.shape)

    for array in arrays:
        if np.any(array.shape != x_sizes.shape): raise ValueError(f'All arrays must be same shape as pixel sizes (currently {array.shape} and {x_sizes.shape}, respectively)')

        array = array.astype(np.float16)

        all_boundary_coordinates = get_coords_of_boundaries(array)

        if interior_circles_only:
            # Calculate distance from each boundary coordinate to all array edges
            coord_locations_x = locations_x[all_boundary_coordinates[:,0], all_boundary_coordinates[:,1]]
            coord_locations_y = locations_y[all_boundary_coordinates[:,0], all_boundary_coordinates[:,1]]
            
            # Distance to each edge for all coordinates
            dist_to_left = coord_locations_x - locations_x[int(h/2), 0]
            dist_to_right = locations_x[int(h/2), w-1] - coord_locations_x
            dist_to_top = coord_locations_y - locations_y[0, int(w/2)]
            dist_to_bottom = locations_y[h-1, int(w/2)] - coord_locations_y
            
            # Find coordinates that are at least maxlength from ALL edges
            min_dist_to_any_edge = np.minimum.reduce([dist_to_left, dist_to_right, dist_to_top, dist_to_bottom])
            interior_mask = min_dist_to_any_edge >= maxlength
            
            circle_centers = all_boundary_coordinates[interior_mask]
                
        else:
            circle_centers = all_boundary_coordinates

        if len(circle_centers) == 0: continue

        if point_reduction_factor>1:
            circle_centers = circle_centers[np.random.choice(np.arange(len(circle_centers)), int(len(circle_centers)/point_reduction_factor), replace=False)]
        elif point_reduction_factor<1: raise ValueError('point_reduction_factor must be >= 1')

        if len(circle_centers) == 0: continue

        C_l += correlation_integral(circle_centers, all_boundary_coordinates, locations_x, locations_y, bins)

    # Perform linear regression to estimate dimension
    x,y = np.log10(bins), np.log10(C_l)
    index = np.isfinite(x) & np.isfinite(y)
    if len(x[index]) <3:   
        warn('Not enough data to estimate correlation dimension, returning nan')
        if return_C_l:
            return np.nan,np.nan,np.array([np.nan]),np.array([np.nan])
        else:
            return np.nan,np.nan
    coefficients, cov = np.polyfit(x[index], y[index], 1, cov=True)
    fit_error = np.sqrt(np.diag(cov))
    dimension, error = coefficients[0], 2*fit_error[0]  # fit error is for 95% conf. int.

    if return_C_l:
        return dimension, error, bins, C_l
    else:
        return dimension, error




[docs]
def ensemble_box_dimension(binary_arrays, set = 'edge', min_pixels=1, min_box_size=2, box_sizes = 'default', return_values=False):
    """
    Calculate the ensemble box-counting dimension of binary arrays.

    This function estimates the box-counting dimension (also known as Minkowski-Bouligand dimension)
    for a list of binary arrays. It averages the results across multiple arrays.

    Parameters:
    -----------
    binary_arrays : list of numpy.ndarray or numpy.ndarray
        A list of 2D binary arrays or a single 2D binary array.
    set : str, optional (default='edge')
        Specifies which set to consider for box counting:
        - 'edge': Box dimension of the set of boundaries between 0 and 1: Count boxes that contain at least one pixel with value 1 AND one pixel with value 0.
        - 'ones': Box dimension of the set of values equal to 1: Count boxes that contain at least one pixel with value 1.
    min_pixels : int, optional (default=1)
        Largest box size, in units of the number of boxes required to cover the array in the smaller direction
    min_box_size : int, optional (default=2)
        Smallest box size
    box_sizes : array-like or 'default', optional (default='default')
        Box sizes used. If 'default', uses those powers of 2 up to 2^14 that satisfy the above criteria.
    return_values : bool, optional (default=False)
        If True, return additional data used in the calculation.

    Returns:
    --------
    float
        The estimated box-counting dimension.
    float
        The error of the estimate.
    tuple, optional
        If return_values is True, also returns:
        box_sizes
        mean_number_boxes

    Raises:
    -------
    ValueError
        If an unsupported value is provided for 'set' or 'coarsening_factors'.
        If an array contains nan values

    Notes:
    ------
    The function uses linear regression on log-log plot of box counts vs. scale
    to estimate the box-counting dimension. The slope of this regression gives
    the negative of the box-counting dimension.

    """

    if type(binary_arrays) == np.ndarray: binary_arrays = [binary_arrays]

    if np.any(np.isnan(binary_arrays)):
        raise ValueError('arrays must not contain NaN values')

    if type(box_sizes) == str:  
        if box_sizes != 'default': raise ValueError(f'box_sizes={box_sizes} not supported')
        
        box_sizes = 2**np.arange(1,15)    # assumed any array is smaller than 32768 pixels
    else: box_sizes = np.array(box_sizes)
    
    max_coarsening_factor = min(binary_arrays[0].shape)/min_pixels
    box_sizes = box_sizes[box_sizes<=max_coarsening_factor]
    box_sizes = box_sizes[box_sizes>=min_box_size]

    mean_number_boxes = np.empty((0,box_sizes.size), dtype=np.float32)
    
    for array in binary_arrays:
        number_boxes = []
        for factor in box_sizes:

            # Coarsen
            coarsened_array = encase_in_value(coarsen_array(array, factor), np.nan)
            
            # Count boxes
            if set == 'edge':
                nboxes = np.count_nonzero((coarsened_array>0) & (coarsened_array<1))
            elif set == 'ones':
                nboxes = np.count_nonzero(coarsened_array>0)
            else: raise ValueError(f'set={set} not supported (supported values are "edge" or "ones")')

            number_boxes.append(nboxes)

        mean_number_boxes = np.append(mean_number_boxes, [number_boxes], axis=0)

    mean_number_boxes = np.mean(mean_number_boxes, axis=0)
    mean_number_boxes[mean_number_boxes==0] = np.nan # eliminate warning when logging 0

    (slope, _), (error,_) = linear_regression(np.log10(box_sizes), np.log10(mean_number_boxes))

    if return_values: return -slope, error, box_sizes, mean_number_boxes
    return -slope, error




[docs]
def ensemble_coarsening_dimension(arrays, x_sizes=None, y_sizes=None, cloudy_threshold=0.5, min_pixels=30, return_values=False, coarsening_factors = 'default', count_exterior=False):
    """
    Calculate the ensemble fractal dimension by coarsening resolution and calculating total perimeter.

    Parameters
    ----------
    arrays : np.ndarray or list of np.ndarray
        Array, or list of arrays, to coarsen, apply cloudy_threshold to make binary, 
        then calculate total perimeter.
    x_sizes : np.ndarray or list, optional
        Pixel sizes in the x direction. If None, assume all pixel dimensions are 1.
        If np.ndarray, use these for each array in 'arrays'. If list, assume
        x_sizes[i] corresponds to arrays[i].
    y_sizes : np.ndarray or list, optional
        Pixel sizes in the y direction. If None, assume all pixel dimensions are 1.
        If np.ndarray, use these for each array in 'arrays'. If list, assume
        y_sizes[i] corresponds to arrays[i].
    cloudy_threshold : float, default=0.5
        Threshold for making arrays binary.
    min_pixels : int, default=30
        Limit the coarsening factors such that coarsened matrices always have 
        shape >= (min_pixels, min_pixels).
    return_values : bool, default=False
        If True, return (D_e, error), (coarsening_factors, mean_total_perimeters).
    coarsening_factors : str or array-like, default='default'
        Coarsening factors to use. If 'default', automatically determined.
    count_exterior : bool, default=False
        Whether to count exterior perimeter.

    Returns
    -------
    D_e : float
        The ensemble fractal dimension.
    error : float
        Error estimate (95% confidence).
    coarsening_factors : np.ndarray, optional
        The coarsening factors used. Only returned if return_values=True.
    mean_total_perimeters : np.ndarray, optional
        Mean total perimeters. Only returned if return_values=True.
    """
    if type(arrays) == np.ndarray: arrays = [arrays]
    if x_sizes is None: x_sizes = np.ones(arrays[0].shape, dtype=np.float32)
    if y_sizes is None: y_sizes = np.ones(arrays[0].shape, dtype=np.float32)

    if type(coarsening_factors) == str:    # to not try elementwise comparison
        if coarsening_factors != 'default': raise ValueError(f'coarsening_factors={coarsening_factors} not supported')
        if np.count_nonzero((arrays[0]<1) & (arrays[0]>0))==0:
            # If a binary array, even coarsening factors can be ambiguous because sometimes the superpixel is half cloudy. In this case, use the following
            coarsening_factors = 3**np.arange(0,10)
        else:
            # Otherwise, use more coarsening factors:
            coarsening_factors = 2**np.arange(0,15)
    else: coarsening_factors = np.array(coarsening_factors)
    
    max_coarsening_factor = min(arrays[0].shape)/min_pixels
    coarsening_factors = coarsening_factors[coarsening_factors<=max_coarsening_factor]

    mean_total_perimeters = np.empty((0,coarsening_factors.size), dtype=np.float32)
    
    for i in range(len(arrays)):
        array = arrays[i]
        if type(x_sizes) == list:
            xs = x_sizes[i]
        else:
            xs = x_sizes
        if type(y_sizes) == list:
            ys = y_sizes[i]
        else:
            ys = y_sizes

        total_perimeters = []
        for factor in coarsening_factors:

            # Coarsen
            coarsened_array = encase_in_value(coarsen_array(array, factor), np.nan)
            coarsened_x_sizes = factor*encase_in_value(coarsen_array(xs, factor), 0)    # the value appended here is irrelevant since the array is 0 here, just to make the shape the same
            coarsened_y_sizes = factor*encase_in_value(coarsen_array(ys, factor), 0)    # the value appended here is irrelevant since the array is 0 here, just to make the shape the same

            nanmask = (np.isnan(coarsened_array) | np.isnan(coarsened_x_sizes) | np.isnan(coarsened_y_sizes))
            # Make binary
            coarsened_array_binary = (coarsened_array>cloudy_threshold).astype(np.float32)

            # To not count exterior perimeter, set to nan, to count it, set to 0
            if count_exterior: padding = 0
            else: padding = np.nan
            coarsened_array_binary[nanmask] = padding

            total_p = total_perimeter(coarsened_array_binary, coarsened_x_sizes, coarsened_y_sizes)
            total_perimeters.append(total_p)
        mean_total_perimeters = np.append(mean_total_perimeters, [total_perimeters], axis=0)

    mean_total_perimeters = np.mean(mean_total_perimeters, axis=0)
    mean_total_perimeters[mean_total_perimeters==0] = np.nan # eliminate warning when logging 0

    (slope, _), (error,_) = linear_regression(np.log10(coarsening_factors), np.log10(mean_total_perimeters))
    if return_values: return 1-slope, error, coarsening_factors, mean_total_perimeters

    return 1-slope, error




[docs]
def individual_fractal_dimension(arrays, x_sizes=None, y_sizes=None, min_a=10, max_a=np.inf, return_values=False):
    """
    Calculate the individual fractal dimension Df of objects within arrays.

    The method uses linear regression on log a vs. log p, where a and p are 
    calculated not including structure holes, and omitting structures touching 
    the array edge.

    Parameters
    ----------
    arrays : list of np.ndarray
        List of boolean 2D arrays.
    x_sizes : np.ndarray or list, optional
        Pixel sizes in the x direction. If None, assume all pixel dimensions are 1.
        If np.ndarray, use these for each array in 'arrays'. If list, assume
        x_sizes[i] corresponds to arrays[i].
    y_sizes : np.ndarray or list, optional
        Pixel sizes in the y direction. If None, assume all pixel dimensions are 1.
        If np.ndarray, use these for each array in 'arrays'. If list, assume
        y_sizes[i] corresponds to arrays[i].
    min_a : float, default=10
        Minimum structure area to include in calculation.
    max_a : float, default=np.inf
        Maximum structure area to include in calculation.
    return_values : bool, default=False
        If True, return (Df, err), (log10(sqrt(a)), log10(p)).

    Returns
    -------
    Df : float
        The individual fractal dimension.
    uncertainty : float
        Uncertainty estimate (95% confidence).
    log10_sqrt_a : np.ndarray, optional
        Log10 of sqrt(area) values. Only returned if return_values=True.
    log10_p : np.ndarray, optional
        Log10 of perimeter values. Only returned if return_values=True.
    """
    from ._object_analysis import get_structure_props

    areas, perimeters = [], []
    if (type(arrays) != list): arrays = [arrays]

    if x_sizes is None: x_sizes = np.ones_like(arrays[0])
    if y_sizes is None: y_sizes = np.ones_like(arrays[0])

    for i in range(len(arrays)):
        array = arrays[i]
        if type(x_sizes) == list:
            xs = x_sizes[i]
        else:
            xs = x_sizes
        if type(y_sizes) == list:
            ys = y_sizes[i]
        else:
            ys = y_sizes

        if np.any(array.shape != xs.shape): raise ValueError('Each array shape must match corresponding pixel sizes shape')
            
        array = remove_structures_touching_border_nan(array)
        array = remove_structure_holes(array)
        new_p, new_a, _, _ = get_structure_props(array, xs, ys)
        areas.extend(new_a)
        perimeters.extend(new_p)

    areas, perimeters = np.array(areas), np.array(perimeters)
    areas, perimeters = areas[(areas>min_a) & (areas<max_a)], perimeters[(areas>min_a) & (areas<max_a)]

    (slope, _), (err, _) = linear_regression(np.log10(np.sqrt(areas)), np.log10(perimeters))

    if return_values: return slope, err, np.log10(np.sqrt(areas)), np.log10(perimeters)
    else: return slope, err



# Helper functions for fractal dimension calculations

def get_coords_of_boundaries(array):
    """
        Find coordinates of pixels of value 1 that are adjacent to pixels of value 0
        Input: 
            -array: 2-D binary np.ndarray
        Output: 
            np.ndarray of shape(n_boundaries, 2)
                where each element is a pair of indicies corresponding to the locations
                of pixels with value 1 that are adjacent to a pixel of value 0
                
        Note:  
            Topology is toroidal, so pixels on one edge are considered adjacent to pixels
            on the opposite edge.
        Example:

            array1 = np.zeros((10,10))
            array1[6:8,6:8] = 1
            array1[3:4,2:5] = 1
            array2 = np.zeros((10,10))
            for i,j in get_coords_of_boundaries(array1):
                array2[i,j] = 1
            print(np.all(array2==array1))

            >>> True
    """
    array = array.astype(np.int16)
    shifted_right = np.roll(array, shift=1, axis=1)
    shifted_down = np.roll(array, 1, axis=0)
    diff_right = shifted_right-array
    diff_down = shifted_down-array

    right_side_of_pixel = np.argwhere(diff_right==1)
    right_side_of_pixel[:,1] -= 1
    left_side_of_pixel = np.argwhere(diff_right==-1)
    bottom_of_pixel = np.argwhere(diff_down==1)
    bottom_of_pixel[:,0] -= 1
    top_of_pixel = np.argwhere(diff_down==-1)

    all_coords = np.append(right_side_of_pixel, left_side_of_pixel, axis=0)
    all_coords = np.append(all_coords, top_of_pixel, axis=0)
    all_coords = np.append(all_coords, bottom_of_pixel, axis=0)

    # remove duplicates
    all_coords = np.unique(all_coords, axis=0)

    return all_coords


def get_locations_from_pixel_sizes(pixel_sizes_x, pixel_sizes_y):
    return np.nancumsum(pixel_sizes_x, 1), np.nancumsum(pixel_sizes_y, 0)


@numba.njit(parallel=True)
def correlation_integral(coordinates_to_check, coordinates_to_count, locations_x, locations_y, bins):
    """
    Calculate correlation integral for boundary coordinates.

    For each coordinates_to_check, calculate how many coordinates_to_count are 
    less than a distance bin_i of the chosen coordinate, for each bin_i in bins.

    Parameters
    ----------
    coordinates_to_check : np.ndarray
        Array of coordinates of boundaries of shape [[x1,y1], [x2,y2], [x3,y3], ...].
    coordinates_to_count : np.ndarray
        Array of coordinates to count within distance bins.
    locations_x : np.ndarray
        X locations of each pixel.
    locations_y : np.ndarray
        Y locations of each pixel.
    bins : np.ndarray
        1-D array of distances to bin.

    Returns
    -------
    C_l : np.ndarray
        Correlation integral values, same shape as bins.

    Notes
    -----
    For example, np.sqrt((locations_x[i,j]-locations_x[p,q])**2 + 
    (locations_y[i,j]-locations_y[p,q])**2) should represent the physical 
    distance between pixel locations at i,j and p,q.
    """    
    # Each thread gets its own copy
    # This is to eliminate race condition during paralellization
    C_l_per_thread = np.zeros((numba.config.NUMBA_NUM_THREADS, bins.shape[0]))
    
    
    for i in numba.prange(coordinates_to_check.shape[0]):
        thread_id = numba.get_thread_id()
        for j in range(coordinates_to_count.shape[0]):
            p,q = coordinates_to_check[i]
            r,s = coordinates_to_count[j]
        
            dx = (locations_x[p,q]-locations_x[r,s])
            dy = (locations_y[p,q]-locations_y[r,s])
            
            distance = np.sqrt((dx**2)+(dy**2))
            for bin_index, bin in enumerate(bins):
                if distance<bin: 
                    C_l_per_thread[thread_id, bin_index] += 1

    # Return total over all threads
    return np.sum(C_l_per_thread, axis=0)



[docs]
def coarsen_array(array, factor):
    """
    Coarsen a given array by a specified factor by averaging along both dimensions.

    This function takes an input array and reduces it by a given factor along both 
    the x and y dimensions. The coarsening is achieved by summing 'superpixel' regions 
    of the original array and dividing by the number of pixels in each region. The 
    resulting coarsened array has reduced resolution.

    Parameters
    ----------
    array : np.ndarray
        The input array to be coarsened.
    factor : int
        The coarsening factor for reducing the array resolution. Must be a positive integer.

    Returns
    -------
    np.ndarray
        The coarsened array with reduced resolution.

    Raises
    ------
    ValueError
        If an even coarsening factor is provided while attempting to create a binary 
        array, as this may lead to rounding issues.

    Examples
    --------
    >>> original_array = np.array([[1, 2], [3, 4]])
    >>> coarsened = coarsen_array(original_array, factor=2)
    >>> coarsened
    array([2.5])
    """

    coarsened_array = np.add.reduceat(array, np.arange(array.shape[0], step=factor), axis=0)     # add points in x direction
    coarsened_array = np.add.reduceat(coarsened_array, np.arange(array.shape[1], step=factor), axis=1)     # add points in y direction

    # The number of pixels that are coarsened is usually factor**2, but not for edge superpixels if the factor does not evenly divide into array size. Solution:
    pixel_counts = np.add.reduceat(np.ones(array.shape), np.arange(array.shape[0], step=factor), axis=0)     # add points in x direction
    pixel_counts = np.add.reduceat(pixel_counts, np.arange(array.shape[1], step=factor), axis=1)     # add points in y direction

    coarsened_array = coarsened_array/pixel_counts

    return coarsened_array




[docs]
@numba.njit()
def total_perimeter(array, x_sizes, y_sizes):
    """
        Given a binary array, calculate the total perimeter. Boundary conditions are assumed periodic. 
        Only counts perimeter along edges between 1 and 0. (so that for different b.c. the array could be encased in any other value)
        Assumes periodic B.C.; for something else, padd inputs with 0s or nans.

        Raises ValueError if x_sizes or y_sizes is nan where array is 1
    """
    perimeter = 0
    for (i, j), value in np.ndenumerate(array):
        if value == 1:
            if np.isnan(x_sizes[i,j]) or np.isnan(y_sizes[i,j]): 
                raise ValueError('x_sizes or y_sizes is nan where array is 1')
            if i != array.shape[0]-1 and array[i+1, j] == 0: perimeter += x_sizes[i,j]
            elif i == array.shape[0]-1 and array[0, j] == 0: perimeter += x_sizes[i,j]

            if i != 0 and array[i-1, j] == 0: perimeter += x_sizes[i,j]
            elif i == 0 and array[array.shape[0]-1, j] == 0: perimeter += x_sizes[i,j]

            if j != array.shape[1]-1 and array[i, j+1] == 0: perimeter += y_sizes[i,j]
            elif j == array.shape[1]-1 and array[i, 0] == 0: perimeter += y_sizes[i,j]

            if j != 0 and array[i, j-1] == 0: perimeter += y_sizes[i,j]
            elif j == 0 and array[i, array.shape[1]-1] == 0: perimeter += y_sizes[i,j]

    return perimeter




[docs]
def total_number(array, structure = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])):
    """
        Given a 2-D array with 0's, nans, and 1's, calculate number of objects of connected 1's where connectivity is defined by structure
    """
    array_copy = array.copy()
    array_copy[np.isnan(array_copy)] = 0
    _, n_structures = label(array_copy.astype(bool), structure, output=np.float32)
    return n_structures




[docs]
def isolate_largest_structure(binary_array, structure=np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])):

    labelled_array = label(binary_array, structure)[0]
    cloud_values = labelled_array[labelled_array!= 0] # remove background
    values, counts = np.unique(cloud_values, return_counts=True)
    most_common = values[np.argmax(counts)]
    return labelled_array == most_common



def label_size(array, variable='area',wrap='both', x_sizes=None, y_sizes=None):
    """
        Input:
            array - Binary array of strc: 2-d np.ndarray, padded with 0's or np.nan's
            variable - 'area', 'perimeter', 'width', 'height'
                        What variable to use for 'size'
            x_sizes, y_sizes - 2-D np.ndarray of pixel sizes in the x and y directions
            wrap = None, 'sides', 'both': 
                if 'sides', connect structures that span the left/right edge
                if 'both', connect structures that span the left/right edge and top/bottom edge
        Output:
            labelled_array, total_P
            : label where structures are labelled with their perimeter, and the background is 0

        Note: if x_sizes or y_sizes are not uniform, the width will be the sum of the average pixel widths of the pixels in the column and in the object.
        Similarly, the height will be the sum of the average pixel heights of the pixels in the row and in the object.
        Given a array and the sizes of each pixel in each direction, calculate properties of structures. 
        Any perimeter between structure and nan is not counted. 
    """
    from ._object_analysis import label_periodic_boundaries

    labelled_array, n_structures = label(array.astype(bool), output=np.float32)      # creates array where every unique structure is composed of a unique number, 1 to n_structures

    if variable not in ['area', 'perimeter', 'width', 'height']:
        raise ValueError(f'variable={variable} not supported (supported values are "area", "perimeter", "width", "height")')

    if x_sizes is None:
        x_sizes = np.ones_like(labelled_array)
    if y_sizes is None:
        y_sizes = np.ones_like(labelled_array)

    if wrap is None: pass
    elif wrap == 'sides':
        # set those on right to the same i.d. as those on left
        for j,value in enumerate(labelled_array[:,0]):
            if value != 0:
                if labelled_array[j, labelled_array.shape[1]-1] != 0 and labelled_array[j, labelled_array.shape[1]-1] != value:
                    # want not a structure and not already changed
                    labelled_array[labelled_array == labelled_array[j, labelled_array.shape[1]-1]] = value  # set to same identification number
    
    if wrap is None: pass
    elif wrap == 'both' or wrap == 'sides': 
        labelled_array = label_periodic_boundaries(labelled_array, wrap)
    else: raise ValueError(f'wrap={wrap} not supported')

    # Flatten arrays to find their indices.
    values = np.sort(labelled_array.flatten())
    original_locations = np.argsort(labelled_array.flatten())  # Get indices where the original values were
    indices_2d = np.array(np.unravel_index(original_locations, labelled_array.shape)).T    # convert flattened indices to 2-d
    
    labelled_array[np.isnan(array)] = np.nan      # Turn this back to nan so perimeter along it is not included
    split_here = np.roll(values, shift=-1)-values   # Split where the values changed.
    split_here[-1] = 0                  # Last value rolled over from first

    separated_structure_indices = np.split(indices_2d, np.where(split_here!=0)[0]+1)
    separated_structure_indices = separated_structure_indices[1:]   # Remove the locations that were 0 (not structure)
    if len(separated_structure_indices) == 0: return np.zeros(array.shape),0

    labelled_with_sizes = np.zeros(array.shape, dtype=int)

    # must use numba.typed.List here for some reason https://numba.readthedocs.io/en/stable/reference/pysupported.html#feature-typed-list   
    labelled_with_sizes = _label_size_helper(labelled_array, List(separated_structure_indices), labelled_with_sizes, variable, x_sizes, y_sizes) 
    return labelled_with_sizes


@numba.njit()
def _label_size_helper(labelled_array, separated_structure_indices, labelled_with_sizes, variable, x_sizes, y_sizes):
    
    for indices in separated_structure_indices:
        perimeter = 0
        area = 0

        y_coords_structure = np.array([c[0] for c in indices])
        x_coords_structure = np.array([c[1] for c in indices])
        unique_y_coords = []
        unique_x_coords = []
        height = 0
        width = 0

        for (i,j) in indices:
            # Height, Width
            if i not in unique_y_coords:
                unique_y_coords.append(i)
                mask = (y_coords_structure==i)
                y_sizes_here = []
                for loc,take in enumerate(mask):
                    if take: y_sizes_here.append(y_sizes[y_coords_structure[loc],x_coords_structure[loc]])
                y_sizes_here = np.array(y_sizes_here)
                height += np.mean(y_sizes_here)
            if j not in unique_x_coords:
                unique_x_coords.append(j)
                mask = (x_coords_structure==j)
                x_sizes_here = []
                for loc,take in enumerate(mask):
                    if take: x_sizes_here.append(x_sizes[y_coords_structure[loc],x_coords_structure[loc]])
                x_sizes_here = np.array(x_sizes_here)
                width += np.mean(x_sizes_here)

            # Perimeter:
            if i != labelled_array.shape[0]-1 and labelled_array[i+1, j] == 0: perimeter += x_sizes[i,j]
            elif i == labelled_array.shape[0]-1 and labelled_array[0, j] == 0: perimeter += x_sizes[i,j]

            if i != 0 and labelled_array[i-1, j] == 0: perimeter += x_sizes[i,j]
            elif i == 0 and labelled_array[labelled_array.shape[0]-1, j] == 0: perimeter += x_sizes[i,j]

            if j != labelled_array.shape[1]-1 and labelled_array[i, j+1] == 0: perimeter += y_sizes[i,j]
            elif j == labelled_array.shape[1]-1 and labelled_array[i, 0] == 0: perimeter += y_sizes[i,j]

            if j != 0 and labelled_array[i, j-1] == 0: perimeter += y_sizes[i,j]
            elif j == 0 and labelled_array[i, labelled_array.shape[1]-1] == 0: perimeter += y_sizes[i,j]

            # Area:
            area += y_sizes[i,j] * x_sizes[i,j]
        for (i,j) in indices:
            if variable == 'perimeter':
                labelled_with_sizes[i,j] = perimeter
            elif variable == 'area':
                labelled_with_sizes[i,j] = area
            elif variable == 'width':
                labelled_with_sizes[i,j] = width
            elif variable == 'height':
                labelled_with_sizes[i,j] = height
        

    return labelled_with_sizes