Source code for objscale._object_analysis

import numpy as np
from scipy.ndimage import label
from numba import njit, prange
from numba.typed import List
from warnings import warn
from skimage.segmentation import clear_border
from ._utils import encase_in_value



[docs]
def get_structure_props(array, x_sizes, y_sizes, structure = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]), print_none=False, wrap=None):
    """
    Calculate properties of structures in a binary array.

    Given an array and the sizes of each pixel in each direction, calculate 
    properties of structures. Any perimeter between structure and nan is not counted.

    Parameters
    ----------
    array : np.ndarray
        Binary array of structures: 2-d array, padded with 0's or np.nan's.
    x_sizes : np.ndarray
        Sizes of pixels in horizontal direction, same shape as array.
    y_sizes : np.ndarray
        Sizes of pixels in vertical direction, same shape as array.
    structure : np.ndarray, default=np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
        Defines connectivity.
    print_none : bool, default=False
        Print message if no structures found.
    wrap : str or None, default=None
        Boundary wrapping options: None, 'sides', 'both'.
        If 'sides', connect structures that span the left/right edge.
        If 'both', connect structures that span the left/right edge and top/bottom edge.

    Returns
    -------
    perimeter : np.ndarray
        1-D array, each element the perimeter of an individual structure.
    area : np.ndarray
        1-D array, each element the area of an individual structure.
    height : np.ndarray
        1-D array, each element the height of an individual structure.
    width : np.ndarray
        1-D array, each element the width of an individual structure.

    Notes
    -----
    If x_sizes or y_sizes are not uniform, the width will be the sum of the average 
    pixel widths of the pixels in the column and in the object. Similarly, the height 
    will be the sum of the average pixel heights of the pixels in the row and in the object.
    """

    if array.shape != x_sizes.shape or array.shape != y_sizes.shape: 
        raise ValueError('array, x_sizes, and y_sizes must all be same shape. Currently {},{},{}'.format(array.shape, x_sizes.shape, y_sizes.shape))

    if np.count_nonzero((np.isnan(x_sizes) | np.isnan(y_sizes)) & np.isfinite(array)): 
        raise ValueError('x or y sizes are nan in locations where array is not')
    
    # if 1 in array[0] or 1 in array[:,0] or 1 in array[-1] or 1 in array[:,-1]: raise ValueError('array must be padded with 0s or nans.')
    no_nans = array.copy()
    no_nans[np.isnan(array)] = 0   # so we don't consider nans structures and also so they don't connect multiple structures
    if np.count_nonzero(no_nans) == 0: 
        if print_none: print('No structures found')
        return np.array([]),np.array([]),np.array([]),np.array([])
    labelled_array, n_structures = label(no_nans.astype(bool), structure, output=np.float32)      # creates array where every unique structure is composed of a unique number, 1 to n_structures

    if wrap is None: pass
    elif wrap == 'both' or wrap == 'sides': 
        labelled_array = label_periodic_boundaries(labelled_array, wrap)
    else: raise ValueError(f'wrap={wrap} not supported')

    # Flatten arrays to find their indices.
    values = np.sort(labelled_array.flatten())
    original_locations = np.argsort(labelled_array.flatten())  # Get indices where the original values were
    indices_2d = np.array(np.unravel_index(original_locations, labelled_array.shape)).T    # convert flattened indices to 2-d
    
    labelled_array[np.isnan(array)] = np.nan      # Turn this back to nan so perimeter along it is not included
    split_here = np.roll(values, shift=-1)-values   # Split where the values changed.
    split_here[-1] = 0                  # Last value rolled over from first

    separated_structure_indices = np.split(indices_2d, np.where(split_here!=0)[0]+1)
    separated_structure_indices = separated_structure_indices[1:]   # Remove the locations that were 0 (not structure)
    if len(separated_structure_indices) == 0: return np.array([]),np.array([]),np.array([]),np.array([])

    # must use numba.typed.List here for some reason https://numba.readthedocs.io/en/stable/reference/pysupported.html#feature-typed-list   
    p, a, h, w = _get_structure_props_helper(labelled_array, List(separated_structure_indices), x_sizes, y_sizes) 
    nanmask = np.logical_or(np.logical_or(np.isnan(p), np.isnan(a)), np.logical_or(np.isnan(h), np.isnan(w)))
    if np.count_nonzero(nanmask) > 0: raise ValueError('Nan values found: {} out of {}'.format(np.count_nonzero(nanmask), len(p)))
    p, a, h, w = np.array(p), np.array(a), np.array(h), np.array(w)
    p, a, h, w = p[~nanmask], a[~nanmask],h[~nanmask], w[~nanmask]
    return p,a,h,w



@njit(parallel=True)
def _get_structure_props_helper(labelled_array, separated_structure_indices, x_sizes, y_sizes):


    # Preallocate arrays
    n_structures = len(separated_structure_indices)
    p = np.empty(n_structures, dtype=np.float32)
    a = np.empty(n_structures, dtype=np.float32)
    h = np.empty(n_structures, dtype=np.float32)
    w = np.empty(n_structures, dtype=np.float32)


    for iteration in prange(len(separated_structure_indices)):
        iteration = np.int64(iteration) 
        structure_coords = separated_structure_indices[iteration]
        perimeter = 0
        area = 0

        y_coords_structure = np.array([c[0] for c in structure_coords])
        x_coords_structure = np.array([c[1] for c in structure_coords])
        unique_y_coords = []
        unique_x_coords = []
        height = 0
        width = 0

        for i,j in structure_coords:

            # Height, Width
            if i not in unique_y_coords:
                unique_y_coords.append(i)
                indices = (y_coords_structure==i)
                y_sizes_here = []
                for loc,take in enumerate(indices):
                    if take: y_sizes_here.append(y_sizes[y_coords_structure[loc],x_coords_structure[loc]])
                y_sizes_here = np.array(y_sizes_here)
                height += np.mean(y_sizes_here)
            if j not in unique_x_coords:
                unique_x_coords.append(j)
                indices = (x_coords_structure==j)
                x_sizes_here = []
                for loc,take in enumerate(indices):
                    if take: x_sizes_here.append(x_sizes[y_coords_structure[loc],x_coords_structure[loc]])
                x_sizes_here = np.array(x_sizes_here)
                width += np.mean(x_sizes_here)

            # Perimeter:
            if i != labelled_array.shape[0]-1 and labelled_array[i+1, j] == 0: perimeter += x_sizes[i,j]
            elif i == labelled_array.shape[0]-1 and labelled_array[0, j] == 0: perimeter += x_sizes[i,j]

            if i != 0 and labelled_array[i-1, j] == 0: perimeter += x_sizes[i,j]
            elif i == 0 and labelled_array[labelled_array.shape[0]-1, j] == 0: perimeter += x_sizes[i,j]

            if j != labelled_array.shape[1]-1 and labelled_array[i, j+1] == 0: perimeter += y_sizes[i,j]
            elif j == labelled_array.shape[1]-1 and labelled_array[i, 0] == 0: perimeter += y_sizes[i,j]

            if j != 0 and labelled_array[i, j-1] == 0: perimeter += y_sizes[i,j]
            elif j == 0 and labelled_array[i, 0] == 0: perimeter += y_sizes[i,j]

            # Area:
            area += y_sizes[i,j] * x_sizes[i,j]


        if area != 0:
            p[iteration] = perimeter
            a[iteration] = area
            h[iteration] = height
            w[iteration] = width
            # valid_count += 1
    
    # Return only the valid entries
    valid_mask = (a>0)
    return p[valid_mask], a[valid_mask], h[valid_mask], w[valid_mask]



[docs]
def label_periodic_boundaries(labelled_array, wrap):
    """
        This functions makes labelled structures that span the edge have the same label.

        Parameters:
        labelled_array (numpy.ndarray): A 2D array where each unique non-zero element represents a distinct label. Should be the output of scipy.ndimage.label().
        wrap (str): A string that determines how the boundaries of the array should be wrapped. 
                    It can take three values: 'sides', 'both', or any other string.

        If 'wrap' is 'sides' or 'both':
            The function sets the labels on the right boundary to be the same as those on the left boundary.

        If 'wrap' is 'both':
            The function also sets the labels on the top boundary to be the same as those on the bottom boundary.

        If 'wrap' is neither 'sides' nor 'both':
            The function raises a ValueError.

        Returns:
        labelled_array (numpy.ndarray): The input array with its periodic boundaries labelled as per the 'wrap' parameter.

        Raises:
        ValueError: If 'wrap' is neither 'sides' nor 'both'.
    """
    if wrap == 'sides' or wrap == 'both':
        # set those on right to the same i.d. as those on left
        for j,value in enumerate(labelled_array[:,0]):
            if value != 0:
                if labelled_array[j, labelled_array.shape[1]-1] != 0 and labelled_array[j, labelled_array.shape[1]-1] != value:
                    # want not a structure and not already changed
                    labelled_array[labelled_array == labelled_array[j, labelled_array.shape[1]-1]] = value  # set to same identification number
    
    if wrap == 'both': 
        # set those on top to the same i.d. as those on bottom
        for i,value in enumerate(labelled_array[0,:]):
            if value != 0:
                if labelled_array[labelled_array.shape[0]-1,i] != 0 and labelled_array[labelled_array.shape[0]-1,i] != value:
                    # want not a structure and not already changed
                    labelled_array[labelled_array == labelled_array[labelled_array.shape[0]-1,i]] = value  # set to same identification number
    if wrap not in ['sides','both']: raise ValueError(f'wrap = {wrap} not supported')
    return labelled_array



def get_every_boundary_perimeter(array, x_sizes, y_sizes, return_nlevels=False):
    """
        Return perimeters of each boundary between 0s and 1s.
        where each individual boundary between 0s and 1s in the array is a unique value.
        Ex: a donut of 1s gives 2 values for interior perimeters

        Array should only contain 0s and 1s
    """
    perimeters = []
    counter = 0
    while np.nansum(array) != 0:
        counter += 1
        if counter > 100: raise ValueError('Hole layer limit reached: 100 layers')
        all_holes_filled = remove_structure_holes(array)
        exterior_perimeters, _, _, _ = get_structure_props(encase_in_value(all_holes_filled), encase_in_value(x_sizes), encase_in_value(y_sizes))
        perimeters.extend(exterior_perimeters)

        # remove one layer
        new_array = all_holes_filled - array
        new_array[all_holes_filled == 0] = 0
        array = new_array
        # Now what were previously holes are clouds. What were previously clouds in holes are now holes in the "new" clouds.
    if return_nlevels: return perimeters, counter
    return perimeters



[docs]
def remove_structures_touching_border_nan(array):
    """
        Input:
            array: 2-D np.ndarray consisting of 0s, 1s, and np.nan. All values at the array edge should be np.nan
        Output:
            2-D np.ndarray consisting of 0s, 1s, and np.nan with any structure in contact with the nan 
            values around the outer edge of the good data removed
            "in contact" is defined using adjacent connectivity, i.e. 4-connectivity
        
    """
    if array.ndim != 2: raise ValueError('array not 2-dimensional')

    nanmask = np.isnan(array).astype(int)
    edge_nan_mask  = (nanmask - clear_border_adjacent(nanmask)).astype(bool)

    with_edge = array.copy()
    with_edge[edge_nan_mask] = 1

    cleared = clear_border_adjacent(with_edge).astype(float)
    cleared[edge_nan_mask] = np.nan
    cleared[np.isnan(array)] = np.nan
    return cleared




[docs]
def clear_border_adjacent(array, structure=np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])):
    """
        Input:
            array: 2-D np.ndarray consisting of 0s and 1s 
        Output:
            2-D np.ndarray consisting of 0s and 1s with border structures removed

        Remove connected regions that touch the edge, using a connectivity determined 
        by structure. Similar to skimage.segmentation.clear_border but structure
        can be changed.

            Examples:
                    [[0,0,0,0],                [[0,0,0,0],                [[0,0,0,0], 
                    [0,1,1,0],                 [0,1,1,0],                 [0,1,0,0], 
                    [0,0,0,1],                 [0,0,1,1],                 [1,0,0,0],
                    [0,0,0,0]]                 [0,0,0,0]]                 [0,0,0,0]]
            so ex 1 and 3 would still have one cloud in output but ex 2 would have 0
            for a structure of np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]).
    """
    border_cleared = clear_border(label(array.astype(bool), structure)[0])
    border_cleared[border_cleared > 0] = 1
    return border_cleared.astype(bool)




[docs]
def remove_structure_holes(array, periodic=False):
    """
        Fills in all holes in all structures within array.

        Set any value of 0 that is not connected to the largest connected structure of 0s (the background) to 1.

        Assume the largest contiguous area of 0s is the "background".


        Input:
            array: 2D np.ndarray with values either 0,1, or np.nan
            periodic: False, 'both', 'sides':
                For structures lying along the boundary, if periodic=False, the behavior is as if the array was padded with 1's, i.e. holes that are connected to the edge are filled.

        Output: filled array
    """
    if type(array) != np.ndarray: raise ValueError('array must be a np.ndarray object')
    filled = array.copy()
    filled[np.isnan(filled)] = 0
    if np.any(filled>1): raise ValueError('array can only have values 0, 1, or np.nan')
    
    # invert and label
    labelled, _ = label((1-filled))
    if periodic != False: labelled = label_periodic_boundaries(labelled, periodic)
    # largest structure will be the background or the cloudy areas.
    unique_values, unique_counts = np.unique(labelled.flatten(), return_counts=True)
    # Make sure we don't identify the cloudy areas as the background.
    unique_counts, unique_values = unique_counts[unique_values!=0], unique_values[unique_values!=0]
    label_of_background = unique_values[unique_counts.argmax()]

    filled[(labelled != 0) & (labelled != label_of_background)] = 1

    if np.count_nonzero(np.isnan(array))>0: filled[np.isnan(array)] = np.nan

    return filled