Source code for objscale._fractal_dimensions

import numpy as np
import numba
from numba.typed import List
from scipy.ndimage import label
from skimage.segmentation import clear_border
from warnings import warn
from ._object_analysis import remove_structures_touching_border_nan, remove_structure_holes
from ._utils import linear_regression, encase_in_value


[docs] def ensemble_correlation_dimension(arrays, x_sizes=None, y_sizes=None, minlength='auto', maxlength='auto', interior_circles_only=True, return_C_l=False, bins=None, point_reduction_factor=1, nbins=50): """ Calculate the correlation dimension D where C_l ∝ l^D for binary arrays. Requires that each array has the same pixel sizes, although across the array they may be nonuniform (e.g. increasing from left to right) Note that the resulting dimension is for the set of *object edge* points. Parameters ---------- arrays : list or np.ndarray List of binary arrays to calculate correlation dimension of. x_sizes : np.ndarray or list, optional Pixel sizes in the x direction. If None, assume all pixel dimensions are 1. If np.ndarray, use these for each array in 'arrays'. If list, assume x_sizes[i] corresponds to arrays[i]. y_sizes : np.ndarray or list, optional Pixel sizes in the y direction. If None, assume all pixel dimensions are 1. If np.ndarray, use these for each array in 'arrays'. If list, assume y_sizes[i] corresponds to arrays[i]. minlength : str or float, default='auto' Minimum length scale for correlation calculation. If 'auto', uses 3 times the minimum pixel size. maxlength : str or float, default='auto' Maximum length scale for correlation calculation. If 'auto', uses 0.1 times the minimum array dimension. interior_circles_only : bool, default=True If True, only use circle centers that are at least maxlength distance from all array edges to avoid boundary effects. In other words, only use circles that are fully contained within the array. Recommended! return_C_l : bool, default=False If True, return dimension, error, bins, C_l. Otherwise, return dimension, error. bins : None, int, or array-like, optional Values of l to use for the regression. Can be: - None: automatically calculate as logarithmically spaced intervals between 3*minimum length and the array width or height using nbins points - int: number of logarithmically spaced bins to generate automatically - array-like: explicit bin edges to use point_reduction_factor : float, default=1 Draw N/point_reduction_factor circles, where N is the total number of available circles. Choose the circle centers randomly. Must be >= 1. nbins : int, default=50 Number of bins to use when bins=None or when bins is an int. Only used for automatic bin generation. Returns ------- dimension : float The correlation dimension. error : float Error estimate for the dimension. bins : np.ndarray, optional The bins used for calculation. Only returned if return_C_l=True. C_l : np.ndarray, optional The correlation integral values. Only returned if return_C_l=True. """ if type(arrays) == np.ndarray: arrays = [arrays] if x_sizes is None: x_sizes = np.ones(arrays[0].shape, dtype=np.float32) if y_sizes is None: y_sizes = np.ones(arrays[0].shape, dtype=np.float32) locations_x, locations_y = get_locations_from_pixel_sizes(x_sizes, y_sizes) h = x_sizes.shape[0] w = x_sizes.shape[1] if maxlength == 'auto': # One tenth of min(width, height) of entire array, where width, height are calculated in the center maxlength = 0.1 * min((locations_x[int(h/2), w-1]-locations_x[int(h/2), 0]), (locations_y[h-1, int(w/2)]-locations_y[0, int(w/2)])) if minlength == 'auto': minlength = 3*min(np.nanmin(x_sizes), np.nanmin(y_sizes)) # Basic validation checks if np.any(np.isnan(arrays)): raise ValueError('arrays must not contain NaN values') if np.any(np.isnan(x_sizes)) or np.any(np.isnan(y_sizes)): raise ValueError("x_sizes and y_sizes cannot contain NaN values") if np.any(x_sizes <= 0) or np.any(y_sizes <= 0): raise ValueError("x_sizes and y_sizes must be positive") if bins is None: bins = np.geomspace(minlength, maxlength, nbins) elif isinstance(bins, int): bins = np.geomspace(minlength, maxlength, bins) # range of scale checks # check these with the actual calculated bins for greatest relevance if bins[-1] <= bins[0]: raise ValueError(f"bin maximum length ({bins[-1]:.3f}) must be greater than bin minimum length ({bins[0]:.3f}); or if bins are passed, they must be increasing. Did you pass invalid values for minlength/maxlength?") if bins[-1] / bins[0] < 10: raise ValueError(f"Available scale ratio ({maxlength/minlength:.2f}) is less than 10. " f"Need at least one order of magnitude separation for reliable dimension estimation.") C_l = np.zeros(bins.shape) for array in arrays: if np.any(array.shape != x_sizes.shape): raise ValueError(f'All arrays must be same shape as pixel sizes (currently {array.shape} and {x_sizes.shape}, respectively)') array = array.astype(np.float16) all_boundary_coordinates = get_coords_of_boundaries(array) if interior_circles_only: # Calculate distance from each boundary coordinate to all array edges coord_locations_x = locations_x[all_boundary_coordinates[:,0], all_boundary_coordinates[:,1]] coord_locations_y = locations_y[all_boundary_coordinates[:,0], all_boundary_coordinates[:,1]] # Distance to each edge for all coordinates dist_to_left = coord_locations_x - locations_x[int(h/2), 0] dist_to_right = locations_x[int(h/2), w-1] - coord_locations_x dist_to_top = coord_locations_y - locations_y[0, int(w/2)] dist_to_bottom = locations_y[h-1, int(w/2)] - coord_locations_y # Find coordinates that are at least maxlength from ALL edges min_dist_to_any_edge = np.minimum.reduce([dist_to_left, dist_to_right, dist_to_top, dist_to_bottom]) interior_mask = min_dist_to_any_edge >= maxlength circle_centers = all_boundary_coordinates[interior_mask] else: circle_centers = all_boundary_coordinates if len(circle_centers) == 0: continue if point_reduction_factor>1: circle_centers = circle_centers[np.random.choice(np.arange(len(circle_centers)), int(len(circle_centers)/point_reduction_factor), replace=False)] elif point_reduction_factor<1: raise ValueError('point_reduction_factor must be >= 1') if len(circle_centers) == 0: continue C_l += correlation_integral(circle_centers, all_boundary_coordinates, locations_x, locations_y, bins) # Perform linear regression to estimate dimension x,y = np.log10(bins), np.log10(C_l) index = np.isfinite(x) & np.isfinite(y) if len(x[index]) <3: warn('Not enough data to estimate correlation dimension, returning nan') if return_C_l: return np.nan,np.nan,np.array([np.nan]),np.array([np.nan]) else: return np.nan,np.nan coefficients, cov = np.polyfit(x[index], y[index], 1, cov=True) fit_error = np.sqrt(np.diag(cov)) dimension, error = coefficients[0], 2*fit_error[0] # fit error is for 95% conf. int. if return_C_l: return dimension, error, bins, C_l else: return dimension, error
[docs] def ensemble_box_dimension(binary_arrays, set = 'edge', min_pixels=1, min_box_size=2, box_sizes = 'default', return_values=False): """ Calculate the ensemble box-counting dimension of binary arrays. This function estimates the box-counting dimension (also known as Minkowski-Bouligand dimension) for a list of binary arrays. It averages the results across multiple arrays. Parameters: ----------- binary_arrays : list of numpy.ndarray or numpy.ndarray A list of 2D binary arrays or a single 2D binary array. set : str, optional (default='edge') Specifies which set to consider for box counting: - 'edge': Box dimension of the set of boundaries between 0 and 1: Count boxes that contain at least one pixel with value 1 AND one pixel with value 0. - 'ones': Box dimension of the set of values equal to 1: Count boxes that contain at least one pixel with value 1. min_pixels : int, optional (default=1) Largest box size, in units of the number of boxes required to cover the array in the smaller direction min_box_size : int, optional (default=2) Smallest box size box_sizes : array-like or 'default', optional (default='default') Box sizes used. If 'default', uses those powers of 2 up to 2^14 that satisfy the above criteria. return_values : bool, optional (default=False) If True, return additional data used in the calculation. Returns: -------- float The estimated box-counting dimension. float The error of the estimate. tuple, optional If return_values is True, also returns: box_sizes mean_number_boxes Raises: ------- ValueError If an unsupported value is provided for 'set' or 'coarsening_factors'. If an array contains nan values Notes: ------ The function uses linear regression on log-log plot of box counts vs. scale to estimate the box-counting dimension. The slope of this regression gives the negative of the box-counting dimension. """ if type(binary_arrays) == np.ndarray: binary_arrays = [binary_arrays] if np.any(np.isnan(binary_arrays)): raise ValueError('arrays must not contain NaN values') if type(box_sizes) == str: if box_sizes != 'default': raise ValueError(f'box_sizes={box_sizes} not supported') box_sizes = 2**np.arange(1,15) # assumed any array is smaller than 32768 pixels else: box_sizes = np.array(box_sizes) max_coarsening_factor = min(binary_arrays[0].shape)/min_pixels box_sizes = box_sizes[box_sizes<=max_coarsening_factor] box_sizes = box_sizes[box_sizes>=min_box_size] mean_number_boxes = np.empty((0,box_sizes.size), dtype=np.float32) for array in binary_arrays: number_boxes = [] for factor in box_sizes: # Coarsen coarsened_array = encase_in_value(coarsen_array(array, factor), np.nan) # Count boxes if set == 'edge': nboxes = np.count_nonzero((coarsened_array>0) & (coarsened_array<1)) elif set == 'ones': nboxes = np.count_nonzero(coarsened_array>0) else: raise ValueError(f'set={set} not supported (supported values are "edge" or "ones")') number_boxes.append(nboxes) mean_number_boxes = np.append(mean_number_boxes, [number_boxes], axis=0) mean_number_boxes = np.mean(mean_number_boxes, axis=0) mean_number_boxes[mean_number_boxes==0] = np.nan # eliminate warning when logging 0 (slope, _), (error,_) = linear_regression(np.log10(box_sizes), np.log10(mean_number_boxes)) if return_values: return -slope, error, box_sizes, mean_number_boxes return -slope, error
[docs] def ensemble_coarsening_dimension(arrays, x_sizes=None, y_sizes=None, cloudy_threshold=0.5, min_pixels=30, return_values=False, coarsening_factors = 'default', count_exterior=False): """ Calculate the ensemble fractal dimension by coarsening resolution and calculating total perimeter. Parameters ---------- arrays : np.ndarray or list of np.ndarray Array, or list of arrays, to coarsen, apply cloudy_threshold to make binary, then calculate total perimeter. x_sizes : np.ndarray or list, optional Pixel sizes in the x direction. If None, assume all pixel dimensions are 1. If np.ndarray, use these for each array in 'arrays'. If list, assume x_sizes[i] corresponds to arrays[i]. y_sizes : np.ndarray or list, optional Pixel sizes in the y direction. If None, assume all pixel dimensions are 1. If np.ndarray, use these for each array in 'arrays'. If list, assume y_sizes[i] corresponds to arrays[i]. cloudy_threshold : float, default=0.5 Threshold for making arrays binary. min_pixels : int, default=30 Limit the coarsening factors such that coarsened matrices always have shape >= (min_pixels, min_pixels). return_values : bool, default=False If True, return (D_e, error), (coarsening_factors, mean_total_perimeters). coarsening_factors : str or array-like, default='default' Coarsening factors to use. If 'default', automatically determined. count_exterior : bool, default=False Whether to count exterior perimeter. Returns ------- D_e : float The ensemble fractal dimension. error : float Error estimate (95% confidence). coarsening_factors : np.ndarray, optional The coarsening factors used. Only returned if return_values=True. mean_total_perimeters : np.ndarray, optional Mean total perimeters. Only returned if return_values=True. """ if type(arrays) == np.ndarray: arrays = [arrays] if x_sizes is None: x_sizes = np.ones(arrays[0].shape, dtype=np.float32) if y_sizes is None: y_sizes = np.ones(arrays[0].shape, dtype=np.float32) if type(coarsening_factors) == str: # to not try elementwise comparison if coarsening_factors != 'default': raise ValueError(f'coarsening_factors={coarsening_factors} not supported') if np.count_nonzero((arrays[0]<1) & (arrays[0]>0))==0: # If a binary array, even coarsening factors can be ambiguous because sometimes the superpixel is half cloudy. In this case, use the following coarsening_factors = 3**np.arange(0,10) else: # Otherwise, use more coarsening factors: coarsening_factors = 2**np.arange(0,15) else: coarsening_factors = np.array(coarsening_factors) max_coarsening_factor = min(arrays[0].shape)/min_pixels coarsening_factors = coarsening_factors[coarsening_factors<=max_coarsening_factor] mean_total_perimeters = np.empty((0,coarsening_factors.size), dtype=np.float32) for i in range(len(arrays)): array = arrays[i] if type(x_sizes) == list: xs = x_sizes[i] else: xs = x_sizes if type(y_sizes) == list: ys = y_sizes[i] else: ys = y_sizes total_perimeters = [] for factor in coarsening_factors: # Coarsen coarsened_array = encase_in_value(coarsen_array(array, factor), np.nan) coarsened_x_sizes = factor*encase_in_value(coarsen_array(xs, factor), 0) # the value appended here is irrelevant since the array is 0 here, just to make the shape the same coarsened_y_sizes = factor*encase_in_value(coarsen_array(ys, factor), 0) # the value appended here is irrelevant since the array is 0 here, just to make the shape the same nanmask = (np.isnan(coarsened_array) | np.isnan(coarsened_x_sizes) | np.isnan(coarsened_y_sizes)) # Make binary coarsened_array_binary = (coarsened_array>cloudy_threshold).astype(np.float32) # To not count exterior perimeter, set to nan, to count it, set to 0 if count_exterior: padding = 0 else: padding = np.nan coarsened_array_binary[nanmask] = padding total_p = total_perimeter(coarsened_array_binary, coarsened_x_sizes, coarsened_y_sizes) total_perimeters.append(total_p) mean_total_perimeters = np.append(mean_total_perimeters, [total_perimeters], axis=0) mean_total_perimeters = np.mean(mean_total_perimeters, axis=0) mean_total_perimeters[mean_total_perimeters==0] = np.nan # eliminate warning when logging 0 (slope, _), (error,_) = linear_regression(np.log10(coarsening_factors), np.log10(mean_total_perimeters)) if return_values: return 1-slope, error, coarsening_factors, mean_total_perimeters return 1-slope, error
[docs] def individual_fractal_dimension(arrays, x_sizes=None, y_sizes=None, min_a=10, max_a=np.inf, return_values=False): """ Calculate the individual fractal dimension Df of objects within arrays. The method uses linear regression on log a vs. log p, where a and p are calculated not including structure holes, and omitting structures touching the array edge. Parameters ---------- arrays : list of np.ndarray List of boolean 2D arrays. x_sizes : np.ndarray or list, optional Pixel sizes in the x direction. If None, assume all pixel dimensions are 1. If np.ndarray, use these for each array in 'arrays'. If list, assume x_sizes[i] corresponds to arrays[i]. y_sizes : np.ndarray or list, optional Pixel sizes in the y direction. If None, assume all pixel dimensions are 1. If np.ndarray, use these for each array in 'arrays'. If list, assume y_sizes[i] corresponds to arrays[i]. min_a : float, default=10 Minimum structure area to include in calculation. max_a : float, default=np.inf Maximum structure area to include in calculation. return_values : bool, default=False If True, return (Df, err), (log10(sqrt(a)), log10(p)). Returns ------- Df : float The individual fractal dimension. uncertainty : float Uncertainty estimate (95% confidence). log10_sqrt_a : np.ndarray, optional Log10 of sqrt(area) values. Only returned if return_values=True. log10_p : np.ndarray, optional Log10 of perimeter values. Only returned if return_values=True. """ from ._object_analysis import get_structure_props areas, perimeters = [], [] if (type(arrays) != list): arrays = [arrays] if x_sizes is None: x_sizes = np.ones_like(arrays[0]) if y_sizes is None: y_sizes = np.ones_like(arrays[0]) for i in range(len(arrays)): array = arrays[i] if type(x_sizes) == list: xs = x_sizes[i] else: xs = x_sizes if type(y_sizes) == list: ys = y_sizes[i] else: ys = y_sizes if np.any(array.shape != xs.shape): raise ValueError('Each array shape must match corresponding pixel sizes shape') array = remove_structures_touching_border_nan(array) array = remove_structure_holes(array) new_p, new_a, _, _ = get_structure_props(array, xs, ys) areas.extend(new_a) perimeters.extend(new_p) areas, perimeters = np.array(areas), np.array(perimeters) areas, perimeters = areas[(areas>min_a) & (areas<max_a)], perimeters[(areas>min_a) & (areas<max_a)] (slope, _), (err, _) = linear_regression(np.log10(np.sqrt(areas)), np.log10(perimeters)) if return_values: return slope, err, np.log10(np.sqrt(areas)), np.log10(perimeters) else: return slope, err
# Helper functions for fractal dimension calculations def get_coords_of_boundaries(array): """ Find coordinates of pixels of value 1 that are adjacent to pixels of value 0 Input: -array: 2-D binary np.ndarray Output: np.ndarray of shape(n_boundaries, 2) where each element is a pair of indicies corresponding to the locations of pixels with value 1 that are adjacent to a pixel of value 0 Note: Topology is toroidal, so pixels on one edge are considered adjacent to pixels on the opposite edge. Example: array1 = np.zeros((10,10)) array1[6:8,6:8] = 1 array1[3:4,2:5] = 1 array2 = np.zeros((10,10)) for i,j in get_coords_of_boundaries(array1): array2[i,j] = 1 print(np.all(array2==array1)) >>> True """ array = array.astype(np.int16) shifted_right = np.roll(array, shift=1, axis=1) shifted_down = np.roll(array, 1, axis=0) diff_right = shifted_right-array diff_down = shifted_down-array right_side_of_pixel = np.argwhere(diff_right==1) right_side_of_pixel[:,1] -= 1 left_side_of_pixel = np.argwhere(diff_right==-1) bottom_of_pixel = np.argwhere(diff_down==1) bottom_of_pixel[:,0] -= 1 top_of_pixel = np.argwhere(diff_down==-1) all_coords = np.append(right_side_of_pixel, left_side_of_pixel, axis=0) all_coords = np.append(all_coords, top_of_pixel, axis=0) all_coords = np.append(all_coords, bottom_of_pixel, axis=0) # remove duplicates all_coords = np.unique(all_coords, axis=0) return all_coords def get_locations_from_pixel_sizes(pixel_sizes_x, pixel_sizes_y): return np.nancumsum(pixel_sizes_x, 1), np.nancumsum(pixel_sizes_y, 0) @numba.njit(parallel=True) def correlation_integral(coordinates_to_check, coordinates_to_count, locations_x, locations_y, bins): """ Calculate correlation integral for boundary coordinates. For each coordinates_to_check, calculate how many coordinates_to_count are less than a distance bin_i of the chosen coordinate, for each bin_i in bins. Parameters ---------- coordinates_to_check : np.ndarray Array of coordinates of boundaries of shape [[x1,y1], [x2,y2], [x3,y3], ...]. coordinates_to_count : np.ndarray Array of coordinates to count within distance bins. locations_x : np.ndarray X locations of each pixel. locations_y : np.ndarray Y locations of each pixel. bins : np.ndarray 1-D array of distances to bin. Returns ------- C_l : np.ndarray Correlation integral values, same shape as bins. Notes ----- For example, np.sqrt((locations_x[i,j]-locations_x[p,q])**2 + (locations_y[i,j]-locations_y[p,q])**2) should represent the physical distance between pixel locations at i,j and p,q. """ # Each thread gets its own copy # This is to eliminate race condition during paralellization C_l_per_thread = np.zeros((numba.config.NUMBA_NUM_THREADS, bins.shape[0])) for i in numba.prange(coordinates_to_check.shape[0]): thread_id = numba.get_thread_id() for j in range(coordinates_to_count.shape[0]): p,q = coordinates_to_check[i] r,s = coordinates_to_count[j] dx = (locations_x[p,q]-locations_x[r,s]) dy = (locations_y[p,q]-locations_y[r,s]) distance = np.sqrt((dx**2)+(dy**2)) for bin_index, bin in enumerate(bins): if distance<bin: C_l_per_thread[thread_id, bin_index] += 1 # Return total over all threads return np.sum(C_l_per_thread, axis=0)
[docs] def coarsen_array(array, factor): """ Coarsen a given array by a specified factor by averaging along both dimensions. This function takes an input array and reduces it by a given factor along both the x and y dimensions. The coarsening is achieved by summing 'superpixel' regions of the original array and dividing by the number of pixels in each region. The resulting coarsened array has reduced resolution. Parameters ---------- array : np.ndarray The input array to be coarsened. factor : int The coarsening factor for reducing the array resolution. Must be a positive integer. Returns ------- np.ndarray The coarsened array with reduced resolution. Raises ------ ValueError If an even coarsening factor is provided while attempting to create a binary array, as this may lead to rounding issues. Examples -------- >>> original_array = np.array([[1, 2], [3, 4]]) >>> coarsened = coarsen_array(original_array, factor=2) >>> coarsened array([2.5]) """ coarsened_array = np.add.reduceat(array, np.arange(array.shape[0], step=factor), axis=0) # add points in x direction coarsened_array = np.add.reduceat(coarsened_array, np.arange(array.shape[1], step=factor), axis=1) # add points in y direction # The number of pixels that are coarsened is usually factor**2, but not for edge superpixels if the factor does not evenly divide into array size. Solution: pixel_counts = np.add.reduceat(np.ones(array.shape), np.arange(array.shape[0], step=factor), axis=0) # add points in x direction pixel_counts = np.add.reduceat(pixel_counts, np.arange(array.shape[1], step=factor), axis=1) # add points in y direction coarsened_array = coarsened_array/pixel_counts return coarsened_array
[docs] @numba.njit() def total_perimeter(array, x_sizes, y_sizes): """ Given a binary array, calculate the total perimeter. Boundary conditions are assumed periodic. Only counts perimeter along edges between 1 and 0. (so that for different b.c. the array could be encased in any other value) Assumes periodic B.C.; for something else, padd inputs with 0s or nans. Raises ValueError if x_sizes or y_sizes is nan where array is 1 """ perimeter = 0 for (i, j), value in np.ndenumerate(array): if value == 1: if np.isnan(x_sizes[i,j]) or np.isnan(y_sizes[i,j]): raise ValueError('x_sizes or y_sizes is nan where array is 1') if i != array.shape[0]-1 and array[i+1, j] == 0: perimeter += x_sizes[i,j] elif i == array.shape[0]-1 and array[0, j] == 0: perimeter += x_sizes[i,j] if i != 0 and array[i-1, j] == 0: perimeter += x_sizes[i,j] elif i == 0 and array[array.shape[0]-1, j] == 0: perimeter += x_sizes[i,j] if j != array.shape[1]-1 and array[i, j+1] == 0: perimeter += y_sizes[i,j] elif j == array.shape[1]-1 and array[i, 0] == 0: perimeter += y_sizes[i,j] if j != 0 and array[i, j-1] == 0: perimeter += y_sizes[i,j] elif j == 0 and array[i, array.shape[1]-1] == 0: perimeter += y_sizes[i,j] return perimeter
[docs] def total_number(array, structure = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])): """ Given a 2-D array with 0's, nans, and 1's, calculate number of objects of connected 1's where connectivity is defined by structure """ array_copy = array.copy() array_copy[np.isnan(array_copy)] = 0 _, n_structures = label(array_copy.astype(bool), structure, output=np.float32) return n_structures
[docs] def isolate_largest_structure(binary_array, structure=np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])): labelled_array = label(binary_array, structure)[0] cloud_values = labelled_array[labelled_array!= 0] # remove background values, counts = np.unique(cloud_values, return_counts=True) most_common = values[np.argmax(counts)] return labelled_array == most_common
def label_size(array, variable='area',wrap='both', x_sizes=None, y_sizes=None): """ Input: array - Binary array of strc: 2-d np.ndarray, padded with 0's or np.nan's variable - 'area', 'perimeter', 'width', 'height' What variable to use for 'size' x_sizes, y_sizes - 2-D np.ndarray of pixel sizes in the x and y directions wrap = None, 'sides', 'both': if 'sides', connect structures that span the left/right edge if 'both', connect structures that span the left/right edge and top/bottom edge Output: labelled_array, total_P : label where structures are labelled with their perimeter, and the background is 0 Note: if x_sizes or y_sizes are not uniform, the width will be the sum of the average pixel widths of the pixels in the column and in the object. Similarly, the height will be the sum of the average pixel heights of the pixels in the row and in the object. Given a array and the sizes of each pixel in each direction, calculate properties of structures. Any perimeter between structure and nan is not counted. """ from ._object_analysis import label_periodic_boundaries labelled_array, n_structures = label(array.astype(bool), output=np.float32) # creates array where every unique structure is composed of a unique number, 1 to n_structures if variable not in ['area', 'perimeter', 'width', 'height']: raise ValueError(f'variable={variable} not supported (supported values are "area", "perimeter", "width", "height")') if x_sizes is None: x_sizes = np.ones_like(labelled_array) if y_sizes is None: y_sizes = np.ones_like(labelled_array) if wrap is None: pass elif wrap == 'sides': # set those on right to the same i.d. as those on left for j,value in enumerate(labelled_array[:,0]): if value != 0: if labelled_array[j, labelled_array.shape[1]-1] != 0 and labelled_array[j, labelled_array.shape[1]-1] != value: # want not a structure and not already changed labelled_array[labelled_array == labelled_array[j, labelled_array.shape[1]-1]] = value # set to same identification number if wrap is None: pass elif wrap == 'both' or wrap == 'sides': labelled_array = label_periodic_boundaries(labelled_array, wrap) else: raise ValueError(f'wrap={wrap} not supported') # Flatten arrays to find their indices. values = np.sort(labelled_array.flatten()) original_locations = np.argsort(labelled_array.flatten()) # Get indices where the original values were indices_2d = np.array(np.unravel_index(original_locations, labelled_array.shape)).T # convert flattened indices to 2-d labelled_array[np.isnan(array)] = np.nan # Turn this back to nan so perimeter along it is not included split_here = np.roll(values, shift=-1)-values # Split where the values changed. split_here[-1] = 0 # Last value rolled over from first separated_structure_indices = np.split(indices_2d, np.where(split_here!=0)[0]+1) separated_structure_indices = separated_structure_indices[1:] # Remove the locations that were 0 (not structure) if len(separated_structure_indices) == 0: return np.zeros(array.shape),0 labelled_with_sizes = np.zeros(array.shape, dtype=int) # must use numba.typed.List here for some reason https://numba.readthedocs.io/en/stable/reference/pysupported.html#feature-typed-list labelled_with_sizes = _label_size_helper(labelled_array, List(separated_structure_indices), labelled_with_sizes, variable, x_sizes, y_sizes) return labelled_with_sizes @numba.njit() def _label_size_helper(labelled_array, separated_structure_indices, labelled_with_sizes, variable, x_sizes, y_sizes): for indices in separated_structure_indices: perimeter = 0 area = 0 y_coords_structure = np.array([c[0] for c in indices]) x_coords_structure = np.array([c[1] for c in indices]) unique_y_coords = [] unique_x_coords = [] height = 0 width = 0 for (i,j) in indices: # Height, Width if i not in unique_y_coords: unique_y_coords.append(i) mask = (y_coords_structure==i) y_sizes_here = [] for loc,take in enumerate(mask): if take: y_sizes_here.append(y_sizes[y_coords_structure[loc],x_coords_structure[loc]]) y_sizes_here = np.array(y_sizes_here) height += np.mean(y_sizes_here) if j not in unique_x_coords: unique_x_coords.append(j) mask = (x_coords_structure==j) x_sizes_here = [] for loc,take in enumerate(mask): if take: x_sizes_here.append(x_sizes[y_coords_structure[loc],x_coords_structure[loc]]) x_sizes_here = np.array(x_sizes_here) width += np.mean(x_sizes_here) # Perimeter: if i != labelled_array.shape[0]-1 and labelled_array[i+1, j] == 0: perimeter += x_sizes[i,j] elif i == labelled_array.shape[0]-1 and labelled_array[0, j] == 0: perimeter += x_sizes[i,j] if i != 0 and labelled_array[i-1, j] == 0: perimeter += x_sizes[i,j] elif i == 0 and labelled_array[labelled_array.shape[0]-1, j] == 0: perimeter += x_sizes[i,j] if j != labelled_array.shape[1]-1 and labelled_array[i, j+1] == 0: perimeter += y_sizes[i,j] elif j == labelled_array.shape[1]-1 and labelled_array[i, 0] == 0: perimeter += y_sizes[i,j] if j != 0 and labelled_array[i, j-1] == 0: perimeter += y_sizes[i,j] elif j == 0 and labelled_array[i, labelled_array.shape[1]-1] == 0: perimeter += y_sizes[i,j] # Area: area += y_sizes[i,j] * x_sizes[i,j] for (i,j) in indices: if variable == 'perimeter': labelled_with_sizes[i,j] = perimeter elif variable == 'area': labelled_with_sizes[i,j] = area elif variable == 'width': labelled_with_sizes[i,j] = width elif variable == 'height': labelled_with_sizes[i,j] = height return labelled_with_sizes