from __future__ import annotations
import warnings
import numpy as np
from numpy.typing import NDArray
from scipy.ndimage import label
import numba
from numba import njit, prange
from skimage.segmentation import clear_border
from ._utils import encase_in_value
__all__ = [
'label_structures',
'get_structure_props',
'get_structure_areas',
'get_structure_perimeters',
'get_structure_height_width',
'get_every_boundary_perimeter',
'remove_structures_touching_border_nan',
'clear_border_adjacent',
'remove_structure_holes',
]
DEFAULT_STRUCTURE = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
# =============================================================================
# Labeling
# =============================================================================
[docs]
def label_structures(
array: NDArray,
structure: NDArray = DEFAULT_STRUCTURE,
wrap: str | None = 'both',
) -> tuple[NDArray | None, NDArray | None, int]:
"""
Label connected components in a binary array.
Wrapper on ``scipy.ndimage.label`` with NaN handling and optional
periodic boundary merging.
Parameters
----------
array : np.ndarray
2-D binary array (0s, 1s, and optionally NaN).
structure : np.ndarray, default=4-connectivity cross
Connectivity kernel passed to ``scipy.ndimage.label``.
wrap : str or None, default='both'
Periodic boundary handling:
- ``'both'``: merge labels across left-right and top-bottom edges.
- ``'sides'``: merge labels across left-right edges only.
- ``None``: no periodic merging.
Returns
-------
labelled_array : np.ndarray or None
Float32 array where each unique positive value is a connected
component label. Pixels that were NaN in the input are 0.
``None`` if no structures exist.
nan_mask : np.ndarray or None
Boolean array indicating NaN locations in the input.
``None`` if no structures exist.
n_labels : int
Number of connected components found (0 if none).
"""
nan_mask = np.isnan(array)
if nan_mask.any():
no_nans = array.copy()
no_nans[nan_mask] = 0
else:
no_nans = array
if np.count_nonzero(no_nans) == 0:
return None, None, 0
labelled_array, n_labels = label(no_nans.astype(bool), structure, output=np.float32)
if wrap == 'both' or wrap == 'sides':
labelled_array = _merge_periodic_labels(labelled_array, wrap)
elif wrap is not None:
raise ValueError(f'wrap={wrap!r} not supported')
return labelled_array, nan_mask, n_labels
def _merge_periodic_labels(labelled_array: NDArray, wrap: str) -> NDArray:
"""Merge labels that span periodic boundaries (internal helper)."""
if wrap == 'sides' or wrap == 'both':
for j, value in enumerate(labelled_array[:, 0]):
if value != 0:
if labelled_array[j, labelled_array.shape[1] - 1] != 0 and labelled_array[j, labelled_array.shape[1] - 1] != value:
labelled_array[labelled_array == labelled_array[j, labelled_array.shape[1] - 1]] = value
if wrap == 'both':
for i, value in enumerate(labelled_array[0, :]):
if value != 0:
if labelled_array[labelled_array.shape[0] - 1, i] != 0 and labelled_array[labelled_array.shape[0] - 1, i] != value:
labelled_array[labelled_array == labelled_array[labelled_array.shape[0] - 1, i]] = value
return labelled_array
def _validate_inputs(array, x_sizes, y_sizes):
"""Validate that array, x_sizes, y_sizes have matching shapes and no bad nans."""
if array.shape != x_sizes.shape or array.shape != y_sizes.shape:
raise ValueError(
f'array, x_sizes, and y_sizes must all be same shape. '
f'Currently {array.shape},{x_sizes.shape},{y_sizes.shape}'
)
if np.count_nonzero((np.isnan(x_sizes) | np.isnan(y_sizes)) & np.isfinite(array)):
raise ValueError('x or y sizes are nan in locations where array is not')
def _validate_labelled(labelled_array):
"""Raise TypeError if the array looks binary instead of labelled."""
if labelled_array.dtype == bool:
raise TypeError(
'labelled_array is boolean. '
'Pass a labelled array from label_structures() instead.'
)
# =============================================================================
# Area: pure numpy bincount — O(n)
# =============================================================================
[docs]
def get_structure_areas(
labelled_array: NDArray,
nan_mask: NDArray,
n_labels: int,
x_sizes: NDArray,
y_sizes: NDArray,
) -> NDArray:
"""
Calculate areas of labelled structures.
Parameters
----------
labelled_array : np.ndarray
Labelled array from :func:`label_structures`.
nan_mask : np.ndarray
Boolean NaN mask from :func:`label_structures`.
n_labels : int
Number of labels from :func:`label_structures`.
x_sizes : np.ndarray
Pixel sizes in horizontal direction, same shape as labelled_array.
y_sizes : np.ndarray
Pixel sizes in vertical direction, same shape as labelled_array.
Returns
-------
areas : np.ndarray
1-D array of shape ``(n_labels,)`` where ``areas[i]`` is the area of
label ``i + 1``. Guarantees index alignment with other
``get_structure_*`` functions called on the same labelled array.
"""
_validate_labelled(labelled_array)
return _compute_areas(labelled_array, x_sizes, y_sizes, n_labels)
def _compute_areas(labelled_array, x_sizes, y_sizes, n_labels):
"""Compute per-label areas via np.bincount.
Returns array of shape (n_labels,) — index i is label i+1.
Merged labels from periodic wrapping may have area 0.
"""
pixel_areas = (x_sizes * y_sizes).ravel()
labels_flat = labelled_array.ravel()
mask = labels_flat > 0
areas = np.bincount(
labels_flat[mask].astype(np.intp),
weights=pixel_areas[mask],
minlength=n_labels + 1,
)
return areas[1:].astype(np.float32)
# =============================================================================
# Perimeter: Numba parallel row-chunked — O(n)
# =============================================================================
[docs]
def get_structure_perimeters(
labelled_array: NDArray,
nan_mask: NDArray,
n_labels: int,
x_sizes: NDArray,
y_sizes: NDArray,
) -> NDArray:
"""
Calculate perimeters of labelled structures.
Perimeter between a structure and NaN is not counted.
Parameters
----------
labelled_array : np.ndarray
Labelled array from :func:`label_structures`.
nan_mask : np.ndarray
Boolean NaN mask from :func:`label_structures`.
n_labels : int
Number of labels from :func:`label_structures`.
x_sizes : np.ndarray
Pixel sizes in horizontal direction, same shape as labelled_array.
y_sizes : np.ndarray
Pixel sizes in vertical direction, same shape as labelled_array.
Returns
-------
perimeters : np.ndarray
1-D array of shape ``(n_labels,)`` where ``perimeters[i]`` is the
perimeter of label ``i + 1``. Guarantees index alignment with other
``get_structure_*`` functions called on the same labelled array.
"""
_validate_labelled(labelled_array)
return _compute_perimeters(labelled_array, nan_mask, x_sizes, y_sizes, n_labels)
@njit(parallel=True)
def _compute_perimeters(labelled_array, nan_mask, x_sizes, y_sizes, n_labels):
"""Compute per-label perimeters via parallel row-chunked scan.
Only counts edges where a structure pixel neighbors a 0 (background).
Edges adjacent to NaN (outside domain) are not counted as perimeter.
"""
nrows, ncols = labelled_array.shape
n_pixels = nrows * ncols
# Memory guard: per-thread buffers are n_threads × (n_labels+1) floats.
# Cap n_threads so total buffer stays under 4× array size.
n_threads = numba.config.NUMBA_NUM_THREADS
max_threads = max(1, (4 * n_pixels) // (n_labels + 1))
n_threads = min(n_threads, max_threads)
chunk_size = (nrows + n_threads - 1) // n_threads
local_p = np.zeros((n_threads, n_labels + 1), dtype=np.float32)
for t in prange(n_threads):
start = t * chunk_size
end = min(start + chunk_size, nrows)
for i in range(start, end):
for j in range(ncols):
lab = labelled_array[i, j]
if lab <= 0:
continue
lab_int = np.intp(lab)
# Down neighbor: count if neighbor is 0 and not nan
ni = i + 1 if i < nrows - 1 else 0
if labelled_array[ni, j] == 0 and not nan_mask[ni, j]:
local_p[t, lab_int] += x_sizes[i, j]
# Up neighbor
ni = i - 1 if i > 0 else nrows - 1
if labelled_array[ni, j] == 0 and not nan_mask[ni, j]:
local_p[t, lab_int] += x_sizes[i, j]
# Right neighbor
nj = j + 1 if j < ncols - 1 else 0
if labelled_array[i, nj] == 0 and not nan_mask[i, nj]:
local_p[t, lab_int] += y_sizes[i, j]
# Left neighbor
nj = j - 1 if j > 0 else ncols - 1
if labelled_array[i, nj] == 0 and not nan_mask[i, nj]:
local_p[t, lab_int] += y_sizes[i, j]
# Sum across threads
perimeters = np.zeros(n_labels + 1, dtype=np.float32)
for t in range(n_threads):
for lab in range(n_labels + 1):
perimeters[lab] += local_p[t, lab]
# Skip label 0
return perimeters[1:]
# =============================================================================
# Height/width: parallel row-chunked bounding-box kernel
# =============================================================================
[docs]
def get_structure_height_width(
labelled_array: NDArray,
nan_mask: NDArray,
n_labels: int,
x_sizes: NDArray,
y_sizes: NDArray,
) -> tuple[NDArray, NDArray]:
"""
Calculate heights and widths of labelled structures as bounding-box extents.
The height of a structure is the physical extent spanned by its
bounding-box rows; the width is the physical extent spanned by its
bounding-box columns. For structures that span a periodic boundary (when
``labelled_array`` was produced with ``wrap='both'`` or ``'sides'``), the
smallest wrap-aware extent is used.
Parameters
----------
labelled_array : np.ndarray
Labelled array from :func:`label_structures`.
nan_mask : np.ndarray
Boolean NaN mask from :func:`label_structures`. Currently unused; kept
for signature compatibility with the other ``get_structure_*`` functions.
n_labels : int
Number of labels from :func:`label_structures`.
x_sizes : np.ndarray
Pixel sizes in horizontal direction, same shape as labelled_array.
y_sizes : np.ndarray
Pixel sizes in vertical direction, same shape as labelled_array.
Returns
-------
heights : np.ndarray
1-D array of shape ``(n_labels,)`` where ``heights[i]`` is the height
of label ``i + 1``.
widths : np.ndarray
1-D array of shape ``(n_labels,)`` where ``widths[i]`` is the width
of label ``i + 1``.
Notes
-----
Widths are well-defined only when ``x_sizes`` is constant within each
column (``x_sizes[:, j]`` does not vary for any ``j``); heights are
well-defined only when ``y_sizes`` is constant within each row. If either
invariant is violated, a :class:`UserWarning` is emitted and a per-row /
per-column nanmean is used as the canonical pixel size.
"""
_validate_labelled(labelled_array)
# Ambiguity check for non-uniform pixel sizes within columns/rows.
# All-NaN pad rows/cols are treated as "no information" and skipped.
with warnings.catch_warnings():
warnings.simplefilter('ignore', category=RuntimeWarning)
xmin_col = np.nanmin(x_sizes, axis=0)
xmax_col = np.nanmax(x_sizes, axis=0)
ymin_row = np.nanmin(y_sizes, axis=1)
ymax_row = np.nanmax(y_sizes, axis=1)
x_both_finite = np.isfinite(xmin_col) & np.isfinite(xmax_col)
y_both_finite = np.isfinite(ymin_row) & np.isfinite(ymax_row)
if np.any((xmin_col != xmax_col) & x_both_finite):
warnings.warn(
'x_sizes varies within at least one column; widths are ambiguous. '
'Using per-column nanmean of x_sizes as the canonical column width.',
stacklevel=2,
)
if np.any((ymin_row != ymax_row) & y_both_finite):
warnings.warn(
'y_sizes varies within at least one row; heights are ambiguous. '
'Using per-row nanmean of y_sizes as the canonical row height.',
stacklevel=2,
)
nrows, ncols = labelled_array.shape
# Canonical per-row y_size and per-col x_size, NaN→0 for pad rows/cols.
with warnings.catch_warnings():
warnings.simplefilter('ignore', category=RuntimeWarning)
y_per_row = np.nanmean(y_sizes, axis=1).astype(np.float32)
x_per_col = np.nanmean(x_sizes, axis=0).astype(np.float32)
y_per_row = np.nan_to_num(y_per_row, nan=0.0)
x_per_col = np.nan_to_num(x_per_col, nan=0.0)
# Cumulative sums for O(1) range extent lookup.
cumy = np.concatenate(([np.float32(0.0)], np.cumsum(y_per_row, dtype=np.float32)))
cumx = np.concatenate(([np.float32(0.0)], np.cumsum(x_per_col, dtype=np.float32)))
if n_labels == 0:
return np.zeros(0, dtype=np.float32), np.zeros(0, dtype=np.float32)
min_r, max_r, min_c, max_c, t_r0, t_rN, t_c0, t_cN = _compute_bbox(
labelled_array, n_labels
)
heights = np.zeros(n_labels, dtype=np.float32)
widths = np.zeros(n_labels, dtype=np.float32)
for l in range(n_labels):
if max_r[l] < 0:
# no pixels for this label (e.g. merged away by periodic wrap)
continue
if t_r0[l] and t_rN[l]:
heights[l] = _wrap_aware_extent(
labelled_array, np.float32(l + 1), axis=0, cum=cumy, length=nrows
)
else:
heights[l] = cumy[max_r[l] + 1] - cumy[min_r[l]]
if t_c0[l] and t_cN[l]:
widths[l] = _wrap_aware_extent(
labelled_array, np.float32(l + 1), axis=1, cum=cumx, length=ncols
)
else:
widths[l] = cumx[max_c[l] + 1] - cumx[min_c[l]]
return heights, widths
@njit(parallel=True)
def _compute_bbox(labelled_array, n_labels):
"""Parallel row-chunked bbox kernel.
Returns per-label min/max row/col and flags for whether the label touches
the first/last row or column. Index ``l`` corresponds to label value ``l+1``.
"""
nrows, ncols = labelled_array.shape
n_pixels = nrows * ncols
# Memory guard: per-thread buffers are 20 bytes per label (4×int32 + 4×bool).
# Same "<4× array bytes" budget as the perimeter kernel.
n_threads = numba.config.NUMBA_NUM_THREADS
max_threads = max(1, (4 * n_pixels) // ((n_labels + 1) * 5))
n_threads = min(n_threads, max_threads)
if n_threads < 1:
n_threads = 1
chunk_size = (nrows + n_threads - 1) // n_threads
local_min_r = np.full((n_threads, n_labels + 1), nrows, dtype=np.int32)
local_max_r = np.full((n_threads, n_labels + 1), -1, dtype=np.int32)
local_min_c = np.full((n_threads, n_labels + 1), ncols, dtype=np.int32)
local_max_c = np.full((n_threads, n_labels + 1), -1, dtype=np.int32)
local_t_r0 = np.zeros((n_threads, n_labels + 1), dtype=np.bool_)
local_t_rN = np.zeros((n_threads, n_labels + 1), dtype=np.bool_)
local_t_c0 = np.zeros((n_threads, n_labels + 1), dtype=np.bool_)
local_t_cN = np.zeros((n_threads, n_labels + 1), dtype=np.bool_)
for t in prange(n_threads):
start = t * chunk_size
end = min(start + chunk_size, nrows)
for i in range(start, end):
for j in range(ncols):
lab = labelled_array[i, j]
if lab <= 0:
continue
li = np.intp(lab)
if i < local_min_r[t, li]:
local_min_r[t, li] = i
if i > local_max_r[t, li]:
local_max_r[t, li] = i
if j < local_min_c[t, li]:
local_min_c[t, li] = j
if j > local_max_c[t, li]:
local_max_c[t, li] = j
if i == 0:
local_t_r0[t, li] = True
if i == nrows - 1:
local_t_rN[t, li] = True
if j == 0:
local_t_c0[t, li] = True
if j == ncols - 1:
local_t_cN[t, li] = True
# Reduction across threads (index l corresponds to label value l+1).
min_r = np.full(n_labels, nrows, dtype=np.int32)
max_r = np.full(n_labels, -1, dtype=np.int32)
min_c = np.full(n_labels, ncols, dtype=np.int32)
max_c = np.full(n_labels, -1, dtype=np.int32)
t_r0 = np.zeros(n_labels, dtype=np.bool_)
t_rN = np.zeros(n_labels, dtype=np.bool_)
t_c0 = np.zeros(n_labels, dtype=np.bool_)
t_cN = np.zeros(n_labels, dtype=np.bool_)
for l in range(n_labels):
li = l + 1
for t in range(n_threads):
if local_min_r[t, li] < min_r[l]:
min_r[l] = local_min_r[t, li]
if local_max_r[t, li] > max_r[l]:
max_r[l] = local_max_r[t, li]
if local_min_c[t, li] < min_c[l]:
min_c[l] = local_min_c[t, li]
if local_max_c[t, li] > max_c[l]:
max_c[l] = local_max_c[t, li]
if local_t_r0[t, li]:
t_r0[l] = True
if local_t_rN[t, li]:
t_rN[l] = True
if local_t_c0[t, li]:
t_c0[l] = True
if local_t_cN[t, li]:
t_cN[l] = True
return min_r, max_r, min_c, max_c, t_r0, t_rN, t_c0, t_cN
def _wrap_aware_extent(labelled_array, lab_value, axis, cum, length):
"""Compute wrap-aware bbox extent for a label that touches both edges of ``axis``.
``axis=0`` → row extent (height), ``axis=1`` → column extent (width).
The structure is assumed to occupy the first and last index along the axis,
so the largest *non-wrap* gap between consecutive occupied indices identifies
the seam to wrap around.
"""
if axis == 0:
mask = (labelled_array == lab_value).any(axis=1)
else:
mask = (labelled_array == lab_value).any(axis=0)
occ = np.nonzero(mask)[0]
if len(occ) == 0:
return np.float32(0.0)
if len(occ) == 1:
return cum[occ[0] + 1] - cum[occ[0]]
gaps = occ[1:] - occ[:-1] - 1
k = int(np.argmax(gaps))
# bbox wraps around the seam: covers occ[k+1]..length-1 then 0..occ[k]
return (cum[length] - cum[occ[k + 1]]) + (cum[occ[k] + 1] - cum[0])
# =============================================================================
# Backward-compatible wrapper
# =============================================================================
[docs]
def get_structure_props(
array: NDArray,
x_sizes: NDArray,
y_sizes: NDArray,
structure: NDArray = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]),
print_none: bool = False,
) -> tuple[NDArray, NDArray, NDArray, NDArray]:
"""
Calculate properties of structures in a binary array.
Assumes toroidal (periodic) boundary conditions. For non-periodic domains,
pad edges with 0 or np.nan before calling. Any perimeter between structure
and nan is not counted.
Parameters
----------
array : np.ndarray
Binary array of structures: 2-d array, padded with 0's or np.nan's.
x_sizes : np.ndarray
Sizes of pixels in horizontal direction, same shape as array.
y_sizes : np.ndarray
Sizes of pixels in vertical direction, same shape as array.
structure : np.ndarray, default=np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
Defines connectivity.
print_none : bool, default=False
Print message if no structures found.
Returns
-------
perimeter : np.ndarray
1-D array, each element the perimeter of an individual structure.
area : np.ndarray
1-D array, each element the area of an individual structure.
height : np.ndarray
1-D array, each element the height of an individual structure.
width : np.ndarray
1-D array, each element the width of an individual structure.
Raises
------
ValueError
If array, x_sizes, and y_sizes are not the same shape.
If x or y sizes are nan where array is not nan.
Notes
-----
If x_sizes or y_sizes are not uniform, the width will be the sum of the average
pixel widths of the pixels in the column and in the object. Similarly, the height
will be the sum of the average pixel heights of the pixels in the row and in the object.
For better performance when only a subset of properties is needed, use the
individual functions: get_structure_areas, get_structure_perimeters,
get_structure_height_width.
"""
_validate_inputs(array, x_sizes, y_sizes)
labelled_array, nan_mask, n_labels = label_structures(array, structure, wrap='both')
if labelled_array is None:
if print_none:
print('No structures found')
return np.array([]), np.array([]), np.array([]), np.array([])
a = get_structure_areas(labelled_array, nan_mask, n_labels, x_sizes, y_sizes)
p = get_structure_perimeters(labelled_array, nan_mask, n_labels, x_sizes, y_sizes)
h, w = get_structure_height_width(labelled_array, nan_mask, n_labels, x_sizes, y_sizes)
# Filter out labels with zero area (from periodic wrapping merging labels)
valid = a > 0
return p[valid], a[valid], h[valid], w[valid]
[docs]
def get_every_boundary_perimeter(
array: NDArray,
x_sizes: NDArray,
y_sizes: NDArray,
return_nlevels: bool = False
) -> list | tuple[list, int]:
"""
Return perimeters of each boundary between 0s and 1s.
Each individual boundary between 0s and 1s in the array is treated as a unique
perimeter. For example, a donut of 1s gives 2 values: one for the inner circle
and one for the outer circle.
Parameters
----------
array : np.ndarray
2-D binary array containing only 0s and 1s.
x_sizes : np.ndarray
Sizes of pixels in horizontal direction, same shape as array.
y_sizes : np.ndarray
Sizes of pixels in vertical direction, same shape as array.
return_nlevels : bool, default=False
If True, also return the number of nesting levels processed.
Returns
-------
perimeters : list
List of perimeter values for each boundary.
nlevels : int, optional
Number of nesting levels. Only returned if return_nlevels=True.
Raises
------
ValueError
If more than 100 nesting levels are found (likely infinite loop).
"""
# Encase once up front: pad + pixel-size arrays never change shape or content.
# Interior NaNs (e.g. satellite "no data" inside a cloud) MUST be preserved
# across iterations — `remove_structure_holes` treats NaN as not-a-hole, and
# `NaN - NaN = NaN` keeps the pattern through the layer subtraction. If we
# collapsed them to 0, a surrounded NaN would be filled on iteration 1 and
# then emerge as a spurious structure on iteration 2.
work = encase_in_value(array).astype(np.float32, copy=False)
enc_xs = encase_in_value(x_sizes)
enc_ys = encase_in_value(y_sizes)
# NaN mask (pad + interior NaN) is static across iterations because NaN
# positions never move under the layer-subtract.
nan_mask_fixed = np.isnan(work)
perimeters = []
counter = 0
# (work == 1).any() is NaN-safe (NaN == 1 is False) and avoids the O(n)
# reduction np.nansum would do.
while (work == 1).any():
counter += 1
if counter > 100:
raise ValueError('Hole layer limit reached: 100 layers')
all_holes_filled = remove_structure_holes(work)
lab, _, nl = label_structures(all_holes_filled, wrap='both')
if lab is not None:
# Pass the pre-computed NaN mask explicitly so the perim kernel
# correctly skips edges to NaN even though we reuse it each iteration.
p = get_structure_perimeters(lab, nan_mask_fixed, nl, enc_xs, enc_ys)
perimeters.extend(p[p > 0])
# Next layer: filled minus original. Since filled ≥ original on the
# finite pixels, the old `new_array[all_holes_filled == 0] = 0` masking
# was redundant. NaN pixels stay NaN via NaN - NaN = NaN.
work = all_holes_filled - work
# Now what were previously holes are clouds. What were previously clouds in holes are now holes in the "new" clouds.
if return_nlevels:
return perimeters, counter
return perimeters
[docs]
def remove_structures_touching_border_nan(array: NDArray) -> NDArray:
"""
Remove structures that touch the NaN border of an array.
Parameters
----------
array : np.ndarray
2-D array consisting of 0s, 1s, and np.nan. All values at the array edge
should be np.nan.
Returns
-------
np.ndarray
2-D array consisting of 0s, 1s, and np.nan with any structure in contact
with the nan values around the outer edge of the good data removed.
Contact is defined using adjacent connectivity (4-connectivity).
Raises
------
ValueError
If array is not 2-dimensional.
"""
if array.ndim != 2:
raise ValueError('array not 2-dimensional')
nanmask = np.isnan(array).astype(np.int8)
edge_nan_mask = (nanmask - clear_border_adjacent(nanmask)).astype(bool)
with_edge = array.copy()
with_edge[edge_nan_mask] = 1
cleared = clear_border_adjacent(with_edge).astype(np.float32)
cleared[edge_nan_mask] = np.nan
cleared[np.isnan(array)] = np.nan
return cleared
[docs]
def clear_border_adjacent(
array: NDArray,
structure: NDArray = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
) -> NDArray[np.bool_]:
"""
Remove connected regions that touch the array border.
Similar to skimage.segmentation.clear_border but allows custom connectivity
structure.
Parameters
----------
array : np.ndarray
2-D array consisting of 0s and 1s.
structure : np.ndarray, default=np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
Defines connectivity for determining connected regions.
Returns
-------
np.ndarray
2-D boolean array with border-touching structures removed.
Examples
--------
For a structure of ``np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])``:
- ``[[0,0,0,0], [0,1,1,0], [0,0,0,1], [0,0,0,0]]`` -> keeps middle structure
- ``[[0,0,0,0], [0,1,1,0], [0,0,1,1], [0,0,0,0]]`` -> removes all (connected to border)
- ``[[0,0,0,0], [0,1,0,0], [1,0,0,0], [0,0,0,0]]`` -> keeps middle structure
"""
border_cleared = clear_border(label(array.astype(bool), structure)[0])
border_cleared[border_cleared > 0] = 1
return border_cleared.astype(bool)
[docs]
def remove_structure_holes(
array: NDArray,
periodic: bool | str = False
) -> NDArray:
"""
Fill in all holes in all structures within the array.
Sets any value of 0 that is not connected to the largest connected structure
of 0s (the background) to 1. Assumes the largest contiguous area of 0s is
the background.
Parameters
----------
array : np.ndarray
2D array with values either 0, 1, or np.nan.
periodic : bool or str, default=False
Boundary condition handling. Options:
- False: Holes connected to the edge are filled (as if padded with 1s).
- 'sides': Periodic boundary on left/right edges.
- 'both': Periodic boundary on all edges.
Returns
-------
np.ndarray
Array with all structure holes filled.
Raises
------
ValueError
If array is not a numpy array or contains values other than 0, 1, or np.nan.
"""
if not isinstance(array, np.ndarray):
raise ValueError('array must be a np.ndarray object')
filled = array.copy()
filled[np.isnan(filled)] = 0
if np.any(filled > 1):
raise ValueError('array can only have values 0, 1, or np.nan')
# invert and label
labelled, _ = label((1 - filled))
if periodic is not False:
labelled = _merge_periodic_labels(labelled, periodic)
# Find the background (largest 0-component) via bincount — O(n), no sort.
counts = np.bincount(labelled.ravel())
counts[0] = 0 # ignore the "labelled == 0" bucket (cloud pixels)
label_of_background = counts.argmax()
filled[(labelled != 0) & (labelled != label_of_background)] = 1
if np.count_nonzero(np.isnan(array)) > 0:
filled[np.isnan(array)] = np.nan
return filled