utils

`sleap_nn.data.utils` ¶

Miscellaneous utility functions for data processing.

Functions:

Name	Description
`check_cache_memory`	Check memory requirements for in-memory caching dataset pipeline.
`check_memory`	Return memory required for caching the image samples from a single labels object.
`ensure_list`	Convert the input into a list if it is not already.
`expand_to_rank`	Expand a tensor to a target rank by adding singleton dimensions in PyTorch.
`gaussian_pdf`	Compute the PDF of an unnormalized 0-centered Gaussian distribution.
`make_grid_vectors`	Make sampling grid vectors from image dimensions.

`check_cache_memory(train_labels, val_labels, memory_buffer=0.2)` ¶

Check memory requirements for in-memory caching dataset pipeline.

Parameters:

Name	Type	Description	Default
`train_labels`	`List[Labels]`	List of `sleap_io.Labels` objects for training data.	required
`val_labels`	`List[Labels]`	List of `sleap_io.Labels` objects for validation data.	required
`memory_buffer`	`float`	Fraction of the total image memory required for caching that should be reserved as a buffer.	`0.2`

Returns:

Name	Type	Description
`bool`	`bool`	True if the total memory required for caching is within available system memory, False otherwise.

Source code in sleap_nn/data/utils.py

def check_cache_memory(
    train_labels: List[sio.Labels],
    val_labels: List[sio.Labels],
    memory_buffer: float = 0.2,
) -> bool:
    """Check memory requirements for in-memory caching dataset pipeline.

    Args:
        train_labels: List of `sleap_io.Labels` objects for training data.
        val_labels: List of `sleap_io.Labels` objects for validation data.
        memory_buffer: Fraction of the total image memory required for caching that
            should be reserved as a buffer.

    Returns:
        bool: True if the total memory required for caching is within available system
            memory, False otherwise.
    """
    train_cache_memory_final = 0
    val_cache_memory_final = 0
    for train, val in zip(train_labels, val_labels):
        train_cache_memory = check_memory(train)
        val_cache_memory = check_memory(val)
        train_cache_memory_final += train_cache_memory
        val_cache_memory_final += val_cache_memory

    total_cache_memory = train_cache_memory_final + val_cache_memory_final
    total_cache_memory += memory_buffer * total_cache_memory  # memory required in bytes
    available_memory = psutil.virtual_memory().available  # available memory in bytes

    if total_cache_memory > available_memory:
        return False
    return True

`check_memory(labels)` ¶

Return memory required for caching the image samples from a single labels object.

Parameters:

Name	Type	Description	Default
`labels`	`Labels`	A `sleap_io.Labels` object containing the labels for a single dataset.	required

Returns:

Type	Description
`int`	Memory in bytes required to cache the image samples from the labels object.

Source code in sleap_nn/data/utils.py

def check_memory(
    labels: sio.Labels,
) -> int:
    """Return memory required for caching the image samples from a single labels object.

    Args:
        labels: A `sleap_io.Labels` object containing the labels for a single dataset.

    Returns:
        Memory in bytes required to cache the image samples from the labels object.
    """
    imgs_bytes = []
    for label in labels:
        if label.image is not None:
            img = label.image
            img_bytes = img.nbytes
            imgs_bytes.append(img_bytes)
        else:
            raise ValueError(
                "Labels object contains a label with no image data, which is required for training."
            )
    img_mem = sum(imgs_bytes)
    return img_mem

`ensure_list(x)` ¶

Convert the input into a list if it is not already.

Source code in sleap_nn/data/utils.py

def ensure_list(x: Any) -> List[Any]:
    """Convert the input into a list if it is not already."""
    if not isinstance(x, list):
        return [x]
    return x

`expand_to_rank(x, target_rank, prepend=True)` ¶

Expand a tensor to a target rank by adding singleton dimensions in PyTorch.

Parameters:

Name	Type	Description	Default
`x`	`Tensor`	Any `torch.Tensor` with rank <= `target_rank`. If the rank is higher than `target_rank`, the tensor will be returned with the same shape.	required
`target_rank`	`int`	Rank to expand the input to.	required
`prepend`	`bool`	If True, singleton dimensions are added before the first axis of the data. If False, singleton dimensions are added after the last axis.	`True`

Returns:

Type	Description
`Tensor`	The expanded tensor of the same dtype as the input, but with rank `target_rank`. The output has the same exact data as the input tensor and will be identical if they are both flattened.

Source code in sleap_nn/data/utils.py

def expand_to_rank(
    x: torch.Tensor, target_rank: int, prepend: bool = True
) -> torch.Tensor:
    """Expand a tensor to a target rank by adding singleton dimensions in PyTorch.

    Args:
        x: Any `torch.Tensor` with rank <= `target_rank`. If the rank is higher than
            `target_rank`, the tensor will be returned with the same shape.
        target_rank: Rank to expand the input to.
        prepend: If True, singleton dimensions are added before the first axis of the
            data. If False, singleton dimensions are added after the last axis.

    Returns:
        The expanded tensor of the same dtype as the input, but with rank `target_rank`.
        The output has the same exact data as the input tensor and will be identical if
        they are both flattened.
    """
    n_singleton_dims = max(target_rank - x.dim(), 0)
    singleton_dims = [1] * n_singleton_dims
    if prepend:
        new_shape = singleton_dims + list(x.shape)
    else:
        new_shape = list(x.shape) + singleton_dims
    return x.reshape(new_shape)

`gaussian_pdf(x, sigma)` ¶

Compute the PDF of an unnormalized 0-centered Gaussian distribution.

Parameters:

Name	Type	Description	Default
`x`	`Tensor`	A tensor of dtype torch.float32 with values to compute the PDF for.	required
`sigma`	`float`	Standard deviation of the Gaussian distribution.	required

Returns:

Type	Description
`Tensor`	A tensor of the same shape as `x`, but with values of a PDF of an unnormalized Gaussian distribution. Values of 0 have an unnormalized PDF value of 1.0.

Source code in sleap_nn/data/utils.py

def gaussian_pdf(x: torch.Tensor, sigma: float) -> torch.Tensor:
    """Compute the PDF of an unnormalized 0-centered Gaussian distribution.

    Args:
        x: A tensor of dtype torch.float32 with values to compute the PDF for.
        sigma: Standard deviation of the Gaussian distribution.

    Returns:
        A tensor of the same shape as `x`, but with values of a PDF of an unnormalized
        Gaussian distribution. Values of 0 have an unnormalized PDF value of 1.0.
    """
    return torch.exp(-(x**2) / (2 * sigma**2))

`make_grid_vectors(image_height, image_width, output_stride=1)` ¶

Make sampling grid vectors from image dimensions.

This is a useful function for creating the x- and y-vectors that define a sampling grid over an image space. These vectors can be used to generate a full meshgrid or for equivalent broadcasting operations.

Parameters:

Name	Type	Description	Default
`image_height`	`int`	Height of the image grid that will be sampled, specified as a scalar integer.	required
`image_width`	`int`	width of the image grid that will be sampled, specified as a scalar integer.	required
`output_stride`	`int`	Sampling step size, specified as a scalar integer. This can be used to specify a sampling grid that has a smaller shape than the image grid but with values span the same range. This can be thought of as the reciprocal of the output scale, i.e., it will induce subsampling when set to values greater than 1.	`1`

Returns:

Type	Description
`Tuple[Tensor, Tensor]`	Tuple of grid vectors (xv, yv). These are tensors of dtype tf.float32 with shapes (grid_width,) and (grid_height,) respectively. The grid dimensions are calculated as: grid_width = image_width // output_stride grid_height = image_height // output_stride

Source code in sleap_nn/data/utils.py

def make_grid_vectors(
    image_height: int, image_width: int, output_stride: int = 1
) -> Tuple[torch.Tensor, torch.Tensor]:
    """Make sampling grid vectors from image dimensions.

    This is a useful function for creating the x- and y-vectors that define a sampling
    grid over an image space. These vectors can be used to generate a full meshgrid or
    for equivalent broadcasting operations.

    Args:
        image_height: Height of the image grid that will be sampled, specified as a
            scalar integer.
        image_width: width of the image grid that will be sampled, specified as a
            scalar integer.
        output_stride: Sampling step size, specified as a scalar integer. This can be
            used to specify a sampling grid that has a smaller shape than the image
            grid but with values span the same range. This can be thought of as the
            reciprocal of the output scale, i.e., it will induce subsampling when set to
            values greater than 1.

    Returns:
        Tuple of grid vectors (xv, yv). These are tensors of dtype tf.float32 with
        shapes (grid_width,) and (grid_height,) respectively.

        The grid dimensions are calculated as:
            grid_width = image_width // output_stride
            grid_height = image_height // output_stride
    """
    xv = torch.arange(0, image_width, step=output_stride, dtype=torch.float32)
    yv = torch.arange(0, image_height, step=output_stride, dtype=torch.float32)
    return xv, yv

utils

sleap_nn.data.utils ¶

check_cache_memory(train_labels, val_labels, memory_buffer=0.2) ¶

check_memory(labels) ¶

ensure_list(x) ¶

expand_to_rank(x, target_rank, prepend=True) ¶

gaussian_pdf(x, sigma) ¶

make_grid_vectors(image_height, image_width, output_stride=1) ¶

`sleap_nn.data.utils` ¶

`check_cache_memory(train_labels, val_labels, memory_buffer=0.2)` ¶

`check_memory(labels)` ¶

`ensure_list(x)` ¶

`expand_to_rank(x, target_rank, prepend=True)` ¶

`gaussian_pdf(x, sigma)` ¶

`make_grid_vectors(image_height, image_width, output_stride=1)` ¶