Skip to content

pydvl.valuation.samplers.classwise

ClasswiseSampler

ClasswiseSampler(
    in_class: IndexSampler,
    out_of_class: PowersetSampler,
    *,
    min_elements_per_label: int = 1
)

Bases: IndexSampler

Sample permutations of indices and iterate through each returning increasing subsets, as required for the permutation definition of semi-values.

PARAMETER DESCRIPTION
in_class

Sampling scheme for elements of a given label.

TYPE: IndexSampler

out_of_class

Sampling scheme for elements of different labels, i.e., the complement set.

TYPE: PowersetSampler

min_elements_per_label

Minimum number of elements per label to sample from the complement set, i.e., out of class elements.

TYPE: int DEFAULT: 1

Source code in src/pydvl/valuation/samplers/classwise.py
def __init__(
    self,
    in_class: IndexSampler,
    out_of_class: PowersetSampler,
    *,
    min_elements_per_label: int = 1,
):
    super().__init__()
    self.in_class = in_class
    self.out_of_class = out_of_class
    self.min_elements_per_label = min_elements_per_label

generate_batches

generate_batches(indices: IndexSetT) -> BatchGenerator

Batches the samples and yields them.

Source code in src/pydvl/valuation/samplers/base.py
def generate_batches(self, indices: IndexSetT) -> BatchGenerator:
    """Batches the samples and yields them."""

    # create an empty generator if the indices are empty. `generate_batches` is
    # a generator function because it has a yield statement later in its body.
    # Inside generator function, `return` acts like a `break`, which produces an
    # empty generator function. See: https://stackoverflow.com/a/13243870
    if len(indices) == 0:
        return

    self._interrupted = False
    self._n_samples = 0
    for batch in chunked(self._generate(indices), self.batch_size):
        yield batch
        self._n_samples += len(batch)
        if self._interrupted:
            break

interrupt

interrupt() -> None

Interrupts the current sampler as well as the passed in samplers

Source code in src/pydvl/valuation/samplers/classwise.py
def interrupt(self) -> None:
    """Interrupts the current sampler as well as the passed in samplers"""
    super().interrupt()
    self.in_class.interrupt()
    self.out_of_class.interrupt()

roundrobin

roundrobin(
    batch_generators: Mapping[U, Iterable[V]]
) -> Generator[tuple[U, V], None, None]

Taken samples from batch generators in order until all of them are exhausted.

This was heavily inspired by the roundrobin recipe in the official Python documentation for the itertools package.

Examples:

>>> from pydvl.valuation.samplers.classwise import roundrobin
>>> list(roundrobin({"A": "123"}, {"B": "456"}))
[("A", "1"), ("B", "4"), ("A", "2"), ("B", "5"), ("A", "3"), ("B", "6")]
PARAMETER DESCRIPTION
batch_generators

dictionary mapping labels to batch generators.

TYPE: Mapping[U, Iterable[V]]

RETURNS DESCRIPTION
Generator[tuple[U, V], None, None]

Combined generators

Source code in src/pydvl/valuation/samplers/classwise.py
def roundrobin(
    batch_generators: Mapping[U, Iterable[V]]
) -> Generator[tuple[U, V], None, None]:
    """Taken samples from batch generators in order until all of them are exhausted.

    This was heavily inspired by the roundrobin recipe
    in the official Python documentation for the itertools package.

    Examples:
        >>> from pydvl.valuation.samplers.classwise import roundrobin
        >>> list(roundrobin({"A": "123"}, {"B": "456"}))
        [("A", "1"), ("B", "4"), ("A", "2"), ("B", "5"), ("A", "3"), ("B", "6")]

    Args:
        batch_generators: dictionary mapping labels to batch generators.

    Returns:
        Combined generators
    """
    n_active = len(batch_generators)
    remaining_generators = cycle(
        (label, iter(it).__next__) for label, it in batch_generators.items()
    )
    while n_active:
        try:
            for label, next_generator in remaining_generators:
                yield label, next_generator()
        except StopIteration:
            # Remove the iterator we just exhausted from the cycle.
            n_active -= 1
            remaining_generators = cycle(islice(remaining_generators, n_active))

get_unique_labels

get_unique_labels(array: NDArray) -> NDArray

Returns unique labels in a categorical dataset.

PARAMETER DESCRIPTION
array

The input array to find unique labels from. It should be of categorical types such as Object, String, Unicode, Unsigned integer, Signed integer, or Boolean.

TYPE: NDArray

RETURNS DESCRIPTION
NDArray

An array of unique labels.

RAISES DESCRIPTION
ValueError

If the input array is not of a categorical type.

Source code in src/pydvl/valuation/samplers/classwise.py
def get_unique_labels(array: NDArray) -> NDArray:
    """Returns unique labels in a categorical dataset.

    Args:
        array: The input array to find unique labels from. It should be of
               categorical types such as Object, String, Unicode, Unsigned
               integer, Signed integer, or Boolean.

    Returns:
        An array of unique labels.

    Raises:
        ValueError: If the input array is not of a categorical type.
    """
    # Object, String, Unicode, Unsigned integer, Signed integer, boolean
    if array.dtype.kind in "OSUiub":
        return cast(NDArray, np.unique(array))
    raise ValueError(
        f"Input array has an unsupported data type for categorical labels: {array.dtype}. "
        "Expected types: Object, String, Unicode, Unsigned integer, Signed integer, or Boolean."
    )