Skip to content

pydvl.reporting

plot_ci_array

plot_ci_array(
    data: NDArray,
    level: float,
    type: Literal["normal", "t", "auto"] = "normal",
    abscissa: Optional[Sequence[str]] = None,
    mean_color: Optional[str] = "dodgerblue",
    shade_color: Optional[str] = "lightblue",
    ax: Optional[Axes] = None,
    **kwargs: Any,
) -> Axes

Plot values and a confidence interval from a 2D array.

Supported intervals are based on the normal and the t distributions.

PARAMETER DESCRIPTION
data

A 2D array with M different values for each of the N indices.

TYPE: NDArray

level

The confidence level.

TYPE: float

type

The type of confidence interval to use.

TYPE: Literal['normal', 't', 'auto'] DEFAULT: 'normal'

abscissa

The values for the x-axis. Leave empty to use increasing integers.

TYPE: Optional[Sequence[str]] DEFAULT: None

mean_color

The color of the mean line.

TYPE: Optional[str] DEFAULT: 'dodgerblue'

shade_color

The color of the confidence interval.

TYPE: Optional[str] DEFAULT: 'lightblue'

ax

If passed, axes object into which to insert the figure. Otherwise, a new figure is created and the axes returned.

TYPE: Optional[Axes] DEFAULT: None

**kwargs

Additional arguments to pass to the plot function.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
Axes

The matplotlib axes.

Source code in src/pydvl/reporting/plots.py
def plot_ci_array(
    data: NDArray,
    level: float,
    type: Literal["normal", "t", "auto"] = "normal",
    abscissa: Optional[Sequence[str]] = None,
    mean_color: Optional[str] = "dodgerblue",
    shade_color: Optional[str] = "lightblue",
    ax: Optional[plt.Axes] = None,
    **kwargs: Any,
) -> plt.Axes:
    """Plot values and a confidence interval from a 2D array.

    Supported intervals are based on the normal and the t distributions.

    Args:
        data: A 2D array with M different values for each of the N indices.
        level: The confidence level.
        type: The type of confidence interval to use.
        abscissa: The values for the x-axis. Leave empty to use increasing
            integers.
        mean_color: The color of the mean line.
        shade_color: The color of the confidence interval.
        ax: If passed, axes object into which to insert the figure. Otherwise,
            a new figure is created and the axes returned.
        **kwargs: Additional arguments to pass to the plot function.

    Returns:
        The matplotlib axes.
    """

    m, n = data.shape

    means = np.mean(data, axis=0)
    variances = np.var(data, axis=0, ddof=1)

    dummy = ValuationResult(
        algorithm="dummy",
        values=means,
        variances=variances,
        counts=np.ones_like(means, dtype=np.int_) * m,
        indices=np.arange(n),
        data_names=(
            np.array(abscissa, dtype=str)
            if abscissa is not None
            else np.arange(n, dtype=str)
        ),
    )
    dummy.sort(key="index")

    return plot_ci_values(
        dummy,
        level=level,
        type=type,
        abscissa=abscissa,
        mean_color=mean_color,
        shade_color=shade_color,
        ax=ax,
        **kwargs,
    )

plot_ci_values

plot_ci_values(
    values: ValuationResult,
    level: float,
    type: Literal["normal", "t", "auto"] = "auto",
    abscissa: Optional[Sequence[Any]] = None,
    mean_color: Optional[str] = "dodgerblue",
    shade_color: Optional[str] = "lightblue",
    ax: Optional[Axes] = None,
    **kwargs: Any,
) -> Axes

Plot values and a confidence interval.

Supported intervals are based on the normal and the t distributions.

PARAMETER DESCRIPTION
values

The valuation result. The object must be sorted by calling ValuationResult.sort().

TYPE: ValuationResult

level

The confidence level.

TYPE: float

type

The type of confidence interval to use. If "auto", uses "norm" if the minimum number of updates for all indices is greater than 30, otherwise uses "t".

TYPE: Literal['normal', 't', 'auto'] DEFAULT: 'auto'

abscissa

The values for the x-axis. Leave empty to use increasing integers.

TYPE: Optional[Sequence[Any]] DEFAULT: None

mean_color

The color of the mean line.

TYPE: Optional[str] DEFAULT: 'dodgerblue'

shade_color

The color of the confidence interval.

TYPE: Optional[str] DEFAULT: 'lightblue'

ax

If passed, axes object into which to insert the figure. Otherwise, a new figure is created and the axes returned.

TYPE: Optional[Axes] DEFAULT: None

**kwargs

Additional arguments to pass to the plot function.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
Axes

The matplotlib axes.

Source code in src/pydvl/reporting/plots.py
def plot_ci_values(
    values: ValuationResult,
    level: float,
    type: Literal["normal", "t", "auto"] = "auto",
    abscissa: Optional[Sequence[Any]] = None,
    mean_color: Optional[str] = "dodgerblue",
    shade_color: Optional[str] = "lightblue",
    ax: Optional[plt.Axes] = None,
    **kwargs: Any,
) -> plt.Axes:
    """Plot values and a confidence interval.

    Supported intervals are based on the normal and the t distributions.

    Args:
        values: The valuation result. The object must be sorted by calling
            `ValuationResult.sort()`.
        level: The confidence level.
        type: The type of confidence interval to use. If "auto", uses "norm" if
            the minimum number of updates for all indices is greater than 30,
            otherwise uses "t".
        abscissa: The values for the x-axis. Leave empty to use increasing
            integers.
        mean_color: The color of the mean line.
        shade_color: The color of the confidence interval.
        ax: If passed, axes object into which to insert the figure. Otherwise,
            a new figure is created and the axes returned.
        **kwargs: Additional arguments to pass to the plot function.

    Returns:
        The matplotlib axes.
    """
    assert values._sort_order is not None, "Values must be sorted first."

    ppfs = {
        "normal": norm.ppf,
        "t": partial(t.ppf, df=values.counts - 1),
        "auto": (
            norm.ppf
            if np.min(values.counts) > 30
            else partial(t.ppf, df=values.counts - 1)
        ),
    }

    try:
        score = ppfs[type](1 - level / 2)
    except KeyError:
        raise ValueError(
            f"Unknown confidence interval type requested: {type}."
        ) from None

    if abscissa is None:
        abscissa = range(len(values))

    bound = score * values.stderr

    if ax is None:
        fig, ax = plt.subplots()

    ax.fill_between(
        abscissa,
        values.values - bound,
        values.values + bound,
        alpha=0.3,
        color=shade_color,
    )
    ax.plot(abscissa, values.values, color=mean_color, **kwargs)
    ax.set_xlim(left=min(abscissa), right=max(abscissa))
    return ax

spearman_correlation

spearman_correlation(vv: List[OrderedDict], num_values: int, pvalue: float)

Simple matrix plots with spearman correlation for each pair in vv.

PARAMETER DESCRIPTION
vv

list of OrderedDicts with index: value. Spearman correlation is computed for the keys.

TYPE: List[OrderedDict]

num_values

Use only these many values from the data (from the start of the OrderedDicts)

TYPE: int

pvalue

correlation coefficients for which the p-value is below the threshold pvalue/len(vv) will be discarded.

TYPE: float

Source code in src/pydvl/reporting/plots.py
def spearman_correlation(vv: List[OrderedDict], num_values: int, pvalue: float):
    """Simple matrix plots with spearman correlation for each pair in vv.

    Args:
        vv: list of OrderedDicts with index: value. Spearman correlation
            is computed for the keys.
        num_values: Use only these many values from the data (from the start
            of the OrderedDicts)
        pvalue: correlation coefficients for which the p-value is below the
            threshold `pvalue/len(vv)` will be discarded.
    """
    r: np.ndarray = np.ndarray((len(vv), len(vv)))
    p: np.ndarray = np.ndarray((len(vv), len(vv)))
    for i, a in enumerate(vv):
        for j, b in enumerate(vv):
            spearman = sp.stats.spearmanr(
                list(a.keys())[:num_values], list(b.keys())[:num_values]
            )
            r[i][j] = (
                spearman.correlation if spearman.pvalue < pvalue / len(vv) else np.nan
            )  # Bonferroni correction
            p[i][j] = spearman.pvalue
    fig, axs = plt.subplots(1, 2, figsize=(16, 7))
    plot1 = axs[0].matshow(r, vmin=-1, vmax=1)
    axs[0].set_title(f"Spearman correlation (top {num_values} values)")
    axs[0].set_xlabel("Runs")
    axs[0].set_ylabel("Runs")
    fig.colorbar(plot1, ax=axs[0])
    plot2 = axs[1].matshow(p, vmin=0, vmax=1)
    axs[1].set_title("p-value")
    axs[1].set_xlabel("Runs")
    axs[1].set_ylabel("Runs")
    fig.colorbar(plot2, ax=axs[1])

    return fig

plot_shapley

plot_shapley(
    df: DataFrame,
    *,
    level: float = 0.05,
    ax: Optional[Axes] = None,
    title: Optional[str] = None,
    xlabel: Optional[str] = None,
    ylabel: Optional[str] = None,
    prefix: Optional[str] = "data_value",
) -> Axes

Plots the shapley values, as returned from compute_shapley_values, with error bars corresponding to an \(\alpha\)-level Normal confidence interval.

PARAMETER DESCRIPTION
df

dataframe with the shapley values

TYPE: DataFrame

level

confidence level for the error bars

TYPE: float DEFAULT: 0.05

ax

axes to plot on or None if a new subplots should be created

TYPE: Optional[Axes] DEFAULT: None

title

string, title of the plot

TYPE: Optional[str] DEFAULT: None

xlabel

string, x label of the plot

TYPE: Optional[str] DEFAULT: None

ylabel

string, y label of the plot

TYPE: Optional[str] DEFAULT: None

RETURNS DESCRIPTION
Axes

The axes created or used

Source code in src/pydvl/reporting/plots.py
def plot_shapley(
    df: pd.DataFrame,
    *,
    level: float = 0.05,
    ax: Optional[plt.Axes] = None,
    title: Optional[str] = None,
    xlabel: Optional[str] = None,
    ylabel: Optional[str] = None,
    prefix: Optional[str] = "data_value",
) -> plt.Axes:
    r"""Plots the shapley values, as returned from
    [compute_shapley_values][pydvl.value.shapley.common.compute_shapley_values],
    with error bars corresponding to an $\alpha$-level Normal confidence
    interval.

    Args:
        df: dataframe with the shapley values
        level: confidence level for the error bars
        ax: axes to plot on or None if a new subplots should be created
        title: string, title of the plot
        xlabel: string, x label of the plot
        ylabel: string, y label of the plot

    Returns:
        The axes created or used
    """
    if ax is None:
        _, ax = plt.subplots()

    stderr = np.sqrt(
        df[f"{prefix}_variances"] / np.maximum(1.0, df[f"{prefix}_counts"])
    )
    yerr = norm.ppf(1 - level / 2) * stderr

    ax.errorbar(x=df.index, y=df[prefix], yerr=yerr, fmt="o", capsize=6)
    ax.set_xlabel(xlabel or "")
    ax.set_ylabel(ylabel or "")
    ax.set_title(title or "")
    plt.xticks(rotation=60)
    return ax

plot_influence_distribution

plot_influence_distribution(
    influences: NDArray[float64], index: int, title_extra: str = ""
) -> Axes

Plots the histogram of the influence that all samples in the training set have over a single sample index.

PARAMETER DESCRIPTION
influences

array of influences (training samples x test samples)

TYPE: NDArray[float64]

index

Index of the test sample for which the influences will be plotted.

TYPE: int

title_extra

Additional text that will be appended to the title.

TYPE: str DEFAULT: ''

Source code in src/pydvl/reporting/plots.py
def plot_influence_distribution(
    influences: NDArray[np.float64], index: int, title_extra: str = ""
) -> plt.Axes:
    """Plots the histogram of the influence that all samples in the training set
    have over a single sample index.

    Args:
       influences: array of influences (training samples x test samples)
       index: Index of the test sample for which the influences
            will be plotted.
       title_extra: Additional text that will be appended to the title.
    """
    _, ax = plt.subplots()
    ax.hist(influences[:, index], alpha=0.7)
    ax.set_xlabel("Influence values")
    ax.set_ylabel("Number of samples")
    ax.set_title(f"Distribution of influences {title_extra}")
    return cast(plt.Axes, ax)

plot_influence_distribution_by_label

plot_influence_distribution_by_label(
    influences: NDArray[float64],
    labels: NDArray[float64],
    title_extra: str = "",
)

Plots the histogram of the influence that all samples in the training set have over a single sample index, separated by labels.

PARAMETER DESCRIPTION
influences

array of influences (training samples x test samples)

TYPE: NDArray[float64]

labels

labels for the training set.

TYPE: NDArray[float64]

title_extra

Additional text that will be appended to the title.

TYPE: str DEFAULT: ''

Source code in src/pydvl/reporting/plots.py
def plot_influence_distribution_by_label(
    influences: NDArray[np.float64], labels: NDArray[np.float64], title_extra: str = ""
):
    """Plots the histogram of the influence that all samples in the training set
    have over a single sample index, separated by labels.

    Args:
       influences: array of influences (training samples x test samples)
       labels: labels for the training set.
       title_extra: Additional text that will be appended to the title.
    """
    _, ax = plt.subplots()
    unique_labels = np.unique(labels)
    for label in unique_labels:
        ax.hist(influences[labels == label], label=label, alpha=0.7)
    ax.set_xlabel("Influence values")
    ax.set_ylabel("Number of samples")
    ax.set_title(f"Distribution of influences {title_extra}")
    ax.legend()
    plt.show()

run_removal_experiment

run_removal_experiment(
    data_factory: DataSplitFactory,
    valuation_factories: list[ValuationFactory],
    utility_factory: UtilityFactory,
    removal_percentages: NDArray,
    n_runs: int = 1,
    n_jobs: int = 1,
    random_state: int | None = None,
) -> tuple[DataFrame, DataFrame]

Run the sample removal experiment.

Given the factories, the removal percentages, and the number of runs, this function does the following in each run:

  1. Sample a random state
  2. For each valuation method, compute the values and iteratively compute the scores after retraining on subsets of the data. This is parallelized. Each job requires 3 factories:

  3. A factory that returns a train-test split of the data given a random state

  4. A factory returning a valuation method. The training set is passed to the factory, in case the valuation needs to train something. E.g. for Data-OOB we need the bagging model to be fitted before the valuation is computed.
  5. A factory that returns a utility that evaluates some model on a given test set. This is used for the performance evaluation. The model need not be the same as the one used for the valuation.
  6. It returns the scores in two DataFrames, one for the high value removals and one for the low value removals.
PARAMETER DESCRIPTION
data_factory

A callable that returns a tuple of Datasets (train, test) given a random state

TYPE: DataSplitFactory

valuation_factories

A list of callables that return Valuation objects given a model, train data, and random state. The training data is typically not needed for construction, but bagging models may require it

TYPE: list[ValuationFactory]

utility_factory

A callable that returns a ModelUtility object given a test dataset and a random state. This object is used to evaluate the performance of the valuation method by removing data points from the training set and retraining the model, then scoring it on the test set.

TYPE: UtilityFactory

removal_percentages

The percentage of data to remove from the training set. This should be a list of floats between 0 and 1.

TYPE: NDArray

n_runs

The number of repetitions of the experiment.

TYPE: int DEFAULT: 1

n_jobs

The number of parallel jobs to use.

TYPE: int DEFAULT: 1

random_state

The initial random state.

TYPE: int | None DEFAULT: None

Returns: A tuple of DataFrames with the scores for the low and high value removals

Source code in src/pydvl/reporting/point_removal.py
def run_removal_experiment(
    data_factory: DataSplitFactory,
    valuation_factories: list[ValuationFactory],
    utility_factory: UtilityFactory,
    removal_percentages: NDArray,
    n_runs: int = 1,
    n_jobs: int = 1,
    random_state: int | None = None,
) -> tuple[pd.DataFrame, pd.DataFrame]:
    """Run the sample removal experiment.

    Given the factories, the removal percentages, and the number of runs, this function
    does the following in each run:

    1. Sample a random state
    2. For each valuation method, compute the values and iteratively compute the scores
       after retraining on subsets of the data. This is parallelized. Each job requires
       3 factories:

       - A factory that returns a train-test split of the data given a random state
       - A factory returning a valuation method. The training set is passed to the
         factory, in case the valuation needs to train something. E.g. for Data-OOB
         we need the bagging model to be fitted before the valuation is computed.
       - A factory that returns a utility that evaluates some model on a given test set.
         This is used for the performance evaluation. The model need not be the same
         as the one used for the valuation.
    3. It returns the scores in two DataFrames, one for the high value removals and one
       for the low value removals.

    Args:
        data_factory: A callable that returns a tuple of Datasets (train, test) given
            a random state
        valuation_factories: A list of callables that return Valuation objects given
            a model, train data, and random state. The training data is typically not
            needed for construction, but bagging models may require it
        utility_factory: A callable that returns a ModelUtility object given a test
            dataset and a random state. This object is used to evaluate the performance
            of the valuation method by removing data points from the training set and
            retraining the model, then scoring it on the test set.
        removal_percentages: The percentage of data to remove from the training set.
            This should be a list of floats between 0 and 1.
        n_runs: The number of repetitions of the experiment.
        n_jobs: The number of parallel jobs to use.
        random_state: The initial random state.
    Returns:
        A tuple of DataFrames with the scores for the low and high value removals
    """
    all_high_scores = []
    all_low_scores = []

    with parallel_config(n_jobs=n_jobs):
        seed_seq = ensure_seed_sequence(random_state).generate_state(n_runs)
        job = delayed(removal_job)

        with Parallel(return_as="generator_unordered") as parallel:
            delayed_evals = parallel(
                job(
                    data_factory=data_factory,
                    valuation_factory=valuation_factory,
                    utility_factory=utility_factory,
                    removal_percentages=removal_percentages,
                    random_state=seed_seq[i],
                )
                for valuation_factory in valuation_factories
                for i in range(n_runs)
            )
            for result in tqdm(
                delayed_evals, unit="%", total=len(valuation_factories) * n_runs
            ):
                low_scores, high_scores = result
                all_low_scores.append(low_scores)
                all_high_scores.append(high_scores)

    low_scores_df = pd.DataFrame(all_low_scores)
    high_scores_df = pd.DataFrame(all_high_scores)

    return low_scores_df, high_scores_df

compute_removal_score

compute_removal_score(
    u: ModelUtility,
    values: ValuationResult,
    training_data: Dataset,
    percentages: NDArray[float_] | Iterable[float],
    *,
    remove_best: bool = False,
    progress: bool = False,
) -> dict[float, float]

Fits a model and computes its score on a test set after incrementally removing a percentage of data points from the training set, based on their values.

PARAMETER DESCRIPTION
u

Utility object with model, test data, and scoring function.

TYPE: ModelUtility

training_data

Dataset from which to remove data points.

TYPE: Dataset

values

Data values of data instances in the training set.

TYPE: ValuationResult

percentages

Sequence of removal percentages.

TYPE: NDArray[float_] | Iterable[float]

remove_best

If True, removes data points in order of decreasing valuation.

TYPE: bool DEFAULT: False

progress

If True, display a progress bar.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
dict[float, float]

Dictionary that maps the percentages to their respective scores.

Source code in src/pydvl/reporting/scores.py
def compute_removal_score(
    u: ModelUtility,
    values: ValuationResult,
    training_data: Dataset,
    percentages: NDArray[np.float_] | Iterable[float],
    *,
    remove_best: bool = False,
    progress: bool = False,
) -> dict[float, float]:
    """Fits a model and computes its score on a test set after incrementally removing
    a percentage of data points from the training set, based on their values.

    Args:
        u: Utility object with model, test data, and scoring function.
        training_data: Dataset from which to remove data points.
        values: Data values of data instances in the training set.
        percentages: Sequence of removal percentages.
        remove_best: If True, removes data points in order of decreasing valuation.
        progress: If True, display a progress bar.

    Returns:
        Dictionary that maps the percentages to their respective scores.
    """
    u = u.with_dataset(training_data)

    # Sanity checks
    if np.any([x >= 1.0 or x < 0.0 for x in percentages]):
        raise ValueError("All percentages should be in the range [0.0, 1.0)")

    if len(values) != len(training_data):
        raise ValueError(
            f"The number of values, {len(values)}, should be equal to the number of data points, {len(training_data)}"
        )

    scores = {}

    # We sort in descending order if we want to remove the best values
    values.sort(reverse=remove_best)

    for pct in tqdm(percentages, disable=not progress, desc="Removal Scores"):
        n_removal = int(pct * len(training_data))
        indices = values.indices[n_removal:]
        score = u(Sample(idx=None, subset=indices))
        scores[pct] = score
    return scores