Skip to content

pydvl.value.least_core.montecarlo

montecarlo_least_core

montecarlo_least_core(
    u: Utility,
    n_iterations: int,
    *,
    n_jobs: int = 1,
    parallel_backend: Optional[ParallelBackend] = None,
    config: Optional[ParallelConfig] = None,
    non_negative_subsidy: bool = False,
    solver_options: Optional[dict] = None,
    progress: bool = False,
    seed: Optional[Seed] = None
) -> ValuationResult

Computes approximate Least Core values using a Monte Carlo approach.

\[ \begin{array}{lll} \text{minimize} & \displaystyle{e} & \\ \text{subject to} & \displaystyle\sum_{i\in N} x_{i} = v(N) & \\ & \displaystyle\sum_{i\in S} x_{i} + e \geq v(S) & , \forall S \in \{S_1, S_2, \dots, S_m \overset{\mathrm{iid}}{\sim} U(2^N) \} \end{array} \]

Where:

  • \(U(2^N)\) is the uniform distribution over the powerset of \(N\).
  • \(m\) is the number of subsets that will be sampled and whose utility will be computed and used to compute the data values.
PARAMETER DESCRIPTION
u

Utility object with model, data, and scoring function

TYPE: Utility

n_iterations

total number of iterations to use

TYPE: int

n_jobs

number of jobs across which to distribute the computation

TYPE: int DEFAULT: 1

parallel_backend

Parallel backend instance to use for parallelizing computations. If None, use JoblibParallelBackend backend. See the Parallel Backends package for available options.

TYPE: Optional[ParallelBackend] DEFAULT: None

config

(DEPRECATED) Object configuring parallel computation, with cluster address, number of cpus, etc.

TYPE: Optional[ParallelConfig] DEFAULT: None

non_negative_subsidy

If True, the least core subsidy \(e\) is constrained to be non-negative.

TYPE: bool DEFAULT: False

solver_options

Dictionary of options that will be used to select a solver and to configure it. Refer to cvxpy's documentation for all possible options.

TYPE: Optional[dict] DEFAULT: None

progress

If True, shows a tqdm progress bar

TYPE: bool DEFAULT: False

seed

Either an instance of a numpy random number generator or a seed for it.

TYPE: Optional[Seed] DEFAULT: None

RETURNS DESCRIPTION
ValuationResult

Object with the data values and the least core value.

Changed in version 0.9.0

Deprecated config argument and added a parallel_backend argument to allow users to pass the Parallel Backend instance directly.

Source code in src/pydvl/value/least_core/montecarlo.py
@deprecated(
    target=True,
    args_mapping={"config": "config"},
    deprecated_in="0.9.0",
    remove_in="0.10.0",
)
def montecarlo_least_core(
    u: Utility,
    n_iterations: int,
    *,
    n_jobs: int = 1,
    parallel_backend: Optional[ParallelBackend] = None,
    config: Optional[ParallelConfig] = None,
    non_negative_subsidy: bool = False,
    solver_options: Optional[dict] = None,
    progress: bool = False,
    seed: Optional[Seed] = None,
) -> ValuationResult:
    r"""Computes approximate Least Core values using a Monte Carlo approach.

    $$
    \begin{array}{lll}
    \text{minimize} & \displaystyle{e} & \\
    \text{subject to} & \displaystyle\sum_{i\in N} x_{i} = v(N) & \\
    & \displaystyle\sum_{i\in S} x_{i} + e \geq v(S) & ,
    \forall S \in \{S_1, S_2, \dots, S_m \overset{\mathrm{iid}}{\sim} U(2^N) \}
    \end{array}
    $$

    Where:

    * $U(2^N)$ is the uniform distribution over the powerset of $N$.
    * $m$ is the number of subsets that will be sampled and whose utility will
      be computed and used to compute the data values.

    Args:
        u: Utility object with model, data, and scoring function
        n_iterations: total number of iterations to use
        n_jobs: number of jobs across which to distribute the computation
        parallel_backend: Parallel backend instance to use
            for parallelizing computations. If `None`,
            use [JoblibParallelBackend][pydvl.parallel.backends.JoblibParallelBackend] backend.
            See the [Parallel Backends][pydvl.parallel.backends] package
            for available options.
        config: (**DEPRECATED**) Object configuring parallel computation,
            with cluster address, number of cpus, etc.
        non_negative_subsidy: If True, the least core subsidy $e$ is constrained
            to be non-negative.
        solver_options: Dictionary of options that will be used to select a solver
            and to configure it. Refer to [cvxpy's
            documentation](https://www.cvxpy.org/tutorial/advanced/index.html#setting-solver-options)
            for all possible options.
        progress: If True, shows a tqdm progress bar
        seed: Either an instance of a numpy random number generator or a seed for it.

    Returns:
        Object with the data values and the least core value.

    !!! tip "Changed in version 0.9.0"
        Deprecated `config` argument and added a `parallel_backend`
        argument to allow users to pass the Parallel Backend instance
        directly.
    """
    problem = mclc_prepare_problem(
        u,
        n_iterations,
        n_jobs=n_jobs,
        parallel_backend=parallel_backend,
        config=config,
        progress=progress,
        seed=seed,
    )
    return lc_solve_problem(
        problem,
        u=u,
        algorithm="montecarlo_least_core",
        non_negative_subsidy=non_negative_subsidy,
        solver_options=solver_options,
    )

mclc_prepare_problem

mclc_prepare_problem(
    u: Utility,
    n_iterations: int,
    *,
    n_jobs: int = 1,
    parallel_backend: Optional[ParallelBackend] = None,
    config: Optional[ParallelConfig] = None,
    progress: bool = False,
    seed: Optional[Seed] = None
) -> LeastCoreProblem

Prepares a linear problem by sampling subsets of the data. Use this to separate the problem preparation from the solving with lc_solve_problem(). Useful for parallel execution of multiple experiments.

See montecarlo_least_core for argument descriptions.

Changed in version 0.9.0

Deprecated config argument and added a parallel_backend argument to allow users to pass the Parallel Backend instance directly.

Source code in src/pydvl/value/least_core/montecarlo.py
@deprecated(
    target=True,
    args_mapping={"config": "config"},
    deprecated_in="0.9.0",
    remove_in="0.10.0",
)
def mclc_prepare_problem(
    u: Utility,
    n_iterations: int,
    *,
    n_jobs: int = 1,
    parallel_backend: Optional[ParallelBackend] = None,
    config: Optional[ParallelConfig] = None,
    progress: bool = False,
    seed: Optional[Seed] = None,
) -> LeastCoreProblem:
    """Prepares a linear problem by sampling subsets of the data. Use this to
    separate the problem preparation from the solving with
    [lc_solve_problem()][pydvl.value.least_core.common.lc_solve_problem]. Useful
    for parallel execution of multiple experiments.

    See
    [montecarlo_least_core][pydvl.value.least_core.montecarlo.montecarlo_least_core]
    for argument descriptions.

    !!! note "Changed in version 0.9.0"
        Deprecated `config` argument and added a `parallel_backend`
        argument to allow users to pass the Parallel Backend instance
        directly.
    """
    n = len(u.data)

    if n_iterations < n:
        warnings.warn(
            f"Number of iterations '{n_iterations}' is smaller the size of the dataset '{n}'. "
            f"This is not optimal because in the worst case we need at least '{n}' constraints "
            "to satisfy the individual rationality condition."
        )

    if n_iterations > 2**n:
        warnings.warn(
            f"Passed n_iterations is greater than the number subsets! "
            f"Setting it to 2^{n}",
            RuntimeWarning,
        )
        n_iterations = 2**n

    parallel_backend = _maybe_init_parallel_backend(parallel_backend, config)

    iterations_per_job = max(
        1, n_iterations // parallel_backend.effective_n_jobs(n_jobs)
    )

    map_reduce_job: MapReduceJob["Utility", "LeastCoreProblem"] = MapReduceJob(
        inputs=u,
        map_func=_montecarlo_least_core,
        reduce_func=_reduce_func,
        map_kwargs=dict(n_iterations=iterations_per_job, progress=progress),
        n_jobs=n_jobs,
        parallel_backend=parallel_backend,
    )

    return map_reduce_job(seed=seed)