Source code for nestkit.diagnostics.stability

"""Hyperparameter stability analysis across outer folds.

Provides ``HyperparameterStability``, a diagnostic class for assessing
how consistently the inner cross-validation selects hyperparameters
across outer folds.  Unstable hyperparameter selection may indicate
that the model is sensitive to the training data composition.
"""

from __future__ import annotations

from collections import Counter
from itertools import combinations

import numpy as np
import pandas as pd

from nestkit._constants import _EPS



[docs]
class HyperparameterStability:
    """Assess hyperparameter selection consistency across outer folds.

    Analyses the best hyperparameter configurations chosen in each
    outer fold and provides summary statistics (mode, entropy,
    agreement rate, coefficient of variation), pairwise Jaccard
    similarity, and a stability flag per parameter.

    Parameters
    ----------
    best_params_per_fold : list of dict
        Best hyperparameters selected in each outer fold.  Each dict
        maps parameter names to their selected values.

    Attributes
    ----------
    best_params_per_fold : list of dict
        The input parameter sets (stored by reference).
    n_folds : int
        Number of outer folds (``len(best_params_per_fold)``).

    Examples
    --------
    >>> from nestkit.diagnostics.stability import HyperparameterStability
    >>> params = [
    ...     {"C": 1.0, "kernel": "rbf"},
    ...     {"C": 1.0, "kernel": "rbf"},
    ...     {"C": 0.1, "kernel": "rbf"},
    ... ]
    >>> hs = HyperparameterStability(params)
    >>> hs.summary()  # doctest: +SKIP
      param mode  nunique   entropy  agreement_rate   cv
    0     C  1.0        2  0.918...        0.666667  ...
    1  kernel  rbf      1  0.000000        1.000000  NaN
    """

    def __init__(self, best_params_per_fold: list[dict]):
        self.best_params_per_fold = best_params_per_fold
        self.n_folds = len(best_params_per_fold)


[docs]
    def summary(self) -> pd.DataFrame:
        """Compute per-parameter stability summary statistics.

        Returns
        -------
        pandas.DataFrame
            One row per hyperparameter with columns:

            * ``param`` -- Hyperparameter name.
            * ``mode`` -- Most frequently selected value (as string).
            * ``nunique`` -- Number of distinct values across folds.
            * ``entropy`` -- Shannon entropy (base 2) of the value
              distribution.  Zero means perfect agreement.
            * ``agreement_rate`` -- Fraction of folds that selected the
              modal value.  Ranges from ``1/n_folds`` to 1.
            * ``cv`` -- Coefficient of variation (``std / mean``) for
              numeric parameters.  ``NaN`` for non-numeric parameters.

        Notes
        -----
        Values are converted to strings for counting purposes, so
        ``1`` and ``1.0`` are treated as distinct.

        Examples
        --------
        >>> hs = HyperparameterStability([{"lr": 0.01}, {"lr": 0.01}])
        >>> hs.summary()["agreement_rate"].iloc[0]  # doctest: +SKIP
        1.0
        """
        all_params: set[str] = set()
        for bp in self.best_params_per_fold:
            all_params.update(bp.keys())

        rows = []
        for param in sorted(all_params):
            vals = [bp.get(param) for bp in self.best_params_per_fold]
            str_vals = [str(v) for v in vals]
            counts = Counter(str_vals)
            mode_val, mode_count = counts.most_common(1)[0]

            # Entropy
            probs = np.array(list(counts.values())) / self.n_folds
            entropy = float(-np.sum(probs * np.log2(probs + _EPS)))

            # CV for numeric params
            cv = np.nan
            try:
                numeric_vals = [float(v) for v in vals if v is not None]
                if len(numeric_vals) == self.n_folds and np.mean(numeric_vals) != 0:
                    cv = (
                        float(np.std(numeric_vals, ddof=1) / np.mean(numeric_vals))
                        if self.n_folds > 1
                        else 0.0
                    )
            except (TypeError, ValueError):
                pass

            rows.append(
                {
                    "param": param,
                    "mode": mode_val,
                    "nunique": len(counts),
                    "entropy": entropy,
                    "agreement_rate": mode_count / self.n_folds,
                    "cv": cv,
                }
            )

        return pd.DataFrame(rows)



[docs]
    def is_stable(self, threshold: float = 0.8) -> dict[str, bool]:
        """Determine whether each hyperparameter is stable.

        A parameter is considered stable if its agreement rate (fraction
        of folds selecting the modal value) meets or exceeds the given
        threshold.

        Parameters
        ----------
        threshold : float, default 0.8
            Minimum agreement rate to consider a parameter stable.

        Returns
        -------
        dict of {str: bool}
            Mapping from parameter name to stability flag.

        Examples
        --------
        >>> hs = HyperparameterStability([
        ...     {"C": 1.0}, {"C": 1.0}, {"C": 0.1}
        ... ])
        >>> hs.is_stable(threshold=0.5)  # doctest: +SKIP
        {'C': True}
        >>> hs.is_stable(threshold=0.8)  # doctest: +SKIP
        {'C': False}
        """
        df = self.summary()
        return {row["param"]: row["agreement_rate"] >= threshold for _, row in df.iterrows()}



[docs]
    def pairwise_jaccard(self) -> pd.DataFrame:
        """Compute pairwise Jaccard similarity of hyperparameter configurations.

        Treats each fold's selected configuration as a set of
        ``"param=value"`` strings and computes the Jaccard index for
        every pair of folds.

        Returns
        -------
        pandas.DataFrame
            DataFrame with columns ``fold_i``, ``fold_j``, ``jaccard``.
            One row per unique pair of folds.

        Notes
        -----
        The Jaccard similarity index is defined as:

        .. math::

            J(A, B) = \\frac{|A \\cap B|}{|A \\cup B|}

        where *A* and *B* are the sets of ``"param=value"`` strings for
        two folds.  A Jaccard index of 1.0 means the two folds selected
        identical configurations; 0.0 means completely different
        configurations.

        Examples
        --------
        >>> hs = HyperparameterStability([
        ...     {"C": 1.0, "kernel": "rbf"},
        ...     {"C": 1.0, "kernel": "rbf"},
        ...     {"C": 0.1, "kernel": "linear"},
        ... ])
        >>> hs.pairwise_jaccard()  # doctest: +SKIP
           fold_i  fold_j  jaccard
        0       0       1      1.0
        1       0       2      0.0
        2       1       2      0.0
        """
        rows = []
        for i, j in combinations(range(self.n_folds), 2):
            set_i = set(f"{k}={v}" for k, v in self.best_params_per_fold[i].items())
            set_j = set(f"{k}={v}" for k, v in self.best_params_per_fold[j].items())
            intersection = len(set_i & set_j)
            union = len(set_i | set_j)
            jaccard = intersection / union if union > 0 else 1.0
            rows.append({"fold_i": i, "fold_j": j, "jaccard": jaccard})
        return pd.DataFrame(rows)