Source code for nestkit.diagnostics.stability

"""Hyperparameter stability analysis across outer folds.

Provides ``HyperparameterStability``, a diagnostic class for assessing
how consistently the inner cross-validation selects hyperparameters
across outer folds.  Unstable hyperparameter selection may indicate
that the model is sensitive to the training data composition.
"""

from __future__ import annotations

from collections import Counter
from itertools import combinations

import numpy as np
import pandas as pd

from nestkit._constants import _EPS


[docs] class HyperparameterStability: """Assess hyperparameter selection consistency across outer folds. Analyses the best hyperparameter configurations chosen in each outer fold and provides summary statistics (mode, entropy, agreement rate, coefficient of variation), pairwise Jaccard similarity, and a stability flag per parameter. Parameters ---------- best_params_per_fold : list of dict Best hyperparameters selected in each outer fold. Each dict maps parameter names to their selected values. Attributes ---------- best_params_per_fold : list of dict The input parameter sets (stored by reference). n_folds : int Number of outer folds (``len(best_params_per_fold)``). Examples -------- >>> from nestkit.diagnostics.stability import HyperparameterStability >>> params = [ ... {"C": 1.0, "kernel": "rbf"}, ... {"C": 1.0, "kernel": "rbf"}, ... {"C": 0.1, "kernel": "rbf"}, ... ] >>> hs = HyperparameterStability(params) >>> hs.summary() # doctest: +SKIP param mode nunique entropy agreement_rate cv 0 C 1.0 2 0.918... 0.666667 ... 1 kernel rbf 1 0.000000 1.000000 NaN """ def __init__(self, best_params_per_fold: list[dict]): self.best_params_per_fold = best_params_per_fold self.n_folds = len(best_params_per_fold)
[docs] def summary(self) -> pd.DataFrame: """Compute per-parameter stability summary statistics. Returns ------- pandas.DataFrame One row per hyperparameter with columns: * ``param`` -- Hyperparameter name. * ``mode`` -- Most frequently selected value (as string). * ``nunique`` -- Number of distinct values across folds. * ``entropy`` -- Shannon entropy (base 2) of the value distribution. Zero means perfect agreement. * ``agreement_rate`` -- Fraction of folds that selected the modal value. Ranges from ``1/n_folds`` to 1. * ``cv`` -- Coefficient of variation (``std / mean``) for numeric parameters. ``NaN`` for non-numeric parameters. Notes ----- Values are converted to strings for counting purposes, so ``1`` and ``1.0`` are treated as distinct. Examples -------- >>> hs = HyperparameterStability([{"lr": 0.01}, {"lr": 0.01}]) >>> hs.summary()["agreement_rate"].iloc[0] # doctest: +SKIP 1.0 """ all_params: set[str] = set() for bp in self.best_params_per_fold: all_params.update(bp.keys()) rows = [] for param in sorted(all_params): vals = [bp.get(param) for bp in self.best_params_per_fold] str_vals = [str(v) for v in vals] counts = Counter(str_vals) mode_val, mode_count = counts.most_common(1)[0] # Entropy probs = np.array(list(counts.values())) / self.n_folds entropy = float(-np.sum(probs * np.log2(probs + _EPS))) # CV for numeric params cv = np.nan try: numeric_vals = [float(v) for v in vals if v is not None] if len(numeric_vals) == self.n_folds and np.mean(numeric_vals) != 0: cv = ( float(np.std(numeric_vals, ddof=1) / np.mean(numeric_vals)) if self.n_folds > 1 else 0.0 ) except (TypeError, ValueError): pass rows.append( { "param": param, "mode": mode_val, "nunique": len(counts), "entropy": entropy, "agreement_rate": mode_count / self.n_folds, "cv": cv, } ) return pd.DataFrame(rows)
[docs] def is_stable(self, threshold: float = 0.8) -> dict[str, bool]: """Determine whether each hyperparameter is stable. A parameter is considered stable if its agreement rate (fraction of folds selecting the modal value) meets or exceeds the given threshold. Parameters ---------- threshold : float, default 0.8 Minimum agreement rate to consider a parameter stable. Returns ------- dict of {str: bool} Mapping from parameter name to stability flag. Examples -------- >>> hs = HyperparameterStability([ ... {"C": 1.0}, {"C": 1.0}, {"C": 0.1} ... ]) >>> hs.is_stable(threshold=0.5) # doctest: +SKIP {'C': True} >>> hs.is_stable(threshold=0.8) # doctest: +SKIP {'C': False} """ df = self.summary() return {row["param"]: row["agreement_rate"] >= threshold for _, row in df.iterrows()}
[docs] def pairwise_jaccard(self) -> pd.DataFrame: """Compute pairwise Jaccard similarity of hyperparameter configurations. Treats each fold's selected configuration as a set of ``"param=value"`` strings and computes the Jaccard index for every pair of folds. Returns ------- pandas.DataFrame DataFrame with columns ``fold_i``, ``fold_j``, ``jaccard``. One row per unique pair of folds. Notes ----- The Jaccard similarity index is defined as: .. math:: J(A, B) = \\frac{|A \\cap B|}{|A \\cup B|} where *A* and *B* are the sets of ``"param=value"`` strings for two folds. A Jaccard index of 1.0 means the two folds selected identical configurations; 0.0 means completely different configurations. Examples -------- >>> hs = HyperparameterStability([ ... {"C": 1.0, "kernel": "rbf"}, ... {"C": 1.0, "kernel": "rbf"}, ... {"C": 0.1, "kernel": "linear"}, ... ]) >>> hs.pairwise_jaccard() # doctest: +SKIP fold_i fold_j jaccard 0 0 1 1.0 1 0 2 0.0 2 1 2 0.0 """ rows = [] for i, j in combinations(range(self.n_folds), 2): set_i = set(f"{k}={v}" for k, v in self.best_params_per_fold[i].items()) set_j = set(f"{k}={v}" for k, v in self.best_params_per_fold[j].items()) intersection = len(set_i & set_j) union = len(set_i | set_j) jaccard = intersection / union if union > 0 else 1.0 rows.append({"fold_i": i, "fold_j": j, "jaccard": jaccard}) return pd.DataFrame(rows)