Quick Start#

Classification#

from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from nestkit import NestedCVClassifier

X, y = load_breast_cancer(return_X_y=True)

ncv = NestedCVClassifier(
    estimator=RandomForestClassifier(random_state=42),
    param_grid={"n_estimators": [50, 100], "max_depth": [3, 5, 10]},
    outer_cv=5,
    inner_cv=3,
    scoring="accuracy",
    random_state=42,
)
ncv.fit(X, y)

results = ncv.results_
print(results.summary_default_)
print(results.best_params_per_fold_)

With calibration and threshold optimization#

ncv = NestedCVClassifier(
    estimator=RandomForestClassifier(random_state=42),
    param_grid={"n_estimators": [50, 100], "max_depth": [3, 5]},
    outer_cv=5,
    inner_cv=3,
    calibration_method="isotonic",
    threshold_strategy="pooled",
    threshold_criterion="youden",
    random_state=42,
)
ncv.fit(X, y)
print(ncv.results_.threshold_comparison())

Regression#

from sklearn.datasets import load_diabetes
from sklearn.linear_model import Ridge
from nestkit import NestedCVRegressor

X, y = load_diabetes(return_X_y=True)

ncv = NestedCVRegressor(
    estimator=Ridge(),
    param_grid={"alpha": [0.01, 0.1, 1.0, 10.0]},
    outer_cv=5,
    inner_cv=3,
    prediction_intervals=True,
    random_state=42,
)
ncv.fit(X, y)

results = ncv.results_
print(results.summary_default_)
print(f"PI coverage: {results.prediction_interval_coverage_['mean']:.3f}")