Source code for climb.tool.impl.data_suite.third_party.uq360.utils.misc

import abc
import sys

# Ensure compatibility with Python 2/3
if sys.version_info >= (3, 4):
    ABC = abc.ABC
else:
    ABC = abc.ABCMeta(str("ABC"), (), {})

from copy import deepcopy

import numpy as np
import numpy.random as npr



[docs]
def make_batches(n_data, batch_size):
    return [slice(i, min(i + batch_size, n_data)) for i in range(0, n_data, batch_size)]




[docs]
def generate_regression_data(seed, data_count=500):
    """
    Generate data from a noisy sine wave.
    :param seed: random number seed
    :param data_count: number of data points.
    :return:
    """
    np.random.seed(seed)
    noise_var = 0.1

    x = np.linspace(-4, 4, data_count)
    y = 1 * np.sin(x) + np.sqrt(noise_var) * npr.randn(data_count)

    train_count = int(0.2 * data_count)
    idx = npr.permutation(range(data_count))
    x_train = x[idx[:train_count], np.newaxis]
    x_test = x[idx[train_count:], np.newaxis]
    y_train = y[idx[:train_count]]
    y_test = y[idx[train_count:]]

    mu = np.mean(x_train, 0)
    std = np.std(x_train, 0)
    x_train = (x_train - mu) / std
    x_test = (x_test - mu) / std
    mu = np.mean(y_train, 0)
    std = np.std(y_train, 0)
    y_train = (y_train - mu) / std
    train_stats = dict()
    train_stats["mu"] = mu
    train_stats["sigma"] = std

    return x_train, y_train, x_test, y_test, train_stats




[docs]
def form_D_for_auucc(yhat, zhatl, zhatu):
    # a handy routine to format data as needed by the UCC fit() method
    D = np.zeros([yhat.shape[0], 3])
    D[:, 0] = yhat.squeeze()
    D[:, 1] = zhatl.squeeze()
    D[:, 2] = zhatu.squeeze()
    return D




[docs]
def fitted_ucc_w_nullref(y_true, y_pred_mean, y_pred_lower, y_pred_upper):
    """
    Instantiates an UCC object for the target predictor plus a 'null' (constant band) reference
    :param y_pred_lower:
    :param y_pred_mean:
    :param y_pred_upper:
    :param y_true:
    :return: ucc object fitted for two systems: target  + null reference
    """
    # form matrix for ucc:
    X_for_ucc = form_D_for_auucc(
        y_pred_mean.squeeze(),
        y_pred_mean.squeeze() - y_pred_lower.squeeze(),
        y_pred_upper.squeeze() - y_pred_mean.squeeze(),
    )
    # form matrix for a 'null' system (constant band)
    X_null = deepcopy(X_for_ucc)
    X_null[:, 1:] = np.std(y_pred_mean)  # can be set to any other constant (no effect on AUUCC)
    # create an instance of ucc and fit data
    from climb.tool.impl.data_suite.third_party.uq360.metrics.uncertainty_characteristics_curve import (
        UncertaintyCharacteristicsCurve as ucc,
    )

    u = ucc()
    u.fit([X_for_ucc, X_null], y_true.squeeze())
    return u




[docs]
def make_sklearn_compatible_scorer(task_type, metric, greater_is_better=True, **kwargs):
    """

    Args:
        task_type: (str) regression or classification.
        metric: (str): choice of metric can be one of these - [aurrrc, ece, auroc, nll, brier, accuracy] for
            classification and ["rmse", "nll", "auucc_gain", "picp", "mpiw", "r2"] for regression.
        greater_is_better: is False the scores are negated before returning.
        **kwargs: additional arguments specific to some metrics.

    Returns:
        sklearn compatible scorer function.

    """

    from climb.tool.impl.data_suite.third_party.uq360.metrics.classification_metrics import compute_classification_metrics
    from climb.tool.impl.data_suite.third_party.uq360.metrics.regression_metrics import compute_regression_metrics

    def sklearn_compatible_score(model, X, y_true):
        """

        Args:
            model: The model being scored. Currently uq360 and sklearn models are supported.
            X: Input features.
            y_true: ground truth values for the target.

        Returns:
            Computed score of the model.

        """

        from climb.tool.impl.data_suite.third_party.uq360.algorithms.builtinuq import BuiltinUQ
        from climb.tool.impl.data_suite.third_party.uq360.algorithms.posthocuq import PostHocUQ

        if isinstance(model, BuiltinUQ) or isinstance(model, PostHocUQ):
            # uq360 models
            if task_type == "classification":
                score = compute_classification_metrics(
                    y_true=y_true, y_prob=model.predict(X).y_prob, option=metric, **kwargs
                )[metric]
            elif task_type == "regression":
                y_mean, y_lower, y_upper = model.predict(X)
                score = compute_regression_metrics(
                    y_true=y_true, y_mean=y_mean, y_lower=y_lower, y_upper=y_upper, option=metric, **kwargs
                )[metric]
            else:
                raise NotImplementedError

        else:
            # sklearn models
            if task_type == "classification":
                score = compute_classification_metrics(
                    y_true=y_true, y_prob=model.predict_proba(X), option=metric, **kwargs
                )[metric]
            else:
                if metric in ["rmse", "r2"]:
                    score = compute_regression_metrics(
                        y_true=y_true, y_mean=model.predict(X), y_lower=None, y_upper=None, option=metric, **kwargs
                    )[metric]
                else:
                    raise NotImplementedError("{} is not supported for sklearn regression models".format(metric))

        if not greater_is_better:
            score = -score
        return score

    return sklearn_compatible_score




[docs]
class DummySklearnEstimator(ABC):
    def __init__(self, num_classes, base_model_prediction_fn):
        self.base_model_prediction_fn = base_model_prediction_fn
        self.classes_ = [i for i in range(num_classes)]


[docs]
    def fit(self):
        pass



[docs]
    def predict_proba(self, X):
        return self.base_model_prediction_fn(X)