Source code for draugr.torch_utilities.evaluation.cross_validation

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = "Christian Heider Nielsen"
__doc__ = r"""

           Created on 29/07/2020
           """

__all__ = ["cross_validation_generator"]

from typing import Tuple

import torch
from sklearn.model_selection import KFold
from torch.utils.data import ConcatDataset, Dataset, Subset, TensorDataset

from draugr.torch_utilities.tensors.to_tensor import to_tensor


[docs]def cross_validation_generator( *datasets: Dataset, n_splits: int = 10 ) -> Tuple[Subset, Subset]: """ Learning the parameters of a prediction function and testing it on the same data is a methodological mistake: a model that would just repeat the labels of the samples that it has just seen would have a perfect score but would fail to predict anything useful on yet-unseen data. This situation is called overfitting. To avoid it, it is common practice when performing a (supervised) machine learning experiment to hold out part of the available data as a test set""" cum = ConcatDataset(datasets) for train_index, val_index in KFold(n_splits=n_splits).split(cum): yield Subset(cum, train_index), Subset(cum, val_index)
if __name__ == "__main__": def asdasidoj() -> None: """ :rtype: None """ X = to_tensor([torch.diag(torch.arange(i, i + 2)) for i in range(200)]) x_train = TensorDataset(X[:100]) x_val = TensorDataset(X[100:]) for train, val in cross_validation_generator(x_train, x_val): print(len(train), len(val)) print(train[0], val[0]) asdasidoj()