cnheider
/
draugr
mirror of https://github.com/cnheider/draugr


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
							#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = "Christian Heider Nielsen"
__doc__ = r"""

           Created on 22/06/2020
           """

import os
from pathlib import Path

from apppath import AppPath
from draugr.tensorboard_utilities import TensorboardEventExporter
from draugr.visualisation.progress import progress_bar
from draugr.writers import (
    StandardTestingCurvesEnum,
    StandardTestingScalarsEnum,
    StandardTrainingCurvesEnum,
    StandardTrainingScalarsEnum,
)

__all__ = ["extract_scalars_as_csv", "extract_tensors_as_csv", "extract_metrics"]

from warg import ensure_existence

EXPORT_RESULTS_PATH = Path.cwd()


def extract_scalars_as_csv(
    train_path: Path = EXPORT_RESULTS_PATH / "csv" / "training",
    test_path: Path = EXPORT_RESULTS_PATH / "csv" / "testing",
    export_train: bool = True,
    export_test: bool = True,
    verbose: bool = False,
    only_extract_from_latest_event_file: bool = False,
) -> None:
    """
    :param train_path:
    :param test_path:
    :param export_train:
    :param export_test:
    :param verbose:
    :param only_extract_from_latest_event_file:
    """
    if only_extract_from_latest_event_file:
        max_load_time = max(
            list(
                AppPath(
                    "Adversarial Speech", "Christian Heider Nielsen"
                ).user_log.iterdir()
            ),
            key=os.path.getctime,
        )
        unique_event_files_parents = set(
            [ef.parent for ef in max_load_time.rglob("events.out.tfevents.*")]
        )
        event_files = {max_load_time: unique_event_files_parents}
    else:
        event_files = {
            a: set([ef.parent for ef in a.rglob("events.out.tfevents.*")])
            for a in list(
                AppPath(
                    "Adversarial Speech", "Christian Heider Nielsen"
                ).user_log.iterdir()
            )
        }

    for k, v in progress_bar(event_files.items()):
        for e in progress_bar(v):
            relative_path = e.relative_to(k)
            mapping_id, *rest = relative_path.parts
            mappind_id_test = f"{mapping_id}_Test_{relative_path.name}"
            # model_id = relative_path.parent.name can be include but is always the same
            relative_path = Path(*(mappind_id_test, *rest))
            with TensorboardEventExporter(e, save_to_disk=True) as tee:
                if export_test:
                    out_tags = []
                    for tag in progress_bar(StandardTestingScalarsEnum):
                        if tag.value in tee.available_scalars:
                            out_tags.append(tag.value)

                    if len(out_tags):
                        tee.scalar_export_csv(
                            *out_tags,
                            out_dir=ensure_existence(
                                test_path / k.name / relative_path,
                                force_overwrite=True,
                                verbose=verbose,
                            ),
                        )
                        print(e)
                    else:
                        if verbose:
                            print(
                                f"{e}, no requested tags found {StandardTestingScalarsEnum.__members__.values()}, {tee.available_scalars}"
                            )

                if export_train:
                    out_tags = []
                    for tag in progress_bar(StandardTrainingScalarsEnum):
                        if tag.value in tee.available_scalars:
                            out_tags.append(tag.value)

                    if len(out_tags):
                        tee.scalar_export_csv(
                            *out_tags,
                            out_dir=ensure_existence(
                                train_path / k.name / relative_path,
                                force_overwrite=True,
                                verbose=verbose,
                            ),
                        )
                    else:
                        if verbose:
                            print(
                                f"{e}, no requested tags found {StandardTrainingScalarsEnum.__members__.values()}, {tee.available_scalars}"
                            )


def extract_tensors_as_csv(
    train_path: Path = EXPORT_RESULTS_PATH / "csv" / "training",
    test_path: Path = EXPORT_RESULTS_PATH / "csv" / "testing",
    export_train: bool = False,
    export_test: bool = True,
    verbose: bool = False,
    only_extract_from_latest_event_file: bool = False,
) -> None:
    """

    :param train_path:
    :param test_path:
    :param export_train:
    :param export_test:
    :param verbose:
    :param only_extract_from_latest_event_file:
    :return:"""
    if only_extract_from_latest_event_file:
        max_load_time = max(
            list(
                AppPath(
                    "Adversarial Speech", "Christian Heider Nielsen"
                ).user_log.iterdir()
            ),
            key=os.path.getctime,
        )
        unique_event_files_parents = set(
            [ef.parent for ef in max_load_time.rglob("events.out.tfevents.*")]
        )
        event_files = {max_load_time: unique_event_files_parents}
    else:
        event_files = {
            a: set([ef.parent for ef in a.rglob("events.out.tfevents.*")])
            for a in list(
                AppPath(
                    "Adversarial Speech", "Christian Heider Nielsen"
                ).user_log.iterdir()
            )
        }

    for k, v in progress_bar(event_files.items()):
        for e in progress_bar(v):
            relative_path = e.relative_to(k)
            mapping_id, *rest = relative_path.parts
            mapping_id_test = f"{mapping_id}_Test_{relative_path.name}"
            # model_id = relative_path.parent.name can be include but is always the same
            relative_path = Path(*(mapping_id_test, *rest))
            with TensorboardEventExporter(e, save_to_disk=True) as tee:
                if export_test:
                    out_tags = []
                    for tag in progress_bar(StandardTestingCurvesEnum):
                        if tag.value in tee.available_tensors:
                            out_tags.append(tag.value)

                    if len(out_tags):
                        tee.pr_curve_export_csv(
                            *out_tags,
                            out_dir=ensure_existence(
                                test_path / k.name / relative_path,
                                force_overwrite=True,
                                verbose=verbose,
                            ),
                        )
                    else:
                        if verbose:
                            print(
                                f"{e}, no requested tags found {StandardTestingCurvesEnum.__members__.values()}, {tee.available_tensors}"
                            )

                if export_train:  # TODO: OUTPUT for all epoch steps, no support yet
                    out_tags = []
                    for tag in progress_bar(StandardTrainingCurvesEnum):
                        if tag.value in tee.available_tensors:
                            out_tags.append(tag.value)

                    if len(out_tags):
                        tee.pr_curve_export_csv(
                            *out_tags,
                            out_dir=ensure_existence(
                                # train_path / max_load_time.name / relative_path, # MAX LOAD TIME HERE?
                                train_path
                                / k.name
                                / relative_path,  # MAX LOAD TIME HERE?
                                force_overwrite=True,
                                verbose=verbose,
                            ),
                        )
                    else:
                        if verbose:
                            print(
                                f"{e}, no requested tags found {StandardTrainingCurvesEnum.__members__.values()}, {tee.available_tensors}"
                            )


def extract_metrics(only_extract_latest=False):
    """

    :param only_extract_latest:
    """
    extract_scalars_as_csv(only_extract_from_latest_event_file=only_extract_latest)
    extract_tensors_as_csv(only_extract_from_latest_event_file=only_extract_latest)


if __name__ == "__main__":
    extract_metrics(only_extract_latest=True)
    # extract_scalars_as_csv(verbose=False,export_train=False)