Source code for draugr.pandas_utilities.misc_utilities

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = "Christian Heider Nielsen"
__doc__ = r"""

           Created on 26-01-2021
           """

__all__ = ["duplicate_columns", "ExportMethodEnum", "ChainedAssignmentOptionEnum"]

import enum
from typing import List

import pandas
from pandas.core.dtypes.missing import array_equivalent
from sorcery import assigned_names


[docs]class ExportMethodEnum(enum.Enum): """ Available Pandas Dataframe Export methods""" ( parquet, pickle, # also 'dataframe' csv, hdf, sql, dict, excel, json, html, feather, latex, stata, gbq, records, string, clipboard, markdown, xarray, ) = assigned_names()
[docs]class ChainedAssignmentOptionEnum(enum.Enum): """ from contextlib import suppress from pandas.core.common import SettingWithCopyWarning """ ( warn, # the default, means a SettingWithCopyWarning is printed. raises, # means pandas will raise a SettingWithCopyException you have to deal with. ) = assigned_names() none = None # will suppress the warnings entirely.
[docs]def duplicate_columns(frame: pandas.DataFrame) -> List[str]: """description""" groups = frame.columns.to_series().groupby(frame.dtypes).groups duplicates = [] for t, v in groups.items(): cs = frame[v].columns vs = frame[v] lcs = len(cs) for i in range(lcs): ia = vs.iloc[:, i].values for j in range(i + 1, lcs): ja = vs.iloc[:, j].values if array_equivalent(ia, ja): duplicates.append(cs[i]) break return duplicates
if __name__ == "__main__": for e in ExportMethodEnum: print(e.value)