Testing DuBE with different number of classes (3-15)

In this example, we compare the duplebalance.DupleBalanceClassifier and other ensemble-based class-imbalanced learning methods on multi-class tasks (with number of classes varying from 3 to 15).

print(__doc__)

RANDOM_STATE = 42

Preparation

Import necessary packages.

from duplebalance import DupleBalanceClassifier
from duplebalance.base import sort_dict_by_key

import numpy as np
import pandas as pd
from collections import Counter
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

Train All Ensemble Classifier

Train all ensemble-based IL classifier (including DuBE) on multi-class datasets.

from imbalanced_ensemble.ensemble import *

ensemble_init_kwargs = {
    'base_estimator': DecisionTreeClassifier(),
    'n_estimators': 10,
    'random_state': RANDOM_STATE,
}
dube_fit_kwargs = {
    'resampling_target': 'hybrid',
    'resampling_strategy': 'shem',
    'perturb_alpha': .5,
}
eval_kwargs = {'average': 'macro', 'multi_class': 'ovo'}

ensemble_clfs = {
    'DuBE': DupleBalanceClassifier,
    'RusBoost': RUSBoostClassifier,
    'OverBoost': OverBoostClassifier,
    'SmoteBoost': SMOTEBoostClassifier,
    'RusBoost': RUSBoostClassifier,
    'UnderBagging': UnderBaggingClassifier,
    'OverBagging': OverBaggingClassifier,
    'SmoteBagging': SMOTEBaggingClassifier,
    'Cascade': BalanceCascadeClassifier,
    'SelfPacedEns': SelfPacedEnsembleClassifier,
}

# Initialize results list
all_results = []

for n_class in range(3, 16):

    # Assign long-tail class weights
    weights = np.array([np.power(.8, i) for i in range(n_class)])
    weights /= weights.sum()
    info = "#Classes: {}\nImbalance Ratio: ".format(n_class)
    for weight in weights:
        info += '{:.2f}/'.format(weight/weights.min())
    print (info.rstrip('/'))

    # Generate synthetic multi-class imbalanced dataset
    X, y = make_classification(n_classes=n_class, class_sep=1,
        weights=weights, n_informative=4, n_redundant=1, flip_y=0,
        n_features=20, n_clusters_per_class=1, n_samples=5000, random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

    for ens_name, clf_class in ensemble_clfs.items():

        # Train all ensemble classifiers
        clf = clf_class(
            **ensemble_init_kwargs
        )
        if ens_name == 'DuBE':
            clf.fit(X_train, y_train, **dube_fit_kwargs)
        else: clf.fit(X_train, y_train)
        y_pred_proba = clf.predict_proba(X_test)
        score = roc_auc_score(y_test, y_pred_proba, **eval_kwargs)
        all_results.append([ens_name, score, n_class])
        print ("{:<15s} | Balanced AUROC: {:.3f}".format(ens_name, score))

Out:

#Classes: 3
Imbalance Ratio: 1.56/1.25/1.00
DuBE            | Balanced AUROC: 0.998
RusBoost        | Balanced AUROC: 0.989
OverBoost       | Balanced AUROC: 0.976
SmoteBoost      | Balanced AUROC: 0.973
UnderBagging    | Balanced AUROC: 0.996
OverBagging     | Balanced AUROC: 0.995
SmoteBagging    | Balanced AUROC: 0.995
Cascade         | Balanced AUROC: 0.991
SelfPacedEns    | Balanced AUROC: 0.992
#Classes: 4
Imbalance Ratio: 1.95/1.56/1.25/1.00
DuBE            | Balanced AUROC: 0.984
RusBoost        | Balanced AUROC: 0.898
OverBoost       | Balanced AUROC: 0.894
SmoteBoost      | Balanced AUROC: 0.896
UnderBagging    | Balanced AUROC: 0.966
OverBagging     | Balanced AUROC: 0.966
SmoteBagging    | Balanced AUROC: 0.969
Cascade         | Balanced AUROC: 0.964
SelfPacedEns    | Balanced AUROC: 0.970
#Classes: 5
Imbalance Ratio: 2.44/1.95/1.56/1.25/1.00
DuBE            | Balanced AUROC: 0.990
RusBoost        | Balanced AUROC: 0.927
OverBoost       | Balanced AUROC: 0.925
SmoteBoost      | Balanced AUROC: 0.924
UnderBagging    | Balanced AUROC: 0.974
OverBagging     | Balanced AUROC: 0.978
SmoteBagging    | Balanced AUROC: 0.978
Cascade         | Balanced AUROC: 0.977
SelfPacedEns    | Balanced AUROC: 0.981
#Classes: 6
Imbalance Ratio: 3.05/2.44/1.95/1.56/1.25/1.00
DuBE            | Balanced AUROC: 0.982
RusBoost        | Balanced AUROC: 0.890
OverBoost       | Balanced AUROC: 0.877
SmoteBoost      | Balanced AUROC: 0.863
UnderBagging    | Balanced AUROC: 0.964
OverBagging     | Balanced AUROC: 0.963
SmoteBagging    | Balanced AUROC: 0.969
Cascade         | Balanced AUROC: 0.966
SelfPacedEns    | Balanced AUROC: 0.971
#Classes: 7
Imbalance Ratio: 3.81/3.05/2.44/1.95/1.56/1.25/1.00
DuBE            | Balanced AUROC: 0.972
RusBoost        | Balanced AUROC: 0.901
OverBoost       | Balanced AUROC: 0.840
SmoteBoost      | Balanced AUROC: 0.855
UnderBagging    | Balanced AUROC: 0.953
OverBagging     | Balanced AUROC: 0.955
SmoteBagging    | Balanced AUROC: 0.954
Cascade         | Balanced AUROC: 0.958
SelfPacedEns    | Balanced AUROC: 0.954
#Classes: 8
Imbalance Ratio: 4.77/3.81/3.05/2.44/1.95/1.56/1.25/1.00
DuBE            | Balanced AUROC: 0.967
RusBoost        | Balanced AUROC: 0.863
OverBoost       | Balanced AUROC: 0.819
SmoteBoost      | Balanced AUROC: 0.841
UnderBagging    | Balanced AUROC: 0.947
OverBagging     | Balanced AUROC: 0.943
SmoteBagging    | Balanced AUROC: 0.954
Cascade         | Balanced AUROC: 0.954
SelfPacedEns    | Balanced AUROC: 0.952
#Classes: 9
Imbalance Ratio: 5.96/4.77/3.81/3.05/2.44/1.95/1.56/1.25/1.00
DuBE            | Balanced AUROC: 0.964
RusBoost        | Balanced AUROC: 0.861
OverBoost       | Balanced AUROC: 0.829
SmoteBoost      | Balanced AUROC: 0.828
UnderBagging    | Balanced AUROC: 0.939
OverBagging     | Balanced AUROC: 0.939
SmoteBagging    | Balanced AUROC: 0.944
Cascade         | Balanced AUROC: 0.943
SelfPacedEns    | Balanced AUROC: 0.942
#Classes: 10
Imbalance Ratio: 7.45/5.96/4.77/3.81/3.05/2.44/1.95/1.56/1.25/1.00
DuBE            | Balanced AUROC: 0.968
RusBoost        | Balanced AUROC: 0.856
OverBoost       | Balanced AUROC: 0.834
SmoteBoost      | Balanced AUROC: 0.830
UnderBagging    | Balanced AUROC: 0.941
OverBagging     | Balanced AUROC: 0.944
SmoteBagging    | Balanced AUROC: 0.955
Cascade         | Balanced AUROC: 0.943
SelfPacedEns    | Balanced AUROC: 0.949
#Classes: 11
Imbalance Ratio: 9.31/7.45/5.96/4.77/3.81/3.05/2.44/1.95/1.56/1.25/1.00
DuBE            | Balanced AUROC: 0.949
RusBoost        | Balanced AUROC: 0.830
OverBoost       | Balanced AUROC: 0.789
SmoteBoost      | Balanced AUROC: 0.795
UnderBagging    | Balanced AUROC: 0.916
OverBagging     | Balanced AUROC: 0.914
SmoteBagging    | Balanced AUROC: 0.925
Cascade         | Balanced AUROC: 0.911
SelfPacedEns    | Balanced AUROC: 0.915
#Classes: 12
Imbalance Ratio: 11.64/9.31/7.45/5.96/4.77/3.81/3.05/2.44/1.95/1.56/1.25/1.00
DuBE            | Balanced AUROC: 0.935
RusBoost        | Balanced AUROC: 0.796
OverBoost       | Balanced AUROC: 0.770
SmoteBoost      | Balanced AUROC: 0.783
UnderBagging    | Balanced AUROC: 0.891
OverBagging     | Balanced AUROC: 0.907
SmoteBagging    | Balanced AUROC: 0.909
Cascade         | Balanced AUROC: 0.895
SelfPacedEns    | Balanced AUROC: 0.890
#Classes: 13
Imbalance Ratio: 14.55/11.64/9.31/7.45/5.96/4.77/3.81/3.05/2.44/1.95/1.56/1.25/1.00
DuBE            | Balanced AUROC: 0.938
RusBoost        | Balanced AUROC: 0.790
OverBoost       | Balanced AUROC: 0.772
SmoteBoost      | Balanced AUROC: 0.785
UnderBagging    | Balanced AUROC: 0.895
OverBagging     | Balanced AUROC: 0.902
SmoteBagging    | Balanced AUROC: 0.924
Cascade         | Balanced AUROC: 0.902
SelfPacedEns    | Balanced AUROC: 0.903
#Classes: 14
Imbalance Ratio: 18.19/14.55/11.64/9.31/7.45/5.96/4.77/3.81/3.05/2.44/1.95/1.56/1.25/1.00
DuBE            | Balanced AUROC: 0.934
RusBoost        | Balanced AUROC: 0.768
OverBoost       | Balanced AUROC: 0.740
SmoteBoost      | Balanced AUROC: 0.765
UnderBagging    | Balanced AUROC: 0.889
OverBagging     | Balanced AUROC: 0.889
SmoteBagging    | Balanced AUROC: 0.915
Cascade         | Balanced AUROC: 0.896
SelfPacedEns    | Balanced AUROC: 0.889
#Classes: 15
Imbalance Ratio: 22.74/18.19/14.55/11.64/9.31/7.45/5.96/4.77/3.81/3.05/2.44/1.95/1.56/1.25/1.00
DuBE            | Balanced AUROC: 0.929
RusBoost        | Balanced AUROC: 0.776
OverBoost       | Balanced AUROC: 0.739
SmoteBoost      | Balanced AUROC: 0.754
UnderBagging    | Balanced AUROC: 0.871
OverBagging     | Balanced AUROC: 0.867
SmoteBagging    | Balanced AUROC: 0.890
Cascade         | Balanced AUROC: 0.871
SelfPacedEns    | Balanced AUROC: 0.850

Results Visualization

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_context('talk')

all_results_columns = ['Method', 'AUROC (macro)', '#Classes']
data_vis = pd.DataFrame(all_results, columns=all_results_columns)


def plot_results_comp(data_vis, x, y, title, figsize=(8,6)):
    fig = plt.figure(figsize=figsize)
    ax = sns.lineplot(
        data=data_vis, x=x, y=y, hue='Method', style='Method',
        markers=True, err_style='bars', linewidth=4, markersize=20, alpha=0.9
    )
    for position, spine in ax.spines.items():
        spine.set_color('black')
        spine.set_linewidth(2)
    ax.grid(color = 'black', linestyle='-.', alpha=0.3)
    ax.set_ylabel('AUROC (macro)')
    ax.set_title(title)
    ax.legend(
        title='',
        borderpad=0.25,
        columnspacing=0.05,
        borderaxespad=0.15,
        handletextpad=0.05,
        labelspacing=0.05,
        handlelength=1.2,
        )
    return ax

plot_results_comp(data_vis, x='#Classes', y='AUROC (macro)',
                  title='DuBE versus Ensemble Baselines (#Classes 3-15)')
DuBE versus Ensemble Baselines (#Classes 3-15)

Out:

<AxesSubplot:title={'center':'DuBE versus Ensemble Baselines (#Classes 3-15)'}, xlabel='#Classes', ylabel='AUROC (macro)'>

Total running time of the script: ( 1 minutes 22.885 seconds)

Estimated memory usage: 37 MB

Gallery generated by Sphinx-Gallery