Source code for pyDRESCALk.plot_results

# @author: Manish Bhattarai
import matplotlib
from matplotlib import pyplot as plt
from .data_io import *
from matplotlib import gridspec
import matplotlib.ticker as ticker
from matplotlib.lines import Line2D

[docs]def plot_err(err):
    """Plots the relative error for NMF decomposition as a function of number of iterations"""
    idx = np.linspace(1, len(err), len(err))
    plt.plot(idx, err)
    plt.xlabel('Iterations')
    plt.ylabel('Relative error')
    plt.title('Relative error vs Iterations')
    plt.savefig('Error_plot.png')
    plt.show()


[docs]def read_plot_factors(factors_path, pgrid):
    """Reads the factors W and H and Plots them"""
    W, H = read_factors(factors_path, pgrid)
    plot_W(W)
    plt.savefig(factors_path + 'W.png')
    plot_W(H.T)
    plt.savefig(factors_path + 'H.png')


[docs]def plot_W(W):
    """Reads a factor and plots into subplots for each component"""
    m, k = W.shape

    params = {'legend.fontsize': 60,
              'axes.labelsize': 60,
              'axes.titlesize': 60,
              'xtick.labelsize': 60,
              'mathtext.fontset': 'cm',
              'mathtext.rm': 'serif',
              "xtick.bottom": False,
              "ytick.left": False,
              }
    matplotlib.rcParams.update(params)

    f, axes = plt.subplots(nrows=k, sharex=True, figsize=(60, 40))

    plt.subplots_adjust(hspace=0.001, bottom=0.2)

    # colors=["blue", "red"]
    colors = plt.rcParams["axes.prop_cycle"]()
    W = W.T
    for i in range(k):
        c = next(colors)["color"]
        axes[i].plot(W[i], label="W[{}]".format(i), color=c, linewidth=5.0)
        axes[i].legend(loc=4, prop={'size': 50})
        axes[i].tick_params(axis="y", labelsize=30)

    plt.xlabel('Features')

    # create subplot just for placing the ylabel centered on all plots
    shadowaxes = f.add_subplot(111, xticks=[], yticks=[], frame_on=False)
    shadowaxes.set_ylabel('W Components')
    shadowaxes.yaxis.set_label_coords(-0.05, 0.5)
    plt.savefig('Results_W.png', bbox_inches='tight')
    plt.show()

[docs]def plot_results_paper(startProcess, endProcess, stepProcess,RECON, SILL_AVG, SILL_MIN, out_put, name,k=-1):
    t = range(startProcess, endProcess + 1, stepProcess)
    fig, ax = plt.subplots(num=None, figsize=(10, 6), dpi=300, facecolor='w', edgecolor='k')
    ax.yaxis.label.set_color('blue')
    ax.tick_params(axis='y', colors='blue')
    # print(feat, minsil.shape, avgsil.shape)
    ax.grid(linestyle='dotted')
    # lns3 = ax[cnt].axvline(x=c_threshold, c='k', lw=3.5)
    lns1 = ax.plot(t, SILL_AVG, c='g', marker='o', ms=7, ls='-.', lw=2.5, label='Avg Silhouette')
    lns2 = ax.plot(t, SILL_MIN, c='b', marker='o', ms=7, ls='--', lw=2.5, label='Min Silhouette')
    ax2 = ax.twinx()
    ax2.yaxis.label.set_color('red')
    ax2.tick_params(axis='y', colors='red')
    lns3 = ax2.plot(t, RECON, c='r', marker='D', ms=7, ls='-', lw=2.0, label='Relative Error')
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax2.set_ylabel('Relative Error')
    ax.set_ylabel('Silhouette Width')
    ax.set_ylabel('Silhouette Width')
    ax.set_xlabel('$k$')
    lns = lns2 + lns3 + lns1
    labels = [l.get_label() for l in lns]
    ax.legend(lns, labels, loc=3)
    if k!=-1:
       ax.axvspan(k - .2, k + .2, alpha=0.5, color='gray')
    plt.tight_layout()
    plt.savefig(out_put + '/' + name + '_selection_plot.pdf')
    plt.close()

[docs]def plot_results(startProcess, endProcess, stepProcess,RECON, SILL_AVG, SILL_MIN, out_put, name):
    """Plots the relative error and Silhouette results for estimation of k"""
    ######################################## Plotting ####################################################
    t = range(startProcess, endProcess + 1,stepProcess)
    fig, ax1 = plt.subplots(num=None, figsize=(10, 6), dpi=300, facecolor='w', edgecolor='k')
    title = 'Num'
    color = 'tab:red'
    ax1.set_xlabel('Total Signatures')
    ax1.set_ylabel('Mean L2 %', color=color)
    ax1.set_title(title)
    #lns1 = ax1.plot(t, RECON, marker='o', linestyle=':', color=color, label='Mean L2 %')
    lns3 = ax1.plot(t, RECON1, marker='X', linestyle=':', color='tab:green', label="Relative error %")

    ax1.tick_params(axis='y', labelcolor=color)
    ax1.xaxis.set_ticks(np.arange(min(t), max(t) + 1, 1))
    # ax1.axvspan(shadow_start, shadow_end, alpha=0.20, color='#ADD8E6')
    # ax1.axvspan(shadow_alternative_start,  shadow_alternative_end, alpha=0.20, color='#696969')
    # manipulate the y-axis values into percentage 
    vals = ax1.get_yticks()
    ax1.set_yticklabels(['{:,.0%}'.format(x) for x in vals])

    # ax1.legend(loc=0)

    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
    color = 'tab:blue'
    ax2.set_ylabel('Minimum Stability', color=color)  # we already handled the x-label with ax1
    lns2 = ax2.plot(t, SILL_MIN, marker='s', linestyle="-.", color=color, label='Minimum Stability')
    ax2.tick_params(axis='y', labelcolor=color)
    # ax2.legend(loc=1)
    fig.tight_layout()  # otherwise the right y-label is slightly clipped
    # plt.show()

    # added these three lines
    lns = lns2 + lns3
    labs = [l.get_label() for l in lns]
    ax1.legend(lns, labs, loc=0)

    plt.savefig(out_put + '/' + name + '_selection_plot.pdf')

    plt.close()


[docs]def box_plot(dat, respath):
    """Plots the boxplot from the given data and saves the results"""
    dat.plot.bar()
    plt.xlabel('operation')
    plt.ylabel('timing(sec)')
    plt.savefig(respath + 'timing.png')

    # plt.show()


[docs]def timing_stats(fpath):
    """Reads the timing stats dictionary from the stored file and parses the data. """
    import copy
    data = pd.read_csv(fpath).iloc[0, 1:]
    breakdown_level_2 = {'init': ['__init__', 'init_factors'],
                         'data_io': ['read', 'create_folder_dir', 'save_factors', 'save_cluster_results'],
                         'sample': ['randM'], 'dist_compute': ['compute_global_dim', \
                                                               'global_gram', 'AH_glob', 'ATW_glob',
                                                               'normalize_features', 'dist_norm', 'relative_err',
                                                               'sum_axis', 'UHT_glob', 'WTU_glob'],
                         'dist_comm': ['cart_2d_collect_factors', 'gather_W_H'], \
                         'clustering': ['normalize_by_W', 'greedy_lsa', 'change_order', 'dist_feature_ordering', 'mad',
                                        'dist_silhouettes', 'column_err', 'pvalueAnalysis']}
    breakdown_level_1 = {'init': 'init_factors', 'dist_io': ['read', 'save_factors', 'save_cluster_results'],
                         'sampling': 'randM',
                         'clustering': ['dist_custom_clustering', 'mad', 'dist_silhouettes', 'pvalueAnalysis'],
                         'compute': 'fit'}
    results = {}

    ''''Data parsing'''
    breakdown_level_1_dat = copy.deepcopy(breakdown_level_1)
    breakdown_level_2_dat = copy.deepcopy(breakdown_level_2)

    for key, val in data.to_dict().items():
        for keys, vals in breakdown_level_1.items():
            try:
                if type(vals) == str:  # Only one val
                    if vals == key:
                        breakdown_level_1_dat[keys] = val
                else:  # Multiple val
                    idx = [key == v for v in vals].index(1)
                    breakdown_level_1_dat[keys][idx] = val
            except:
                continue
        for keys, vals in breakdown_level_2.items():
            try:
                if type(vals) == str:
                    if vals == key:
                        breakdown_level_2_dat[keys] = val
                else:
                    idx = [key == v for v in vals].index(1)
                    breakdown_level_2_dat[keys][idx] = val
            except:
                continue
    return breakdown_level_1_dat, breakdown_level_2_dat


[docs]def plot_timing_stats(fpath, respath):
    ''' Plots the timing stats for the MPI operation.
    fpath: Stats data path
    respath: Path to save graph'''
    res1, res2 = timing_stats(fpath)
    # print('res1',res1)
    for i, j in res1.items():
        if type(j) == float:
            res1[i] = [j]
    tmp = dict([(i, sum(j)) for i, j in res1.items()])
    box_plot(pd.DataFrame([tmp]).loc[0, :], respath)