# @author: Manish Bhattarai
import matplotlib
from matplotlib import pyplot as plt
from .data_io import *
from matplotlib import gridspec
import matplotlib.ticker as ticker
from matplotlib.lines import Line2D
[docs]def plot_err(err):
"""Plots the relative error for NMF decomposition as a function of number of iterations"""
idx = np.linspace(1, len(err), len(err))
plt.plot(idx, err)
plt.xlabel('Iterations')
plt.ylabel('Relative error')
plt.title('Relative error vs Iterations')
plt.savefig('Error_plot.png')
plt.show()
[docs]def read_plot_factors(factors_path, pgrid):
"""Reads the factors W and H and Plots them"""
W, H = read_factors(factors_path, pgrid)
plot_W(W)
plt.savefig(factors_path + 'W.png')
plot_W(H.T)
plt.savefig(factors_path + 'H.png')
[docs]def plot_W(W):
"""Reads a factor and plots into subplots for each component"""
m, k = W.shape
params = {'legend.fontsize': 60,
'axes.labelsize': 60,
'axes.titlesize': 60,
'xtick.labelsize': 60,
'mathtext.fontset': 'cm',
'mathtext.rm': 'serif',
"xtick.bottom": False,
"ytick.left": False,
}
matplotlib.rcParams.update(params)
f, axes = plt.subplots(nrows=k, sharex=True, figsize=(60, 40))
plt.subplots_adjust(hspace=0.001, bottom=0.2)
# colors=["blue", "red"]
colors = plt.rcParams["axes.prop_cycle"]()
W = W.T
for i in range(k):
c = next(colors)["color"]
axes[i].plot(W[i], label="W[{}]".format(i), color=c, linewidth=5.0)
axes[i].legend(loc=4, prop={'size': 50})
axes[i].tick_params(axis="y", labelsize=30)
plt.xlabel('Features')
# create subplot just for placing the ylabel centered on all plots
shadowaxes = f.add_subplot(111, xticks=[], yticks=[], frame_on=False)
shadowaxes.set_ylabel('W Components')
shadowaxes.yaxis.set_label_coords(-0.05, 0.5)
plt.savefig('Results_W.png', bbox_inches='tight')
plt.show()
[docs]def plot_results_paper(startProcess, endProcess, stepProcess,RECON, SILL_AVG, SILL_MIN, out_put, name,k=-1):
t = range(startProcess, endProcess + 1, stepProcess)
fig, ax = plt.subplots(num=None, figsize=(10, 6), dpi=300, facecolor='w', edgecolor='k')
ax.yaxis.label.set_color('blue')
ax.tick_params(axis='y', colors='blue')
# print(feat, minsil.shape, avgsil.shape)
ax.grid(linestyle='dotted')
# lns3 = ax[cnt].axvline(x=c_threshold, c='k', lw=3.5)
lns1 = ax.plot(t, SILL_AVG, c='g', marker='o', ms=7, ls='-.', lw=2.5, label='Avg Silhouette')
lns2 = ax.plot(t, SILL_MIN, c='b', marker='o', ms=7, ls='--', lw=2.5, label='Min Silhouette')
ax2 = ax.twinx()
ax2.yaxis.label.set_color('red')
ax2.tick_params(axis='y', colors='red')
lns3 = ax2.plot(t, RECON, c='r', marker='D', ms=7, ls='-', lw=2.0, label='Relative Error')
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax2.set_ylabel('Relative Error')
ax.set_ylabel('Silhouette Width')
ax.set_ylabel('Silhouette Width')
ax.set_xlabel('$k$')
lns = lns2 + lns3 + lns1
labels = [l.get_label() for l in lns]
ax.legend(lns, labels, loc=3)
if k!=-1:
ax.axvspan(k - .2, k + .2, alpha=0.5, color='gray')
plt.tight_layout()
plt.savefig(out_put + '/' + name + '_selection_plot.pdf')
plt.close()
[docs]def plot_results(startProcess, endProcess, stepProcess,RECON, SILL_AVG, SILL_MIN, out_put, name):
"""Plots the relative error and Silhouette results for estimation of k"""
######################################## Plotting ####################################################
t = range(startProcess, endProcess + 1,stepProcess)
fig, ax1 = plt.subplots(num=None, figsize=(10, 6), dpi=300, facecolor='w', edgecolor='k')
title = 'Num'
color = 'tab:red'
ax1.set_xlabel('Total Signatures')
ax1.set_ylabel('Mean L2 %', color=color)
ax1.set_title(title)
#lns1 = ax1.plot(t, RECON, marker='o', linestyle=':', color=color, label='Mean L2 %')
lns3 = ax1.plot(t, RECON1, marker='X', linestyle=':', color='tab:green', label="Relative error %")
ax1.tick_params(axis='y', labelcolor=color)
ax1.xaxis.set_ticks(np.arange(min(t), max(t) + 1, 1))
# ax1.axvspan(shadow_start, shadow_end, alpha=0.20, color='#ADD8E6')
# ax1.axvspan(shadow_alternative_start, shadow_alternative_end, alpha=0.20, color='#696969')
# manipulate the y-axis values into percentage
vals = ax1.get_yticks()
ax1.set_yticklabels(['{:,.0%}'.format(x) for x in vals])
# ax1.legend(loc=0)
ax2 = ax1.twinx() # instantiate a second axes that shares the same x-axis
color = 'tab:blue'
ax2.set_ylabel('Minimum Stability', color=color) # we already handled the x-label with ax1
lns2 = ax2.plot(t, SILL_MIN, marker='s', linestyle="-.", color=color, label='Minimum Stability')
ax2.tick_params(axis='y', labelcolor=color)
# ax2.legend(loc=1)
fig.tight_layout() # otherwise the right y-label is slightly clipped
# plt.show()
# added these three lines
lns = lns2 + lns3
labs = [l.get_label() for l in lns]
ax1.legend(lns, labs, loc=0)
plt.savefig(out_put + '/' + name + '_selection_plot.pdf')
plt.close()
[docs]def box_plot(dat, respath):
"""Plots the boxplot from the given data and saves the results"""
dat.plot.bar()
plt.xlabel('operation')
plt.ylabel('timing(sec)')
plt.savefig(respath + 'timing.png')
# plt.show()
[docs]def timing_stats(fpath):
"""Reads the timing stats dictionary from the stored file and parses the data. """
import copy
data = pd.read_csv(fpath).iloc[0, 1:]
breakdown_level_2 = {'init': ['__init__', 'init_factors'],
'data_io': ['read', 'create_folder_dir', 'save_factors', 'save_cluster_results'],
'sample': ['randM'], 'dist_compute': ['compute_global_dim', \
'global_gram', 'AH_glob', 'ATW_glob',
'normalize_features', 'dist_norm', 'relative_err',
'sum_axis', 'UHT_glob', 'WTU_glob'],
'dist_comm': ['cart_2d_collect_factors', 'gather_W_H'], \
'clustering': ['normalize_by_W', 'greedy_lsa', 'change_order', 'dist_feature_ordering', 'mad',
'dist_silhouettes', 'column_err', 'pvalueAnalysis']}
breakdown_level_1 = {'init': 'init_factors', 'dist_io': ['read', 'save_factors', 'save_cluster_results'],
'sampling': 'randM',
'clustering': ['dist_custom_clustering', 'mad', 'dist_silhouettes', 'pvalueAnalysis'],
'compute': 'fit'}
results = {}
''''Data parsing'''
breakdown_level_1_dat = copy.deepcopy(breakdown_level_1)
breakdown_level_2_dat = copy.deepcopy(breakdown_level_2)
for key, val in data.to_dict().items():
for keys, vals in breakdown_level_1.items():
try:
if type(vals) == str: # Only one val
if vals == key:
breakdown_level_1_dat[keys] = val
else: # Multiple val
idx = [key == v for v in vals].index(1)
breakdown_level_1_dat[keys][idx] = val
except:
continue
for keys, vals in breakdown_level_2.items():
try:
if type(vals) == str:
if vals == key:
breakdown_level_2_dat[keys] = val
else:
idx = [key == v for v in vals].index(1)
breakdown_level_2_dat[keys][idx] = val
except:
continue
return breakdown_level_1_dat, breakdown_level_2_dat
[docs]def plot_timing_stats(fpath, respath):
''' Plots the timing stats for the MPI operation.
fpath: Stats data path
respath: Path to save graph'''
res1, res2 = timing_stats(fpath)
# print('res1',res1)
for i, j in res1.items():
if type(j) == float:
res1[i] = [j]
tmp = dict([(i, sum(j)) for i, j in res1.items()])
box_plot(pd.DataFrame([tmp]).loc[0, :], respath)