Source code for TELF.factorization.decompositions.utilities.nnsvd
from .generic_utils import get_np, get_scipy
from .math_utils import fro_norm
[docs]
def nnsvd(X, k, use_gpu=False):
"""
Nonnegative SVD algorithm for NMF initialization based off of Gillis et al. in https://arxiv.org/pdf/1807.04020.pdf.
Args:
X (ndarray): Nonnegative m by n matrix to approximate with nnsvd.
k (int): The desired rank of the nonnegative approximation.
Returns:
W (ndarray): Nonnegative m by k left factor of X.
H (ndarray): Nonnegative k by n right factor of X.
"""
np = get_np(X, use_gpu=use_gpu)
scipy = get_scipy(X, use_gpu=use_gpu)
dtype = X.dtype
if np.issubdtype(dtype, np.integer):
eps = np.finfo(float).eps
elif np.issubdtype(dtype, np.floating):
eps = np.finfo(dtype).eps
else:
raise Exception("Unknown data type!")
#
# Truncated SVD
#
U, S, V = scipy.sparse.linalg.svds(X, k=k)
V = V.T
#
# NN-SVD
#
UP = np.where(U > 0, U, 0)
UN = np.where(U < 0, -U, 0)
VP = np.where(V > 0, V, 0)
VN = np.where(V < 0, -V, 0)
UP_norm = np.sqrt(np.sum(np.square(UP), 0), dtype=dtype)
UN_norm = np.sqrt(np.sum(np.square(UN), 0), dtype=dtype)
VP_norm = np.sqrt(np.sum(np.square(VP), 0), dtype=dtype)
VN_norm = np.sqrt(np.sum(np.square(VN), 0), dtype=dtype)
mp = np.sqrt(UP_norm * VP_norm * S, dtype=dtype)
mn = np.sqrt(UN_norm * VN_norm * S, dtype=dtype)
W = np.where(mp > mn, mp * UP / (UP_norm + eps), mn * UN / (UN_norm + eps))
H = np.where(mp > mn, mp * VP / (VP_norm + eps), mn * VN / (VN_norm + eps)).T
Wsum = np.sum(W, 0, keepdims=True)
H = H * Wsum.T
W = W / np.maximum(Wsum, eps)
return W, H