Source code for TELF.factorization.decompositions.utilities.nnsvd

from .generic_utils import get_np, get_scipy
from .math_utils import fro_norm



[docs]
def nnsvd(X, k, use_gpu=False):
    """
    Nonnegative SVD algorithm for NMF initialization based off of Gillis et al. in https://arxiv.org/pdf/1807.04020.pdf.

    Args:
        X (ndarray): Nonnegative m by n matrix to approximate with nnsvd.
        k (int): The desired rank of the nonnegative approximation.

    Returns:
        W (ndarray): Nonnegative m by k left factor of X.
        H (ndarray): Nonnegative k by n right factor of X.
    """
    np = get_np(X, use_gpu=use_gpu)
    scipy = get_scipy(X, use_gpu=use_gpu)
    dtype = X.dtype
    
    if np.issubdtype(dtype, np.integer):
        eps = np.finfo(float).eps
    elif np.issubdtype(dtype, np.floating):
        eps = np.finfo(dtype).eps
    else:
        raise Exception("Unknown data type!")
    
    #
    # Truncated SVD
    #
    U, S, V = scipy.sparse.linalg.svds(X, k=k)
    V = V.T
    
    #
    # NN-SVD
    #
    UP = np.where(U > 0, U, 0)
    UN = np.where(U < 0, -U, 0)
    VP = np.where(V > 0, V, 0)
    VN = np.where(V < 0, -V, 0)

    UP_norm = np.sqrt(np.sum(np.square(UP), 0), dtype=dtype)
    UN_norm = np.sqrt(np.sum(np.square(UN), 0), dtype=dtype)
    VP_norm = np.sqrt(np.sum(np.square(VP), 0), dtype=dtype)
    VN_norm = np.sqrt(np.sum(np.square(VN), 0), dtype=dtype)

    mp = np.sqrt(UP_norm * VP_norm * S, dtype=dtype)
    mn = np.sqrt(UN_norm * VN_norm * S, dtype=dtype)

    W = np.where(mp > mn, mp * UP / (UP_norm + eps), mn * UN / (UN_norm + eps))
    H = np.where(mp > mn, mp * VP / (VP_norm + eps), mn * VN / (VN_norm + eps)).T
    Wsum = np.sum(W, 0, keepdims=True)
    H = H * Wsum.T
    W = W / np.maximum(Wsum, eps)
    return W, H