#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Python implementation of the CP-APR algorithm [1-4] with Numpy backend.\n
This backend can be used to factorize sparse tensorsin COO format and dense Numpy tensors.
References
========================================
[1] General software, latest release: Brett W. Bader, Tamara G. Kolda and others, Tensor Toolbox for MATLAB, Version 3.2.1, www.tensortoolbox.org, April 5, 2021.\n
[2] Dense tensors: B. W. Bader and T. G. Kolda, Algorithm 862: MATLAB Tensor Classes for Fast Algorithm Prototyping, ACM Trans. Mathematical Software, 32(4):635-653, 2006, http://dx.doi.org/10.1145/1186785.1186794.\n
[3] Sparse, Kruskal, and Tucker tensors: B. W. Bader and T. G. Kolda, Efficient MATLAB Computations with Sparse and Factored Tensors, SIAM J. Scientific Computing, 30(1):205-231, 2007, http://dx.doi.org/10.1137/060676489.\n
[4] Chi, E.C. and Kolda, T.G., 2012. On tensors, sparsity, and nonnegative factorizations. SIAM Journal on Matrix Analysis and Applications, 33(4), pp.1272-1299.
@author: Maksim Ekin Eren
"""
import copy
import time
from math import sqrt
from cmath import sqrt as sqrtc
import numpy as np
from tqdm import tqdm
from . ktensor import K_TENSOR
from . sptensor import SP_TENSOR
from . tensor import TENSOR
from . redistribute_ktensor import redistribute
from . normalize_ktensor import normalize
from . norm_ktensor import norm
from . innerprod_ktensor import innerprod
from . tenmat_tensor import tenmat as tenmat_tensor
from . tenmat_ktensor import tenmat as tenmat_ktensor
from . khatrirao_ktensor import khatrirao
[docs]class CP_APR_MU:
def __init__(self, epsilon=1e-10, kappa=1e-2, kappa_tol=1e-10, max_inner_iters=10,
n_iters=1000, print_inner_itn=0, verbose=10, simple_verbose=False,
stoptime=1e6, tol=1e-4, random_state=42, return_type='numpy'):
"""
Initilize the CP_APR_MU class.
Parameters
----------
epsilon : float, optional
Prevents zero division. Default is 1e-10.
kappa : float, optional
Fix slackness level. Default is 1e-2.
kappa_tol : float, optional
Tolerance on complementary slackness. The default is 1e-10.
max_inner_iters : int, optional
Number of inner iterations per epoch. Default is 10.
n_iters : int, optional
Number of iterations during optimization or epoch. Default is 1000.
print_inner_itn : int, optional
Print every *n* inner iterations. Does not print if 0. Default is 0.
verbose : int, optional
Print every n epoch, or ``n_iters``. Does not print if 0. Default is 10.
simple_verbose : bool, optional
Turns off details for verbose, such as fit, but instead shows a progress bar.
stoptime : float, optional
Number of seconds before early stopping. Default is 1e6.
tol : float, optional
KKT violations tolerance. Default is 1e-4.
random_state : int, optional
Random seed for initial M.
The default is 42.
"""
self.epsilon = epsilon
self.tol = tol
self.stoptime = stoptime
self.maxOuterIters = n_iters
self.kappa = kappa
self.kappaTol = kappa_tol
self.maxInnerIters = max_inner_iters
self.verbose = verbose
self.simple_verbose = simple_verbose
self.printInnerItn = print_inner_itn
self.random_state = random_state
self.kktViolations = -np.ones(n_iters)
self.nInnerIters = np.zeros(n_iters)
self.times = np.zeros(n_iters)
self.logLikelihoods = np.ones(n_iters)
self.obj = 0
self.M = None
self.X = None
self.start_time = -1
self.exec_time = -1
self.return_type = return_type
[docs] def train(self, tensor=[], coords=[], values=[], rank=2, Minit='random', Type='sptensor'):
"""
Factorize the tensor X (i.e. compute the KRUSKAL tensor M).
Parameters
----------
tensor : array
Original dense tensor X.\n
Use with Type = 'tensor' and pass the tensor parameter as a dense Numpy array.\n
coords : Numpy array (i.e. array that is a list of list)
Array of non-zero coordinates for sparse tensor X. COO format.\n
Each entry in this array is a coordinate of a non-zero value in the tensor.\n
Used when Type = 'sptensor' and tensor parameter is not passed.\n
len(Coords) is number of total entiries in X, and len(coords[0]) should give the number of dimensions.
values : Numpy array (i.e. list of non-zero values corresponding to each list of non-zero coordinates)
Array of non-zero tensor entries. COO format.\n
Used when Type = 'sptensor' and tensor parameter is not passed.\n
Length of values must match the length of coords.
rank : int
Tensor rank, i.e. number of components to extract.
The default is 2.
Minit : string or dictionary of latent factors
Initial value of latent factors.\n
If Minit = 'random', initial factors are chosen randomly from uniform distribution between 0 and 1.\n
Else, pass dictionary where the key is the mode number and value is array size d x r
where d is the number of elements on the dimension and r is the rank.\n
The default is "random".\n
Type : string
Type of tensor (i.e. sparse or dense).\n
Use 'sptensor' for sparse, and 'tensor' for dense tensors.\n
If 'sptensor' used, pass the list of non-zero coordinates using the Coords parameter
and the corresponding list of non-zero elements with values parameter.\n
The default is 'sptensor'.
Returns
-------
result : dict
KRUSKAL tensor M is returned.
The latent factors can be found with the key 'Factors'.\n
The weight of each component can be found with the key 'Weights'.
"""
if rank <= 0:
raise Exception('Number of components requested must be positive')
# Setup for iterations
X, M = self.__setup(tensor, coords, values, Minit, rank, Type)
Phi = dict()
kktModeViolations = np.zeros(X.Dimensions)
nViolations = np.zeros(self.maxOuterIters)
self.start_time = time.time()
# Iterate until convergence or early stop
for outer_iter in tqdm(range(self.maxOuterIters), disable=not(self.simple_verbose)):
isConverged = True
for d in range(X.Dimensions):
# Adjust latent factors that are violating the slackness.
if outer_iter > 1:
V = (Phi[str(d)] > 1) & (M.Factors[str(d)] < self.kappaTol)
if np.any(V.flatten('F')):
nViolations[outer_iter] += 1
M.Factors[str(d)][V > 0] += self.kappa
# Absorb the component weight to dimension d
M = redistribute(M, d)
# Product of all matrices but the d-th
Pi = self.__calculatePi(M, X, d)
# Multiplicative updates
for inner_iter in range(self.maxInnerIters):
self.nInnerIters[outer_iter] += 1
# Matrix for multiplicative update
Phi[str(d)] = self.__calculatePhi(M, X, d, Pi)
# Check for convergence
x = np.minimum(M.Factors[str(d)], 1 - Phi[str(d)])
kktModeViolations[d] = np.max(np.abs(self.__vectorizeForMu(x)))
if kktModeViolations[d] < self.tol:
break
else:
isConverged = False
# Do the multiplicative update
M.Factors[str(d)] = np.multiply(M.Factors[str(d)], Phi[str(d)])
# Print status
if self.printInnerItn != 0 and (inner_iter % self.printInnerItn == 0) and (self.simple_verbose == False):
print("Mode = %d, Inner Iter = %d, KKT Violation = %.6f" % \
(d, inner_iter + 1, kktModeViolations[d]))
M = normalize(M, mode=d)
self.kktViolations[outer_iter] = np.max(kktModeViolations)
# calculate the log likelihood
M_ = normalize(copy.deepcopy(M), N=-2)
obj_ = self.__tt_loglikelihood(M_, X)
self.logLikelihoods[outer_iter] = obj_
# Print update
if self.verbose != 0 and (outer_iter % self.verbose == 0) and (self.simple_verbose == False):
print("Iter=%d, Inner Iter=%d, KKT Violation=%.6f, obj=%.6f, nViolations=%d" % \
(outer_iter + 1, self.nInnerIters[outer_iter], self.kktViolations[outer_iter], \
self.logLikelihoods[outer_iter], nViolations[outer_iter]))
# Check for convergence
if isConverged and (self.simple_verbose == False):
if self.verbose != 0:
print("Exiting because all subproblems reached KKT tol.")
break
self.times[outer_iter] = time.time() - self.start_time
if self.times[-1] > self.stoptime:
if self.verbose != 0 (self.simple_verbose == False):
print("Exiting because time limit exceeded.")
break
# Done
result = self.__finalize(M, X, outer_iter)
return result
def __finalize(self, M, X, outer_iter):
"""
Helper functions to finalize the results. Calculates the final fit, performs the final
tensor format conversions, shows the results if verbose, and returns the
KRUSKAL tensor M.
Parameters
----------
M : class
KRUSKAL tensor M class. ktensor_Torch.K_TENSOR
X : class
Original tensor. sptensor_Torch.SP_TENSOR
outer_iter : int
Current epoch.
Returns
-------
result : dict
KRUSKAL tensor M is returned.
The latent factors can be found with the key 'Factors'.\n
The weight of each component can be found with the key 'Weights'.
"""
# Clean up final result
M = normalize(M, N=-2)
self.obj = self.__tt_loglikelihood(copy.deepcopy(M), X)
self.M = M
self.X = X
self.exec_time = time.time() - self.start_time
if self.verbose != 0 and (self.simple_verbose == False):
if X.Type == 'sptensor':
normX = np.linalg.norm(X.data)
elif X.Type == 'tensor':
normX = np.linalg.norm(X.data.flatten())
nrm_sqr = norm(M) ** 2
rem = innerprod(M, X)
try:
normresidual = sqrt(normX ** 2 + nrm_sqr - 2 * rem)
# if negative in sqrt due to overflow
except Exception as e:
normresidual = sqrtc(normX ** 2 + nrm_sqr - 2 * rem)
fit = 1 - (normresidual / normX)
print("===========================================")
print(" Final log-likelihood = %f" % self.obj)
print(" Final least squares fit = %f" % fit)
print(" Final KKT violation = %f" % self.kktViolations[outer_iter])
print(" Total inner iterations = %d" % np.sum(self.nInnerIters))
print(" Total execution time = %.4f seconds" % self.exec_time)
result = dict()
if self.return_type == 'numpy':
if self.verbose != 0 and (self.simple_verbose == False):
print("Converting the latent factors to Numpy arrays.")
if M.Rank == 1:
for dim in range(M.Dimensions):
M.Factors[str(dim)] = [item for sublist in M.Factors[str(dim)] for item in sublist]
M.Factors[str(dim)] = np.array(M.Factors[str(dim)])
result['Factors'] = M.Factors
else:
for dim in range(M.Dimensions):
M.Factors[str(dim)] = np.array(M.Factors[str(dim)])
result['Factors'] = M.Factors
result['Weights'] = np.array(M.Weights)
else:
result['Factors'] = M.Factors
result['Weights'] = M.Weights
return result
def __setup(self, Tensor, Coords, Values, Minit, Rank, Type):
"""
Parameters
----------
Tensor : array
Original dense tensor X.\n
Use with Type = 'tensor' and pass the tensor parameter as a dense Numpy array.\n
Coords : Numpy array (i.e. array that is a list of list)
Array of non-zero coordinates for sparse tensor X. COO format.\n
Each entry in this array is a coordinate of a non-zero value in the tensor.\n
Used when Type = 'sptensor' and tensor parameter is not passed.\n
len(Coords) is number of total entiries in X, and len(coords[0]) should give the number of dimensions.
Values : Numpy array (i.e. list of non-zero values corresponding to each list of non-zero coordinates)
Array of non-zero tensor entries. COO format.\n
Used when Type = 'sptensor' and tensor parameter is not passed.\n
Length of values must match the length of coords.
Rank : int
Tensor rank, i.e. number of components to extract.
The default is 2.
Minit : string or dictionary of latent factors
Initial value of latent factors.\n
If Minit = 'random', initial factors are chosen randomly from uniform distribution between 0 and 1.\n
Else, pass dictionary where the key is the mode number and value is array size d x r
where d is the number of elements on the dimension and r is the rank.\n
The default is "random".\n
Type : string
Type of tensor (i.e. sparse or dense).\n
Use 'sptensor' for sparse, and 'tensor' for dense tensors.\n
If 'sptensor' used, pass the list of non-zero coordinates using the Coords parameter
and the corresponding list of non-zero elements with values parameter.\n
The default is 'sptensor'.
Returns
-------
M : class
KRUSKAL tensor M class. ktensor.K_TENSOR
X : class
Original tensor. sptensor.SP_TENSOR or tensor.TENSOR
"""
# Sparse tensor
if Type == 'sptensor':
if len(Coords) == 0:
raise Exception('Coordinates of the non-zero elements is not passed for sptensor.\
Use the Coords parameter.')
if len(Values) == 0:
raise Exception('Non-zero values are not passed for sptensor.\
Use the Values parameter')
if not isinstance(Coords, np.ndarray):
Coords = np.array(Coords)
if not isinstance(Values, np.ndarray):
Values = np.array(Values)
X = SP_TENSOR(Coords, Values)
# Dense tensor
elif Type == 'tensor':
if len(Tensor) == 0:
raise Exception('Tensor is not passed for dense tensor type.\
Use the Tensor parameter.')
X = TENSOR(Tensor)
M = K_TENSOR(Rank, X.Size, Minit, self.random_state)
M = normalize(M)
if self.verbose != 0 and (self.simple_verbose == False):
print("CP-APR (MU):")
return X, M
def __tt_loglikelihood(self, M, X):
"""
This function computes log-likelihood of tensor X with model M.
Parameters
----------
M : class
KRUSKAL tensor M class. ktensor.K_TENSOR
X : class
Original tensor. sptensor.SP_TENSOR or tensor.TENSOR
Returns
-------
f : float
log-likelihood.
"""
M = normalize(M, N=1)
f = 0
if X.Type == 'sptensor':
A = M.Factors[str(0)][X.Coords[:, 0], :]
for d in range(1, X.Dimensions):
A = np.multiply(A, M.Factors[str(d)][X.Coords[:, d], :])
f = np.sum(np.multiply(X.data, np.log(np.sum(A, 1)))) - \
np.sum(np.sum(M.Factors[str(0)], axis=0))
elif X.Type == 'tensor':
dX = tenmat_tensor(X, 0)
dM = tenmat_ktensor(M, 0)
for ii in range(dX.shape[0]):
for jj in range(dM.shape[1]):
if dX[ii, jj] == 0.0:
pass
else:
f += dX[ii, jj] * np.log(dM[ii, jj])
f -= np.sum(np.sum(M.Factors['0']))
return f
def __vectorizeForMu(self, x):
"""
Turn x into a vector.
Parameters
----------
x : array
tensor x.
Returns
-------
y : array
flattenned x.
"""
y = x.flatten('F')
return y
def __calculatePhi(self, M, X, mode, Pi):
"""
This function calculates the matrix for MU
Parameters
----------
M : class
KRUSKAL tensor M class. ktensor.K_TENSOR
X : class
Original tensor. sptensor.SP_TENSOR or tensor.TENSOR
mode : int
dimension.
Pi : array
product of all matrices but nth.
Returns
-------
Phi : array
multiplicative update.
"""
Phi = 0
if X.Type == 'sptensor':
Phi = -np.ones((X.Size[mode], M.Rank))
xsubs = X.Coords[:, mode]
# not sure about the one below
v = np.sum(np.multiply(M.Factors[str(mode)][xsubs, :], Pi), 1)
wvals = np.divide(X.data, np.maximum(v, self.epsilon))
for r in range(M.Rank):
Yr = np.bincount(xsubs, np.multiply(wvals, Pi[:, r]), X.Size[mode])
Phi[:, r] = Yr
elif X.Type == 'tensor':
Xn = tenmat_tensor(X, mode)
V = np.dot(M.Factors[str(mode)], Pi.T)
W = np.divide(Xn, np.maximum(V, self.epsilon))
Y = np.dot(W, Pi)
Phi = Y
return Phi
def __calculatePi(self, M, X, mode):
"""
Calculate product of all matrices but the n-th.
Parameters
----------
M : class
KRUSKAL tensor M class. ktensor.K_TENSOR
X : class
Original tensor. sptensor.SP_TENSOR or tensor.TENSOR
mode : int
dimension.
Returns
-------
Pi : array
product of all matrices but nth..
"""
Pi = 0
if X.Type == 'sptensor':
Pi = np.ones((len(X.Coords), M.Rank))
for nn in range(X.Dimensions):
if nn != mode:
Pi = np.multiply(M.Factors[str(nn)][X.Coords[:, nn], :], Pi)
elif X.Type == 'tensor':
nn = [str(x) for x in range(0, X.Dimensions) if x != mode]
Pi = khatrirao(M, nn)
return Pi