Source code for pyCP_APR.torch_backend.CP_APR_Torch

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Python implementation of the CP-APR algorithm [1-4] with PyTorch backend.\n
This backend can be used to factorize sparse tensorsin COO format on GPU or CPU.

References
========================================
[1] General software, latest release: Brett W. Bader, Tamara G. Kolda and others, Tensor Toolbox for MATLAB, Version 3.2.1, www.tensortoolbox.org, April 5, 2021.\n
[2] Dense tensors: B. W. Bader and T. G. Kolda, Algorithm 862: MATLAB Tensor Classes for Fast Algorithm Prototyping, ACM Trans. Mathematical Software, 32(4):635-653, 2006, http://dx.doi.org/10.1145/1186785.1186794.\n
[3] Sparse, Kruskal, and Tucker tensors: B. W. Bader and T. G. Kolda, Efficient MATLAB Computations with Sparse and Factored Tensors, SIAM J. Scientific Computing, 30(1):205-231, 2007, http://dx.doi.org/10.1137/060676489.\n
[4] Chi, E.C. and Kolda, T.G., 2012. On tensors, sparsity, and nonnegative factorizations. SIAM Journal on Matrix Analysis and Applications, 33(4), pp.1272-1299.

@author: Maksim Ekin Eren
"""
import copy
import time
from math import sqrt
from cmath import sqrt as sqrtc
from tqdm import tqdm

import numpy as np
import torch as tr

from . ktensor_Torch import K_TENSOR
from . sptensor_Torch import SP_TENSOR

from . redistribute_ktensor import redistribute
from . normalize_ktensor import normalize
from . innerprod_ktensor import innerprod
from . norm_ktensor import norm

[docs]class CP_APR_MU:

    def __init__(self, epsilon=1e-10, kappa=1e-2, kappa_tol=1e-10, max_inner_iters=10,
                 n_iters=1000, print_inner_itn=0, verbose=10, simple_verbose=False,
                 stoptime=1e6, tol=1e-4, random_state=42, device='cpu',
                 device_num='0', return_type='numpy', dtype='torch.DoubleTensor',
                 follow_M=False):
        """
        Initilize the CP_APR_MU class. Sets up the class variables and the CUDA for
        tensors.

        Parameters
        ----------
        epsilon : float, optional
            Prevents zero division. Default is 1e-10.
        kappa : float, optional
            Fix slackness level. Default is 1e-2.
        kappa_tol : float, optional
            Tolerance on slackness level. Default is 1e-10.
        max_inner_iters : int, optional
            Number of inner iterations per epoch. Default is 10.
        n_iters : int, optional
            Number of iterations during optimization or epoch. Default is 1000.
        print_inner_itn : int, optional
            Print every *n* inner iterations. Does not print if 0. Default is 0.
        verbose : int, optional
            Print every n epoch, or ``n_iters``. Does not print if 0. Default is 10.
        simple_verbose : bool, optional
            Turns off details for verbose, such as fit, but instead shows a progress bar.
        stoptime : float, optional
            Number of seconds before early stopping. Default is 1e6.
        tol : float, optional
            KKT violations tolerance. Default is 1e-4.
        random_state : int, optional
            Random seed for initial M.
            The default is 42.
        device : string, optional
            Torch device to be used.
            'cpu' to use PyTorch with CPU.
            'gpu' to use cuda:0
            The default is cpu.
        device_num : string, optional
            Which device to to store the tensors.
        return_type : string, optional
            Type for the latent factors.
            'torch' keep as torch tensors.
            'numpy' convert to numpy arrays.
        dtype : string, optional
            Type to be used in torch tensors.
            Default is torch.cuda.DoubleTensor.
        follow_M : bool, optional
            Saves M on each iteration.
            The default is False.

        """
        # Parameter for printing
        self.verbose = verbose
        self.simple_verbose = simple_verbose
        self.print_inner_itn = print_inner_itn

        # Keep track of the runtime and the iteration stoptime
        self.start_time = -1
        self.final_iter = -1
        
        # Keep track of Ms
        self.follow_M = follow_M
        self.saved_Ms = list()

        # Set the default tensor type
        if device == 'gpu' and dtype == 'torch.DoubleTensor':
            dtype = 'torch.cuda.DoubleTensor'

        self.dtype = dtype
        tr.set_default_tensor_type(self.dtype)

        # GPU or CPU device parameters
        self.device = device
        self.device_num = str(device_num)

        if device == 'gpu':
            if tr.cuda.is_available():
                self.device = tr.device('cuda:' + self.device_num)
                if self.verbose != 0:
                    print('Using', tr.cuda.get_device_name(int(self.device_num)))
            else:
                raise Exception('No CUDA device found')

        # Return Format
        if return_type in ['torch', 'numpy']:
            self.return_type = return_type
        else:
            raise Exception('Invalid return type!')

        # Original X tensor, and KRUSKAL tensor M
        self.X = None
        self.M = None

        # Optimization Parameters
        self.tol = tol
        self.stoptime = stoptime
        self.exec_time = -1
        self.n_iters = n_iters
        self.max_inner_iters = max_inner_iters
        self.random_state = random_state
        self.kappa = kappa
        self.kappa_tol = kappa_tol

        self.kktViolations = -tr.ones(n_iters).to(self.device)
        self.nInnerIters = tr.zeros(n_iters).to(self.device)
        self.times = tr.zeros(n_iters).to(self.device)
        self.logLikelihoods = tr.ones(n_iters).to(self.device)
        self.epsilon = tr.tensor(epsilon).to(self.device)
        self.obj = 0


[docs]    def train(self, tensor=[], coords=[], values=[], rank=2, Minit='random', Type='sptensor'):
        """
        Factorize the tensor X (i.e. compute the KRUSKAL tensor M).

        Parameters
        ----------
        tensor : PyTorch Sparse Tensor or dense Numpy array as tensor
            Original dense or sparse tensor X.\n
            Can be used when Type = 'sptensor'. Then tensor parameter needs to be a PyTorch Sparse tensor.\n
            Or use with Type = 'tensor' and pass the tensor parameter as a dense Numpy array.\n
            Note that PyTorch only supports Type = 'sptensor'.
        coords : Numpy array (i.e. array that is a list of list)
            Array of non-zero coordinates for sparse tensor X. COO format.\n
            Each entry in this array is a coordinate of a non-zero value in the tensor.\n
            Used when Type = 'sptensor' and tensor parameter is not passed.\n
            len(Coords) is number of total entiries in X, and len(coords[0]) should give the number of dimensions.
        values : Numpy array (i.e. list of non-zero values corresponding to each list of non-zero coordinates)
            Array of non-zero tensor entries. COO format.\n
            Used when Type = 'sptensor' and tensor parameter is not passed.\n
            Length of values must match the length of coords.
        rank : int
            Tensor rank, i.e. number of components to extract.
            The default is 2.
        Minit : string or dictionary of latent factors
            Initial value of latent factors.\n
            If Minit = 'random', initial factors are chosen randomly from uniform distribution between 0 and 1.\n
            Else, pass dictionary where the key is the mode number and value is array size d x r
            where d is the number of elements on the dimension and r is the rank.\n
            The default is "random".\n
        Type : string
            Type of tensor (i.e. sparse or dense).\n
            Use 'sptensor' for sparse, and 'tensor' for dense tensors.\n
            'sptensor' can be used with method = 'torch', method = 'numpy'.\n
            If 'sptensor' used, pass the list of non-zero coordinates using the Coords parameter
            and the corresponding list of non-zero elements with values.\n
            'sptensor' can also be used with the PyTorch Sparse format. Pass the torch.sparse format in the tensor parameter.\n
            'tensor' can be used with method = 'numpy' only. Pass the tensor using tensor parameter in that case.\n
            The default is 'sptensor'.

        Returns
        -------
        result : dict
            KRUSKAL tensor M is returned.
            The latent factors can be found with the key 'Factors'.\n
            The weight of each component can be found with the key 'Weights'.

        """

        if rank <= 0:
            raise Exception('Number of components requested must be positive!')

        # Setup for iterations
        X, M = self.__setup(tensor, coords, values, Minit, rank, Type)

        Phi = dict()
        kktModeViolations = tr.zeros(X.Dimensions)
        nViolations = tr.zeros(self.n_iters)

        self.start_time = time.time()

        # Iterate until convergence or early stop
        for outer_iter in tqdm(range(self.n_iters), disable=not(self.simple_verbose)):

            isConverged = True
            for d in range(X.Dimensions):

                # Adjust latent factors that are violating the slackness.
                if outer_iter > 1:
                    V = (Phi[str(d)] > 1) & (M.Factors[str(d)] < self.kappa_tol)

                    if tr.any(V.flatten().transpose(-1, 0).flatten()):
                        nViolations[outer_iter] += 1
                        M.Factors[str(d)][V > 0] += self.kappa

                # Absorb the component weight to dimension d
                M = redistribute(M, d)
                
                # Product of all matrices but the d-th
                Pi = self.__calculatePi(M, X, d)

                # Multiplicative updates
                for inner_iter in range(self.max_inner_iters):

                    self.nInnerIters[outer_iter] += 1

                    # Matrix for multiplicative update
                    Phi[str(d)] = self.__calculatePhi(M, X, d, Pi)

                    # Check for convergence
                    x = tr.min(M.Factors[str(d)], 1 - Phi[str(d)])
                    kktModeViolations[d] = tr.max(tr.abs(self.__vectorizeForMu(x)))

                    if kktModeViolations[d] < self.tol:
                        break
                    else:
                        isConverged = False

                    # Do the multiplicative update
                    M.Factors[str(d)] = tr.mul(M.Factors[str(d)], Phi[str(d)])

                    # Print status
                    if self.print_inner_itn != 0 and (inner_iter % self.print_inner_itn == 0) and (self.simple_verbose == False):
                        print("Mode = %d, Inner Iter = %d, KKT Violation = %.6f" % \
                              (d, inner_iter + 1, kktModeViolations[d]))

                M = normalize(M, mode=d)

            self.kktViolations[outer_iter] = tr.max(kktModeViolations)

            # calculate the log likelihood
            M_ = normalize(copy.deepcopy(M), N=-2)
            obj_ = self.__tt_loglikelihood(M_, X)
            self.logLikelihoods[outer_iter] = obj_
            
            # if we want to save the results from current iteration
            if self.follow_M:
                save_M = dict()
                if self.return_type == 'numpy':
                    save_M = result = self.__transfer_M_cpu(M_)
                else:
                    save_M['Factors'] = M_.Factors
                    save_M['Weights'] = M_.Weights
                self.saved_Ms.append(save_M)
            

            # Print update
            if self.verbose != 0 and (outer_iter % self.verbose == 0) and (self.simple_verbose == False):
                print("Iter=%d, Inner Iter=%d, KKT Violation=%.6f, obj=%.6f, nViolations=%d" % \
                      (outer_iter + 1, self.nInnerIters[outer_iter], self.kktViolations[outer_iter], \
                       self.logLikelihoods[outer_iter], nViolations[outer_iter]))

            # Check for convergence
            if isConverged:
                if self.verbose != 0 and (self.simple_verbose == False):
                    print("Exiting because all subproblems reached KKT tol.")
                break

            self.times[outer_iter] = time.time() - self.start_time
            if self.times[-1] > self.stoptime:
                if self.verbose != 0 and (self.simple_verbose == False):
                    print("Exiting because time limit exceeded.")
                break

        # Done
        result = self.__finalize(M, X, outer_iter)

        return result

    def __finalize(self, M, X, outer_iter):
        """
        Helper functions to finalize the results. Calculates the final fit, performs the final
        tensor format conversions, shows the results if verbose, and returns the
        KRUSKAL tensor M.

        Parameters
        ----------
        M : class
            KRUSKAL tensor M class. ktensor_Torch.K_TENSOR
        X : class
            Original tensor. sptensor_Torch.SP_TENSOR
        outer_iter : int
            Current epoch.

        Returns
        -------
        result : dict
            KRUSKAL tensor M is returned.
            The latent factors can be found with the key 'Factors'.\n
            The weight of each component can be found with the key 'Weights'.

        """

        # Clean up final result
        M = normalize(M, N=-2)
        self.obj = self.__tt_loglikelihood(copy.deepcopy(M), X)
        self.final_iter = outer_iter + 1

        result = dict()
        self.exec_time = time.time() - self.start_time

        if self.verbose != 0 and (self.simple_verbose == False):
            normX = tr.norm(X.data)
            nrm_sqr = norm(M) ** 2
            rem = innerprod(M, X)

            try:
                normresidual = sqrt(normX ** 2 + nrm_sqr - 2 * rem)
            # if negative in sqrt
            except Exception as e:
                normresidual = sqrtc(normX ** 2 + nrm_sqr - 2 * rem)

            fit = 1 - (normresidual / normX)

            print("===========================================")
            print(" Final log-likelihood = %f" % self.obj)
            print(" Final least squares fit = %f" % fit)
            print(" Final KKT violation = %f" % self.kktViolations[outer_iter])
            print(" Total inner iterations = %d" % tr.sum(self.nInnerIters))
            print(" Total execution time = %.4f seconds" % self.exec_time)

        if self.return_type == 'numpy':
            if self.verbose != 0 and (self.simple_verbose == False):
                print("Converting the latent factors to Numpy arrays.")

            # Convert KTENSOR to Numpy arrays
            result = self.__transfer_M_cpu(M)

            # convert the optimization variables
            self.kktViolations = self.kktViolations.cpu().numpy().astype('float64')
            self.nInnerIters = self.nInnerIters.cpu().numpy().astype('float64')
            self.times = self.times.cpu().numpy().astype('float64')
            self.logLikelihoods = self.logLikelihoods.cpu().numpy().astype('float64')
            self.epsilon = self.epsilon.cpu().numpy().astype('float64')
            self.obj = float(self.obj.cpu().numpy().astype('float64') )

        else:
            result['Factors'] = M.Factors
            result['Weights'] = M.Weights

        # if GPU used, free up the space
        if not isinstance(self.device, str) and self.device.type == 'torch':
            tr.cuda.empty_cache()

        # Save the KRUSKAL tensor M and the original tensor X
        self.M = M
        self.X = X

        return result
    
    def __transfer_M_cpu(self, M):
        """
        Transfers M to CPU if requested.
        
        Parameters
        ----------
        M : class
            KRUSKAL tensor M class. ktensor_Torch.K_TENSOR
        
        Returns
        -------
        M : dict
            M factors and weight that is transferred to CPU.
        
        """
        result = dict()
        if M.Rank == 1:
            for dim in range(M.Dimensions):
                M.Factors[str(dim)] = M.Factors[str(dim)].cpu().numpy().astype('float64')
                M.Factors[str(dim)] = [item for sublist in M.Factors[str(dim)] for item in sublist]
                M.Factors[str(dim)] = np.array(M.Factors[str(dim)])

        else:
            for dim in range(M.Dimensions):
                M.Factors[str(dim)] = M.Factors[str(dim)].cpu().numpy().astype('float64')
                    
        result['Factors'] = M.Factors
        result['Weights'] = M.Weights.cpu().numpy().astype('float64')
            
        return result

    def __setup(self, Tensor, Coords, Values, Minit, Rank, Type):
        """
        Sets up the classes for KRUSKAL tensor and the original tensor X.

        Parameters
        ----------
        Tensor : PyTorch Sparse Tensor or dense Numpy array as tensor
            Original dense or sparse tensor X.\n
            Can be used when Type = 'sptensor'. Then Tensor needs to be a PyTorch Sparse tensor.\n
            Or use with Type = 'tensor' and pass Tensor as a dense Numpy array.\n
            Note that PyTorch only supports Type = 'sptensor'.
        Coords : Numpy array (i.e. array that is a list of list)
            Array of non-zero coordinates for sparse tensor X. COO format.\n
            Each entry in this array is a coordinate of a non-zero value in the tensor.\n
            Used when Type = 'sptensor' and tensor parameter is not passed.\n
            len(Coords) is number of total entiries in X, and len(coords[0]) should give the number of dimensions.
        Values : Numpy array (i.e. list of non-zero values corresponding to each list of non-zero coordinates)
            Array of non-zero tensor entries. COO format.\n
            Used when Type = 'sptensor' and tensor parameter is not passed.\n
            Length of values must match the length of coords.
        Rank : int or list
            Tensor rank, or list of ranks for two tensors.\n
            List of ranks will allow using weighted prediction between the two latent factors.\n
            Pass a single integer or list of length two.\n
            The default is 2.
        Minit : string or dictionary of latent factors
            Initial value of latent factors.\n
            If Minit = 'random', initial factors are chosen randomly from uniform distribution between 0 and 1.\n
            Else, pass dictionary where the key is the mode number and value is array size d x r
            where d is the number of elements on the dimension and r is the rank.\n
            The default is "random".\n
        Type : string
            Type of tensor (i.e. sparse or dense).\n
            Use 'sptensor' for sparse, and 'tensor' for dense tensors.\n
            'sptensor' can be used with method = 'torch', method = 'numpy'.\n
            If 'sptensor' used, pass the list of non-zero coordinates usingvCoords parameter
            and the corresponding list of non-zero elements with values.\n
            'sptensor' can also be used with the PyTorch Sparse format. Pass the torch.sparse format in the tensor parameter.\n
            'tensor' can be used with method = 'numpy'. Pass the tensor using tensor parameter in that case.\n
            The default is 'sptensor'.

        Returns
        -------
        M : class
            KRUSKAL tensor M class. ktensor_Torch.K_TENSOR
        X : class
            Original tensor. sptensor_Torch.SP_TENSOR

        """

        # Setup the optimization variables
        self.kktViolations = -tr.ones(self.n_iters).to(self.device)
        self.nInnerIters = tr.zeros(self.n_iters).to(self.device)
        self.times = tr.zeros(self.n_iters).to(self.device)
        self.logLikelihoods = tr.ones(self.n_iters).to(self.device)
        self.epsilon = tr.tensor(self.epsilon).to(self.device)
        self.obj = 0

        # Setup the tensors
        if Type == 'sptensor':
            if tr.is_tensor(Tensor):
                if Tensor._nnz() == 0:
                    raise Exception('Non-zero values must be more than 0.')

            else:
                if len(Coords) == 0:
                    raise Exception('Coordinates of the non-zero elements is not passed for sptensor.\
                             Use the Coords parameter.')
                if len(Values) == 0:
                    raise Exception('Non-zero values are not passed for sptensor.\
                             Use the Values parameter')
                if (Coords < 0).all():
                    raise Exception('Coords tensor must be nonnegative for factorization')

            # Convert the initial latent factors to pyTorch tensors
            if Minit != 'random':
                flag = False
                if "Factors" in Minit and isinstance(Minit["Factors"]['0'], (list, np.ndarray)):
                    flag=True
                elif isinstance(Minit['0'], (list, np.ndarray)):
                    flag=True
                
                if flag:
                    if "Factors" in Minit:
                        for d in range(len(Minit["Factors"].keys())):
                            Minit["Factors"][str(d)] = tr.from_numpy(Minit["Factors"][str(d)]).type(self.dtype)
                            
                    else:
                        comp = len(Minit.keys())
                        if "Weights" in Minit:
                            comp -= 1
                        
                        for d in range(comp):
                            Minit[str(d)] = tr.from_numpy(Minit[str(d)]).type(self.dtype)
                    
            X = SP_TENSOR(Tensor, Coords, Values, self.dtype, self.device)


        elif Type == 'tensor':
            raise Exception("PyTorch backend only support sparse tensor implementation currently.")

        M = K_TENSOR(Rank, X.Size, Minit, self.random_state, self.device, self.dtype)
        M = normalize(M)
        
        if self.verbose != 0 and (self.simple_verbose == False):
            print("CP-APR (MU):")

        return X, M

    def __tt_loglikelihood(self, M, X):
        """
        This function computes log-likelihood of tensor X with model M.

        Parameters
        ----------
        M : class
            KRUSKAL tensor M class. ktensor_Torch.K_TENSOR
        X : class
            Original tensor. sptensor_Torch.SP_TENSOR

        Returns
        -------
        f : float
            log-likelihood.

        """

        M = normalize(M, N=1)

        A = M.Factors[str(0)][X.Coords[:, 0], :]

        for d in range(1, X.Dimensions):
            A = tr.mul(A, M.Factors[str(d)][X.Coords[:, d], :])

        f = tr.sum(tr.mul(X.data, tr.log(tr.sum(A, 1)))) - \
            tr.sum(tr.sum(M.Factors[str(0)], axis=0))

        return f

    def __vectorizeForMu(self, x):
        """
        Turn x into a vector.

        Parameters
        ----------
        x : array
            tensor x.

        Returns
        -------
        y : array
            flattenned x.

        """

        y = x.flatten().transpose(-1, 0).flatten()
        return y

    def __calculatePhi(self, M, X, mode, Pi):
        """
        Calculates the matrix for MU

        Parameters
        ----------
        M : class
            KRUSKAL tensor M class. ktensor_Torch.K_TENSOR
        X : class
            Original tensor. sptensor_Torch.SP_TENSOR
        mode : int
            dimension.
        Pi : array
            Product of all matrices but nth.

        Returns
        -------
        Phi : array
            multiplicative update.

        """

        Phi = -tr.ones((X.Size[mode], M.Rank)).to(self.device)
        xsubs = X.Coords[:, mode]

        v = tr.sum(tr.mul(M.Factors[str(mode)][xsubs, :], Pi), 1)
        wvals = tr.div(X.data, tr.max(v, self.epsilon))

        for r in range(M.Rank):
            Yr = tr.bincount(xsubs, tr.mul(wvals, Pi[:, r]), X.Size[mode])
            Phi[:, r] = Yr

        return Phi

    def __calculatePi(self, M, X, mode):
        """
        Calculates the product of all matrices without the "mode" dimension.

        Parameters
        ----------
        M : class
            KRUSKAL tensor M class. ktensor_Torch.K_TENSOR
        X : class
            Original tensor. sptensor_Torch.SP_TENSOR
        mode : int
            dimension.

        Returns
        -------
        Pi : array
            product of all matrices but nth.

        """

        Pi = tr.ones((X.nnz, M.Rank)).to(self.device)

        for nn in range(X.Dimensions):
            if nn != mode:
                Pi = tr.mul(M.Factors[str(nn)][X.Coords[:, nn], :], Pi)

        return Pi