Source code for pyCP_APR.torch_cp.CP_APR_Torch

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Python implementation of the CP-APR algorithm [1-4] with PyTorch backend.\n
This backend can be used to factorize sparse tensorsin COO format on GPU or CPU.

References
========================================
[1] General software, latest release: Brett W. Bader, Tamara G. Kolda and others, Tensor Toolbox for MATLAB, Version 3.2.1, www.tensortoolbox.org, April 5, 2021.\n
[2] Dense tensors: B. W. Bader and T. G. Kolda, Algorithm 862: MATLAB Tensor Classes for Fast Algorithm Prototyping, ACM Trans. Mathematical Software, 32(4):635-653, 2006, http://dx.doi.org/10.1145/1186785.1186794.\n
[3] Sparse, Kruskal, and Tucker tensors: B. W. Bader and T. G. Kolda, Efficient MATLAB Computations with Sparse and Factored Tensors, SIAM J. Scientific Computing, 30(1):205-231, 2007, http://dx.doi.org/10.1137/060676489.\n
[4] Chi, E.C. and Kolda, T.G., 2012. On tensors, sparsity, and nonnegative factorizations. SIAM Journal on Matrix Analysis and Applications, 33(4), pp.1272-1299.

@author: Maksim Ekin Eren
"""
import copy
import sys
import time
from math import sqrt
from cmath import sqrt as sqrtc

import numpy as np
import torch as tr

from . ktensor_Torch import K_TENSOR
from . sptensor_Torch import SP_TENSOR


[docs]class CP_APR_MU: def __init__(self, epsilon=1e-10, kappa=1e-2, kappa_tol=1e-10, max_inner_iters=10, n_iters=1000, print_inner_itn=0, verbose=10, stoptime=1e6, tol=1e-4, random_state=42, device='cpu', device_num='0', return_type='numpy', dtype='torch.DoubleTensor', follow_M=False): """ Initilize the CP_APR_MU class. Sets up the class variables and the CUDA for tensors. Parameters ---------- epsilon : float, optional Prevents zero division. Default is 1e-10. kappa : float, optional Fix slackness level. Default is 1e-2. kappa_tol : float, optional Tolerance on slackness level. Default is 1e-10. max_inner_iters : int, optional Number of inner iterations per epoch. Default is 10. n_iters : int, optional Number of iterations during optimization or epoch. Default is 1000. print_inner_itn : int, optional Print every *n* inner iterations. Does not print if 0. Default is 0. verbose : int, optional Print every n epoch, or ``n_iters``. Does not print if 0. Default is 10. stoptime : float, optional Number of seconds before early stopping. Default is 1e6. tol : float, optional KKT violations tolerance. Default is 1e-4. random_state : int, optional Random seed for initial M. The default is 42. device : string, optional Torch device to be used. 'cpu' to use PyTorch with CPU. 'gpu' to use cuda:0 The default is cpu. device_num : string, optional Which device to to store the tensors. return_type : string, optional Type for the latent factors. 'torch' keep as torch tensors. 'numpy' convert to numpy arrays. dtype : string, optional Type to be used in torch tensors. Default is torch.cuda.DoubleTensor. follow_M : bool, optional Saves M on each iteration. The default is False. """ # Parameter for printing self.verbose = verbose self.print_inner_itn = print_inner_itn # Keep track of the runtime and the iteration stoptime self.start_time = -1 self.final_iter = -1 # Keep track of Ms self.follow_M = follow_M self.saved_Ms = list() # Set the default tensor type if device == 'gpu' and dtype == 'torch.DoubleTensor': dtype = 'torch.cuda.DoubleTensor' self.dtype = dtype tr.set_default_tensor_type(self.dtype) # GPU or CPU device parameters self.device = device self.device_num = str(device_num) if device == 'gpu': if tr.cuda.is_available(): self.device = tr.device('cuda:' + self.device_num) if self.verbose != 0: print('Using', tr.cuda.get_device_name(int(self.device_num))) else: sys.exit('No CUDA device found') # Return Format if return_type in ['torch', 'numpy']: self.return_type = return_type else: sys.exit('Invalid return type!') # Original X tensor, and KRUSKAL tensor M self.X = None self.M = None # Optimization Parameters self.tol = tol self.stoptime = stoptime self.exec_time = -1 self.n_iters = n_iters self.max_inner_iters = max_inner_iters self.random_state = random_state self.kappa = kappa self.kappa_tol = kappa_tol self.kktViolations = -tr.ones(n_iters).to(self.device) self.nInnerIters = tr.zeros(n_iters).to(self.device) self.times = tr.zeros(n_iters).to(self.device) self.logLikelihoods = tr.ones(n_iters).to(self.device) self.epsilon = tr.tensor(epsilon).to(self.device) self.obj = 0
[docs] def train(self, tensor=[], coords=[], values=[], rank=2, Minit='random', Type='sptensor'): """ Factorize the tensor X (i.e. compute the KRUSKAL tensor M). Parameters ---------- tensor : PyTorch Sparse Tensor or dense Numpy array as tensor Original dense or sparse tensor X.\n Can be used when Type = 'sptensor'. Then tensor parameter needs to be a PyTorch Sparse tensor.\n Or use with Type = 'tensor' and pass the tensor parameter as a dense Numpy array.\n Note that PyTorch only supports Type = 'sptensor'. coords : Numpy array (i.e. array that is a list of list) Array of non-zero coordinates for sparse tensor X. COO format.\n Each entry in this array is a coordinate of a non-zero value in the tensor.\n Used when Type = 'sptensor' and tensor parameter is not passed.\n len(Coords) is number of total entiries in X, and len(coords[0]) should give the number of dimensions. values : Numpy array (i.e. list of non-zero values corresponding to each list of non-zero coordinates) Array of non-zero tensor entries. COO format.\n Used when Type = 'sptensor' and tensor parameter is not passed.\n Length of values must match the length of coords. rank : int Tensor rank, i.e. number of components to extract. The default is 2. Minit : string or dictionary of latent factors Initial value of latent factors.\n If Minit = 'random', initial factors are chosen randomly from uniform distribution between 0 and 1.\n Else, pass dictionary where the key is the mode number and value is array size d x r where d is the number of elements on the dimension and r is the rank.\n The default is "random".\n Type : string Type of tensor (i.e. sparse or dense).\n Use 'sptensor' for sparse, and 'tensor' for dense tensors.\n 'sptensor' can be used with method = 'torch', method = 'numpy'.\n If 'sptensor' used, pass the list of non-zero coordinates using the Coords parameter and the corresponding list of non-zero elements with values.\n 'sptensor' can also be used with the PyTorch Sparse format. Pass the torch.sparse format in the tensor parameter.\n 'tensor' can be used with method = 'numpy' only. Pass the tensor using tensor parameter in that case.\n The default is 'sptensor'. Returns ------- result : dict KRUSKAL tensor M is returned. The latent factors can be found with the key 'Factors'.\n The weight of each component can be found with the key 'Weights'. """ if rank <= 0: sys.exit('Number of components requested must be positive!') # Setup for iterations X, M = self.__setup(tensor, coords, values, Minit, rank, Type) Phi = dict() kktModeViolations = tr.zeros(X.Dimensions) nViolations = tr.zeros(self.n_iters) self.start_time = time.time() # Iterate until convergence or early stop for outer_iter in range(self.n_iters): isConverged = True for d in range(X.Dimensions): # Adjust latent factors that are violating the slackness. if outer_iter > 1: V = (Phi[str(d)] > 1) & (M.Factors[str(d)] < self.kappa_tol) if tr.any(V.flatten().transpose(-1, 0).flatten()): nViolations[outer_iter] += 1 M.Factors[str(d)][V > 0] += self.kappa # Absorb the component weight to dimension d M.redistribute(d) # Product of all matrices but the d-th Pi = self.__calculatePi(M, X, d) # Multiplicative updates for inner_iter in range(self.max_inner_iters): self.nInnerIters[outer_iter] += 1 # Matrix for multiplicative update Phi[str(d)] = self.__calculatePhi(M, X, d, Pi) # Check for convergence x = tr.min(M.Factors[str(d)], 1 - Phi[str(d)]) kktModeViolations[d] = tr.max(tr.abs(self.__vectorizeForMu(x))) if kktModeViolations[d] < self.tol: break else: isConverged = False # Do the multiplicative update M.Factors[str(d)] = tr.mul(M.Factors[str(d)], Phi[str(d)]) # Print status if self.print_inner_itn != 0 and (inner_iter % self.print_inner_itn == 0): print("Mode = %d, Inner Iter = %d, KKT Violation = %.6f" % \ (d, inner_iter + 1, kktModeViolations[d])) M = M.normalize(M, mode=d) self.kktViolations[outer_iter] = tr.max(kktModeViolations) # calculate the log likelihood M_ = M.normalize(copy.deepcopy(M), N=-2) obj_ = self.__tt_loglikelihood(M_, X) self.logLikelihoods[outer_iter] = obj_ # if we want to save the results from current iteration if self.follow_M: save_M = dict() if self.return_type == 'numpy': save_M = result = self.__transfer_M_cpu(M_) else: save_M['Factors'] = M_.Factors save_M['Weights'] = M_.Weights self.saved_Ms.append(save_M) # Print update if self.verbose != 0 and (outer_iter % self.verbose == 0): print("Iter=%d, Inner Iter=%d, KKT Violation=%.6f, obj=%.6f, nViolations=%d" % \ (outer_iter + 1, self.nInnerIters[outer_iter], self.kktViolations[outer_iter], \ self.logLikelihoods[outer_iter], nViolations[outer_iter])) # Check for convergence if isConverged: if self.verbose != 0: print("Exiting because all subproblems reached KKT tol.") break self.times[outer_iter] = time.time() - self.start_time if self.times[-1] > self.stoptime: if self.verbose != 0: print("Exiting because time limit exceeded.") break # Done result = self.__finalize(M, X, outer_iter) return result
def __finalize(self, M, X, outer_iter): """ Helper functions to finalize the results. Calculates the final fit, performs the final tensor format conversions, shows the results if verbose, and returns the KRUSKAL tensor M. Parameters ---------- M : class KRUSKAL tensor M class. ktensor_Torch.K_TENSOR X : class Original tensor. sptensor_Torch.SP_TENSOR outer_iter : int Current epoch. Returns ------- result : dict KRUSKAL tensor M is returned. The latent factors can be found with the key 'Factors'.\n The weight of each component can be found with the key 'Weights'. """ # Clean up final result M = M.normalize(M, N=-2) self.obj = self.__tt_loglikelihood(copy.deepcopy(M), X) self.final_iter = outer_iter + 1 result = dict() self.exec_time = time.time() - self.start_time if self.verbose != 0: normX = tr.norm(X.Values) nrm_sqr = M.norm() ** 2 rem = M.innerprod(X) try: normresidual = sqrt(normX ** 2 + nrm_sqr - 2 * rem) # if negative in sqrt except Exception as e: normresidual = sqrtc(normX ** 2 + nrm_sqr - 2 * rem) fit = 1 - (normresidual / normX) print("===========================================") print(" Final log-likelihood = %f" % self.obj) print(" Final least squares fit = %f" % fit) print(" Final KKT violation = %f" % self.kktViolations[outer_iter]) print(" Total inner iterations = %d" % tr.sum(self.nInnerIters)) print(" Total execution time = %.4f seconds" % self.exec_time) if self.return_type == 'numpy': if self.verbose != 0: print("Converting the latent factors to Numpy arrays.") # Convert KTENSOR to Numpy arrays result = self.__transfer_M_cpu(M) # convert the optimization variables self.kktViolations = self.kktViolations.cpu().numpy().astype('float64') self.nInnerIters = self.nInnerIters.cpu().numpy().astype('float64') self.times = self.times.cpu().numpy().astype('float64') self.logLikelihoods = self.logLikelihoods.cpu().numpy().astype('float64') self.epsilon = self.epsilon.cpu().numpy().astype('float64') self.obj = float(self.obj.cpu().numpy().astype('float64') ) else: result['Factors'] = M.Factors result['Weights'] = M.Weights # if GPU used, free up the space if not isinstance(self.device, str) and self.device.type == 'torch': tr.cuda.empty_cache() # Save the KRUSKAL tensor M and the original tensor X self.M = M self.X = X return result def __transfer_M_cpu(self, M): """ Transfers M to CPU if requested. Parameters ---------- M : class KRUSKAL tensor M class. ktensor_Torch.K_TENSOR Returns ------- M : dict M factors and weight that is transferred to CPU. """ result = dict() if M.Rank == 1: for dim in range(M.Dimensions): M.Factors[str(dim)] = M.Factors[str(dim)].cpu().numpy().astype('float64') M.Factors[str(dim)] = [item for sublist in M.Factors[str(dim)] for item in sublist] M.Factors[str(dim)] = np.array(M.Factors[str(dim)]) else: for dim in range(M.Dimensions): M.Factors[str(dim)] = M.Factors[str(dim)].cpu().numpy().astype('float64') result['Factors'] = M.Factors result['Weights'] = M.Weights.cpu().numpy().astype('float64') return result def __setup(self, Tensor, Coords, Values, Minit, Rank, Type): """ Sets up the classes for KRUSKAL tensor and the original tensor X. Parameters ---------- Tensor : PyTorch Sparse Tensor or dense Numpy array as tensor Original dense or sparse tensor X.\n Can be used when Type = 'sptensor'. Then Tensor needs to be a PyTorch Sparse tensor.\n Or use with Type = 'tensor' and pass Tensor as a dense Numpy array.\n Note that PyTorch only supports Type = 'sptensor'. Coords : Numpy array (i.e. array that is a list of list) Array of non-zero coordinates for sparse tensor X. COO format.\n Each entry in this array is a coordinate of a non-zero value in the tensor.\n Used when Type = 'sptensor' and tensor parameter is not passed.\n len(Coords) is number of total entiries in X, and len(coords[0]) should give the number of dimensions. Values : Numpy array (i.e. list of non-zero values corresponding to each list of non-zero coordinates) Array of non-zero tensor entries. COO format.\n Used when Type = 'sptensor' and tensor parameter is not passed.\n Length of values must match the length of coords. Rank : int or list Tensor rank, or list of ranks for two tensors.\n List of ranks will allow using weighted prediction between the two latent factors.\n Pass a single integer or list of length two.\n The default is 2. Minit : string or dictionary of latent factors Initial value of latent factors.\n If Minit = 'random', initial factors are chosen randomly from uniform distribution between 0 and 1.\n Else, pass dictionary where the key is the mode number and value is array size d x r where d is the number of elements on the dimension and r is the rank.\n The default is "random".\n Type : string Type of tensor (i.e. sparse or dense).\n Use 'sptensor' for sparse, and 'tensor' for dense tensors.\n 'sptensor' can be used with method = 'torch', method = 'numpy'.\n If 'sptensor' used, pass the list of non-zero coordinates usingvCoords parameter and the corresponding list of non-zero elements with values.\n 'sptensor' can also be used with the PyTorch Sparse format. Pass the torch.sparse format in the tensor parameter.\n 'tensor' can be used with method = 'numpy'. Pass the tensor using tensor parameter in that case.\n The default is 'sptensor'. Returns ------- M : class KRUSKAL tensor M class. ktensor_Torch.K_TENSOR X : class Original tensor. sptensor_Torch.SP_TENSOR """ # Setup the optimization variables self.kktViolations = -tr.ones(self.n_iters).to(self.device) self.nInnerIters = tr.zeros(self.n_iters).to(self.device) self.times = tr.zeros(self.n_iters).to(self.device) self.logLikelihoods = tr.ones(self.n_iters).to(self.device) self.epsilon = tr.tensor(self.epsilon).to(self.device) self.obj = 0 # Setup the tensors if Type == 'sptensor': if tr.is_tensor(Tensor): if len((Tensor._values() < 0).nonzero()) > 0: sys.exit('Data tensor must be nonnegative for Poisson-based factorization.') if Tensor._nnz() == 0: sys.exit('Non-zero values must be more than 0.') else: if len(Coords) == 0: sys.exit('Coordinates of the non-zero elements is not passed for sptensor.\ Use the Coords parameter.') if len(Values) == 0: sys.exit('Non-zero values are not passed for sptensor.\ Use the Values parameter') if (Coords < 0).all(): sys.exit('Data tensor must be nonnegative for Poisson-based factorization') # Convert the initial latent factors to pyTorch tensors if Minit != 'random' and isinstance(Minit['0'], (list, np.ndarray)): for d in range(len(Minit.keys())): Minit[str(d)] = tr.from_numpy(Minit[str(d)]).type(self.dtype) X = SP_TENSOR(Tensor, Coords, Values, self.dtype, self.device) elif Type == 'tensor': sys.exit("PyTorch backend only support sparse tensor implementation currently.") M = K_TENSOR(Rank, X.Size, Minit, self.random_state, self.device, self.dtype) M = M.normalize(M) if self.verbose != 0: print("CP-APR (MU):") return X, M def __tt_loglikelihood(self, M, X): """ This function computes log-likelihood of tensor X with model M. Parameters ---------- M : class KRUSKAL tensor M class. ktensor_Torch.K_TENSOR X : class Original tensor. sptensor_Torch.SP_TENSOR Returns ------- f : float log-likelihood. """ M = M.normalize(M, N=1) A = M.Factors[str(0)][X.Coords[:, 0], :] for d in range(1, X.Dimensions): A = tr.mul(A, M.Factors[str(d)][X.Coords[:, d], :]) f = tr.sum(tr.mul(X.Values, tr.log(tr.sum(A, 1)))) - \ tr.sum(tr.sum(M.Factors[str(0)], axis=0)) return f def __vectorizeForMu(self, x): """ Turn x into a vector. Parameters ---------- x : array tensor x. Returns ------- y : array flattenned x. """ y = x.flatten().transpose(-1, 0).flatten() return y def __calculatePhi(self, M, X, mode, Pi): """ Calculates the matrix for MU Parameters ---------- M : class KRUSKAL tensor M class. ktensor_Torch.K_TENSOR X : class Original tensor. sptensor_Torch.SP_TENSOR mode : int dimension. Pi : array Product of all matrices but nth. Returns ------- Phi : array multiplicative update. """ Phi = -tr.ones((X.Size[mode], M.Rank)).to(self.device) xsubs = X.Coords[:, mode] v = tr.sum(tr.mul(M.Factors[str(mode)][xsubs, :], Pi), 1) wvals = tr.div(X.Values, tr.max(v, self.epsilon)) for r in range(M.Rank): Yr = tr.bincount(xsubs, tr.mul(wvals, Pi[:, r]), X.Size[mode]) Phi[:, r] = Yr return Phi def __calculatePi(self, M, X, mode): """ Calculates the product of all matrices without the "mode" dimension. Parameters ---------- M : class KRUSKAL tensor M class. ktensor_Torch.K_TENSOR X : class Original tensor. sptensor_Torch.SP_TENSOR mode : int dimension. Returns ------- Pi : array product of all matrices but nth. """ Pi = tr.ones((X.nnz, M.Rank)).to(self.device) for nn in range(X.Dimensions): if nn != mode: Pi = tr.mul(M.Factors[str(nn)][X.Coords[:, nn], :], Pi) return Pi