-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathcommonFunctions.py
84 lines (75 loc) · 2.77 KB
/
commonFunctions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from numpy import dot, loadtxt, ones
from numpy.random import randint
from numpy.random import random_integers
from scipy.sparse import csr_matrix
import numpy as np
import fnmatch
import os
def squareFrobeniusNormOfSparseBoolean(M):
rows, cols = M.nonzero()
return len(rows)
def squareFrobeniusNormOfSparse(M):
"""
Computes the square of the Frobenius norm
"""
rows, cols = M.nonzero()
norm = 0
for i in range(len(rows)):
norm += M[rows[i],cols[i]] ** 2
return norm
def trace(M):
""" Compute the trace of a sparse matrix
"""
return sum(M.diagonal())
def fitNorm(X, A, R):
"""
Computes the squared Frobenius norm of the fitting matrix || X - A*R*A^T ||,
where X is a sparse matrix
"""
return squareFrobeniusNormOfSparse(X) + fitNormWithoutNormX(X, A, R)
def fitNormWithoutNormX(X, A, R):
AtA = dot(A.T, A)
secondTerm = dot(A.T, dot(X.dot(A), R.T))
thirdTerm = dot(dot(AtA, R), dot(AtA, R.T))
return np.trace(thirdTerm) - 2 * trace(secondTerm)
def reservoir(it, k):
ls = [next(it) for _ in range(k)]
for i, x in enumerate(it, k + 1):
j = randint(0, i)
if j < k:
ls[j] = x
return ls
def checkingIndices(M, ratio = 1):
"""
Returns the indices for computing fit values
based on non-zero values as well as sample indices
(the sample size is proportional to the given ratio ([0,1]) and number of matrix columns)
"""
rowSize, colSize = M.shape
nonzeroRows, nonzeroCols = M.nonzero()
nonzeroIndices = [(nonzeroRows[i], nonzeroCols[i]) for i in range(len(nonzeroRows))]
sampledRows = random_integers(0, rowSize - 1, round(ratio*colSize))
sampledCols = random_integers(0, colSize - 1, round(ratio*colSize))
sampledIndices = zip(sampledRows, sampledCols)
indices = list(set(sampledIndices + nonzeroIndices))
return indices
def loadX(inputDir, dim):
X = []
numSlices = 0
numNonzeroTensorEntries = 0
for inputFile in os.listdir('./%s' % inputDir):
if fnmatch.fnmatch(inputFile, '[0-9]*-rows'):
numSlices += 1
row = loadtxt('./%s/%s' % (inputDir, inputFile), dtype=np.uint32)
if row.size == 1:
row = np.atleast_1d(row)
col = loadtxt('./%s/%s' % (inputDir, inputFile.replace("rows", "cols")), dtype=np.uint32)
if col.size == 1:
col = np.atleast_1d(col)
Xi = csr_matrix((ones(row.size),(row,col)), shape=(dim,dim))
numNonzeroTensorEntries += row.size
X.append(Xi)
print 'loaded %d: %s' % (numSlices, inputFile)
print 'The number of tensor slices: %d' % numSlices
print 'The number of non-zero values in the tensor: %d' % numNonzeroTensorEntries
return X