Matrix Completion¶
In [4]:
Copied!
import numpy as np
import matplotlib.pyplot as plt
def svt(A, alpha):
U, S, V = np.linalg.svd(A, full_matrices=False)
S = np.sign(S) * np.maximum(np.abs(S) - alpha, 0)
return (U * S) @ V
def update_stats(X, X_p, M_obs, M, Omega, viol, obj, res, err, verbose=False):
''' Update arrays that store performance statistics.
'''
viol.append(max(0, (np.linalg.norm(X.flat[Omega] - M_obs.flat[Omega]) - eps)) / eps)
obj.append(np.linalg.norm(X, ord='nuc'))
res.append(np.linalg.norm(X - X_p, ord='fro') / np.linalg.norm(M, ord='fro'))
err.append(np.linalg.norm(X - M, ord='fro') / np.linalg.norm(M, ord='fro'))
if len(viol) % 5 == 0 and verbose:
print("[{:3d}] obj = {:0.5e}, res = {:0.3e}, viol = {:0.3e}".format(len(viol), obj[-1], res[-1], viol[-1]))
return viol, obj, res, err
def create_mat(n, r, s):
''' Create Gaussian matrix
'''
d_r = r * (2 * n - r)
s *= d_r
MR = np.random.normal(0, 1.0, size=(n,r))
ML = np.random.normal(0, 1.0, size=(n,r))
M = ML @ MR.T
N = np.random.normal(0, 0.25, size=(s))
Omega = np.random.choice(np.arange(n * n), size=s, replace=False)
M_obs = np.zeros(M.shape)
M_obs.flat[Omega] = M.flat[Omega] + N
return M, M_obs, N, Omega
def ball_proj(X, eps, Omega):
''' Project onto ball w.r.t. Frobenius norm
'''
P = X.copy()
norm = np.linalg.norm(X.flat[Omega])
if norm > eps:
P.flat[Omega] = X.flat[Omega] * eps / norm
return P
import numpy as np
import matplotlib.pyplot as plt
def svt(A, alpha):
U, S, V = np.linalg.svd(A, full_matrices=False)
S = np.sign(S) * np.maximum(np.abs(S) - alpha, 0)
return (U * S) @ V
def update_stats(X, X_p, M_obs, M, Omega, viol, obj, res, err, verbose=False):
''' Update arrays that store performance statistics.
'''
viol.append(max(0, (np.linalg.norm(X.flat[Omega] - M_obs.flat[Omega]) - eps)) / eps)
obj.append(np.linalg.norm(X, ord='nuc'))
res.append(np.linalg.norm(X - X_p, ord='fro') / np.linalg.norm(M, ord='fro'))
err.append(np.linalg.norm(X - M, ord='fro') / np.linalg.norm(M, ord='fro'))
if len(viol) % 5 == 0 and verbose:
print("[{:3d}] obj = {:0.5e}, res = {:0.3e}, viol = {:0.3e}".format(len(viol), obj[-1], res[-1], viol[-1]))
return viol, obj, res, err
def create_mat(n, r, s):
''' Create Gaussian matrix
'''
d_r = r * (2 * n - r)
s *= d_r
MR = np.random.normal(0, 1.0, size=(n,r))
ML = np.random.normal(0, 1.0, size=(n,r))
M = ML @ MR.T
N = np.random.normal(0, 0.25, size=(s))
Omega = np.random.choice(np.arange(n * n), size=s, replace=False)
M_obs = np.zeros(M.shape)
M_obs.flat[Omega] = M.flat[Omega] + N
return M, M_obs, N, Omega
def ball_proj(X, eps, Omega):
''' Project onto ball w.r.t. Frobenius norm
'''
P = X.copy()
norm = np.linalg.norm(X.flat[Omega])
if norm > eps:
P.flat[Omega] = X.flat[Omega] * eps / norm
return P
In [5]:
Copied!
def PG(M_obs, M, Omega, eps, tau=1.0e-1, alpha=1.0, tol=1e-3, max_iters=1000, verbose=False):
''' Execute ???
1.5e-5*len(Omega)
'''
viol = []
obj = []
res = []
err = []
X = M_obs.copy()
converge = False
while not converge:
X_p = X.copy()
X.flat[Omega] -= alpha * (X - M_obs).flat[Omega]
X = svt(X, tau * alpha)
viol, obj, res, err = update_stats(X, X_p, M_obs, M, Omega, viol, obj, res, err, verbose=verbose)
converge = res[-1] < tol or len(res) == max_iters
return X, viol, obj, res, err
def VASALM(M_obs, M, Omega, eps, alpha=1e-2, eta=3.0, tol=1.0e-3, max_iters=1000, verbose=False):
''' alpha =1e-2
'''
viol = []
obj = []
res = []
err = []
X = M_obs.copy()
Lambd = np.zeros(M_obs.shape)
converge = False
while not converge:
X_p = X.copy()
N = ball_proj(Lambd / alpha + M_obs - X, eps, Omega)
Lambd = Lambd - alpha * (X + N - M_obs)
X = svt(X + Lambd / (alpha * eta), 1.0 / (alpha * eta))
Lambd = Lambd + alpha * (X_p - X)
viol, obj, res, err = update_stats(X, X_p, M_obs, M, Omega, viol, obj, res, err, verbose=verbose)
converge = res[-1] < tol or len(res) == max_iters
return X, viol, obj, res, err
def SPG(M_obs, M, Omega, eps, mu=1.0e1, tol=1e-3, max_iters=1000, verbose=False):
''' Execute ???
'''
viol = []
obj = []
res = []
err = []
X = M_obs.copy()
OmegaC = list(set(range(X.size)) - set(Omega))
converge = False
while not converge:
X_p = X.copy()
Y = svt(X, mu)
norm = np.linalg.norm((M_obs - Y).flat[Omega])
theta = (norm - eps) / (mu * eps)
X.flat[Omega] = (mu * theta * M_obs.flat[Omega] + Y.flat[Omega]) / (1.0 + mu * theta)
X.flat[OmegaC] = Y.flat[OmegaC]
viol, obj, res, err = update_stats(X, X_p, M_obs, M, Omega, viol, obj, res, err, verbose=verbose)
converge = res[-1] < tol or len(res) == max_iters
return X, viol, obj, res, err
def PP(M_obs, M, Omega, eps, alpha=5.0e1, tol=1e-3, max_iters=1000, verbose=False):
''' Execute ???
'''
viol = []
obj = []
res = []
err = []
X = M_obs.copy()
Z = M_obs.copy()
OmegaC = list(set(range(X.size)) - set(Omega))
converge = False
while not converge:
X_p = X.copy()
Z = Z + svt(2.0 * X - Z, alpha) - X
norm = np.linalg.norm(Z.flat[Omega] - M_obs.flat[Omega])
if norm > eps:
X.flat[OmegaC] = Z.flat[OmegaC]
X.flat[Omega] = ((norm - eps) * M_obs.flat[Omega] + eps * Z.flat[Omega] ) / norm
else:
X = Z
viol, obj, res, err = update_stats(X, X_p, M_obs, M, Omega, viol, obj, res, err, verbose=verbose)
converge = res[-1] < tol or len(res) == max_iters
return X, viol, obj, res, err
def PG(M_obs, M, Omega, eps, tau=1.0e-1, alpha=1.0, tol=1e-3, max_iters=1000, verbose=False):
''' Execute ???
1.5e-5*len(Omega)
'''
viol = []
obj = []
res = []
err = []
X = M_obs.copy()
converge = False
while not converge:
X_p = X.copy()
X.flat[Omega] -= alpha * (X - M_obs).flat[Omega]
X = svt(X, tau * alpha)
viol, obj, res, err = update_stats(X, X_p, M_obs, M, Omega, viol, obj, res, err, verbose=verbose)
converge = res[-1] < tol or len(res) == max_iters
return X, viol, obj, res, err
def VASALM(M_obs, M, Omega, eps, alpha=1e-2, eta=3.0, tol=1.0e-3, max_iters=1000, verbose=False):
''' alpha =1e-2
'''
viol = []
obj = []
res = []
err = []
X = M_obs.copy()
Lambd = np.zeros(M_obs.shape)
converge = False
while not converge:
X_p = X.copy()
N = ball_proj(Lambd / alpha + M_obs - X, eps, Omega)
Lambd = Lambd - alpha * (X + N - M_obs)
X = svt(X + Lambd / (alpha * eta), 1.0 / (alpha * eta))
Lambd = Lambd + alpha * (X_p - X)
viol, obj, res, err = update_stats(X, X_p, M_obs, M, Omega, viol, obj, res, err, verbose=verbose)
converge = res[-1] < tol or len(res) == max_iters
return X, viol, obj, res, err
def SPG(M_obs, M, Omega, eps, mu=1.0e1, tol=1e-3, max_iters=1000, verbose=False):
''' Execute ???
'''
viol = []
obj = []
res = []
err = []
X = M_obs.copy()
OmegaC = list(set(range(X.size)) - set(Omega))
converge = False
while not converge:
X_p = X.copy()
Y = svt(X, mu)
norm = np.linalg.norm((M_obs - Y).flat[Omega])
theta = (norm - eps) / (mu * eps)
X.flat[Omega] = (mu * theta * M_obs.flat[Omega] + Y.flat[Omega]) / (1.0 + mu * theta)
X.flat[OmegaC] = Y.flat[OmegaC]
viol, obj, res, err = update_stats(X, X_p, M_obs, M, Omega, viol, obj, res, err, verbose=verbose)
converge = res[-1] < tol or len(res) == max_iters
return X, viol, obj, res, err
def PP(M_obs, M, Omega, eps, alpha=5.0e1, tol=1e-3, max_iters=1000, verbose=False):
''' Execute ???
'''
viol = []
obj = []
res = []
err = []
X = M_obs.copy()
Z = M_obs.copy()
OmegaC = list(set(range(X.size)) - set(Omega))
converge = False
while not converge:
X_p = X.copy()
Z = Z + svt(2.0 * X - Z, alpha) - X
norm = np.linalg.norm(Z.flat[Omega] - M_obs.flat[Omega])
if norm > eps:
X.flat[OmegaC] = Z.flat[OmegaC]
X.flat[Omega] = ((norm - eps) * M_obs.flat[Omega] + eps * Z.flat[Omega] ) / norm
else:
X = Z
viol, obj, res, err = update_stats(X, X_p, M_obs, M, Omega, viol, obj, res, err, verbose=verbose)
converge = res[-1] < tol or len(res) == max_iters
return X, viol, obj, res, err
Numerical Examples to generate SMC Plots¶
In [ ]:
Copied!
trials = 10
r = [10, 50]
s = [5, 4]
n = 1000
tol = 1.0e-20
iters = 200
for i in range(len(r)):
print('Rank ', r[i], ' plots')
viol = np.zeros((3, trials, iters))
obj = np.zeros((3, trials, iters))
res = np.zeros((3, trials, iters))
err = np.zeros((3, trials, iters))
for t in range(trials):
print('Trial: %2d of %2d' % (t + 1, trials))
np.random.seed(t)
M, M_obs, N, Omega = create_mat(n, r[i], s[i])
eps = np.linalg.norm(N)
_, viol[0, t, :], obj[0, t, :], res[0, t, :], err[0, t, :] = PP(M_obs, M, Omega, eps, tol=tol, max_iters=iters, verbose=True)
_, viol[1, t, :], obj[1, t, :], res[1, t, :], err[1, t, :] = VASALM(M_obs, M, Omega, eps, tol=tol, max_iters=iters, verbose=True)
_, viol[2, t, :], obj[2, t, :], res[2, t, :], err[2, t, :] = SPG(M_obs, M, Omega, eps, tol=tol, max_iters=iters, verbose=True)
viol_pp = np.median(viol[0, :, :], axis=0)
viol_vasalm = np.median(viol[1, :, :], axis=0)
viol_spg = np.median(viol[2, :, :], axis=0)
obj_pp = np.median(obj[0, :, :], axis=0)
obj_vasalm = np.median(obj[1, :, :], axis=0)
obj_spg = np.median(obj[2, :, :], axis=0)
res_pp = np.median(res[0, :, :], axis=0)
res_vasalm = np.median(res[1, :, :], axis=0)
res_spg = np.median(res[2, :, :], axis=0)
err_pp = np.median(err[0, :, :], axis=0)
err_vasalm = np.median(err[1, :, :], axis=0)
err_spg = np.median(err[2, :, :], axis=0)
filename = '../results/smc-obj-plots-' + str(r[i]) + '.csv'
with open(filename, 'w') as csv_file:
for k in range(iters):
csv_file.write('%0.5e,%0.5e,%0.5e\n' % (obj_pp[k], obj_vasalm[k], obj_spg[k]))
filename = '../results/smc-viol-plots-' + str(r[i]) + '.csv'
with open(filename, 'w') as csv_file:
for k in range(iters):
csv_file.write('%0.5e,%0.5e,%0.5e\n' % (viol_pp[k], viol_vasalm[k], viol_spg[k]))
filename = '../results/smc-res-plots-' + str(r[i]) + '.csv'
with open(filename, 'w') as csv_file:
for k in range(iters):
csv_file.write('%0.5e,%0.5e,%0.5e\n' % (res_pp[k], res_vasalm[k], res_spg[k]))
# Violation
fig, ax = plt.subplots()
plt.title('Violation $\\|\\mathcal{P}_{\\Omega}(X^k - M^k)\\|_F/\\|X^k\\|_F$')
plt.plot(viol_pp, color='b')
plt.plot(viol_spg, color='k')
plt.plot(viol_vasalm, color='r')
plt.yscale('log')
plt.show()
# Objective
fig, ax = plt.subplots()
plt.title('Objective $\\|X^k\\|_\\star$')
plt.plot(obj_pp, color='b')
plt.plot(obj_spg, color='k')
plt.plot(obj_vasalm, color='r')
plt.yscale('log')
plt.show()
# Residual
fig, ax = plt.subplots()
plt.title('Residual $\\|X^{k+1}-X^k\\|_F$')
plt.plot(res_pp, color='b')
plt.plot(res_spg, color='k')
plt.plot(res_vasalm, color='r')
plt.yscale('log')
plt.show()
# Error
fig, ax = plt.subplots()
plt.title('Error $\\|X^k - M\\|_F / \\|M\\|_F$')
plt.plot(err_pp, color='b')
plt.plot(err_spg, color='k')
plt.plot(err_vasalm, color='r')
plt.yscale('log')
plt.show()
print('All methods have completed.')
trials = 10
r = [10, 50]
s = [5, 4]
n = 1000
tol = 1.0e-20
iters = 200
for i in range(len(r)):
print('Rank ', r[i], ' plots')
viol = np.zeros((3, trials, iters))
obj = np.zeros((3, trials, iters))
res = np.zeros((3, trials, iters))
err = np.zeros((3, trials, iters))
for t in range(trials):
print('Trial: %2d of %2d' % (t + 1, trials))
np.random.seed(t)
M, M_obs, N, Omega = create_mat(n, r[i], s[i])
eps = np.linalg.norm(N)
_, viol[0, t, :], obj[0, t, :], res[0, t, :], err[0, t, :] = PP(M_obs, M, Omega, eps, tol=tol, max_iters=iters, verbose=True)
_, viol[1, t, :], obj[1, t, :], res[1, t, :], err[1, t, :] = VASALM(M_obs, M, Omega, eps, tol=tol, max_iters=iters, verbose=True)
_, viol[2, t, :], obj[2, t, :], res[2, t, :], err[2, t, :] = SPG(M_obs, M, Omega, eps, tol=tol, max_iters=iters, verbose=True)
viol_pp = np.median(viol[0, :, :], axis=0)
viol_vasalm = np.median(viol[1, :, :], axis=0)
viol_spg = np.median(viol[2, :, :], axis=0)
obj_pp = np.median(obj[0, :, :], axis=0)
obj_vasalm = np.median(obj[1, :, :], axis=0)
obj_spg = np.median(obj[2, :, :], axis=0)
res_pp = np.median(res[0, :, :], axis=0)
res_vasalm = np.median(res[1, :, :], axis=0)
res_spg = np.median(res[2, :, :], axis=0)
err_pp = np.median(err[0, :, :], axis=0)
err_vasalm = np.median(err[1, :, :], axis=0)
err_spg = np.median(err[2, :, :], axis=0)
filename = '../results/smc-obj-plots-' + str(r[i]) + '.csv'
with open(filename, 'w') as csv_file:
for k in range(iters):
csv_file.write('%0.5e,%0.5e,%0.5e\n' % (obj_pp[k], obj_vasalm[k], obj_spg[k]))
filename = '../results/smc-viol-plots-' + str(r[i]) + '.csv'
with open(filename, 'w') as csv_file:
for k in range(iters):
csv_file.write('%0.5e,%0.5e,%0.5e\n' % (viol_pp[k], viol_vasalm[k], viol_spg[k]))
filename = '../results/smc-res-plots-' + str(r[i]) + '.csv'
with open(filename, 'w') as csv_file:
for k in range(iters):
csv_file.write('%0.5e,%0.5e,%0.5e\n' % (res_pp[k], res_vasalm[k], res_spg[k]))
# Violation
fig, ax = plt.subplots()
plt.title('Violation $\\|\\mathcal{P}_{\\Omega}(X^k - M^k)\\|_F/\\|X^k\\|_F$')
plt.plot(viol_pp, color='b')
plt.plot(viol_spg, color='k')
plt.plot(viol_vasalm, color='r')
plt.yscale('log')
plt.show()
# Objective
fig, ax = plt.subplots()
plt.title('Objective $\\|X^k\\|_\\star$')
plt.plot(obj_pp, color='b')
plt.plot(obj_spg, color='k')
plt.plot(obj_vasalm, color='r')
plt.yscale('log')
plt.show()
# Residual
fig, ax = plt.subplots()
plt.title('Residual $\\|X^{k+1}-X^k\\|_F$')
plt.plot(res_pp, color='b')
plt.plot(res_spg, color='k')
plt.plot(res_vasalm, color='r')
plt.yscale('log')
plt.show()
# Error
fig, ax = plt.subplots()
plt.title('Error $\\|X^k - M\\|_F / \\|M\\|_F$')
plt.plot(err_pp, color='b')
plt.plot(err_spg, color='k')
plt.plot(err_vasalm, color='r')
plt.yscale('log')
plt.show()
print('All methods have completed.')
Numerical Examples to Generate SMC Table¶
In [ ]:
Copied!
methods = ['PP', 'VASALM', 'SPG']
r = [10, 50, 100]
labels = ['a', 'b', 'c'] # use letters for each rank when saving to file
s = [5, 4, 3]
tol = 1.0e-5
trials = 10
table_iters = np.zeros((3, len(r), trials))
table_viol = np.zeros((3, len(r), trials))
table_obj = np.zeros((3, len(r), trials))
for i in range(len(r)):
print('Rank: %3d' % r[i])
for t in range(trials):
print('Trial: %2d of %2d' % (t + 1, trials))
np.random.seed(t)
M, M_obs, N, Omega = create_mat(n, r[i], s[i])
eps = np.linalg.norm(N)
_, viol_pp, obj_pp, res_pp, err_pp = PP(M_obs, M, Omega, eps, tol=tol, verbose=False)
_, viol_vasalm, obj_vasalm, res_vasalm, err_vasalm = VASALM(M_obs, M, Omega, eps, tol=tol, verbose=False)
_, viol_spg, obj_spg, res_spg, err_spg = SPG(M_obs, M, Omega, eps, tol=tol, verbose=False)
table_iters[0, i, t] = len(viol_pp)
table_iters[1, i, t] = len(viol_vasalm)
table_iters[2, i, t] = len(viol_spg)
table_viol[0, i, t] = viol_pp[-1]
table_viol[1, i, t] = viol_vasalm[-1]
table_viol[2, i, t] = viol_spg[-1]
table_obj[0, i, t] = obj_pp[-1]
table_obj[1, i, t] = obj_vasalm[-1]
table_obj[2, i, t] = obj_spg[-1]
filename = '../results/smc-table.tex'
with open(filename, 'w') as table_file:
for i, m in enumerate(methods):
for k, label in enumerate(labels):
table_file.write('\\def\\smcIters' + m + label + '{%0.0d}\n'% (np.mean(table_iters[i, k, :])))
table_file.write('\\def\\smcViol' + m + label + '{%0.2e}\n'% (np.mean(table_viol[i, k, :])))
table_file.write('\\def\\smcObj' + m + label + '{%0.2e}\n'% (np.mean(table_obj[i, k, :])))
print('Data saved to file')
methods = ['PP', 'VASALM', 'SPG']
r = [10, 50, 100]
labels = ['a', 'b', 'c'] # use letters for each rank when saving to file
s = [5, 4, 3]
tol = 1.0e-5
trials = 10
table_iters = np.zeros((3, len(r), trials))
table_viol = np.zeros((3, len(r), trials))
table_obj = np.zeros((3, len(r), trials))
for i in range(len(r)):
print('Rank: %3d' % r[i])
for t in range(trials):
print('Trial: %2d of %2d' % (t + 1, trials))
np.random.seed(t)
M, M_obs, N, Omega = create_mat(n, r[i], s[i])
eps = np.linalg.norm(N)
_, viol_pp, obj_pp, res_pp, err_pp = PP(M_obs, M, Omega, eps, tol=tol, verbose=False)
_, viol_vasalm, obj_vasalm, res_vasalm, err_vasalm = VASALM(M_obs, M, Omega, eps, tol=tol, verbose=False)
_, viol_spg, obj_spg, res_spg, err_spg = SPG(M_obs, M, Omega, eps, tol=tol, verbose=False)
table_iters[0, i, t] = len(viol_pp)
table_iters[1, i, t] = len(viol_vasalm)
table_iters[2, i, t] = len(viol_spg)
table_viol[0, i, t] = viol_pp[-1]
table_viol[1, i, t] = viol_vasalm[-1]
table_viol[2, i, t] = viol_spg[-1]
table_obj[0, i, t] = obj_pp[-1]
table_obj[1, i, t] = obj_vasalm[-1]
table_obj[2, i, t] = obj_spg[-1]
filename = '../results/smc-table.tex'
with open(filename, 'w') as table_file:
for i, m in enumerate(methods):
for k, label in enumerate(labels):
table_file.write('\\def\\smcIters' + m + label + '{%0.0d}\n'% (np.mean(table_iters[i, k, :])))
table_file.write('\\def\\smcViol' + m + label + '{%0.2e}\n'% (np.mean(table_viol[i, k, :])))
table_file.write('\\def\\smcObj' + m + label + '{%0.2e}\n'% (np.mean(table_obj[i, k, :])))
print('Data saved to file')