Basis Pursuit¶
This notebook serves to benchmark proximal projection (PP) against existing alternatives on an instance the basis pursuit problem
$$ \min_{x\in\mathbb{R}^n} \|x\|_1 \ \ \mbox{s.t.}\ \ Ax = b.$$
The first cell imports the needed libraries and defines the experiment setup: $m = 1250$ and $n=5000$ along with a function for logging perfomance of each algorithm.
In [7]:
Copied!
import numpy as np
import matplotlib.pyplot as plt
import time
m = 1250
n = 5000
iters = 2000
seeds = 20
def shrink(xi, alpha):
''' Execute element-wise soft-threshold
'''
return np.sign(xi) * np.maximum(np.abs(xi) - alpha, 0)
def update_stats(x, x_p, viol, obj, res, k):
''' Update arrays that store performance statistics.
'''
viol[k] = np.linalg.norm(A @ x - b)
obj[k] = np.linalg.norm(x, ord=1)
res[k] = np.linalg.norm(x - x_p)
return viol, obj, res
import numpy as np
import matplotlib.pyplot as plt
import time
m = 1250
n = 5000
iters = 2000
seeds = 20
def shrink(xi, alpha):
''' Execute element-wise soft-threshold
'''
return np.sign(xi) * np.maximum(np.abs(xi) - alpha, 0)
def update_stats(x, x_p, viol, obj, res, k):
''' Update arrays that store performance statistics.
'''
viol[k] = np.linalg.norm(A @ x - b)
obj[k] = np.linalg.norm(x, ord=1)
res[k] = np.linalg.norm(x - x_p)
return viol, obj, res
Define Algorithms¶
As defined in Appendix B.1 of the paper, here we implement the following:
- Proximal Projection (PP)
- Linearized Bregman (LB)
- Linearized Method of Multipliers (LMM)
- Primal Dual Hybrid Gradient (PDHG)
In [8]:
Copied!
def PP(A, b, alpha=1.0e-1, num_iters=2000):
''' Execute proximal projection algorithm
'''
viol = np.zeros(num_iters)
obj = np.zeros(num_iters)
res = np.zeros(num_iters)
z = np.zeros((n, 1))
x = np.zeros((n, 1))
start = time.time()
AAt = np.linalg.inv(A @ A.T)
for k in range(num_iters):
x_p = x.copy()
x = z - A.T @ (AAt @ (A @ z - b))
z = z + shrink(2.0 * x - z, alpha) - x
viol, obj, res = update_stats(x, x_p, viol, obj, res, k)
stop = time.time()
return x, viol, obj, res, (stop - start)
def LB(A, b, mu=2.0, num_iters=2000):
''' Execute linearized bregman algorithm
Note: This solves the problem
min mu * ||x||_1 + ||x||_2^2 / 2 * alpha,
x
subject to the linear constraint A @ x = b.
So, mu must be chosen "large enough" to ensure
the minimizer of the L1 norm is obtained.
'''
viol = np.zeros(num_iters)
obj = np.zeros(num_iters)
res = np.zeros(num_iters)
x = np.zeros((n, 1))
v = np.zeros((n, 1))
start = time.time()
A_norm = np.linalg.norm(A @ A.T)
alpha = 2.0 / A_norm
mu *= A_norm
for k in range(num_iters):
x_p = x.copy()
v = v - A.T @ (A @ x - b)
x = shrink(alpha * v, alpha * mu)
viol, obj, res = update_stats(x, x_p, viol, obj, res, k)
stop = time.time()
return x, viol, obj, res, (stop - start)
def LMM(A, b, lambd=100.0, num_iters=2000):
''' Execute linearized method of multipliers algorithm
'''
viol = np.zeros(num_iters)
obj = np.zeros(num_iters)
res = np.zeros(num_iters)
x = np.zeros((n, 1))
v = np.zeros((m, 1))
start = time.time()
lambd *= np.linalg.norm(A.T @ A)
alpha = 1.0 / (lambd * np.linalg.norm(A.T @ A))
for k in range(num_iters):
x_p = x.copy()
x = shrink(x - alpha * A.T @ (v + lambd * (A @ x - b)), alpha)
v = v + lambd * (A @ x - b)
viol, obj, res = update_stats(x, x_p, viol, obj, res, k)
stop = time.time()
return x, viol, obj, res, (stop - start)
def PDHG(A, b, lambd=100.0, num_iters=2000):
''' Execute primal dual hybrid gradient algorithm
'''
viol = np.zeros(num_iters)
obj = np.zeros(num_iters)
res = np.zeros(num_iters)
x = np.zeros((n, 1))
v = np.zeros((m, 1))
alpha = 1.0 / (lambd * np.linalg.norm(A.T @ A))
start = time.time()
for k in range(num_iters):
x_p = x.copy()
x = shrink(x - alpha * A.T @ v, alpha)
v = v + lambd * (A @ (2 * x - x_p) - b)
viol, obj, res = update_stats(x, x_p, viol, obj, res, k)
stop = time.time()
return x, viol, obj, res, (stop - start)
def PP(A, b, alpha=1.0e-1, num_iters=2000):
''' Execute proximal projection algorithm
'''
viol = np.zeros(num_iters)
obj = np.zeros(num_iters)
res = np.zeros(num_iters)
z = np.zeros((n, 1))
x = np.zeros((n, 1))
start = time.time()
AAt = np.linalg.inv(A @ A.T)
for k in range(num_iters):
x_p = x.copy()
x = z - A.T @ (AAt @ (A @ z - b))
z = z + shrink(2.0 * x - z, alpha) - x
viol, obj, res = update_stats(x, x_p, viol, obj, res, k)
stop = time.time()
return x, viol, obj, res, (stop - start)
def LB(A, b, mu=2.0, num_iters=2000):
''' Execute linearized bregman algorithm
Note: This solves the problem
min mu * ||x||_1 + ||x||_2^2 / 2 * alpha,
x
subject to the linear constraint A @ x = b.
So, mu must be chosen "large enough" to ensure
the minimizer of the L1 norm is obtained.
'''
viol = np.zeros(num_iters)
obj = np.zeros(num_iters)
res = np.zeros(num_iters)
x = np.zeros((n, 1))
v = np.zeros((n, 1))
start = time.time()
A_norm = np.linalg.norm(A @ A.T)
alpha = 2.0 / A_norm
mu *= A_norm
for k in range(num_iters):
x_p = x.copy()
v = v - A.T @ (A @ x - b)
x = shrink(alpha * v, alpha * mu)
viol, obj, res = update_stats(x, x_p, viol, obj, res, k)
stop = time.time()
return x, viol, obj, res, (stop - start)
def LMM(A, b, lambd=100.0, num_iters=2000):
''' Execute linearized method of multipliers algorithm
'''
viol = np.zeros(num_iters)
obj = np.zeros(num_iters)
res = np.zeros(num_iters)
x = np.zeros((n, 1))
v = np.zeros((m, 1))
start = time.time()
lambd *= np.linalg.norm(A.T @ A)
alpha = 1.0 / (lambd * np.linalg.norm(A.T @ A))
for k in range(num_iters):
x_p = x.copy()
x = shrink(x - alpha * A.T @ (v + lambd * (A @ x - b)), alpha)
v = v + lambd * (A @ x - b)
viol, obj, res = update_stats(x, x_p, viol, obj, res, k)
stop = time.time()
return x, viol, obj, res, (stop - start)
def PDHG(A, b, lambd=100.0, num_iters=2000):
''' Execute primal dual hybrid gradient algorithm
'''
viol = np.zeros(num_iters)
obj = np.zeros(num_iters)
res = np.zeros(num_iters)
x = np.zeros((n, 1))
v = np.zeros((m, 1))
alpha = 1.0 / (lambd * np.linalg.norm(A.T @ A))
start = time.time()
for k in range(num_iters):
x_p = x.copy()
x = shrink(x - alpha * A.T @ v, alpha)
v = v + lambd * (A @ (2 * x - x_p) - b)
viol, obj, res = update_stats(x, x_p, viol, obj, res, k)
stop = time.time()
return x, viol, obj, res, (stop - start)
Benchmark Algorithms¶
We repeat 20 trials with different random seeds and report the median execution time of each algorithm.
In [9]:
Copied!
viol = np.zeros((4, seeds, iters))
obj = np.zeros((4, seeds, iters))
res = np.zeros((4, seeds, iters))
times = np.zeros((4, seeds))
for seed in range(seeds):
print('seed = ', str(seed + 1), ' of ', seeds)
np.random.seed(seed)
A = np.random.normal(0, 1.0/m, size=(m,n))
x = np.random.normal(0, 1.0, size=(n,1)) * np.random.binomial(n=1, p=0.05, size=(n,1))
b = A @ x
_, viol[0, seed, :], obj[0, seed, :], res[0, seed, :], times[0, seed] = PP(A, b, num_iters=iters)
_, viol[1, seed, :], obj[1, seed, :], res[1, seed, :], times[1, seed] = LMM(A, b, num_iters=iters)
_, viol[2, seed, :], obj[2, seed, :], res[2, seed, :], times[2, seed] = LB(A, b, num_iters=iters)
_, viol[3, seed, :], obj[3, seed, :], res[3, seed, :], times[3, seed] = PDHG(A, b, num_iters=iters)
viol_pp = np.median(viol[0, :, :], axis=0)
viol_lmm = np.median(viol[1, :, :], axis=0)
viol_lb = np.median(viol[2, :, :], axis=0)
viol_pdhg = np.median(viol[3, :, :], axis=0)
obj_pp = np.median(obj[0, :, :], axis=0)
obj_lmm = np.median(obj[1, :, :], axis=0)
obj_lb = np.median(obj[2, :, :], axis=0)
obj_pdhg = np.median(obj[3, :, :], axis=0)
res_pp = np.median(res[0, :, :], axis=0)
res_lmm = np.median(res[1, :, :], axis=0)
res_lb = np.median(res[2, :, :], axis=0)
res_pdhg = np.median(res[3, :, :], axis=0)
times_pp = np.mean(times[0, :])
times_lmm = np.mean(times[1, :])
times_lb = np.mean(times[2, :])
times_pdhg = np.mean(times[3, :])
print('PP median time = %0.2f s' % times_pp)
print('LMM median time = %0.2f s' % times_lmm)
print('LB median time = %0.2f s' % times_lb)
print('PDHG median time = %0.2f s' % times_pdhg)
viol = np.zeros((4, seeds, iters))
obj = np.zeros((4, seeds, iters))
res = np.zeros((4, seeds, iters))
times = np.zeros((4, seeds))
for seed in range(seeds):
print('seed = ', str(seed + 1), ' of ', seeds)
np.random.seed(seed)
A = np.random.normal(0, 1.0/m, size=(m,n))
x = np.random.normal(0, 1.0, size=(n,1)) * np.random.binomial(n=1, p=0.05, size=(n,1))
b = A @ x
_, viol[0, seed, :], obj[0, seed, :], res[0, seed, :], times[0, seed] = PP(A, b, num_iters=iters)
_, viol[1, seed, :], obj[1, seed, :], res[1, seed, :], times[1, seed] = LMM(A, b, num_iters=iters)
_, viol[2, seed, :], obj[2, seed, :], res[2, seed, :], times[2, seed] = LB(A, b, num_iters=iters)
_, viol[3, seed, :], obj[3, seed, :], res[3, seed, :], times[3, seed] = PDHG(A, b, num_iters=iters)
viol_pp = np.median(viol[0, :, :], axis=0)
viol_lmm = np.median(viol[1, :, :], axis=0)
viol_lb = np.median(viol[2, :, :], axis=0)
viol_pdhg = np.median(viol[3, :, :], axis=0)
obj_pp = np.median(obj[0, :, :], axis=0)
obj_lmm = np.median(obj[1, :, :], axis=0)
obj_lb = np.median(obj[2, :, :], axis=0)
obj_pdhg = np.median(obj[3, :, :], axis=0)
res_pp = np.median(res[0, :, :], axis=0)
res_lmm = np.median(res[1, :, :], axis=0)
res_lb = np.median(res[2, :, :], axis=0)
res_pdhg = np.median(res[3, :, :], axis=0)
times_pp = np.mean(times[0, :])
times_lmm = np.mean(times[1, :])
times_lb = np.mean(times[2, :])
times_pdhg = np.mean(times[3, :])
print('PP median time = %0.2f s' % times_pp)
print('LMM median time = %0.2f s' % times_lmm)
print('LB median time = %0.2f s' % times_lb)
print('PDHG median time = %0.2f s' % times_pdhg)
seed = 1 of 20 seed = 2 of 20 seed = 3 of 20 seed = 4 of 20 seed = 5 of 20 seed = 6 of 20 seed = 7 of 20 seed = 8 of 20 seed = 9 of 20 seed = 10 of 20 seed = 11 of 20 seed = 12 of 20 seed = 13 of 20 seed = 14 of 20 seed = 15 of 20 seed = 16 of 20 seed = 17 of 20 seed = 18 of 20 seed = 19 of 20 seed = 20 of 20 PP median time = 8.06 s LMM median time = 22.85 s LB median time = 7.40 s PDHG median time = 20.60 s
Plot Median Convergence Results¶
In [10]:
Copied!
fig, ax = plt.subplots()
plt.title('Violation $|Ax-b|$')
plt.plot(viol_pp, color='b')
plt.plot(viol_lb, color='g')
plt.plot(viol_lmm, color='k')
plt.plot(viol_pdhg, color='r')
plt.yscale('log')
plt.show()
fig, ax = plt.subplots()
plt.title('Objective |x^k|_1')
plt.plot(obj_pp, color='b')
plt.plot(obj_lb, color='g')
plt.plot(obj_lmm, color='k')
plt.plot(obj_pdhg, color='r')
plt.show()
fig, ax = plt.subplots()
plt.title('Residual $|x^{k+1}-x^k|$')
plt.plot(res_pp, color='b')
plt.plot(res_lb, color='g')
plt.plot(res_lmm, color='k')
plt.plot(res_pdhg, color='r')
plt.yscale('log')
plt.show()
fig, ax = plt.subplots()
plt.title('Violation $|Ax-b|$')
plt.plot(viol_pp, color='b')
plt.plot(viol_lb, color='g')
plt.plot(viol_lmm, color='k')
plt.plot(viol_pdhg, color='r')
plt.yscale('log')
plt.show()
fig, ax = plt.subplots()
plt.title('Objective |x^k|_1')
plt.plot(obj_pp, color='b')
plt.plot(obj_lb, color='g')
plt.plot(obj_lmm, color='k')
plt.plot(obj_pdhg, color='r')
plt.show()
fig, ax = plt.subplots()
plt.title('Residual $|x^{k+1}-x^k|$')
plt.plot(res_pp, color='b')
plt.plot(res_lb, color='g')
plt.plot(res_lmm, color='k')
plt.plot(res_pdhg, color='r')
plt.yscale('log')
plt.show()
Save Plots and Times¶
In [11]:
Copied!
filename = '../results/bp-obj-plots.csv'
with open(filename, 'w') as csv_file:
for k in range(iters):
if k % 1 == 0:
csv_file.write('%0.5e,%0.5e,%0.5e,%0.5e\n' % (obj_pp[k],
obj_lmm[k],
obj_lb[k],
obj_pdhg[k]))
filename = '../results/bp-viol-plots.csv'
with open(filename, 'w') as csv_file:
for k in range(iters):
if k % 1 == 0:
csv_file.write('%0.5e,%0.5e,%0.5e,%0.5e\n' % (viol_pp[k],
viol_lmm[k],
viol_lb[k],
viol_pdhg[k]))
filename = '../results/bp-res-plots.csv'
with open(filename, 'w') as csv_file:
for k in range(iters):
if k % 1 == 0:
csv_file.write('%0.5e,%0.5e,%0.5e,%0.5e\n' % (res_pp[k],
res_lmm[k],
res_lb[k],
res_pdhg[k]))
filename = '../results/bp-times.tex'
with open(filename, 'w') as csv_file:
csv_file.write('\\def\\bpTimePP{%0.2f}\n' % (times_pp))
csv_file.write('\\def\\bpTimeLB{%0.2f}\n' % (times_lb))
csv_file.write('\\def\\bpTimePDHG{%0.2f}\n' % (times_pdhg))
csv_file.write('\\def\\bpTimeLMM{%0.2f}' % (times_lmm))
filename = '../results/bp-obj-plots.csv'
with open(filename, 'w') as csv_file:
for k in range(iters):
if k % 1 == 0:
csv_file.write('%0.5e,%0.5e,%0.5e,%0.5e\n' % (obj_pp[k],
obj_lmm[k],
obj_lb[k],
obj_pdhg[k]))
filename = '../results/bp-viol-plots.csv'
with open(filename, 'w') as csv_file:
for k in range(iters):
if k % 1 == 0:
csv_file.write('%0.5e,%0.5e,%0.5e,%0.5e\n' % (viol_pp[k],
viol_lmm[k],
viol_lb[k],
viol_pdhg[k]))
filename = '../results/bp-res-plots.csv'
with open(filename, 'w') as csv_file:
for k in range(iters):
if k % 1 == 0:
csv_file.write('%0.5e,%0.5e,%0.5e,%0.5e\n' % (res_pp[k],
res_lmm[k],
res_lb[k],
res_pdhg[k]))
filename = '../results/bp-times.tex'
with open(filename, 'w') as csv_file:
csv_file.write('\\def\\bpTimePP{%0.2f}\n' % (times_pp))
csv_file.write('\\def\\bpTimeLB{%0.2f}\n' % (times_lb))
csv_file.write('\\def\\bpTimePDHG{%0.2f}\n' % (times_pdhg))
csv_file.write('\\def\\bpTimeLMM{%0.2f}' % (times_lmm))
CVX Comparison¶
As a sanity check, we include a CVX implementation to compute the solution to the basis pursuit problem using the final seed from the above setup.
In [12]:
Copied!
import cvxpy as cp
x = cp.Variable((n,1))
objective = cp.Minimize(cp.sum(cp.abs(x)))
constraints = [A @ x == b]
prob = cp.Problem(objective, constraints)
result = prob.solve(verbose=True)
print('|x| = ', np.sum(np.abs(x.value)))
print('|Ax - b| = ', np.sum(np.abs(A @ x.value - b)))
import cvxpy as cp
x = cp.Variable((n,1))
objective = cp.Minimize(cp.sum(cp.abs(x)))
constraints = [A @ x == b]
prob = cp.Problem(objective, constraints)
result = prob.solve(verbose=True)
print('|x| = ', np.sum(np.abs(x.value)))
print('|Ax - b| = ', np.sum(np.abs(A @ x.value - b)))
=============================================================================== CVXPY v1.5.2 =============================================================================== (CVXPY) Jul 23 05:36:56 PM: Your problem has 5000 variables, 1250 constraints, and 0 parameters. (CVXPY) Jul 23 05:36:56 PM: It is compliant with the following grammars: DCP, DQCP (CVXPY) Jul 23 05:36:56 PM: (If you need to solve this problem multiple times, but with different data, consider using parameters.) (CVXPY) Jul 23 05:36:56 PM: CVXPY will first compile your problem; then, it will invoke a numerical solver to obtain a solution. (CVXPY) Jul 23 05:36:56 PM: Your problem is compiled with the CPP canonicalization backend. ------------------------------------------------------------------------------- Compilation ------------------------------------------------------------------------------- (CVXPY) Jul 23 05:36:56 PM: Compiling problem (target solver=CLARABEL). (CVXPY) Jul 23 05:36:56 PM: Reduction chain: Dcp2Cone -> CvxAttr2Constr -> ConeMatrixStuffing -> CLARABEL (CVXPY) Jul 23 05:36:56 PM: Applying reduction Dcp2Cone (CVXPY) Jul 23 05:36:56 PM: Applying reduction CvxAttr2Constr (CVXPY) Jul 23 05:36:56 PM: Applying reduction ConeMatrixStuffing (CVXPY) Jul 23 05:36:57 PM: Applying reduction CLARABEL (CVXPY) Jul 23 05:36:57 PM: Finished problem compilation (took 1.518e+00 seconds). ------------------------------------------------------------------------------- Numerical solver ------------------------------------------------------------------------------- (CVXPY) Jul 23 05:36:57 PM: Invoking solver CLARABEL to obtain a solution. ------------------------------------------------------------- Clarabel.rs v0.9.0 - Clever Acronym (c) Paul Goulart University of Oxford, 2022 ------------------------------------------------------------- problem: variables = 10000 constraints = 11250 nnz(P) = 0 nnz(A) = 6270000 cones (total) = 2 : Zero = 1, numel = 1250 : Nonnegative = 1, numel = 10000 settings: linear algebra: direct / qdldl, precision: 64 bit max iter = 200, time limit = Inf, max step = 0.990 tol_feas = 1.0e-8, tol_gap_abs = 1.0e-8, tol_gap_rel = 1.0e-8, static reg : on, ϵ1 = 1.0e-8, ϵ2 = 4.9e-32 dynamic reg: on, ϵ = 1.0e-13, δ = 2.0e-7 iter refine: on, reltol = 1.0e-13, abstol = 1.0e-12, max iter = 10, stop ratio = 5.0 equilibrate: on, min_scale = 1.0e-4, max_scale = 1.0e4 max iter = 10 iter pcost dcost gap pres dres k/t μ step --------------------------------------------------------------------------------------------- 0 +1.4716e-23 -7.3128e-17 7.31e-17 9.57e-01 6.50e-01 1.00e+00 1.80e+00 ------ 1 +8.8651e+01 +8.8760e+01 1.22e-03 7.21e-01 1.76e-01 2.78e-01 5.19e-01 8.12e-01 2 +1.5012e+02 +1.5018e+02 4.20e-04 4.86e-01 1.08e-01 1.71e-01 3.46e-01 5.20e-01 3 +1.7209e+02 +1.7213e+02 2.08e-04 2.91e-01 6.41e-02 1.01e-01 2.17e-01 4.47e-01 4 +1.8641e+02 +1.8643e+02 8.64e-05 1.51e-01 3.47e-02 5.24e-02 1.24e-01 5.62e-01 5 +1.9359e+02 +1.9359e+02 3.37e-05 8.59e-02 2.05e-02 2.85e-02 7.58e-02 5.93e-01 6 +1.9651e+02 +1.9651e+02 1.39e-05 5.24e-02 1.29e-02 1.68e-02 4.89e-02 5.56e-01 7 +1.9878e+02 +1.9878e+02 3.65e-06 2.90e-02 7.33e-03 8.78e-03 2.82e-02 6.71e-01 8 +1.9978e+02 +1.9978e+02 1.24e-06 1.86e-02 4.79e-03 5.55e-03 1.86e-02 5.27e-01 9 +2.0050e+02 +2.0050e+02 9.32e-08 1.09e-02 2.84e-03 3.14e-03 1.11e-02 6.77e-01 10 +2.0080e+02 +2.0080e+02 2.69e-07 7.58e-03 1.98e-03 2.16e-03 7.82e-03 4.94e-01 11 +2.0103e+02 +2.0103e+02 4.24e-07 5.14e-03 1.35e-03 1.43e-03 5.35e-03 7.58e-01 12 +2.0121e+02 +2.0121e+02 2.81e-07 2.54e-03 6.72e-04 7.01e-04 2.67e-03 9.90e-01 13 +2.0125e+02 +2.0125e+02 1.65e-07 1.45e-03 3.86e-04 4.02e-04 1.54e-03 9.90e-01 14 +2.0131e+02 +2.0131e+02 2.77e-08 2.41e-04 6.43e-05 6.72e-05 2.57e-04 9.01e-01 15 +2.0132e+02 +2.0132e+02 1.21e-08 1.04e-04 2.77e-05 2.90e-05 1.11e-04 9.90e-01 16 +2.0132e+02 +2.0132e+02 7.32e-10 6.30e-06 1.68e-06 1.76e-06 6.72e-06 9.42e-01 17 +2.0132e+02 +2.0132e+02 7.33e-12 6.30e-08 1.68e-08 1.76e-08 6.72e-08 9.90e-01 18 +2.0132e+02 +2.0132e+02 6.88e-14 6.41e-10 1.71e-10 1.79e-10 6.83e-10 9.90e-01 --------------------------------------------------------------------------------------------- Terminated with status = Solved solve time = 86.646018501s ------------------------------------------------------------------------------- Summary ------------------------------------------------------------------------------- (CVXPY) Jul 23 05:38:25 PM: Problem status: optimal (CVXPY) Jul 23 05:38:25 PM: Optimal value: 2.013e+02 (CVXPY) Jul 23 05:38:25 PM: Compilation took 1.518e+00 seconds (CVXPY) Jul 23 05:38:25 PM: Solver (including time spent in interface) took 8.732e+01 seconds |x| = 201.3212978720586 |Ax - b| = 1.1977110861254026e-10