Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 57 additions & 30 deletions copt/penalty.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,68 +55,95 @@ def _prox_L1(x, i, indices, indptr, d, step_size):

class GroupL1:
"""
Group Lasso penalty
Group Lasso penalty for non-overlapping groups

Args:
alpha: float
Constant multiplying this loss

blocks: list of lists

weights: (optional)
- If not passed, each group gets the same penalty (=1). (default)
- If 'nf', each groups gets a penalty equal to the square root of
the number of features indexed in it.
- If 'nfi', each group gets a penalty equal to the inverse of the
square root of the number of features indexed in it.
- If iterable, the i-th group gets a penalty equal to the i-th
entry of the passed iterable.

"""

def __init__(self, alpha, groups):
def __init__(self, alpha, groups, weights=None):
self.alpha = alpha
# groups need to be increasing
for i, g in enumerate(groups):
if not np.all(np.diff(g) == 1):
raise ValueError("Groups must be contiguous")
if i > 0 and groups[i - 1][-1] >= g[0]:
raise ValueError("Groups must be increasing")
self.groups = groups

sum_groups = np.sum([len(g) for g in groups])
all_indices = list(groups[0])
for g in groups[1:]:
all_indices.extend(list(g))
n_unique = np.unique(all_indices).size
if sum_groups != n_unique:
raise ValueError('Groups must not overlap.')
self.groups = [list(g) for g in groups]

if weights is None:
self.weights = np.ones(len(self.groups), dtype=np.float32)
elif isinstance(weights, str):
if weights == 'nf':
self.weights = np.asarray([np.sqrt(len(g)) for g in
self.groups])
elif weights == 'nfi':
self.weights = np.asarray([1 / np.sqrt(len(g)) for g in
self.groups])
else:
if len(weights) != len(self.groups):
raise ValueError('Length of weights must be equal to number '
'of groups.')
self.weights = np.asarray(weights)

def __call__(self, x):
return self.alpha * np.sum([np.linalg.norm(x[g]) for g in self.groups])
return self.alpha * np.sum([w * np.linalg.norm(x[g]) for w, g in
zip(self.weights, self.groups)])

def prox(self, x, step_size):
out = x.copy()
for g in self.groups:

for w, g in zip(self.weights, self.groups):
norm = np.linalg.norm(x[g])
if norm > self.alpha * step_size:
out[g] -= step_size * self.alpha * out[g] / norm
r = w * self.alpha * step_size
if norm > r:
out[g] -= r * out[g] / norm
else:
out[g] = 0
return out

def prox_factory(self, n_features):
B_data = np.zeros(n_features)
B_indices = np.arange(n_features, dtype=np.int32)
B_indices = np.zeros(n_features, dtype=np.int32)
B_indptr = np.zeros(n_features + 1, dtype=np.int32)

feature_pointer = 0
block_pointer = 0
for g in self.groups:
while feature_pointer < g[0]:
# non-penalized feature
B_data[feature_pointer] = -1.0
B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1
feature_pointer += 1
block_pointer += 1
B_indptr[block_pointer + 1] = B_indptr[block_pointer]
for _ in g:
B_data[feature_pointer] = 1.0
B_indptr[block_pointer + 1] += 1
for atom in g:
B_data[feature_pointer] = 1.
B_indices[feature_pointer] = atom
feature_pointer += 1
B_indptr[block_pointer + 1] = B_indptr[block_pointer] + len(g)
block_pointer += 1
for _ in range(feature_pointer, n_features):
B_data[feature_pointer] = -1.0
B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1

excluded_indices = np.ones(n_features, dtype=np.int32)
excluded_indices[B_indices[: feature_pointer + 1]] = 0.
for i in np.where(excluded_indices)[0]:
B_data[feature_pointer] = -1.
B_indices[feature_pointer] = i
feature_pointer += 1

B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1
block_pointer += 1

B_indptr = B_indptr[: block_pointer + 1]
B = sparse.csr_matrix((B_data, B_indices, B_indptr))

alpha = self.alpha

@njit
Expand All @@ -131,7 +158,7 @@ def _prox_gl(x, i, indices, indptr, d, step_size):
j_idx = B_indices[j]
norm += x[j_idx] ** 2
norm = np.sqrt(norm)
if norm > alpha * ss:
if norm > alpha * ss * self.weights[h]:
for j in range(B_indptr[h], B_indptr[h + 1]):
j_idx = B_indices[j]
x[j_idx] *= 1 - alpha * ss / norm
Expand Down Expand Up @@ -304,4 +331,4 @@ def prox(self, x, step_size):
self.n_cols,
max_iter=self.max_iter,
tol=self.tol,
)
)
124 changes: 83 additions & 41 deletions tests/test_penalties.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import numpy as np
import copt as cp
import pytest
from numpy import testing

import copt.constraint
import copt.penalty
from copt import tv_prox
from numpy import testing
import pytest

proximal_penalties = [
copt.penalty.L1Norm(1.0),
Expand All @@ -17,46 +17,87 @@


def test_GroupL1():
# non-overlapping
groups = [(0, 1), (1, 2)]
with np.testing.assert_raises(ValueError):
copt.penalty.GroupL1(1.0, groups)

# converts group type from tuple to list
groups = [(0, 1), (2, 3)]
g1 = copt.penalty.GroupL1(1.0, groups)
pen = copt.penalty.GroupL1(1.0, groups)
for g in pen.groups:
np.testing.assert_(isinstance(g, list))

# same number of groups and weights
groups = [[0, 1], [2, 3]]
weights = [1, 2, 3]
with np.testing.assert_raises(ValueError):
copt.penalty.GroupL1(1.0, groups, weights)

# evaluation of penalty and prox
x = np.array([0.01, 0.5, 3, 4])
weights = np.array([10, .2])
g0 = copt.penalty.GroupL1(1, groups, weights)
# eval
result = g0(x)
gt = (weights[0] * np.linalg.norm(x[groups[0]], 2) +
weights[1] * np.linalg.norm(x[groups[1]], 2))
np.testing.assert_almost_equal(result, gt)
# prox
gt = x.copy()
# the first group has norm lower than the corresponding weight
gt[groups[0]] = 0
# the second group has norm higher than the corresponding weight
gt[groups[1]] -= (x[groups[1]] * weights[1] /
np.linalg.norm(x[groups[1]]))
prox = g0.prox(x, 1)
np.testing.assert_almost_equal(prox, gt)

# default weights
g1 = copt.penalty.GroupL1(1, groups)
gt = np.array([1., 1])
np.testing.assert_almost_equal(g1.weights, gt)

# weights: sqrt(|g|)
g2 = copt.penalty.GroupL1(1.0, groups, 'nf')
gt = np.array([np.sqrt(2), np.sqrt(2)])
np.testing.assert_almost_equal(g2.weights, gt)

# weights: sqrt(|g|) ** -1
g3 = copt.penalty.GroupL1(1.0, groups, 'nfi')
gt = 1. / gt
np.testing.assert_almost_equal(g3.weights, gt)

# custom weights
gt = np.random.rand(len(groups))
g4 = copt.penalty.GroupL1(1.0, groups, gt)
np.testing.assert_almost_equal(g4.weights, gt)
expected = (np.linalg.norm(x[[0, 1]]) * gt[0] +
np.linalg.norm(x[[2, 3]]) * gt[1])
np.testing.assert_almost_equal(g4(x), expected)

# sparse proximal
_, B = g1.prox_factory(5)
assert np.all(
B.toarray()
== np.array(
[
[1.0, 1.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 1.0, 1.0, 0.0],
[0.0, 0.0, 0.0, 0.0, -1.0],
]
)
gt = np.array(
[
[1.0, 1.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 1.0, 1.0, 0.0],
[0.0, 0.0, 0.0, 0.0, -1.0],
]
)
np.testing.assert_almost_equal(B.toarray(), gt)

groups = [(0, 1), (3, 4)]
g2 = copt.penalty.GroupL1(1.0, groups)
_, B = g2.prox_factory(5)
assert np.all(
B.toarray()
== np.array(
[
[1.0, 1.0, 0.0, 0.0, 0.0],
[0.0, 0.0, -1.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 1.0, 1.0],
]
)
g5 = copt.penalty.GroupL1(1.0, groups)
_, B = g5.prox_factory(5)
gt = np.array(
[
[1.0, 1.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 1.0, 1.0],
[0.0, 0.0, -1.0, 0.0, 0.0],
]
)


#
# for blocks in [[(0, 1), (2, 3)], ]:
# pen = cp.utils.GroupL1(1., blocks)
# counter = 0
# for g in pen.groups:
# for j in g:
# counter += 1
# assert counter == blocks.size
# assert pen.groups
# for g in pen.groups:
# assert np.unique(blocks[g]).size == 1
np.testing.assert_almost_equal(B.toarray(), gt)


def test_tv1_prox():
Expand Down Expand Up @@ -99,7 +140,8 @@ def tv_norm(x, n_rows, n_cols):

for nrun in range(20):
x = np.random.randn(n_features)
x_next = tv_prox.prox_tv2d(x, gamma, n_rows, n_cols, tol=1e-10, max_iter=10000)
x_next = tv_prox.prox_tv2d(x, gamma, n_rows, n_cols, tol=1e-10,
max_iter=10000)
diff_obj = tv_norm(x, n_rows, n_cols) - tv_norm(x_next, n_rows, n_cols)
testing.assert_array_less(
((x - x_next) ** 2).sum() / gamma, (1 + epsilon) * diff_obj
Expand Down Expand Up @@ -137,8 +179,8 @@ def test_three_inequality(pen):

lhs = 2 * (pen(xi) - pen(u))
rhs = (
np.linalg.norm(u - z) ** 2
- np.linalg.norm(u - xi) ** 2
- np.linalg.norm(xi - z) ** 2
np.linalg.norm(u - z) ** 2
- np.linalg.norm(u - xi) ** 2
- np.linalg.norm(xi - z) ** 2
)
assert lhs <= rhs, pen
66 changes: 33 additions & 33 deletions tests/test_randomized.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import numpy as np
import pytest
from scipy import sparse

import copt as cp
import copt.loss
import copt.penalty
from copt import randomized
import pytest

np.random.seed(0)
n_samples, n_features = 20, 10
Expand Down Expand Up @@ -188,37 +188,37 @@ def test_gl(groups):
assert np.linalg.norm(grad_map) < 1e-6


def test_vrtos_ogl():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why removing the test? This is testing an overlapping group lasso model by adding to proximal functions (for solvers that allow this)

"""Test on overlapping group lasso"""
alpha = 1.0 / n_samples
groups_1 = [np.arange(8)]
groups_2 = [np.arange(5, 10)]
f = copt.loss.LogLoss(A, b, alpha)
for beta in np.logspace(-3, 3, 3):
p_1 = copt.penalty.GroupL1(beta, groups_1)
p_2 = copt.penalty.GroupL1(beta, groups_2)
L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density

opt_vrtos = cp.minimize_vrtos(
f.partial_deriv,
A,
b,
np.zeros(n_features),
1 / (3 * L),
alpha=alpha,
max_iter=200,
prox_1=p_1.prox_factory(n_features),
prox_2=p_2.prox_factory(n_features),
)

opt_tos = cp.minimize_three_split(
f.f_grad, np.zeros(n_features), prox_1=p_1.prox, prox_2=p_2.prox
)

norm = np.linalg.norm(opt_tos.x)
if norm < 1e-10:
norm = 1
assert np.linalg.norm(opt_vrtos.x - opt_tos.x) / norm < 1e-4
# def test_vrtos_ogl():
# """Test on overlapping group lasso"""
# alpha = 1.0 / n_samples
# groups_1 = [np.arange(8)]
# groups_2 = [np.arange(5, 10)]
# f = copt.loss.LogLoss(A, b, alpha)
# for beta in np.logspace(-3, 3, 3):
# p_1 = copt.penalty.OGroupL1(beta, groups_1)
# p_2 = copt.penalty.OGroupL1(beta, groups_2)
# L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density
#
# opt_vrtos = cp.minimize_vrtos(
# f.partial_deriv,
# A,
# b,
# np.zeros(n_features),
# 1 / (3 * L),
# alpha=alpha,
# max_iter=200,
# prox_1=p_1.prox_factory(n_features),
# prox_2=p_2.prox_factory(n_features),
# )
#
# opt_tos = cp.minimize_three_split(
# f.f_grad, np.zeros(n_features), prox_1=p_1.prox, prox_2=p_2.prox
# )
#
# norm = np.linalg.norm(opt_tos.x)
# if norm < 1e-10:
# norm = 1
# assert np.linalg.norm(opt_vrtos.x - opt_tos.x) / norm < 1e-4


@pytest.mark.parametrize("A_data", [A, A2])
Expand Down