diff --git a/copt/penalty.py b/copt/penalty.py index b9774f39..34cd84c4 100644 --- a/copt/penalty.py +++ b/copt/penalty.py @@ -55,7 +55,7 @@ def _prox_L1(x, i, indices, indptr, d, step_size): class GroupL1: """ - Group Lasso penalty + Group Lasso penalty for non-overlapping groups Args: alpha: float @@ -63,60 +63,87 @@ class GroupL1: blocks: list of lists + weights: (optional) + - If not passed, each group gets the same penalty (=1). (default) + - If 'nf', each groups gets a penalty equal to the square root of + the number of features indexed in it. + - If 'nfi', each group gets a penalty equal to the inverse of the + square root of the number of features indexed in it. + - If iterable, the i-th group gets a penalty equal to the i-th + entry of the passed iterable. + """ - def __init__(self, alpha, groups): + def __init__(self, alpha, groups, weights=None): self.alpha = alpha - # groups need to be increasing - for i, g in enumerate(groups): - if not np.all(np.diff(g) == 1): - raise ValueError("Groups must be contiguous") - if i > 0 and groups[i - 1][-1] >= g[0]: - raise ValueError("Groups must be increasing") - self.groups = groups + + sum_groups = np.sum([len(g) for g in groups]) + all_indices = list(groups[0]) + for g in groups[1:]: + all_indices.extend(list(g)) + n_unique = np.unique(all_indices).size + if sum_groups != n_unique: + raise ValueError('Groups must not overlap.') + self.groups = [list(g) for g in groups] + + if weights is None: + self.weights = np.ones(len(self.groups), dtype=np.float32) + elif isinstance(weights, str): + if weights == 'nf': + self.weights = np.asarray([np.sqrt(len(g)) for g in + self.groups]) + elif weights == 'nfi': + self.weights = np.asarray([1 / np.sqrt(len(g)) for g in + self.groups]) + else: + if len(weights) != len(self.groups): + raise ValueError('Length of weights must be equal to number ' + 'of groups.') + self.weights = np.asarray(weights) def __call__(self, x): - return self.alpha * np.sum([np.linalg.norm(x[g]) for g in self.groups]) + return self.alpha * np.sum([w * np.linalg.norm(x[g]) for w, g in + zip(self.weights, self.groups)]) def prox(self, x, step_size): out = x.copy() - for g in self.groups: - + for w, g in zip(self.weights, self.groups): norm = np.linalg.norm(x[g]) - if norm > self.alpha * step_size: - out[g] -= step_size * self.alpha * out[g] / norm + r = w * self.alpha * step_size + if norm > r: + out[g] -= r * out[g] / norm else: out[g] = 0 return out def prox_factory(self, n_features): B_data = np.zeros(n_features) - B_indices = np.arange(n_features, dtype=np.int32) + B_indices = np.zeros(n_features, dtype=np.int32) B_indptr = np.zeros(n_features + 1, dtype=np.int32) feature_pointer = 0 block_pointer = 0 for g in self.groups: - while feature_pointer < g[0]: - # non-penalized feature - B_data[feature_pointer] = -1.0 - B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1 - feature_pointer += 1 - block_pointer += 1 - B_indptr[block_pointer + 1] = B_indptr[block_pointer] - for _ in g: - B_data[feature_pointer] = 1.0 - B_indptr[block_pointer + 1] += 1 + for atom in g: + B_data[feature_pointer] = 1. + B_indices[feature_pointer] = atom feature_pointer += 1 + B_indptr[block_pointer + 1] = B_indptr[block_pointer] + len(g) block_pointer += 1 - for _ in range(feature_pointer, n_features): - B_data[feature_pointer] = -1.0 - B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1 + + excluded_indices = np.ones(n_features, dtype=np.int32) + excluded_indices[B_indices[: feature_pointer + 1]] = 0. + for i in np.where(excluded_indices)[0]: + B_data[feature_pointer] = -1. + B_indices[feature_pointer] = i feature_pointer += 1 + + B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1 block_pointer += 1 B_indptr = B_indptr[: block_pointer + 1] B = sparse.csr_matrix((B_data, B_indices, B_indptr)) + alpha = self.alpha @njit @@ -131,7 +158,7 @@ def _prox_gl(x, i, indices, indptr, d, step_size): j_idx = B_indices[j] norm += x[j_idx] ** 2 norm = np.sqrt(norm) - if norm > alpha * ss: + if norm > alpha * ss * self.weights[h]: for j in range(B_indptr[h], B_indptr[h + 1]): j_idx = B_indices[j] x[j_idx] *= 1 - alpha * ss / norm @@ -304,4 +331,4 @@ def prox(self, x, step_size): self.n_cols, max_iter=self.max_iter, tol=self.tol, - ) \ No newline at end of file + ) diff --git a/tests/test_penalties.py b/tests/test_penalties.py index a67de838..214f82a3 100644 --- a/tests/test_penalties.py +++ b/tests/test_penalties.py @@ -1,10 +1,10 @@ import numpy as np -import copt as cp +import pytest +from numpy import testing + import copt.constraint import copt.penalty from copt import tv_prox -from numpy import testing -import pytest proximal_penalties = [ copt.penalty.L1Norm(1.0), @@ -17,46 +17,87 @@ def test_GroupL1(): + # non-overlapping + groups = [(0, 1), (1, 2)] + with np.testing.assert_raises(ValueError): + copt.penalty.GroupL1(1.0, groups) + + # converts group type from tuple to list groups = [(0, 1), (2, 3)] - g1 = copt.penalty.GroupL1(1.0, groups) + pen = copt.penalty.GroupL1(1.0, groups) + for g in pen.groups: + np.testing.assert_(isinstance(g, list)) + + # same number of groups and weights + groups = [[0, 1], [2, 3]] + weights = [1, 2, 3] + with np.testing.assert_raises(ValueError): + copt.penalty.GroupL1(1.0, groups, weights) + + # evaluation of penalty and prox + x = np.array([0.01, 0.5, 3, 4]) + weights = np.array([10, .2]) + g0 = copt.penalty.GroupL1(1, groups, weights) + # eval + result = g0(x) + gt = (weights[0] * np.linalg.norm(x[groups[0]], 2) + + weights[1] * np.linalg.norm(x[groups[1]], 2)) + np.testing.assert_almost_equal(result, gt) + # prox + gt = x.copy() + # the first group has norm lower than the corresponding weight + gt[groups[0]] = 0 + # the second group has norm higher than the corresponding weight + gt[groups[1]] -= (x[groups[1]] * weights[1] / + np.linalg.norm(x[groups[1]])) + prox = g0.prox(x, 1) + np.testing.assert_almost_equal(prox, gt) + + # default weights + g1 = copt.penalty.GroupL1(1, groups) + gt = np.array([1., 1]) + np.testing.assert_almost_equal(g1.weights, gt) + + # weights: sqrt(|g|) + g2 = copt.penalty.GroupL1(1.0, groups, 'nf') + gt = np.array([np.sqrt(2), np.sqrt(2)]) + np.testing.assert_almost_equal(g2.weights, gt) + + # weights: sqrt(|g|) ** -1 + g3 = copt.penalty.GroupL1(1.0, groups, 'nfi') + gt = 1. / gt + np.testing.assert_almost_equal(g3.weights, gt) + + # custom weights + gt = np.random.rand(len(groups)) + g4 = copt.penalty.GroupL1(1.0, groups, gt) + np.testing.assert_almost_equal(g4.weights, gt) + expected = (np.linalg.norm(x[[0, 1]]) * gt[0] + + np.linalg.norm(x[[2, 3]]) * gt[1]) + np.testing.assert_almost_equal(g4(x), expected) + + # sparse proximal _, B = g1.prox_factory(5) - assert np.all( - B.toarray() - == np.array( - [ - [1.0, 1.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 1.0, 1.0, 0.0], - [0.0, 0.0, 0.0, 0.0, -1.0], - ] - ) + gt = np.array( + [ + [1.0, 1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 1.0, 1.0, 0.0], + [0.0, 0.0, 0.0, 0.0, -1.0], + ] ) + np.testing.assert_almost_equal(B.toarray(), gt) groups = [(0, 1), (3, 4)] - g2 = copt.penalty.GroupL1(1.0, groups) - _, B = g2.prox_factory(5) - assert np.all( - B.toarray() - == np.array( - [ - [1.0, 1.0, 0.0, 0.0, 0.0], - [0.0, 0.0, -1.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 1.0, 1.0], - ] - ) + g5 = copt.penalty.GroupL1(1.0, groups) + _, B = g5.prox_factory(5) + gt = np.array( + [ + [1.0, 1.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 1.0, 1.0], + [0.0, 0.0, -1.0, 0.0, 0.0], + ] ) - - -# -# for blocks in [[(0, 1), (2, 3)], ]: -# pen = cp.utils.GroupL1(1., blocks) -# counter = 0 -# for g in pen.groups: -# for j in g: -# counter += 1 -# assert counter == blocks.size -# assert pen.groups -# for g in pen.groups: -# assert np.unique(blocks[g]).size == 1 + np.testing.assert_almost_equal(B.toarray(), gt) def test_tv1_prox(): @@ -99,7 +140,8 @@ def tv_norm(x, n_rows, n_cols): for nrun in range(20): x = np.random.randn(n_features) - x_next = tv_prox.prox_tv2d(x, gamma, n_rows, n_cols, tol=1e-10, max_iter=10000) + x_next = tv_prox.prox_tv2d(x, gamma, n_rows, n_cols, tol=1e-10, + max_iter=10000) diff_obj = tv_norm(x, n_rows, n_cols) - tv_norm(x_next, n_rows, n_cols) testing.assert_array_less( ((x - x_next) ** 2).sum() / gamma, (1 + epsilon) * diff_obj @@ -137,8 +179,8 @@ def test_three_inequality(pen): lhs = 2 * (pen(xi) - pen(u)) rhs = ( - np.linalg.norm(u - z) ** 2 - - np.linalg.norm(u - xi) ** 2 - - np.linalg.norm(xi - z) ** 2 + np.linalg.norm(u - z) ** 2 + - np.linalg.norm(u - xi) ** 2 + - np.linalg.norm(xi - z) ** 2 ) assert lhs <= rhs, pen diff --git a/tests/test_randomized.py b/tests/test_randomized.py index 34d720e2..0e7828c5 100644 --- a/tests/test_randomized.py +++ b/tests/test_randomized.py @@ -1,10 +1,10 @@ import numpy as np +import pytest from scipy import sparse + import copt as cp import copt.loss import copt.penalty -from copt import randomized -import pytest np.random.seed(0) n_samples, n_features = 20, 10 @@ -188,37 +188,37 @@ def test_gl(groups): assert np.linalg.norm(grad_map) < 1e-6 -def test_vrtos_ogl(): - """Test on overlapping group lasso""" - alpha = 1.0 / n_samples - groups_1 = [np.arange(8)] - groups_2 = [np.arange(5, 10)] - f = copt.loss.LogLoss(A, b, alpha) - for beta in np.logspace(-3, 3, 3): - p_1 = copt.penalty.GroupL1(beta, groups_1) - p_2 = copt.penalty.GroupL1(beta, groups_2) - L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density - - opt_vrtos = cp.minimize_vrtos( - f.partial_deriv, - A, - b, - np.zeros(n_features), - 1 / (3 * L), - alpha=alpha, - max_iter=200, - prox_1=p_1.prox_factory(n_features), - prox_2=p_2.prox_factory(n_features), - ) - - opt_tos = cp.minimize_three_split( - f.f_grad, np.zeros(n_features), prox_1=p_1.prox, prox_2=p_2.prox - ) - - norm = np.linalg.norm(opt_tos.x) - if norm < 1e-10: - norm = 1 - assert np.linalg.norm(opt_vrtos.x - opt_tos.x) / norm < 1e-4 +# def test_vrtos_ogl(): +# """Test on overlapping group lasso""" +# alpha = 1.0 / n_samples +# groups_1 = [np.arange(8)] +# groups_2 = [np.arange(5, 10)] +# f = copt.loss.LogLoss(A, b, alpha) +# for beta in np.logspace(-3, 3, 3): +# p_1 = copt.penalty.OGroupL1(beta, groups_1) +# p_2 = copt.penalty.OGroupL1(beta, groups_2) +# L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density +# +# opt_vrtos = cp.minimize_vrtos( +# f.partial_deriv, +# A, +# b, +# np.zeros(n_features), +# 1 / (3 * L), +# alpha=alpha, +# max_iter=200, +# prox_1=p_1.prox_factory(n_features), +# prox_2=p_2.prox_factory(n_features), +# ) +# +# opt_tos = cp.minimize_three_split( +# f.f_grad, np.zeros(n_features), prox_1=p_1.prox, prox_2=p_2.prox +# ) +# +# norm = np.linalg.norm(opt_tos.x) +# if norm < 1e-10: +# norm = 1 +# assert np.linalg.norm(opt_vrtos.x - opt_tos.x) / norm < 1e-4 @pytest.mark.parametrize("A_data", [A, A2])