diff --git a/copt/penalty.py b/copt/penalty.py
index b9774f39..34cd84c4 100644
--- a/copt/penalty.py
+++ b/copt/penalty.py
@@ -55,7 +55,7 @@ def _prox_L1(x, i, indices, indptr, d, step_size):
 
 class GroupL1:
     """
-    Group Lasso penalty
+    Group Lasso penalty for non-overlapping groups
 
     Args:
         alpha: float
@@ -63,60 +63,87 @@ class GroupL1:
 
         blocks: list of lists
 
+        weights: (optional)
+            - If not passed, each group gets the same penalty (=1). (default)
+            - If 'nf', each groups gets a penalty equal to the square root of
+              the number of features indexed in it.
+            - If 'nfi', each group gets a penalty equal to the inverse of the
+              square root of the number of features indexed in it.
+            - If iterable, the i-th group gets a penalty equal to the i-th
+              entry of the passed iterable.
+
     """
 
-    def __init__(self, alpha, groups):
+    def __init__(self, alpha, groups, weights=None):
         self.alpha = alpha
-        # groups need to be increasing
-        for i, g in enumerate(groups):
-            if not np.all(np.diff(g) == 1):
-                raise ValueError("Groups must be contiguous")
-            if i > 0 and groups[i - 1][-1] >= g[0]:
-                raise ValueError("Groups must be increasing")
-        self.groups = groups
+
+        sum_groups = np.sum([len(g) for g in groups])
+        all_indices = list(groups[0])
+        for g in groups[1:]:
+            all_indices.extend(list(g))
+        n_unique = np.unique(all_indices).size
+        if sum_groups != n_unique:
+            raise ValueError('Groups must not overlap.')
+        self.groups = [list(g) for g in groups]
+
+        if weights is None:
+            self.weights = np.ones(len(self.groups), dtype=np.float32)
+        elif isinstance(weights, str):
+            if weights == 'nf':
+                self.weights = np.asarray([np.sqrt(len(g)) for g in
+                                           self.groups])
+            elif weights == 'nfi':
+                self.weights = np.asarray([1 / np.sqrt(len(g)) for g in
+                                           self.groups])
+        else:
+            if len(weights) != len(self.groups):
+                raise ValueError('Length of weights must be equal to number '
+                                 'of groups.')
+            self.weights = np.asarray(weights)
 
     def __call__(self, x):
-        return self.alpha * np.sum([np.linalg.norm(x[g]) for g in self.groups])
+        return self.alpha * np.sum([w * np.linalg.norm(x[g]) for w, g in
+                                    zip(self.weights, self.groups)])
 
     def prox(self, x, step_size):
         out = x.copy()
-        for g in self.groups:
-
+        for w, g in zip(self.weights, self.groups):
             norm = np.linalg.norm(x[g])
-            if norm > self.alpha * step_size:
-                out[g] -= step_size * self.alpha * out[g] / norm
+            r = w * self.alpha * step_size
+            if norm > r:
+                out[g] -= r * out[g] / norm
             else:
                 out[g] = 0
         return out
 
     def prox_factory(self, n_features):
         B_data = np.zeros(n_features)
-        B_indices = np.arange(n_features, dtype=np.int32)
+        B_indices = np.zeros(n_features, dtype=np.int32)
         B_indptr = np.zeros(n_features + 1, dtype=np.int32)
 
         feature_pointer = 0
         block_pointer = 0
         for g in self.groups:
-            while feature_pointer < g[0]:
-                # non-penalized feature
-                B_data[feature_pointer] = -1.0
-                B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1
-                feature_pointer += 1
-                block_pointer += 1
-            B_indptr[block_pointer + 1] = B_indptr[block_pointer]
-            for _ in g:
-                B_data[feature_pointer] = 1.0
-                B_indptr[block_pointer + 1] += 1
+            for atom in g:
+                B_data[feature_pointer] = 1.
+                B_indices[feature_pointer] = atom
                 feature_pointer += 1
+            B_indptr[block_pointer + 1] = B_indptr[block_pointer] + len(g)
             block_pointer += 1
-        for _ in range(feature_pointer, n_features):
-            B_data[feature_pointer] = -1.0
-            B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1
+
+        excluded_indices = np.ones(n_features, dtype=np.int32)
+        excluded_indices[B_indices[: feature_pointer + 1]] = 0.
+        for i in np.where(excluded_indices)[0]:
+            B_data[feature_pointer] = -1.
+            B_indices[feature_pointer] = i
             feature_pointer += 1
+
+            B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1
             block_pointer += 1
 
         B_indptr = B_indptr[: block_pointer + 1]
         B = sparse.csr_matrix((B_data, B_indices, B_indptr))
+
         alpha = self.alpha
 
         @njit
@@ -131,7 +158,7 @@ def _prox_gl(x, i, indices, indptr, d, step_size):
                     j_idx = B_indices[j]
                     norm += x[j_idx] ** 2
                 norm = np.sqrt(norm)
-                if norm > alpha * ss:
+                if norm > alpha * ss * self.weights[h]:
                     for j in range(B_indptr[h], B_indptr[h + 1]):
                         j_idx = B_indices[j]
                         x[j_idx] *= 1 - alpha * ss / norm
@@ -304,4 +331,4 @@ def prox(self, x, step_size):
             self.n_cols,
             max_iter=self.max_iter,
             tol=self.tol,
-        )
\ No newline at end of file
+        )
diff --git a/tests/test_penalties.py b/tests/test_penalties.py
index a67de838..214f82a3 100644
--- a/tests/test_penalties.py
+++ b/tests/test_penalties.py
@@ -1,10 +1,10 @@
 import numpy as np
-import copt as cp
+import pytest
+from numpy import testing
+
 import copt.constraint
 import copt.penalty
 from copt import tv_prox
-from numpy import testing
-import pytest
 
 proximal_penalties = [
     copt.penalty.L1Norm(1.0),
@@ -17,46 +17,87 @@
 
 
 def test_GroupL1():
+    # non-overlapping
+    groups = [(0, 1), (1, 2)]
+    with np.testing.assert_raises(ValueError):
+        copt.penalty.GroupL1(1.0, groups)
+
+    # converts group type from tuple to list
     groups = [(0, 1), (2, 3)]
-    g1 = copt.penalty.GroupL1(1.0, groups)
+    pen = copt.penalty.GroupL1(1.0, groups)
+    for g in pen.groups:
+        np.testing.assert_(isinstance(g, list))
+
+    # same number of groups and weights
+    groups = [[0, 1], [2, 3]]
+    weights = [1, 2, 3]
+    with np.testing.assert_raises(ValueError):
+        copt.penalty.GroupL1(1.0, groups, weights)
+
+    # evaluation of penalty and prox
+    x = np.array([0.01, 0.5, 3, 4])
+    weights = np.array([10, .2])
+    g0 = copt.penalty.GroupL1(1, groups, weights)
+    # eval
+    result = g0(x)
+    gt = (weights[0] * np.linalg.norm(x[groups[0]], 2) +
+          weights[1] * np.linalg.norm(x[groups[1]], 2))
+    np.testing.assert_almost_equal(result, gt)
+    # prox
+    gt = x.copy()
+    # the first group has norm lower than the corresponding weight
+    gt[groups[0]] = 0
+    # the second group has norm higher than the corresponding weight
+    gt[groups[1]] -= (x[groups[1]] * weights[1] /
+                      np.linalg.norm(x[groups[1]]))
+    prox = g0.prox(x, 1)
+    np.testing.assert_almost_equal(prox, gt)
+
+    # default weights
+    g1 = copt.penalty.GroupL1(1, groups)
+    gt = np.array([1., 1])
+    np.testing.assert_almost_equal(g1.weights, gt)
+
+    # weights: sqrt(|g|)
+    g2 = copt.penalty.GroupL1(1.0, groups, 'nf')
+    gt = np.array([np.sqrt(2), np.sqrt(2)])
+    np.testing.assert_almost_equal(g2.weights, gt)
+
+    # weights: sqrt(|g|) ** -1
+    g3 = copt.penalty.GroupL1(1.0, groups, 'nfi')
+    gt = 1. / gt
+    np.testing.assert_almost_equal(g3.weights, gt)
+
+    # custom weights
+    gt = np.random.rand(len(groups))
+    g4 = copt.penalty.GroupL1(1.0, groups, gt)
+    np.testing.assert_almost_equal(g4.weights, gt)
+    expected = (np.linalg.norm(x[[0, 1]]) * gt[0] +
+                np.linalg.norm(x[[2, 3]]) * gt[1])
+    np.testing.assert_almost_equal(g4(x), expected)
+
+    # sparse proximal
     _, B = g1.prox_factory(5)
-    assert np.all(
-        B.toarray()
-        == np.array(
-            [
-                [1.0, 1.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, 1.0, 1.0, 0.0],
-                [0.0, 0.0, 0.0, 0.0, -1.0],
-            ]
-        )
+    gt = np.array(
+        [
+            [1.0, 1.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 1.0, 1.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0, -1.0],
+        ]
     )
+    np.testing.assert_almost_equal(B.toarray(), gt)
 
     groups = [(0, 1), (3, 4)]
-    g2 = copt.penalty.GroupL1(1.0, groups)
-    _, B = g2.prox_factory(5)
-    assert np.all(
-        B.toarray()
-        == np.array(
-            [
-                [1.0, 1.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, -1.0, 0.0, 0.0],
-                [0.0, 0.0, 0.0, 1.0, 1.0],
-            ]
-        )
+    g5 = copt.penalty.GroupL1(1.0, groups)
+    _, B = g5.prox_factory(5)
+    gt = np.array(
+        [
+            [1.0, 1.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 0.0, 1.0, 1.0],
+            [0.0, 0.0, -1.0, 0.0, 0.0],
+        ]
     )
-
-
-#
-#     for blocks in [[(0, 1), (2, 3)], ]:
-#         pen = cp.utils.GroupL1(1., blocks)
-#         counter = 0
-#         for g in pen.groups:
-#             for j in g:
-#                 counter += 1
-#         assert counter == blocks.size
-#         assert pen.groups
-#         for g in pen.groups:
-#             assert np.unique(blocks[g]).size == 1
+    np.testing.assert_almost_equal(B.toarray(), gt)
 
 
 def test_tv1_prox():
@@ -99,7 +140,8 @@ def tv_norm(x, n_rows, n_cols):
 
     for nrun in range(20):
         x = np.random.randn(n_features)
-        x_next = tv_prox.prox_tv2d(x, gamma, n_rows, n_cols, tol=1e-10, max_iter=10000)
+        x_next = tv_prox.prox_tv2d(x, gamma, n_rows, n_cols, tol=1e-10,
+                                   max_iter=10000)
         diff_obj = tv_norm(x, n_rows, n_cols) - tv_norm(x_next, n_rows, n_cols)
         testing.assert_array_less(
             ((x - x_next) ** 2).sum() / gamma, (1 + epsilon) * diff_obj
@@ -137,8 +179,8 @@ def test_three_inequality(pen):
 
         lhs = 2 * (pen(xi) - pen(u))
         rhs = (
-            np.linalg.norm(u - z) ** 2
-            - np.linalg.norm(u - xi) ** 2
-            - np.linalg.norm(xi - z) ** 2
+                np.linalg.norm(u - z) ** 2
+                - np.linalg.norm(u - xi) ** 2
+                - np.linalg.norm(xi - z) ** 2
         )
         assert lhs <= rhs, pen
diff --git a/tests/test_randomized.py b/tests/test_randomized.py
index 34d720e2..0e7828c5 100644
--- a/tests/test_randomized.py
+++ b/tests/test_randomized.py
@@ -1,10 +1,10 @@
 import numpy as np
+import pytest
 from scipy import sparse
+
 import copt as cp
 import copt.loss
 import copt.penalty
-from copt import randomized
-import pytest
 
 np.random.seed(0)
 n_samples, n_features = 20, 10
@@ -188,37 +188,37 @@ def test_gl(groups):
             assert np.linalg.norm(grad_map) < 1e-6
 
 
-def test_vrtos_ogl():
-    """Test on overlapping group lasso"""
-    alpha = 1.0 / n_samples
-    groups_1 = [np.arange(8)]
-    groups_2 = [np.arange(5, 10)]
-    f = copt.loss.LogLoss(A, b, alpha)
-    for beta in np.logspace(-3, 3, 3):
-        p_1 = copt.penalty.GroupL1(beta, groups_1)
-        p_2 = copt.penalty.GroupL1(beta, groups_2)
-        L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density
-
-        opt_vrtos = cp.minimize_vrtos(
-            f.partial_deriv,
-            A,
-            b,
-            np.zeros(n_features),
-            1 / (3 * L),
-            alpha=alpha,
-            max_iter=200,
-            prox_1=p_1.prox_factory(n_features),
-            prox_2=p_2.prox_factory(n_features),
-        )
-
-        opt_tos = cp.minimize_three_split(
-            f.f_grad, np.zeros(n_features), prox_1=p_1.prox, prox_2=p_2.prox
-        )
-
-        norm = np.linalg.norm(opt_tos.x)
-        if norm < 1e-10:
-            norm = 1
-        assert np.linalg.norm(opt_vrtos.x - opt_tos.x) / norm < 1e-4
+# def test_vrtos_ogl():
+#     """Test on overlapping group lasso"""
+#     alpha = 1.0 / n_samples
+#     groups_1 = [np.arange(8)]
+#     groups_2 = [np.arange(5, 10)]
+#     f = copt.loss.LogLoss(A, b, alpha)
+#     for beta in np.logspace(-3, 3, 3):
+#         p_1 = copt.penalty.OGroupL1(beta, groups_1)
+#         p_2 = copt.penalty.OGroupL1(beta, groups_2)
+#         L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density
+#
+#         opt_vrtos = cp.minimize_vrtos(
+#             f.partial_deriv,
+#             A,
+#             b,
+#             np.zeros(n_features),
+#             1 / (3 * L),
+#             alpha=alpha,
+#             max_iter=200,
+#             prox_1=p_1.prox_factory(n_features),
+#             prox_2=p_2.prox_factory(n_features),
+#         )
+#
+#         opt_tos = cp.minimize_three_split(
+#             f.f_grad, np.zeros(n_features), prox_1=p_1.prox, prox_2=p_2.prox
+#         )
+#
+#         norm = np.linalg.norm(opt_tos.x)
+#         if norm < 1e-10:
+#             norm = 1
+#         assert np.linalg.norm(opt_vrtos.x - opt_tos.x) / norm < 1e-4
 
 
 @pytest.mark.parametrize("A_data", [A, A2])