helmholtz-analytics · ClaudiaComito · Feb 19, 2021 · Mar 1, 2021 · Mar 5, 2021 · Mar 5, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,10 +10,11 @@
 - [#846](https://github.com/helmholtz-analytics/heat/pull/846) Fixed an issue in `_reduce_op` when axis and keepdim were set.
 - [#846](https://github.com/helmholtz-analytics/heat/pull/846) Fixed an issue in `min`, `max` where DNDarrays with empty processes can't be computed.
 - [#868](https://github.com/helmholtz-analytics/heat/pull/868) Fixed an issue in `__binary_op` where data was falsely distributed if a DNDarray has single element.
+- [#876](https://github.com/helmholtz-analytics/heat/pull/876) Make examples work (Lasso and kNN)
 
 ## Feature Additions
-### Linear Algebra
-- [#842](https://github.com/helmholtz-analytics/heat/pull/842) New feature: `vdot`
+- [#867](https://github.com/helmholtz-analytics/heat/pull/867) Support torch 1.9.0
+- [#884](https://github.com/helmholtz-analytics/heat/pull/884) Support PyTorch 1.10.0, this is now the recommended version to use.
 
 ## Feature additions
 ### Communication
@@ -22,16 +23,20 @@
 - [#856](https://github.com/helmholtz-analytics/heat/pull/856) New `DNDarray` method `__torch_proxy__`
 - [#885](https://github.com/helmholtz-analytics/heat/pull/885) New `DNDarray` method `conj`
 
+### Factories
+- [#749](https://github.com/helmholtz-analytics/heat/pull/749) `ht.array(copy=False)` behaviour now more in line with `np.array(copy=False)`, reduced memory footprint
 # Feature additions
 ### Linear Algebra
 - [#840](https://github.com/helmholtz-analytics/heat/pull/840) New feature: `vecdot()`
+- [#842](https://github.com/helmholtz-analytics/heat/pull/842) New feature: `vdot`
 - [#846](https://github.com/helmholtz-analytics/heat/pull/846) New features `norm`, `vector_norm`, `matrix_norm`
 - [#850](https://github.com/helmholtz-analytics/heat/pull/850) New Feature `cross`
 - [#877](https://github.com/helmholtz-analytics/heat/pull/877) New feature `det`
 
 ### Logical
 - [#862](https://github.com/helmholtz-analytics/heat/pull/862) New feature `signbit`
 ### Manipulations
+- [#749](https://github.com/helmholtz-analytics/heat/pull/749) Distributed sorted `ht.unique`
 - [#829](https://github.com/helmholtz-analytics/heat/pull/829) New feature: `roll`
 - [#853](https://github.com/helmholtz-analytics/heat/pull/853) New Feature: `swapaxes`
 - [#854](https://github.com/helmholtz-analytics/heat/pull/854) New Feature: `moveaxis`
@@ -43,6 +48,7 @@
 ### Rounding
 - [#827](https://github.com/helmholtz-analytics/heat/pull/827) New feature: `sign`, `sgn`
 
+
 # v1.1.1
 - [#864](https://github.com/helmholtz-analytics/heat/pull/864) Dependencies: constrain `torchvision` version range to match supported `pytorch` version range.
 
@@ -104,6 +110,9 @@ Example on 2 processes:
 ### Linear Algebra
 - [#718](https://github.com/helmholtz-analytics/heat/pull/718) New feature: `trace()`
 - [#768](https://github.com/helmholtz-analytics/heat/pull/768) New feature: unary positive and negative operations
+
+### Manipulations
+- [#820](https://github.com/helmholtz-analytics/heat/pull/820) `dot` can handle matrix vector operation now
 - [#820](https://github.com/helmholtz-analytics/heat/pull/820) `dot` can handle matrix-vector operation now
 
 ### Manipulations
@@ -199,6 +208,7 @@ Example on 2 processes:
 ### Manipulations
 - [#690](https://github.com/helmholtz-analytics/heat/pull/690) Enhancement: reshape accepts shape arguments with one unknown dimension.
 - [#706](https://github.com/helmholtz-analytics/heat/pull/706) Bug fix: prevent `__setitem__`, `__getitem__` from modifying key in place
+- [#744](https://github.com/helmholtz-analytics/heat/pull/744) Fix split semantics for reduction operations
 ### Unit testing / CI
 - [#717](https://github.com/helmholtz-analytics/heat/pull/717) Switch CPU CI over to Jenkins and pre-commit to GitHub action.
 - [#720](https://github.com/helmholtz-analytics/heat/pull/720) Ignore test files in codecov report and allow drops in code coverage.

diff --git a/heat/core/communication.py b/heat/core/communication.py
@@ -170,21 +170,21 @@ def chunk(
         Parameters
         ----------
         shape : Tuple[int,...]
-            The global shape of the data to be split
+            The global shape of the data to be split.
         split : int
-            The axis along which to chunk the data
+            The axis along which to chunk the data. Must be within the range of ``shape``.
         rank : int, optional
             Process for which the chunking is calculated for, defaults to ``self.rank``.
-            Intended for creating chunk maps without communication
+            Intended for creating chunk maps without communication.
         w_size : int, optional
             The MPI world size, defaults to ``self.size``.
-            Intended for creating chunk maps without communication
-
+            Intended for creating chunk maps without communication.
         """
-        # ensure the split axis is valid, we actually do not need it
-        split = sanitize_axis(shape, split)
         if split is None:
             return 0, shape, tuple(slice(0, end) for end in shape)
+        if split < 0:
+            split = len(shape) + split
+
         rank = self.rank if rank is None else rank
         w_size = self.size if w_size is None else w_size
         if not isinstance(rank, int) or not isinstance(w_size, int):
@@ -212,7 +212,7 @@ def counts_displs_shape(
         self, shape: Tuple[int], axis: int
     ) -> Tuple[Tuple[int], Tuple[int], Tuple[int]]:
         """
-        Calculates the item counts, displacements and output shape for a variable sized all-to-all MPI-call (e.g.
+        Calculates the item counts, displacements and output shape for a variable-sized all-to-all MPI-call (e.g.
         ``MPI_Alltoallv``). The passed shape is regularly chunk along the given axis and for all nodes.
 
         Parameters

diff --git a/heat/core/dndarray.py b/heat/core/dndarray.py
@@ -368,10 +368,6 @@ def get_halo(self, halo_size) -> torch.Tensor:
         halo_size : int
             Size of the halo.
         """
-        if not self.is_balanced():
-            raise RuntimeError(
-                "halo cannot be created for unbalanced tensors, running the .balance_() function is recommended"
-            )
         if not isinstance(halo_size, int):
             raise TypeError(
                 "halo_size needs to be of Python type integer, {} given".format(type(halo_size))
@@ -381,30 +377,43 @@ def get_halo(self, halo_size) -> torch.Tensor:
                 "halo_size needs to be a positive Python integer, {} given".format(type(halo_size))
             )
 
-        if self.comm.is_distributed() and self.split is not None:
+        if self.is_distributed():
             # gather lshapes
             lshape_map = self.create_lshape_map()
             rank = self.comm.rank
             size = self.comm.size
+
+            first_rank = 0
             next_rank = rank + 1
             prev_rank = rank - 1
             last_rank = size - 1
 
-            # if local shape is zero and it's the last process
+            if not self.balanced:
+                populated_ranks = torch.nonzero(lshape_map[:, 0]).squeeze().tolist()
+                if rank in populated_ranks:
+                    first_rank = populated_ranks[0]
+                    last_rank = populated_ranks[-1]
+                    next_rank = rank + 1
+                    prev_rank = rank - 1
+                    if rank != last_rank:
+                        next_rank = populated_ranks[populated_ranks.index(rank) + 1]
+                    if rank != first_rank:
+                        prev_rank = populated_ranks[populated_ranks.index(rank) - 1]
+
+            # if local shape is zero
             if self.lshape[self.split] == 0:
                 return  # if process has no data we ignore it
 
             if halo_size > self.lshape[self.split]:
                 # if on at least one process the halo_size is larger than the local size throw ValueError
                 raise ValueError(
-                    "halo_size {} needs to be smaller than chunck-size {} )".format(
+                    "halo_size {} needs to be smaller than chunk-size {} )".format(
                         halo_size, self.lshape[self.split]
                     )
                 )
 
             a_prev = self.__prephalo(0, halo_size)
             a_next = self.__prephalo(-halo_size, None)
-
             res_prev = None
             res_next = None
 
@@ -418,7 +427,7 @@ def get_halo(self, halo_size) -> torch.Tensor:
                 )
                 req_list.append(self.comm.Irecv(res_prev, source=next_rank))
 
-            if rank != 0:
+            if rank != first_rank:
                 self.comm.Isend(a_prev, prev_rank)
                 res_next = torch.zeros(
                     a_next.size(), dtype=a_next.dtype, device=self.device.torch_device

diff --git a/heat/core/linalg/tests/test_qr.py b/heat/core/linalg/tests/test_qr.py
@@ -80,7 +80,6 @@ def test_qr(self):
                 self.assertTrue(
                     ht.allclose(ht.eye(m, dtype=ht.double), qr2.Q @ qr2.Q.T, rtol=1e-5, atol=1e-5)
                 )
-
         # test if calc R alone works
         a2_0 = ht.array(st2, split=0)
         a2_1 = ht.array(st2, split=1)