Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
68e6e31
WIP: Add failing test.
bgrant May 27, 2014
ac301b9
Add a comment.
bgrant May 27, 2014
992fad5
Already works for a setitem that doesn't span procs.
bgrant May 27, 2014
c03b560
Add a new failing test.
bgrant May 27, 2014
b38afa3
Add `__setitem__` slicing.
bgrant May 27, 2014
d59e31e
Make work for 2d slices.
bgrant May 27, 2014
23afae0
Add more setitem slice tests.
bgrant May 27, 2014
ec659ad
Add more tests.
bgrant May 27, 2014
5ddd354
Make setUp and tearDown classmethods.
bgrant May 27, 2014
858ac21
Remove completed TODO comment.
bgrant May 27, 2014
342f19f
Convert a non-array rvalue to array.
bgrant May 27, 2014
6c7e84a
Add failing ValueError test.
bgrant May 27, 2014
df4f699
Remove an obsolete comment.
bgrant May 27, 2014
29a33a7
Raise an IndexError instead of a TypeError...
bgrant May 27, 2014
c78e8d2
Raise a ValueError if rvalue shape is incorrect...
bgrant May 27, 2014
572b1a4
Merge branch 'feature/add-ellipsis-support-to-slicing' into feature/s…
bgrant May 29, 2014
8e2c109
Merge branch 'feature/add-ellipsis-support-to-slicing' into feature/s…
bgrant Jun 5, 2014
0d6cb59
Merge branch 'feature/add-ellipsis-support-to-slicing' into feature/s…
bgrant Jun 12, 2014
2d75aa4
Add test for @kwmsmith's "strange behavior".
bgrant Jun 12, 2014
581ae6f
Fix typo in comment.
bgrant Jun 12, 2014
fef6142
Add failing test from @kwmsmith.
bgrant Jun 12, 2014
085cc31
Improve failing test.
bgrant Jun 12, 2014
bd01d1f
Fix DistArrays as rvalues in slicing setitem.
bgrant Jun 12, 2014
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 53 additions & 13 deletions distarray/dist/distarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,14 +193,7 @@ def get_slice(arr, index, ddpr, comm):
result = self.context.apply(local_fn, args=args, targets=targets)
return self._process_return_value(result, return_proxy, index, targets)


def __setitem__(self, index, value):
#TODO: FIXME: major performance improvements possible here.
# Especially when `index == slice(None)` and value is an
# ndarray, since for block and cyclic, we can generate slices of
# `value` and assign to local arrays. This would dramatically
# improve the fromndarray method's performance.

# to be run locally
def checked_setitem(arr, index, value):
return arr.global_index.checked_setitem(index, value)
Expand All @@ -209,15 +202,62 @@ def checked_setitem(arr, index, value):
def raw_setitem(arr, index, value):
arr.global_index[index] = value

_, index = sanitize_indices(index, ndim=self.ndim, shape=self.shape)
# to be run locally
def set_slice(arr, index, value, value_slices):
from distarray.local.localarray import LocalArray
slice_ = value_slices[arr.comm_rank]
if isinstance(value, LocalArray):
arr.global_index[index] = value.ndarray
else:
arr.global_index[index] = value[slice_]

set_type, index = sanitize_indices(index, ndim=self.ndim,
shape=self.shape)

targets = self.distribution.owning_targets(index)
args = (self.key, index, value)
args = [self.key, index, value]
if self.distribution.has_precise_index:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This block could use some refactoring -- what about adding relevant polymorphic stuff to the client-side map classes and to the client-side Distribution to help with this?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed.

self.context.apply(raw_setitem, args=args, targets=targets)
else:
result = self.context.apply(checked_setitem, args=args,
targets=targets)
if set_type == 'value':
local_fn = raw_setitem
elif set_type == 'view':
new_distribution = self.distribution.slice(index)
# this could be made more efficient
# we only need the bounds computed by distribution.slice
if isinstance(args[-1], DistArray):
if not args[-1].distribution.is_compatible(
new_distribution):
msg = "rvalue Distribution not compatible."
raise ValueError(msg)
args[-1] = args[-1].key
else:
args[-1] = np.asarray(args[-1]) # convert to array
if args[-1].shape != new_distribution.shape:
msg = "Slice shape does not equal rvalue shape."
raise ValueError(msg)
ddpr = new_distribution.get_dim_data_per_rank()
def bounds_slice(dd):
if dd['dist_type'] == 'b':
return slice(dd['start'], dd['stop'])
elif dd['dist_type'] == 'n':
return slice(0, dd['size'])
else:
msg = "Function only works for 'n' and 'b' 'dist_type's"
raise TypeError(msg)
value_slices = [tuple(bounds_slice(dd) for dd in dim_data)
for dim_data in ddpr]
# but we need a data structure indexable by a target's rank
# assume contiguous range of targets here
value_slices_per_target = [None] * len(self.targets)
value_slices_per_target[targets[0]:targets[-1]] = value_slices
args.append(value_slices_per_target)
local_fn = set_slice
else:
assert False
self.context.apply(local_fn, args=args, targets=targets)

else: # setting unstructured elements
local_fn = checked_setitem
result = self.context.apply(local_fn, args=args, targets=targets)
result = [i for i in result if i is not None]
if len(result) > 1:
raise IndexError("Setting more than one result (%s) is "
Expand Down
107 changes: 107 additions & 0 deletions distarray/dist/tests/test_distarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,113 @@ def test_0d_ellipsis(self):
assert_array_equal(arr[...].toarray(),
expected[...])

def test_resulting_slice(self):
dist = Distribution.from_shape(self.context, (10, 20))
da = self.context.ones(dist)
db = da[:5, :10]
dc = db * 2
assert_array_equal(dc.toarray(), numpy.ones(dc.shape) * 2)


class TestSetItemSlicing(ContextTestCase):

def test_small_1d_slice(self):
source = numpy.random.randint(10, size=20)
new_data = numpy.random.randint(10, size=3)
slc = slice(1, 4)
arr = self.context.fromarray(source)
source[slc] = new_data
arr[slc] = new_data
assert_array_equal(arr.toarray(), source)

def test_large_1d_slice(self):
source = numpy.random.randint(10, size=20)
new_data = numpy.random.randint(10, size=10)
slc = slice(5, 15)
arr = self.context.fromarray(source)
source[slc] = new_data
arr[slc] = new_data
assert_array_equal(arr.toarray(), source)

def test_2d_slice_0(self):
# on process boundaries
source = numpy.random.randint(10, size=(10, 20))
new_data = numpy.random.randint(10, size=(5, 10))
slc = (slice(5, 10), slice(5, 15))
arr = self.context.fromarray(source)
source[slc] = new_data
arr[slc] = new_data
assert_array_equal(arr.toarray(), source)

def test_2d_slice_1(self):
# off process boundaries
source = numpy.random.randint(10, size=(10, 20))
new_data = numpy.random.randint(10, size=(5, 10))
slc = (slice(3, 8), slice(9, 19))
arr = self.context.fromarray(source)
source[slc] = new_data
arr[slc] = new_data
assert_array_equal(arr.toarray(), source)

def test_full_3d_slice(self):
source = numpy.random.randint(10, size=(3, 4, 5))
new_data = numpy.random.randint(10, size=(3, 4, 5))
slc = (slice(None), slice(None), slice(None))
arr = self.context.fromarray(source)
source[slc] = new_data
arr[slc] = new_data
assert_array_equal(arr.toarray(), source)

def test_full_3d_slice_ellipsis(self):
source = numpy.random.randint(10, size=(3, 4, 5))
new_data = numpy.random.randint(10, size=(3, 4, 5))
slc = Ellipsis
arr = self.context.fromarray(source)
source[slc] = new_data
arr[slc] = new_data
assert_array_equal(arr.toarray(), source)

def test_partial_indexing_0(self):
source = numpy.random.randint(10, size=(3, 4, 5))
new_data = numpy.random.randint(10, size=(4, 5))
slc = (1,)
arr = self.context.fromarray(source)
source[slc] = new_data
arr[slc] = new_data
assert_array_equal(arr.toarray(), source)

def test_partial_indexing_1(self):
source = numpy.random.randint(10, size=(3, 4, 5))
new_data = numpy.random.randint(10, size=(3, 5))
slc = (slice(None), 2)
arr = self.context.fromarray(source)
source[slc] = new_data
arr[slc] = new_data
assert_array_equal(arr.toarray(), source)

def test_non_array_data(self):
source = numpy.random.randint(10, size=(3, 4))
new_data = [42, 42, 42, 42]
slc = (2,)
arr = self.context.fromarray(source)
source[slc] = new_data
arr[slc] = new_data
assert_array_equal(arr.toarray(), source)

def test_valueerror(self):
source = numpy.random.randint(10, size=21)
new_data = numpy.random.randint(10, size=10)
slc = slice(15, None)
arr = self.context.fromarray(source)
with self.assertRaises(ValueError):
arr[slc] = new_data

def test_set_DistArray_slice(self):
dist = Distribution.from_shape(self.context, (10, 20))
da = self.context.ones(dist)
db = self.context.zeros(dist)
da[...] = db


class TestDistArrayCreationFromGlobalDimData(ContextTestCase):

Expand Down
4 changes: 1 addition & 3 deletions distarray/metadata_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,6 @@ def positivify(index, size):
------
IndexError
for out-of-bounds indices
NotImplementedError
for negative steps
"""
if isinstance(index, Integral):
index = _positivify(index, size)
Expand Down Expand Up @@ -366,7 +364,7 @@ def sanitize_indices(indices, ndim=None, shape=None):
else:
msg = ("Index must be an Integral, a slice, or a sequence of "
"Integrals and slices.")
raise TypeError(msg)
raise IndexError(msg)

if Ellipsis in sanitized:
if ndim is None:
Expand Down