From 0142593c7482f36c4889e07d7251705da09e8c13 Mon Sep 17 00:00:00 2001 From: Robert David Grant Date: Wed, 30 Apr 2014 17:21:41 -0500 Subject: [PATCH 01/14] Refactor `from_localarrays` to take a distribution ... optionally, instead of grabbing distribution information from the engines. Also add an optional `dtype` kwarg so that can be passed in instead of pulling it from the engines. --- distarray/client.py | 30 +++++++++++++++++++++++++----- distarray/context.py | 19 +++++++++++++------ 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/distarray/client.py b/distarray/client.py index f02f0fe8..8f50c8d0 100644 --- a/distarray/client.py +++ b/distarray/client.py @@ -109,17 +109,37 @@ def __init__(self, distribution, dtype=float): self._dtype = dtype @classmethod - def from_localarrays(cls, key, context): - """ The caller has already created the LocalArray objects. `key` is + def from_localarrays(cls, key, context=None, distribution=None, + dtype=None): + """The caller has already created the LocalArray objects. `key` is their name on the engines. This classmethod creates a DistArray that refers to these LocalArrays. + Either a `context` or a `distribution` must also be provided. If + `context` is provided, a ``dim_data_per_rank`` will be pulled from + the existing ``LocalArray``s and a ``Distribution`` will be created + from it. If `distribution` is provided, it should accurately + reflect the distribution of the existing ``LocalArray``s. + + If `dtype` is not provided, it will be fetched from the engines. """ da = cls.__new__(cls) da.key = key - da.distribution = _make_distribution_from_dim_data_per_rank(key, - context) - da._dtype = _get_attribute(context, key, 'dtype') + + if (context is None) == (distribution is None): + errmsg = "Must provide `context` or `distribution` but not both." + raise RuntimeError(errmsg) + elif (distribution is not None): + da.distribution = distribution + elif (context is not None): + da.distribution = _make_distribution_from_dim_data_per_rank(key, + context) + + if dtype is None: + da._dtype = _get_attribute(context, key, 'dtype') + else: + da._dtype = dtype + return da def __del__(self): diff --git a/distarray/context.py b/distarray/context.py index 5d5e046e..2bdec85a 100644 --- a/distarray/context.py +++ b/distarray/context.py @@ -198,14 +198,21 @@ def _pull0(self, k): return self.view.pull(k, targets=self.targets[0], block=True) def _create_local(self, local_call, shape, dist, grid_shape, dtype): - """ Creates a local array, according to the method named in `local_call`.""" - shape_name, dist_name, grid_shape_name, dtype_name = \ - self._key_and_push(shape, dist, grid_shape, dtype) + """Creates LocalArrays with the method named in `local_call`.""" da_key = self._generate_key() - comm_key = self._comm_key - cmd = '{da_key} = {local_call}(distarray.local.maps.Distribution.from_shape({shape_name}, {dist_name}, {grid_shape_name}, {comm_key}), {dtype_name})' + comm_name = self._comm_key + distribution = Distribution.from_shape(context=self, + shape=shape, + dist=dist, + grid_shape=grid_shape) + ddpr = distribution.get_dim_data_per_rank() + ddpr_name, dtype_name = self._key_and_push(ddpr, dtype) + cmd = ('{da_key} = {local_call}(distarray.local.maps.Distribution(' + '{ddpr_name}[{comm_name}.Get_rank()], comm={comm_name}), ' + 'dtype={dtype_name})') self._execute(cmd.format(**locals())) - return DistArray.from_localarrays(da_key, self) + return DistArray.from_localarrays(da_key, distribution=distribution, + dtype=dtype) def zeros(self, shape, dtype=float, dist=None, grid_shape=None): if dist is None: From c73df57c99184cd4e1a2b82bcc69899f5fbdd566 Mon Sep 17 00:00:00 2001 From: Robert David Grant Date: Wed, 30 Apr 2014 17:55:30 -0500 Subject: [PATCH 02/14] Be more explicit about new from_localarrays call. --- distarray/client.py | 2 +- distarray/context.py | 8 ++++---- distarray/decorators.py | 2 +- distarray/functions.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/distarray/client.py b/distarray/client.py index 8f50c8d0..cb63128a 100644 --- a/distarray/client.py +++ b/distarray/client.py @@ -57,7 +57,7 @@ def is_LocalArray(typestring): return typestring == "" if all(is_LocalArray(r) for r in result_type_str): - result = DistArray.from_localarrays(result_key, subcontext) + result = DistArray.from_localarrays(result_key, context=subcontext) elif all(is_NoneType(r) for r in result_type_str): result = None else: diff --git a/distarray/context.py b/distarray/context.py index 2bdec85a..03ee965e 100644 --- a/distarray/context.py +++ b/distarray/context.py @@ -345,7 +345,7 @@ def load_dnpy(self, name): errmsg = "`name` must be a string or a list." raise TypeError(errmsg) - return DistArray.from_localarrays(da_key, self) + return DistArray.from_localarrays(da_key, context=self) def save_hdf5(self, filename, da, key='buffer', mode='a'): """ @@ -414,7 +414,7 @@ def load_npy(self, filename, dim_data_per_rank): '%s = distarray.local.load_npy(%s, %s[%s.Get_rank()], %s)' % subs ) - return DistArray.from_localarrays(da_key, self) + return DistArray.from_localarrays(da_key, context=self) def load_hdf5(self, filename, dim_data_per_rank, key='buffer'): """ @@ -455,7 +455,7 @@ def load_hdf5(self, filename, dim_data_per_rank, key='buffer'): '%s = distarray.local.load_hdf5(%s, %s[%s.Get_rank()], %s, %s)' % subs ) - return DistArray.from_localarrays(da_key, self) + return DistArray.from_localarrays(da_key, context=self) def fromndarray(self, arr, dist=None, grid_shape=None): """Convert an ndarray to a distarray.""" @@ -477,4 +477,4 @@ def fromfunction(self, function, shape, **kwargs): new_key = self._generate_key() subs = (new_key, func_key) + keys self._execute('%s = distarray.local.fromfunction(%s,%s,**%s)' % subs) - return DistArray.from_localarrays(new_key, self) + return DistArray.from_localarrays(new_key, context=self) diff --git a/distarray/decorators.py b/distarray/decorators.py index 2c3c53bf..3dd60150 100644 --- a/distarray/decorators.py +++ b/distarray/decorators.py @@ -142,7 +142,7 @@ def is_LocalArray(typestring): "LocalArray'>") if all(is_LocalArray(r) for r in result_type_str): - result = DistArray.from_localarrays(result_key, context) + result = DistArray.from_localarrays(result_key, context=context) elif all(is_NoneType(r) for r in result_type_str): result = None else: diff --git a/distarray/functions.py b/distarray/functions.py index 99f8aef3..4c625d0d 100644 --- a/distarray/functions.py +++ b/distarray/functions.py @@ -45,7 +45,7 @@ def proxy_func(a, *args, **kwargs): exec_str %= (new_key, name, a.key) context._execute(exec_str) - return DistArray.from_localarrays(new_key, context) + return DistArray.from_localarrays(new_key, context=context) return proxy_func @@ -75,7 +75,7 @@ def proxy_func(a, b, *args, **kwargs): exec_str %= (new_key, name, a_key, b_key) context._execute(exec_str) - return DistArray.from_localarrays(new_key, context) + return DistArray.from_localarrays(new_key, context=context) return proxy_func From db1d4a1539280f82db082cef9285f0ec65daadf1 Mon Sep 17 00:00:00 2001 From: Robert David Grant Date: Wed, 30 Apr 2014 18:53:08 -0500 Subject: [PATCH 03/14] Fix an API irregularity and a bug in client_map... 1. `dist` should be optional, with a default value of `None`, as discusses 2. `make_grid_shape` takes a dist_tuple, not an arbitrary dist. The normalized dist (that is, a tuple), self.dist, wasn't being passed in here, the raw `dist` was. --- distarray/client_map.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/distarray/client_map.py b/distarray/client_map.py index 83a53075..69409bcf 100644 --- a/distarray/client_map.py +++ b/distarray/client_map.py @@ -422,16 +422,19 @@ def from_dim_data_per_rank(cls, context, dim_data_per_rank): return self @classmethod - def from_shape(cls, context, shape, dist, grid_shape=None): + def from_shape(cls, context, shape, dist=None, grid_shape=None): self = cls.__new__(cls) self.context = context self.shape = shape self.ndim = len(shape) + + if dist is None: + dist = {0: 'b'} self.dist = normalize_dist(dist, self.ndim) if grid_shape is None: # Make a new grid_shape if not provided. - self.grid_shape = make_grid_shape(self.shape, dist, + self.grid_shape = make_grid_shape(self.shape, self.dist, len(context.targets)) else: # Otherwise normalize the one passed in. self.grid_shape = normalize_grid_shape(grid_shape, self.ndim) From 70b028872102b9c06dfa9c01fcfd2b37f62e071b Mon Sep 17 00:00:00 2001 From: Robert David Grant Date: Wed, 30 Apr 2014 18:57:51 -0500 Subject: [PATCH 04/14] Add a type check to `make_grid_shape`. Accidentally passing a dictionary into here has bitten me too many times. Apparently enumerate works on a dict, so an error won't be thrown here without the explicit check. --- distarray/metadata_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/distarray/metadata_utils.py b/distarray/metadata_utils.py index 7668cafa..13c6064f 100644 --- a/distarray/metadata_utils.py +++ b/distarray/metadata_utils.py @@ -69,6 +69,8 @@ def make_grid_shape(shape, dist, comm_size): if not possible to distribute `comm_size` processes over number of dimensions. """ + if not isinstance(dist, Sequence): + raise TypeError("`dist` argument should be a Sequence.") distdims = tuple(i for (i, v) in enumerate(dist) if v != 'n') ndistdim = len(distdims) From daa925de8b92b28ba467d60660f07bc508f6655c Mon Sep 17 00:00:00 2001 From: Robert David Grant Date: Wed, 30 Apr 2014 19:03:18 -0500 Subject: [PATCH 05/14] Fix a bug in Distarray.from_localarrays. It didn't actually work when dtype was left None. --- distarray/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/distarray/client.py b/distarray/client.py index cb63128a..0276285a 100644 --- a/distarray/client.py +++ b/distarray/client.py @@ -131,6 +131,7 @@ def from_localarrays(cls, key, context=None, distribution=None, raise RuntimeError(errmsg) elif (distribution is not None): da.distribution = distribution + context = distribution.context elif (context is not None): da.distribution = _make_distribution_from_dim_data_per_rank(key, context) From dc4dc347c12d68c11f5936d7adb9a494e97f2cd6 Mon Sep 17 00:00:00 2001 From: Robert David Grant Date: Wed, 30 Apr 2014 19:04:18 -0500 Subject: [PATCH 06/14] Modify Random.rand to use the new from_localarrays ... API. --- distarray/random.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/distarray/random.py b/distarray/random.py index dc937b59..218e3feb 100644 --- a/distarray/random.py +++ b/distarray/random.py @@ -9,6 +9,7 @@ from distarray.client import DistArray +from distarray.client_map import Distribution class Random(object): @@ -68,17 +69,22 @@ def rand(self, size=None, dist=None, grid_shape=None): if dist is None: dist = {0: 'b'} da_key = self.context._generate_key() - size_key, dist_key, grid_shape_key = \ - self.context._key_and_push(size, dist, grid_shape) - comm_key = self.context._comm_key + + distribution = Distribution.from_shape(context=self.context, + shape=size, + dist=dist, + grid_shape=grid_shape) + ddpr = distribution.get_dim_data_per_rank() + + ddpr_name = self.context._key_and_push(ddpr)[0] + comm_name = self.context._comm_key + self.context._execute( '{da_key} = distarray.local.random.rand(' - 'distribution=distarray.local.maps.Distribution.from_shape(' - 'shape={size_key}, dist={dist_key},' - 'grid_shape={grid_shape_key}, comm={comm_key}' - '))'.format(**locals()) - ) - return DistArray.from_localarrays(da_key, self.context) + 'distribution=distarray.local.maps.Distribution(' + 'dim_data={ddpr_name}[{comm_name}.Get_rank()], ' + 'comm={comm_name}))'.format(**locals())) + return DistArray.from_localarrays(da_key, distribution=distribution) def normal(self, loc=0.0, scale=1.0, size=None, dist=None, grid_shape=None): From 56b7e79cb72a725e0256170cc77a72ac213a923b Mon Sep 17 00:00:00 2001 From: Robert David Grant Date: Wed, 30 Apr 2014 19:12:10 -0500 Subject: [PATCH 07/14] Whitespace. --- distarray/random.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/distarray/random.py b/distarray/random.py index 218e3feb..862cfa92 100644 --- a/distarray/random.py +++ b/distarray/random.py @@ -75,10 +75,8 @@ def rand(self, size=None, dist=None, grid_shape=None): dist=dist, grid_shape=grid_shape) ddpr = distribution.get_dim_data_per_rank() - ddpr_name = self.context._key_and_push(ddpr)[0] comm_name = self.context._comm_key - self.context._execute( '{da_key} = distarray.local.random.rand(' 'distribution=distarray.local.maps.Distribution(' From 5b2776b02e049004e8c54ddac177d3138aab136e Mon Sep 17 00:00:00 2001 From: Robert David Grant Date: Wed, 30 Apr 2014 19:12:34 -0500 Subject: [PATCH 08/14] Modify Random.normal to use new from_localarrays --- distarray/random.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/distarray/random.py b/distarray/random.py index 862cfa92..5030e869 100644 --- a/distarray/random.py +++ b/distarray/random.py @@ -144,18 +144,22 @@ def normal(self, loc=0.0, scale=1.0, size=None, dist=None, if dist is None: dist = {0: 'b'} da_key = self.context._generate_key() - loc_key, scale_key, size_key, dist_key, grid_shape_key = \ - self.context._key_and_push(loc, scale, size, dist, grid_shape) - comm_key = self.context._comm_key + + distribution = Distribution.from_shape(context=self.context, + shape=size, + dist=dist, + grid_shape=grid_shape) + ddpr = distribution.get_dim_data_per_rank() + loc_name, scale_name, ddpr_name = \ + self.context._key_and_push(loc, scale, ddpr) + comm_name = self.context._comm_key self.context._execute( '{da_key} = distarray.local.random.normal(' - 'loc={loc_key}, scale={scale_key},' - 'distribution=distarray.local.maps.Distribution.from_shape(' - 'shape={size_key}, dist={dist_key},' - 'grid_shape={grid_shape_key}, comm={comm_key}' - '))'.format(**locals()) - ) - return DistArray.from_localarrays(da_key, self.context) + 'loc={loc_name}, scale={scale_name},' + 'distribution=distarray.local.maps.Distribution(' + 'dim_data={ddpr_name}[{comm_name}.Get_rank()], ' + 'comm={comm_name}))'.format(**locals())) + return DistArray.from_localarrays(da_key, distribution=distribution) def randint(self, low, high=None, size=None, dist=None, grid_shape=None): """ From e9b3432ba82f2ff6322d76ea28a3dfd041d83bfb Mon Sep 17 00:00:00 2001 From: Robert David Grant Date: Wed, 30 Apr 2014 19:23:04 -0500 Subject: [PATCH 09/14] Modify Random.randint to use new from_localarrays --- distarray/random.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/distarray/random.py b/distarray/random.py index 5030e869..41711699 100644 --- a/distarray/random.py +++ b/distarray/random.py @@ -198,18 +198,22 @@ def randint(self, low, high=None, size=None, dist=None, grid_shape=None): if dist is None: dist = {0: 'b'} da_key = self.context._generate_key() - low_key, high_key, size_key, dist_key, grid_shape_key = \ - self.context._key_and_push(low, high, size, dist, grid_shape) - comm_key = self.context._comm_key + + distribution = Distribution.from_shape(context=self.context, + shape=size, + dist=dist, + grid_shape=grid_shape) + ddpr = distribution.get_dim_data_per_rank() + low_name, high_name, ddpr_name = \ + self.context._key_and_push(low, high, ddpr) + comm_name = self.context._comm_key self.context._execute( '{da_key} = distarray.local.random.randint(' - 'low={low_key}, high={high_key},' - 'distribution=distarray.local.maps.Distribution.from_shape(' - 'shape={size_key}, dist={dist_key},' - 'grid_shape={grid_shape_key}, comm={comm_key}' - '))'.format(**locals()) - ) - return DistArray.from_localarrays(da_key, self.context) + 'low={low_name}, high={high_name},' + 'distribution=distarray.local.maps.Distribution(' + 'dim_data={ddpr_name}[{comm_name}.Get_rank()], ' + 'comm={comm_name}))'.format(**locals())) + return DistArray.from_localarrays(da_key, distribution=distribution) def randn(self, size=None, dist=None, grid_shape=None): """ From 7272d21b47e81659f2c58b19eb5a8c13b2ea5d15 Mon Sep 17 00:00:00 2001 From: Robert David Grant Date: Wed, 30 Apr 2014 19:25:43 -0500 Subject: [PATCH 10/14] Modify Random.randn to use new from_localarrays --- distarray/random.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/distarray/random.py b/distarray/random.py index 41711699..2eb101f8 100644 --- a/distarray/random.py +++ b/distarray/random.py @@ -241,14 +241,17 @@ def randn(self, size=None, dist=None, grid_shape=None): if dist is None: dist = {0: 'b'} da_key = self.context._generate_key() - size_key, dist_key, grid_shape_key = \ - self.context._key_and_push(size, dist, grid_shape) - comm_key = self.context._comm_key + + distribution = Distribution.from_shape(context=self.context, + shape=size, + dist=dist, + grid_shape=grid_shape) + ddpr = distribution.get_dim_data_per_rank() + ddpr_name = self.context._key_and_push(ddpr)[0] + comm_name = self.context._comm_key self.context._execute( '{da_key} = distarray.local.random.randn(' - 'distribution=distarray.local.maps.Distribution.from_shape(' - 'shape={size_key}, dist={dist_key},' - 'grid_shape={grid_shape_key}, comm={comm_key}' - '))'.format(**locals()) - ) - return DistArray.from_localarrays(da_key, self.context) + 'distribution=distarray.local.maps.Distribution(' + 'dim_data={ddpr_name}[{comm_name}.Get_rank()], ' + 'comm={comm_name}))'.format(**locals())) + return DistArray.from_localarrays(da_key, distribution=distribution) From ae7c65d1da096f2d795b4cdf2b6b30dd80aa0c3d Mon Sep 17 00:00:00 2001 From: Robert David Grant Date: Thu, 1 May 2014 11:26:10 -0500 Subject: [PATCH 11/14] Add a function to fill in missing dim_data keys ... for the 'n' dist_type. --- distarray/client_map.py | 6 +++++- distarray/metadata_utils.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/distarray/client_map.py b/distarray/client_map.py index 69409bcf..1bc5a188 100644 --- a/distarray/client_map.py +++ b/distarray/client_map.py @@ -34,7 +34,8 @@ normalize_grid_shape, make_grid_shape, validate_grid_shape, - _start_stop_block) + _start_stop_block, + normalize_dim_dict) def _dedup_dim_dicts(dim_dicts): @@ -394,6 +395,9 @@ def from_dim_data_per_rank(cls, context, dim_data_per_rank): self = cls.__new__(cls) dd0 = dim_data_per_rank[0] self.context = context + for dim_data in dim_data_per_rank: + for dim_dict in dim_data: + normalize_dim_dict(dim_dict) self.shape = tuple(dd['size'] for dd in dd0) self.ndim = len(dd0) self.dist = tuple(dd['dist_type'] for dd in dd0) diff --git a/distarray/metadata_utils.py b/distarray/metadata_utils.py index 13c6064f..475445e1 100644 --- a/distarray/metadata_utils.py +++ b/distarray/metadata_utils.py @@ -194,3 +194,13 @@ def distribute_indices(dd): except KeyError: msg = "dist_type %r not supported." raise TypeError(msg % dist_type) + + +def normalize_dim_dict(dd): + """Fill out some degenerate dim_dicts.""" + + # TODO: Fill out empty dim_dict alias here? + + if dd['dist_type'] == 'n': + dd['proc_grid_size'] = 1 + dd['proc_grid_rank'] = 0 From 11dd03071687a213037f674deabe56cf81426b69 Mon Sep 17 00:00:00 2001 From: Robert David Grant Date: Thu, 1 May 2014 11:28:25 -0500 Subject: [PATCH 12/14] Use Distribution objs with from_localarrays... Now for functions that take a dim_data_per_rank. --- distarray/context.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/distarray/context.py b/distarray/context.py index 03ee965e..2f3c27e8 100644 --- a/distarray/context.py +++ b/distarray/context.py @@ -414,7 +414,9 @@ def load_npy(self, filename, dim_data_per_rank): '%s = distarray.local.load_npy(%s, %s[%s.Get_rank()], %s)' % subs ) - return DistArray.from_localarrays(da_key, context=self) + distribution = Distribution.from_dim_data_per_rank(self, + dim_data_per_rank) + return DistArray.from_localarrays(da_key, distribution=distribution) def load_hdf5(self, filename, dim_data_per_rank, key='buffer'): """ @@ -455,7 +457,10 @@ def load_hdf5(self, filename, dim_data_per_rank, key='buffer'): '%s = distarray.local.load_hdf5(%s, %s[%s.Get_rank()], %s, %s)' % subs ) - return DistArray.from_localarrays(da_key, context=self) + distribution = Distribution.from_dim_data_per_rank(self, + dim_data_per_rank) + + return DistArray.from_localarrays(da_key, distribution=distribution) def fromndarray(self, arr, dist=None, grid_shape=None): """Convert an ndarray to a distarray.""" From 5351dc98142cbf7544d71f1eb26ed622ea2b06a9 Mon Sep 17 00:00:00 2001 From: Robert David Grant Date: Thu, 1 May 2014 13:17:24 -0500 Subject: [PATCH 13/14] Use new version of from_localarrays in unary_proxy --- distarray/functions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/distarray/functions.py b/distarray/functions.py index 4c625d0d..209338b5 100644 --- a/distarray/functions.py +++ b/distarray/functions.py @@ -45,7 +45,8 @@ def proxy_func(a, *args, **kwargs): exec_str %= (new_key, name, a.key) context._execute(exec_str) - return DistArray.from_localarrays(new_key, context=context) + return DistArray.from_localarrays(new_key, + distribution=a.distribution) return proxy_func From 92582afced03f8328c73af33229b863f1516e109 Mon Sep 17 00:00:00 2001 From: Robert David Grant Date: Thu, 1 May 2014 13:17:34 -0500 Subject: [PATCH 14/14] Use new version of from_localarrays in binary_proxy --- distarray/functions.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/distarray/functions.py b/distarray/functions.py index 209338b5..a4fe8568 100644 --- a/distarray/functions.py +++ b/distarray/functions.py @@ -58,12 +58,15 @@ def proxy_func(a, b, *args, **kwargs): if is_a_dap and is_b_dap: a_key = a.key b_key = b.key + distribution = a.distribution elif is_a_dap and numpy.isscalar(b): a_key = a.key b_key = context._key_and_push(b)[0] + distribution = a.distribution elif is_b_dap and numpy.isscalar(a): a_key = context._key_and_push(a)[0] b_key = b.key + distribution = b.distribution else: raise TypeError('only DistArray or scalars are accepted') new_key = context._generate_key() @@ -76,7 +79,7 @@ def proxy_func(a, b, *args, **kwargs): exec_str %= (new_key, name, a_key, b_key) context._execute(exec_str) - return DistArray.from_localarrays(new_key, context=context) + return DistArray.from_localarrays(new_key, distribution=distribution) return proxy_func