From fde6ee127a95a7500f46a59dc9d66624f117e9f4 Mon Sep 17 00:00:00 2001 From: Blake Griffith Date: Wed, 16 Apr 2014 10:44:12 -0500 Subject: [PATCH 01/10] ClientMDMap -> Distribution --- distarray/client.py | 4 ++-- distarray/client_map.py | 14 +++++++------- distarray/context.py | 5 ++--- distarray/tests/test_client.py | 6 +++--- distarray/tests/test_client_map.py | 6 +++--- 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/distarray/client.py b/distarray/client.py index f1936758..c1a79d9c 100644 --- a/distarray/client.py +++ b/distarray/client.py @@ -18,7 +18,7 @@ import numpy as np import distarray -from distarray.client_map import ClientMDMap +from distarray.client_map import Distribution from distarray.externals.six import next from distarray.utils import has_exactly_one, _raise_nie @@ -75,7 +75,7 @@ def _make_mdmap_from_local_dimdata(local_name, context): dim_data_name = context._generate_key() context._execute(_DIMDATAS.format(local_name=local_name, dim_data_name=dim_data_name)) dim_datas = context._pull(dim_data_name) - return ClientMDMap.from_dim_data(context, dim_datas) + return Distribution.from_dim_data(context, dim_datas) def _get_attribute(context, key, name): local_key = context._generate_key() diff --git a/distarray/client_map.py b/distarray/client_map.py index d4280a7e..a2919585 100644 --- a/distarray/client_map.py +++ b/distarray/client_map.py @@ -5,15 +5,15 @@ # --------------------------------------------------------------------------- """ -ClientMDMap class and auxiliary ClientMap classes. +Distribution class and auxiliary ClientMap classes. -The ClientMDMap is a multi-dimensional map class that manages the -one-dimensional maps for each DistArray dimension. The ClientMDMap class +The Distribution is a multi-dimensional map class that manages the +one-dimensional maps for each DistArray dimension. The Distribution class represents the *distribution* information for a distributed array, independent -of the distributed array's *data*. ClientMDMaps allow DistArrays to reduce +of the distributed array's *data*. Distributions allow DistArrays to reduce overall communication when indexing and slicing by determining which processes own (or may possibly own) the indices in question. Two DistArray objects can -share the same ClientMDMap if they have the exact same distribution. +share the same Distribution if they have the exact same distribution. The one-dimensional ClientMap classes keep track of which process owns which index in that dimension. This class has several subclasses for specific @@ -360,7 +360,7 @@ def map_from_global_dim_dict(global_dim_dict): return selector[dist_type](global_dim_dict) -class ClientMDMap(object): +class Distribution(object): """ Governs the mapping between global indices and process ranks for multi-dimensional objects. @@ -385,7 +385,7 @@ def from_global_dim_data(cls, context, glb_dim_data): @classmethod def from_dim_data(cls, context, dim_datas): - """ Creates a ClientMDMap from a sequence of `dim_data` dictionary + """ Creates a Distribution from a sequence of `dim_data` dictionary tuples from each LocalArray. """ diff --git a/distarray/context.py b/distarray/context.py index 679278a4..1434f341 100644 --- a/distarray/context.py +++ b/distarray/context.py @@ -18,8 +18,7 @@ from distarray import cleanup from distarray.externals import six from distarray.client import DistArray -from distarray.client_map import ClientMDMap - +from distarray.client_map import Distribution from distarray.ipython_utils import IPythonClient DISTARRAY_BASE_NAME = '__distarray__' @@ -310,7 +309,7 @@ def from_global_dim_data(self, global_dim_data, dtype=float): """ # global_dim_data is a sequence of dictionaries, one per dimension. - mdmap = ClientMDMap.from_global_dim_data(self, global_dim_data) + mdmap = Distribution.from_global_dim_data(self, global_dim_data) dim_data_per_rank = mdmap.get_local_dim_datas() if len(self.targets) != len(dim_data_per_rank): diff --git a/distarray/tests/test_client.py b/distarray/tests/test_client.py index 98877701..11d19d5c 100644 --- a/distarray/tests/test_client.py +++ b/distarray/tests/test_client.py @@ -19,7 +19,7 @@ from distarray.externals.six.moves import range from distarray.client import DistArray -from distarray.client_map import ClientMDMap +from distarray.client_map import Distribution from distarray.context import Context @@ -233,7 +233,7 @@ def test_global_dim_data_local_dim_data_equivalence(self): 'size': cols, }, ) - mdmap = ClientMDMap.from_global_dim_data(self.context, glb_dim_data) + mdmap = Distribution.from_global_dim_data(self.context, glb_dim_data) actual = mdmap.get_local_dim_datas() expected = [ @@ -318,7 +318,7 @@ def tearDown(self): def test___init__(self): shape = (100, 100) - mdmap = ClientMDMap(self.context, shape, ('b', 'c')) + mdmap = Distribution(self.context, shape, ('b', 'c')) da = DistArray(mdmap, dtype=int) da.fill(42) nda = numpy.empty(shape, dtype=int) diff --git a/distarray/tests/test_client_map.py b/distarray/tests/test_client_map.py index 84d482cc..8137ac0a 100644 --- a/distarray/tests/test_client_map.py +++ b/distarray/tests/test_client_map.py @@ -24,7 +24,7 @@ def tearDown(self): def test_2D_bn(self): nrows, ncols = 31, 53 - cm = client_map.ClientMDMap(self.ctx, (nrows, ncols), {0:'b'}, (4,1)) + cm = client_map.Distribution(self.ctx, (nrows, ncols), {0:'b'}, (4,1)) chunksize = (nrows // 4) + 1 for _ in range(100): r, c = randrange(nrows), randrange(ncols) @@ -34,7 +34,7 @@ def test_2D_bn(self): def test_2D_bb(self): nrows, ncols = 3, 5 nprocs_per_dim = 2 - cm = client_map.ClientMDMap(self.ctx, (nrows, ncols), ('b', 'b'), + cm = client_map.Distribution(self.ctx, (nrows, ncols), ('b', 'b'), (nprocs_per_dim, nprocs_per_dim)) row_chunks = nrows // nprocs_per_dim + 1 col_chunks = ncols // nprocs_per_dim + 1 @@ -47,7 +47,7 @@ def test_2D_bb(self): def test_2D_cc(self): nrows, ncols = 3, 5 nprocs_per_dim = 2 - cm = client_map.ClientMDMap(self.ctx, (nrows, ncols), ('c', 'c'), + cm = client_map.Distribution(self.ctx, (nrows, ncols), ('c', 'c'), (nprocs_per_dim, nprocs_per_dim)) for r in range(nrows): for c in range(ncols): From 312ab0e99534fb4b902c098c94fc09fbada5d1dc Mon Sep 17 00:00:00 2001 From: Blake Griffith Date: Wed, 16 Apr 2014 11:06:47 -0500 Subject: [PATCH 02/10] ClientMapBase -> MapBase --- distarray/client_map.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/distarray/client_map.py b/distarray/client_map.py index a2919585..0c6f5579 100644 --- a/distarray/client_map.py +++ b/distarray/client_map.py @@ -38,7 +38,7 @@ def client_map_factory(size, dist, grid_size): - """ Returns an instance of the appropriate subclass of ClientMapBase. + """ Returns an instance of the appropriate subclass of MapBase. """ cls_from_dist = { 'b': ClientBlockMap, @@ -52,14 +52,14 @@ def client_map_factory(size, dist, grid_size): @add_metaclass(ABCMeta) -class ClientMapBase(object): +class MapBase(object): """ Base class for one-dimensional client-side maps. Maps keep track of the relevant distribution information for a single dimension of a distributed array. Maps allow distributed arrays to keep track of which process to talk to when indexing and slicing. - Classes that inherit from `ClientMapBase` must implement the `owners()` + Classes that inherit from `MapBase` must implement the `owners()` abstractmethod. """ @@ -75,7 +75,7 @@ def owners(self, idx): raise IndexError() -class ClientNoDistMap(ClientMapBase): +class ClientNoDistMap(MapBase): dist = 'n' @@ -118,7 +118,7 @@ def get_dimdicts(self): },) -class ClientBlockMap(ClientMapBase): +class ClientBlockMap(MapBase): dist = 'b' @@ -190,7 +190,7 @@ def get_dimdicts(self): }) for grid_rank, padding, (start, stop) in data_tuples) -class ClientBlockCyclicMap(ClientMapBase): +class ClientBlockCyclicMap(MapBase): dist = 'c' @@ -238,7 +238,7 @@ def get_dimdicts(self): }) for grid_rank in range(self.grid_size)) -class ClientUnstructuredMap(ClientMapBase): +class ClientUnstructuredMap(MapBase): dist = 'u' @@ -327,7 +327,7 @@ def map_from_dim_datas(dim_datas): Returns ------- - An instance of a subclass of ClientMapBase. + An instance of a subclass of MapBase. """ # check that all proccesses / ranks are accounted for. From bb4ca807a97685a5ef6ed2c15d62f422a84e1cab Mon Sep 17 00:00:00 2001 From: Blake Griffith Date: Wed, 16 Apr 2014 11:08:21 -0500 Subject: [PATCH 03/10] ClientNoDistMap -> NoDistMap --- distarray/client_map.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/distarray/client_map.py b/distarray/client_map.py index 0c6f5579..4ac9f0e9 100644 --- a/distarray/client_map.py +++ b/distarray/client_map.py @@ -43,7 +43,7 @@ def client_map_factory(size, dist, grid_size): cls_from_dist = { 'b': ClientBlockMap, 'c': ClientBlockCyclicMap, - 'n': ClientNoDistMap, + 'n': NoDistMap, 'u': ClientUnstructuredMap, } if dist not in cls_from_dist: @@ -75,7 +75,7 @@ def owners(self, idx): raise IndexError() -class ClientNoDistMap(MapBase): +class NoDistMap(MapBase): dist = 'n' @@ -103,7 +103,7 @@ def from_dim_data(cls, dim_data_seq): def __init__(self, size, grid_size): if grid_size != 1: - msg = "grid_size for ClientNoDistMap must be 1 (given %s)" + msg = "grid_size for NoDistMap must be 1 (given %s)" raise ValueError(msg % grid_size) self.size = size @@ -339,7 +339,7 @@ def map_from_dim_datas(dim_datas): dim_datas = sorted(dim_datas, key=lambda d: d['proc_grid_rank']) dist_type = dim_datas[0]['dist_type'] - selector = {'n': ClientNoDistMap.from_dim_data, + selector = {'n': NoDistMap.from_dim_data, 'b': ClientBlockMap.from_dim_data, 'c': ClientBlockCyclicMap.from_dim_data, 'u': ClientUnstructuredMap.from_dim_data} @@ -350,7 +350,7 @@ def map_from_dim_datas(dim_datas): def map_from_global_dim_dict(global_dim_dict): dist_type = global_dim_dict['dist_type'] - selector = {'n': ClientNoDistMap.from_global_dim_dict, + selector = {'n': NoDistMap.from_global_dim_dict, 'b': ClientBlockMap.from_global_dim_dict, 'c': ClientBlockCyclicMap.from_global_dim_dict, 'u': ClientUnstructuredMap.from_global_dim_dict, From e5ca47a4b081cc7677f488f03a89ce34c1bf7cba Mon Sep 17 00:00:00 2001 From: Blake Griffith Date: Wed, 16 Apr 2014 11:09:29 -0500 Subject: [PATCH 04/10] ClientBlockMap -> BlockMap --- distarray/client_map.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/distarray/client_map.py b/distarray/client_map.py index 4ac9f0e9..19b63158 100644 --- a/distarray/client_map.py +++ b/distarray/client_map.py @@ -41,7 +41,7 @@ def client_map_factory(size, dist, grid_size): """ Returns an instance of the appropriate subclass of MapBase. """ cls_from_dist = { - 'b': ClientBlockMap, + 'b': BlockMap, 'c': ClientBlockCyclicMap, 'n': NoDistMap, 'u': ClientUnstructuredMap, @@ -118,7 +118,7 @@ def get_dimdicts(self): },) -class ClientBlockMap(MapBase): +class BlockMap(MapBase): dist = 'b' @@ -340,7 +340,7 @@ def map_from_dim_datas(dim_datas): dist_type = dim_datas[0]['dist_type'] selector = {'n': NoDistMap.from_dim_data, - 'b': ClientBlockMap.from_dim_data, + 'b': BlockMap.from_dim_data, 'c': ClientBlockCyclicMap.from_dim_data, 'u': ClientUnstructuredMap.from_dim_data} if dist_type not in selector: @@ -351,7 +351,7 @@ def map_from_global_dim_dict(global_dim_dict): dist_type = global_dim_dict['dist_type'] selector = {'n': NoDistMap.from_global_dim_dict, - 'b': ClientBlockMap.from_global_dim_dict, + 'b': BlockMap.from_global_dim_dict, 'c': ClientBlockCyclicMap.from_global_dim_dict, 'u': ClientUnstructuredMap.from_global_dim_dict, } From 6178e4db2d8bf60af91c591dc9e9d8e68acc21f3 Mon Sep 17 00:00:00 2001 From: Blake Griffith Date: Wed, 16 Apr 2014 11:10:30 -0500 Subject: [PATCH 05/10] ClientBlockCyclicMap -> BlockCyclicMap --- distarray/client_map.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/distarray/client_map.py b/distarray/client_map.py index 19b63158..1de7ae5e 100644 --- a/distarray/client_map.py +++ b/distarray/client_map.py @@ -42,7 +42,7 @@ def client_map_factory(size, dist, grid_size): """ cls_from_dist = { 'b': BlockMap, - 'c': ClientBlockCyclicMap, + 'c': BlockCyclicMap, 'n': NoDistMap, 'u': ClientUnstructuredMap, } @@ -190,7 +190,7 @@ def get_dimdicts(self): }) for grid_rank, padding, (start, stop) in data_tuples) -class ClientBlockCyclicMap(MapBase): +class BlockCyclicMap(MapBase): dist = 'c' @@ -341,7 +341,7 @@ def map_from_dim_datas(dim_datas): dist_type = dim_datas[0]['dist_type'] selector = {'n': NoDistMap.from_dim_data, 'b': BlockMap.from_dim_data, - 'c': ClientBlockCyclicMap.from_dim_data, + 'c': BlockCyclicMap.from_dim_data, 'u': ClientUnstructuredMap.from_dim_data} if dist_type not in selector: raise ValueError("Unknown dist_type %r" % dist_type) @@ -352,7 +352,7 @@ def map_from_global_dim_dict(global_dim_dict): dist_type = global_dim_dict['dist_type'] selector = {'n': NoDistMap.from_global_dim_dict, 'b': BlockMap.from_global_dim_dict, - 'c': ClientBlockCyclicMap.from_global_dim_dict, + 'c': BlockCyclicMap.from_global_dim_dict, 'u': ClientUnstructuredMap.from_global_dim_dict, } if dist_type not in selector: From 925b02a9cf3403f25bdf2de89668ea5eeae66e33 Mon Sep 17 00:00:00 2001 From: Blake Griffith Date: Wed, 16 Apr 2014 11:13:42 -0500 Subject: [PATCH 06/10] ClientUnstructuredMap -> UnstructuredMap --- distarray/client_map.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/distarray/client_map.py b/distarray/client_map.py index 1de7ae5e..f6e19839 100644 --- a/distarray/client_map.py +++ b/distarray/client_map.py @@ -44,7 +44,7 @@ def client_map_factory(size, dist, grid_size): 'b': BlockMap, 'c': BlockCyclicMap, 'n': NoDistMap, - 'u': ClientUnstructuredMap, + 'u': UnstructuredMap, } if dist not in cls_from_dist: raise ValueError("unknown distribution type for %r" % dist) @@ -238,7 +238,7 @@ def get_dimdicts(self): }) for grid_rank in range(self.grid_size)) -class ClientUnstructuredMap(MapBase): +class UnstructuredMap(MapBase): dist = 'u' @@ -342,7 +342,7 @@ def map_from_dim_datas(dim_datas): selector = {'n': NoDistMap.from_dim_data, 'b': BlockMap.from_dim_data, 'c': BlockCyclicMap.from_dim_data, - 'u': ClientUnstructuredMap.from_dim_data} + 'u': UnstructuredMap.from_dim_data} if dist_type not in selector: raise ValueError("Unknown dist_type %r" % dist_type) return selector[dist_type](dim_datas) @@ -353,7 +353,7 @@ def map_from_global_dim_dict(global_dim_dict): selector = {'n': NoDistMap.from_global_dim_dict, 'b': BlockMap.from_global_dim_dict, 'c': BlockCyclicMap.from_global_dim_dict, - 'u': ClientUnstructuredMap.from_global_dim_dict, + 'u': UnstructuredMap.from_global_dim_dict, } if dist_type not in selector: raise ValueError("Unknown dist_type %r" % dist_type) From edeeb537e37f648dbeef8e89e7f41a7c5e5b536f Mon Sep 17 00:00:00 2001 From: Blake Griffith Date: Wed, 16 Apr 2014 11:48:53 -0500 Subject: [PATCH 07/10] Move module level functions to top. Refactor them. There is now a single function for getting a `Map` from a `dist_type`. This pattern was duplicated a lot and is now consolidated in `choose_map`. Renamed `dist` -> `dist_type` in some places for consitency. Renamed `map_factor` -> `map_from_sizes` to be consisten with `map_from_dim_datas` and `map_from_global_dim_dict`. --- distarray/client_map.py | 145 ++++++++++++++++++++-------------------- 1 file changed, 73 insertions(+), 72 deletions(-) diff --git a/distarray/client_map.py b/distarray/client_map.py index f6e19839..cbf51026 100644 --- a/distarray/client_map.py +++ b/distarray/client_map.py @@ -36,19 +36,85 @@ make_grid_shape, validate_grid_shape) +def _compactify_dicts(dicts): + """ Internal helper function to take a list of dimension dictionaries with + duplicates and remove the dupes. -def client_map_factory(size, dist, grid_size): - """ Returns an instance of the appropriate subclass of MapBase. """ - cls_from_dist = { + # Workaround to make the dictionary's contents hashable. + for d in dicts: + if 'indices' in d: + d['indices'] = tuple(d['indices']) + try: + return [dict(u) for u in set(tuple(sorted(d.items())) for d in dicts)] + except TypeError: + result = [] + for i, d in enumerate(dicts): + if d not in dicts[i+1:]: + result.append(d) + return result + +# --------------------------------------------------------------------------- +# Functions for creating Map objects +# --------------------------------------------------------------------------- + +def choose_map(dist_type): + """Choose a map classe given one of the distribution types.""" + cls_from_dist_type = { 'b': BlockMap, 'c': BlockCyclicMap, 'n': NoDistMap, 'u': UnstructuredMap, } - if dist not in cls_from_dist: - raise ValueError("unknown distribution type for %r" % dist) - return cls_from_dist[dist](size, grid_size) + if dist_type not in cls_from_dist_type: + raise ValueError("unknown distribution type for %r" % dist_type) + return cls_from_dist_type[dist_type] + + +def map_from_dim_datas(dim_datas): + """ Generates a ClientMap instance from a santized sequence of dim_data + dictionaries. + + Parameters + ---------- + dim_datas : sequence of dictionaries + Each dictionary is a "dimension dictionary" from the distributed array + protocol, one per process in this dimension of the process grid. The + dimension dictionaries shall all have the same keys and values for + global attributes: `dist_type`, `size`, `proc_grid_size`, and perhaps + others. + + Returns + ------- + An instance of a subclass of MapBase. + + """ + # check that all proccesses / ranks are accounted for. + proc_ranks = sorted(dd['proc_grid_rank'] for dd in dim_datas) + if proc_ranks != list(range(len(dim_datas))): + msg = "Ranks of processes (%r) not consistent." + raise ValueError(msg % proc_ranks) + # Sort dim_datas according to proc_grid_rank. + dim_datas = sorted(dim_datas, key=lambda d: d['proc_grid_rank']) + + dist_type = dim_datas[0]['dist_type'] + map_class = choose_map(dist_type) + return map_class.from_dim_data(dim_datas) + + +def map_from_global_dim_dict(global_dim_dict): + """Given a global_dim_dict return map.""" + + dist_type = global_dim_dict['dist_type'] + map_class = choose_map(dist_type) + return map_class.from_global_dim_dict(global_dim_dict) + + +def map_from_sizes(size, dist_type, grid_size): + """ Returns an instance of the appropriate subclass of MapBase. + """ + map_class = choose_map(dist_type) + return map_class(size, grid_size) @add_metaclass(ABCMeta) @@ -293,71 +359,6 @@ def get_dimdicts(self): }) for grid_rank, ii in enumerate(self.indices)) -def _compactify_dicts(dicts): - """ Internal helper function to take a list of dimension dictionaries with - duplicates and remove the dupes. - - """ - # Workaround to make the dictionary's contents hashable. - for d in dicts: - if 'indices' in d: - d['indices'] = tuple(d['indices']) - try: - return [dict(u) for u in set(tuple(sorted(d.items())) for d in dicts)] - except TypeError: - result = [] - for i, d in enumerate(dicts): - if d not in dicts[i+1:]: - result.append(d) - return result - - -def map_from_dim_datas(dim_datas): - """ Generates a ClientMap instance from a santized sequence of dim_data - dictionaries. - - Parameters - ---------- - dim_datas : sequence of dictionaries - Each dictionary is a "dimension dictionary" from the distributed array - protocol, one per process in this dimension of the process grid. The - dimension dictionaries shall all have the same keys and values for - global attributes: `dist_type`, `size`, `proc_grid_size`, and perhaps - others. - - Returns - ------- - An instance of a subclass of MapBase. - - """ - # check that all proccesses / ranks are accounted for. - proc_ranks = sorted(dd['proc_grid_rank'] for dd in dim_datas) - if proc_ranks != list(range(len(dim_datas))): - msg = "Ranks of processes (%r) not consistent." - raise ValueError(msg % proc_ranks) - # Sort dim_datas according to proc_grid_rank. - dim_datas = sorted(dim_datas, key=lambda d: d['proc_grid_rank']) - - dist_type = dim_datas[0]['dist_type'] - selector = {'n': NoDistMap.from_dim_data, - 'b': BlockMap.from_dim_data, - 'c': BlockCyclicMap.from_dim_data, - 'u': UnstructuredMap.from_dim_data} - if dist_type not in selector: - raise ValueError("Unknown dist_type %r" % dist_type) - return selector[dist_type](dim_datas) - -def map_from_global_dim_dict(global_dim_dict): - - dist_type = global_dim_dict['dist_type'] - selector = {'n': NoDistMap.from_global_dim_dict, - 'b': BlockMap.from_global_dim_dict, - 'c': BlockCyclicMap.from_global_dim_dict, - 'u': UnstructuredMap.from_global_dim_dict, - } - if dist_type not in selector: - raise ValueError("Unknown dist_type %r" % dist_type) - return selector[dist_type](global_dim_dict) class Distribution(object): @@ -433,7 +434,7 @@ def __init__(self, context, shape, dist, grid_shape=None): self.rank_from_coords = np.arange(nelts).reshape(*self.grid_shape) # List of `ClientMap` objects, one per dimension. - self.maps = [client_map_factory(*args) + self.maps = [map_from_sizes(*args) for args in zip(self.shape, self.dist, self.grid_shape)] def owning_ranks(self, idxs): From 1307f750065d77846d41bc75816009b3f20bccf2 Mon Sep 17 00:00:00 2001 From: Blake Griffith Date: Wed, 16 Apr 2014 11:54:24 -0500 Subject: [PATCH 08/10] Module organization comments. --- distarray/client_map.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/distarray/client_map.py b/distarray/client_map.py index cbf51026..09b1c6d8 100644 --- a/distarray/client_map.py +++ b/distarray/client_map.py @@ -117,6 +117,10 @@ def map_from_sizes(size, dist_type, grid_size): return map_class(size, grid_size) +# --------------------------------------------------------------------------- +# Map classes +# --------------------------------------------------------------------------- + @add_metaclass(ABCMeta) class MapBase(object): """ Base class for one-dimensional client-side maps. @@ -141,6 +145,10 @@ def owners(self, idx): raise IndexError() +# --------------------------------------------------------------------------- +# 1-D Map classes +# --------------------------------------------------------------------------- + class NoDistMap(MapBase): dist = 'n' @@ -359,7 +367,9 @@ def get_dimdicts(self): }) for grid_rank, ii in enumerate(self.indices)) - +# --------------------------------------------------------------------------- +# N-Dimensional map. +# --------------------------------------------------------------------------- class Distribution(object): """ Governs the mapping between global indices and process ranks for From 751ca0e13efad6c64e9d4deca5d18bfa32b73eea Mon Sep 17 00:00:00 2001 From: Blake Griffith Date: Wed, 16 Apr 2014 13:23:30 -0500 Subject: [PATCH 09/10] pep8/pyflakes --- distarray/client_map.py | 81 +++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/distarray/client_map.py b/distarray/client_map.py index 09b1c6d8..1f512fa3 100644 --- a/distarray/client_map.py +++ b/distarray/client_map.py @@ -36,6 +36,7 @@ make_grid_shape, validate_grid_shape) + def _compactify_dicts(dicts): """ Internal helper function to take a list of dimension dictionaries with duplicates and remove the dupes. @@ -54,6 +55,7 @@ def _compactify_dicts(dicts): result.append(d) return result + # --------------------------------------------------------------------------- # Functions for creating Map objects # --------------------------------------------------------------------------- @@ -61,11 +63,11 @@ def _compactify_dicts(dicts): def choose_map(dist_type): """Choose a map classe given one of the distribution types.""" cls_from_dist_type = { - 'b': BlockMap, - 'c': BlockCyclicMap, - 'n': NoDistMap, - 'u': UnstructuredMap, - } + 'b': BlockMap, + 'c': BlockCyclicMap, + 'n': NoDistMap, + 'u': UnstructuredMap, + } if dist_type not in cls_from_dist_type: raise ValueError("unknown distribution type for %r" % dist_type) return cls_from_dist_type[dist_type] @@ -185,11 +187,12 @@ def owners(self, idx): return [0] if idx >= 0 and idx < self.size else [] def get_dimdicts(self): - return ({'dist_type' : 'n', - 'size' : self.size, - 'proc_grid_size' : 1, - 'proc_grid_rank' : 0, - },) + return ({ + 'dist_type': 'n', + 'size': self.size, + 'proc_grid_size': 1, + 'proc_grid_rank': 0, + },) class BlockMap(MapBase): @@ -229,8 +232,8 @@ def from_dim_data(cls, dim_data_seq): "inconsistent with proc_grid_size (%r).") raise ValueError(msg % (len(dim_data_seq), self.grid_size)) self.bounds = [(d['start'], d['stop']) for d in dim_data_seq] - self.boundary_padding, self.comm_padding = dd.get('padding', (0,0)) - + self.boundary_padding, self.comm_padding = dd.get('padding', (0, 0)) + return self def __init__(self, size, grid_size): @@ -254,14 +257,19 @@ def get_dimdicts(self): padding[0][0] = self.boundary_padding padding[-1][-1] = self.boundary_padding data_tuples = zip(grid_ranks, padding, self.bounds) - return tuple(({'dist_type' : 'b', - 'size' : self.size, - 'proc_grid_size' : self.grid_size, - 'proc_grid_rank' : grid_rank, - 'start' : start, - 'stop' : stop, - 'padding': padding, - }) for grid_rank, padding, (start, stop) in data_tuples) + # Build the result + out = [] + for grid_rank, padding, (start, stop) in data_tuples: + out.append({ + 'dist_type': 'b', + 'size': self.size, + 'proc_grid_size': self.grid_size, + 'proc_grid_rank': grid_rank, + 'start': start, + 'stop': stop, + 'padding': padding, + }) + return tuple(out) class BlockCyclicMap(MapBase): @@ -303,11 +311,11 @@ def owners(self, idx): return [idx_block % self.grid_size] def get_dimdicts(self): - return tuple(({'dist_type' : 'c', - 'size' : self.size, - 'proc_grid_size' : self.grid_size, - 'proc_grid_rank' : grid_rank, - 'start' : grid_rank * self.block_size, + return tuple(({'dist_type': 'c', + 'size': self.size, + 'proc_grid_size': self.grid_size, + 'proc_grid_rank': grid_rank, + 'start': grid_rank * self.block_size, 'block_size': self.block_size, }) for grid_rank in range(self.grid_size)) @@ -321,10 +329,10 @@ def from_global_dim_dict(cls, glb_dim_dict): if glb_dim_dict['dist_type'] != 'u': msg = "Wrong dist_type (%r) for unstructured map." raise ValueError(msg % glb_dim_dict['dist_type']) - indices_sequence = tuple(np.asarray(ind) for ind in glb_dim_dict['indices']) - size = sum(len(ii) for ii in indices_sequence) - grid_size = len(indices_sequence) - return cls(size, grid_size, indices=indices_sequence) + indices = tuple(np.asarray(i) for i in glb_dim_dict['indices']) + size = sum(len(i) for i in indices) + grid_size = len(indices) + return cls(size, grid_size, indices=indices) @classmethod def from_dim_data(cls, dim_data_seq): @@ -359,12 +367,13 @@ def owners(self, idx): def get_dimdicts(self): if self.indices is None: raise ValueError() - return tuple(({'dist_type' : 'u', - 'size' : self.size, - 'proc_grid_size' : self.grid_size, - 'proc_grid_rank' : grid_rank, - 'indices' : ii, - }) for grid_rank, ii in enumerate(self.indices)) + return tuple(({ + 'dist_type': 'u', + 'size': self.size, + 'proc_grid_size': self.grid_size, + 'proc_grid_rank': grid_rank, + 'indices': ii, + }) for grid_rank, ii in enumerate(self.indices)) # --------------------------------------------------------------------------- @@ -411,7 +420,7 @@ def from_dim_data(cls, context, dim_datas): validate_grid_shape(self.grid_shape, self.dist, len(context.targets)) coords = [tuple(d['proc_grid_rank'] for d in dd) for dd in dim_datas] - self.rank_from_coords = { c: r for (r, c) in enumerate(coords)} + self.rank_from_coords = {c: r for (r, c) in enumerate(coords)} dim_data_per_dim = [_compactify_dicts(dict_tuple) for dict_tuple in zip(*dim_datas)] From 661be4aa191ae51a7eaa7d40b3ae72fd677fe11d Mon Sep 17 00:00:00 2001 From: Blake Griffith Date: Mon, 21 Apr 2014 17:03:20 -0500 Subject: [PATCH 10/10] Refactoring. Distribution creation methods. Distribution.__init__ takes a global dim_data The old Distribution.__init__ is now the Distribution.from_shape classmethod. --- distarray/client_map.py | 35 +++++++++++++++--------------- distarray/context.py | 2 +- distarray/tests/test_client.py | 4 ++-- distarray/tests/test_client_map.py | 9 +++++--- 4 files changed, 26 insertions(+), 24 deletions(-) diff --git a/distarray/client_map.py b/distarray/client_map.py index 1f512fa3..68dec244 100644 --- a/distarray/client_map.py +++ b/distarray/client_map.py @@ -386,23 +386,6 @@ class Distribution(object): """ - @classmethod - def from_global_dim_data(cls, context, glb_dim_data): - self = cls.__new__(cls) - self.context = context - self.maps = [map_from_global_dim_dict(gdd) for gdd in glb_dim_data] - self.shape = tuple(m.size for m in self.maps) - self.ndim = len(self.maps) - self.dist = tuple(m.dist for m in self.maps) - self.grid_shape = tuple(m.grid_size for m in self.maps) - - validate_grid_shape(self.grid_shape, self.dist, len(context.targets)) - - nelts = reduce(operator.mul, self.grid_shape) - self.rank_from_coords = np.arange(nelts).reshape(*self.grid_shape) - - return self - @classmethod def from_dim_data(cls, context, dim_datas): """ Creates a Distribution from a sequence of `dim_data` dictionary @@ -432,8 +415,10 @@ def from_dim_data(cls, context, dim_datas): return self - def __init__(self, context, shape, dist, grid_shape=None): + @classmethod + def from_shape(cls, context, shape, dist, grid_shape=None): + self = cls.__new__(cls) self.context = context self.shape = shape self.ndim = len(shape) @@ -455,6 +440,20 @@ def __init__(self, context, shape, dist, grid_shape=None): # List of `ClientMap` objects, one per dimension. self.maps = [map_from_sizes(*args) for args in zip(self.shape, self.dist, self.grid_shape)] + return self + + def __init__(self, context, glb_dim_data): + self.context = context + self.maps = [map_from_global_dim_dict(gdd) for gdd in glb_dim_data] + self.shape = tuple(m.size for m in self.maps) + self.ndim = len(self.maps) + self.dist = tuple(m.dist for m in self.maps) + self.grid_shape = tuple(m.grid_size for m in self.maps) + + validate_grid_shape(self.grid_shape, self.dist, len(context.targets)) + + nelts = reduce(operator.mul, self.grid_shape) + self.rank_from_coords = np.arange(nelts).reshape(*self.grid_shape) def owning_ranks(self, idxs): """ Returns a list of ranks that may *possibly* own the location in the diff --git a/distarray/context.py b/distarray/context.py index 1434f341..b2fd2365 100644 --- a/distarray/context.py +++ b/distarray/context.py @@ -309,7 +309,7 @@ def from_global_dim_data(self, global_dim_data, dtype=float): """ # global_dim_data is a sequence of dictionaries, one per dimension. - mdmap = Distribution.from_global_dim_data(self, global_dim_data) + mdmap = Distribution(self, global_dim_data) dim_data_per_rank = mdmap.get_local_dim_datas() if len(self.targets) != len(dim_data_per_rank): diff --git a/distarray/tests/test_client.py b/distarray/tests/test_client.py index 11d19d5c..6cb1785f 100644 --- a/distarray/tests/test_client.py +++ b/distarray/tests/test_client.py @@ -233,7 +233,7 @@ def test_global_dim_data_local_dim_data_equivalence(self): 'size': cols, }, ) - mdmap = Distribution.from_global_dim_data(self.context, glb_dim_data) + mdmap = Distribution(self.context, glb_dim_data) actual = mdmap.get_local_dim_datas() expected = [ @@ -318,7 +318,7 @@ def tearDown(self): def test___init__(self): shape = (100, 100) - mdmap = Distribution(self.context, shape, ('b', 'c')) + mdmap = Distribution.from_shape(self.context, shape, ('b', 'c')) da = DistArray(mdmap, dtype=int) da.fill(42) nda = numpy.empty(shape, dtype=int) diff --git a/distarray/tests/test_client_map.py b/distarray/tests/test_client_map.py index 8137ac0a..f8214b09 100644 --- a/distarray/tests/test_client_map.py +++ b/distarray/tests/test_client_map.py @@ -24,7 +24,8 @@ def tearDown(self): def test_2D_bn(self): nrows, ncols = 31, 53 - cm = client_map.Distribution(self.ctx, (nrows, ncols), {0:'b'}, (4,1)) + cm = client_map.Distribution.from_shape(self.ctx, (nrows, ncols), + {0: 'b'}, (4, 1)) chunksize = (nrows // 4) + 1 for _ in range(100): r, c = randrange(nrows), randrange(ncols) @@ -34,7 +35,8 @@ def test_2D_bn(self): def test_2D_bb(self): nrows, ncols = 3, 5 nprocs_per_dim = 2 - cm = client_map.Distribution(self.ctx, (nrows, ncols), ('b', 'b'), + cm = client_map.Distribution.from_shape( + self.ctx, (nrows, ncols), ('b', 'b'), (nprocs_per_dim, nprocs_per_dim)) row_chunks = nrows // nprocs_per_dim + 1 col_chunks = ncols // nprocs_per_dim + 1 @@ -47,7 +49,8 @@ def test_2D_bb(self): def test_2D_cc(self): nrows, ncols = 3, 5 nprocs_per_dim = 2 - cm = client_map.Distribution(self.ctx, (nrows, ncols), ('c', 'c'), + cm = client_map.Distribution.from_shape( + self.ctx, (nrows, ncols), ('c', 'c'), (nprocs_per_dim, nprocs_per_dim)) for r in range(nrows): for c in range(ncols):