diff --git a/distarray/globalapi/tests/test_distributed_io.py b/distarray/globalapi/tests/test_distributed_io.py index b4a117e5..c95abdcf 100644 --- a/distarray/globalapi/tests/test_distributed_io.py +++ b/distarray/globalapi/tests/test_distributed_io.py @@ -45,7 +45,6 @@ def setUpClass(cls): cls.da = cls.context.empty(cls.distribution) cls.output_paths = cls.context.apply(engine_temp_path) - @unittest.skip("FIXME") def test_save_load_with_filenames(self): try: @@ -57,7 +56,6 @@ def test_save_load_with_filenames(self): for filepath, target in zip(self.output_paths, self.context.targets): self.context.apply(cleanup_file, (filepath,), targets=(target,)) - @unittest.skip("FIXME") def test_save_load_with_prefix(self): output_path = self.output_paths[0] diff --git a/distarray/localapi/format.py b/distarray/localapi/format.py index c08c31a5..387ededd 100644 --- a/distarray/localapi/format.py +++ b/distarray/localapi/format.py @@ -56,6 +56,9 @@ the magic number for ``.npy`` files, followed by the ``.npy`` header and array data. +Notes +----- + The ``.npy`` format, including reasons for creating it and a comparison of alternatives, is described fully in the "npy-format" NEP and in the module docstring for ``numpy.lib.format``. @@ -66,12 +69,9 @@ from distarray.externals import six import numpy as np -from numpy.lib.format import write_array_header_1_0 from numpy.lib.utils import safe_eval from numpy.compat import asbytes -from distarray.utils import _raise_nie - MAGIC_PREFIX = asbytes('\x93DARRY') MAGIC_LEN = len(MAGIC_PREFIX) + 2 @@ -86,10 +86,12 @@ def magic(major, minor, prefix=MAGIC_PREFIX): ---------- major : int in [0, 255] minor : int in [0, 255] + prefix : bytes + The magic prefix to concatenate with version number Returns ------- - magic : str + magic : bytes Raises ------ @@ -101,15 +103,91 @@ def magic(major, minor, prefix=MAGIC_PREFIX): raise ValueError("Major version must be 0 <= major < 256.") if minor < 0 or minor > 255: raise ValueError("Minor version must be 0 <= minor < 256.") + + return prefix + six.int2byte(major) + six.int2byte(minor) + + +def read_magic(fp, prefix=MAGIC_PREFIX, prefix_len=MAGIC_LEN): + """Read the magic string to get the version of the file format. + + Parameters + ---------- + fp : filelike object + prefix : bytes + Magic prefix to look for + prefix_len : int + Number of bytes in `prefix` + + Returns + ------- + major : int + minor : int + """ + magic_str = _read_bytes(fp, prefix_len, "magic string") + if magic_str[:-2] != prefix: + msg = "the magic string is not correct; expected %r, got %r" + raise ValueError(msg % (prefix, magic_str[:-2])) + if six.PY2: - return prefix + chr(major) + chr(minor) - elif six.PY3: - return prefix + bytes([major, minor]) + major, minor = map(ord, magic_str[-2:]) + if six.PY3: + major, minor = magic_str[-2:] + return major, minor + + +# mostly copied from numpy/lib/format +# dependance on _filter_header removed, since we don't care about npz-style +# headers +def write_localarray_header(fp, d, version=None): + """Write the header for a localarray and return the version used + + Parameters + ---------- + fp : filelike object + d : dict + This has the appropriate entries for writing its string representation + to the header of the file. + version: tuple or None + None means use oldest that works + explicit version will raise a ValueError if the format does not + allow saving this data. Default: None + + Returns + ------- + version : tuple of int + the file version which needs to be used to store the data + """ + import struct + header = ["{"] + for key, value in sorted(d.items()): + # Need to use repr here, since we eval these when reading + header.append("'%s': %s, " % (key, repr(value))) + header.append("}") + header = "".join(header) + # Pad the header with spaces and a final newline such that the magic + # string, the header-length short and the header are aligned on a + # 16-byte boundary. Hopefully, some system, possibly memory-mapping, + # can take advantage of our premature optimization. + current_header_len = MAGIC_LEN + 2 + len(header) + 1 # 1 for the newline + topad = 16 - (current_header_len % 16) + header = header + ' '*topad + '\n' + header = asbytes(header) + + hlen = len(header) + if hlen < 256*256 and version in (None, (1, 0)): + version = (1, 0) + header_prefix = magic(1, 0) + struct.pack('