Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions distarray/globalapi/tests/test_distributed_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ def setUpClass(cls):
cls.da = cls.context.empty(cls.distribution)
cls.output_paths = cls.context.apply(engine_temp_path)

@unittest.skip("FIXME")
def test_save_load_with_filenames(self):

try:
Expand All @@ -57,7 +56,6 @@ def test_save_load_with_filenames(self):
for filepath, target in zip(self.output_paths, self.context.targets):
self.context.apply(cleanup_file, (filepath,), targets=(target,))

@unittest.skip("FIXME")
def test_save_load_with_prefix(self):

output_path = self.output_paths[0]
Expand Down
143 changes: 99 additions & 44 deletions distarray/localapi/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@
the magic number for ``.npy`` files, followed by the ``.npy`` header and
array data.

Notes
-----

The ``.npy`` format, including reasons for creating it and a comparison
of alternatives, is described fully in the "npy-format" NEP and in the
module docstring for ``numpy.lib.format``.
Expand All @@ -66,12 +69,9 @@
from distarray.externals import six

import numpy as np
from numpy.lib.format import write_array_header_1_0
from numpy.lib.utils import safe_eval
from numpy.compat import asbytes

from distarray.utils import _raise_nie


MAGIC_PREFIX = asbytes('\x93DARRY')
MAGIC_LEN = len(MAGIC_PREFIX) + 2
Expand All @@ -86,10 +86,12 @@ def magic(major, minor, prefix=MAGIC_PREFIX):
----------
major : int in [0, 255]
minor : int in [0, 255]
prefix : bytes
The magic prefix to concatenate with version number

Returns
-------
magic : str
magic : bytes

Raises
------
Expand All @@ -101,15 +103,91 @@ def magic(major, minor, prefix=MAGIC_PREFIX):
raise ValueError("Major version must be 0 <= major < 256.")
if minor < 0 or minor > 255:
raise ValueError("Minor version must be 0 <= minor < 256.")

return prefix + six.int2byte(major) + six.int2byte(minor)


def read_magic(fp, prefix=MAGIC_PREFIX, prefix_len=MAGIC_LEN):
"""Read the magic string to get the version of the file format.

Parameters
----------
fp : filelike object
prefix : bytes
Magic prefix to look for
prefix_len : int
Number of bytes in `prefix`

Returns
-------
major : int
minor : int
"""
magic_str = _read_bytes(fp, prefix_len, "magic string")
if magic_str[:-2] != prefix:
msg = "the magic string is not correct; expected %r, got %r"
raise ValueError(msg % (prefix, magic_str[:-2]))

if six.PY2:
return prefix + chr(major) + chr(minor)
elif six.PY3:
return prefix + bytes([major, minor])
major, minor = map(ord, magic_str[-2:])
if six.PY3:
major, minor = magic_str[-2:]
return major, minor


# mostly copied from numpy/lib/format
# dependance on _filter_header removed, since we don't care about npz-style
# headers
def write_localarray_header(fp, d, version=None):
"""Write the header for a localarray and return the version used

Parameters
----------
fp : filelike object
d : dict
This has the appropriate entries for writing its string representation
to the header of the file.
version: tuple or None
None means use oldest that works
explicit version will raise a ValueError if the format does not
allow saving this data. Default: None

Returns
-------
version : tuple of int
the file version which needs to be used to store the data
"""
import struct
header = ["{"]
for key, value in sorted(d.items()):
# Need to use repr here, since we eval these when reading
header.append("'%s': %s, " % (key, repr(value)))
header.append("}")
header = "".join(header)
# Pad the header with spaces and a final newline such that the magic
# string, the header-length short and the header are aligned on a
# 16-byte boundary. Hopefully, some system, possibly memory-mapping,
# can take advantage of our premature optimization.
current_header_len = MAGIC_LEN + 2 + len(header) + 1 # 1 for the newline
topad = 16 - (current_header_len % 16)
header = header + ' '*topad + '\n'
header = asbytes(header)

hlen = len(header)
if hlen < 256*256 and version in (None, (1, 0)):
version = (1, 0)
header_prefix = magic(1, 0) + struct.pack('<H', hlen)
else:
raise _raise_nie()
msg = "Header length %s too big for version=%s"
msg %= (hlen, version)
raise ValueError(msg)

fp.write(header_prefix)
fp.write(header)
return version


def write_localarray(fp, arr, version=(1, 0)):
def write_localarray(fp, larr, version=(1, 0)):
"""
Write a LocalArray to a .dnpy file, including a header.

Expand All @@ -122,7 +200,7 @@ def write_localarray(fp, arr, version=(1, 0)):
fp : file_like object
An open, writable file object, or similar object with a ``.write()``
method.
arr : LocalArray
larr : LocalArray
The array to write to disk.
version : (int, int), optional
The version number of the file format. Default: (1, 0)
Expand All @@ -141,43 +219,16 @@ def write_localarray(fp, arr, version=(1, 0)):
msg = "Only version (1, 0) is supported, not %s."
raise ValueError(msg % (version,))

fp.write(magic(*version))

distbuffer = arr.__distarray__()
distbuffer = larr.__distarray__()
metadata = {'__version__': distbuffer['__version__'],
'dim_data': distbuffer['dim_data'],
}

write_array_header_1_0(fp, metadata)
write_localarray_header(fp, metadata, version=(1, 0))
np.save(fp, distbuffer['buffer'])


def read_magic(fp):
"""Read the magic string to get the version of the file format.

Parameters
----------
fp : filelike object

Returns
-------
major : int
minor : int
"""
magic_str = _read_bytes(fp, MAGIC_LEN, "magic string")
if magic_str[:-2] != MAGIC_PREFIX:
msg = "the magic string is not correct; expected %r, got %r"
raise ValueError(msg % (MAGIC_PREFIX, magic_str[:-2]))
if six.PY2:
major, minor = map(ord, magic_str[-2:])
elif six.PY3:
major, minor = magic_str[-2:]
else:
raise _raise_nie()
return major, minor


def read_array_header_1_0(fp):
def read_localarray_header(fp, version):
"""
Read an array header from a filelike object using the 1.0 file format
version.
Expand All @@ -188,6 +239,7 @@ def read_array_header_1_0(fp):
----------
fp : filelike object
A file object or something with a `.read()` method like a file.
version : tuple of int

Returns
-------
Expand All @@ -206,9 +258,12 @@ def read_array_header_1_0(fp):
# Read an unsigned, little-endian short int which has the length of the
# header.
import struct
hlength_str = _read_bytes(fp, 2, "Array header length")
header_length = struct.unpack('<H', hlength_str)[0]
header = _read_bytes(fp, header_length, "Array header")
if version == (1, 0):
hlength_str = _read_bytes(fp, 2, "Array header length")
header_length = struct.unpack('<H', hlength_str)[0]
header = _read_bytes(fp, header_length, "Array header")
else:
raise ValueError("Invalid version %r" % version)

# The header is a pretty-printed string representation of a literal Python
# dictionary with trailing newlines padded to a 16-byte boundary. The keys
Expand Down Expand Up @@ -257,7 +312,7 @@ def read_localarray(fp):
msg = "only support version (1,0) of file format, not %r"
raise ValueError(msg % (version,))

__version__, dim_data = read_array_header_1_0(fp)
__version__, dim_data = read_localarray_header(fp, version=(1, 0))

buf = np.load(fp)

Expand Down
7 changes: 3 additions & 4 deletions distarray/localapi/tests/paralleltest_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,19 @@

import os
import numpy
import unittest

from numpy.testing import assert_allclose, assert_equal
from distarray.testing import ParallelTestCase, import_or_skip, temp_filepath
from distarray.localapi import LocalArray, ndenumerate
from distarray.localapi import (save_dnpy, load_dnpy, save_hdf5, load_hdf5,
load_npy)
load_npy)
from distarray.localapi.maps import Distribution


class TestDnpyFileIO(ParallelTestCase):

comm_size = 1

def setUp(self):
d = Distribution.from_shape(comm=self.comm, shape=(7,))
self.larr0 = LocalArray(d)
Expand Down Expand Up @@ -46,14 +47,12 @@ def test_flat_file_save_with_file_object(self):

self.assertTrue(magic == b'\x93DARRY')

@unittest.skip("FIXME")
def test_flat_file_save_load_with_filename(self):
save_dnpy(self.output_path, self.larr0)
larr1 = load_dnpy(comm=self.comm, file=self.output_path)
self.assertTrue(isinstance(larr1, LocalArray))
assert_allclose(self.larr0, larr1)

@unittest.skip("FIXME")
def test_flat_file_save_load_with_file_object(self):
save_dnpy(self.output_path, self.larr0)
with open(self.output_path, 'rb') as fp:
Expand Down
57 changes: 57 additions & 0 deletions distarray/localapi/tests/test_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# encoding: utf-8
# ---------------------------------------------------------------------------
# Copyright (C) 2008-2014, IPython Development Team and Enthought, Inc.
# Distributed under the terms of the BSD License. See COPYING.rst.
# ---------------------------------------------------------------------------


import unittest
import six

from distarray.localapi import format as fmt


class TestMagic(unittest.TestCase):

def test_magic_0(self):
expected = six.b('\x93DARRY\x03\x02')

prefix = six.b('\x93DARRY')
major = 3
minor = 2

result = fmt.magic(major=major, minor=minor, prefix=prefix)
self.assertEqual(result, expected)

def test_magic_1(self):
expected = six.b('\x93NUMPY\x01\x00')

prefix = six.b('\x93NUMPY')
major = 1
minor = 0

result = fmt.magic(major=major, minor=minor, prefix=prefix)
self.assertEqual(result, expected)


class TestReadMagic(unittest.TestCase):

def test_read_magic_0(self):
prefix = six.b('\x93DARRY')
prefix_len = 8
fp = six.BytesIO(six.b('\x93DARRY\x03\x02'))

major, minor = fmt.read_magic(fp, prefix=prefix, prefix_len=prefix_len)

expected = (3, 2)
self.assertEqual((major, minor), expected)

def test_read_magic_1(self):
prefix = six.b('\x93NUMPY')
prefix_len = 8
fp = six.BytesIO(six.b('\x93NUMPY\x01\x01'))

major, minor = fmt.read_magic(fp, prefix=prefix, prefix_len=prefix_len)

expected = (1, 1)
self.assertEqual((major, minor), expected)
1 change: 0 additions & 1 deletion distarray/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from distarray.externals import six
from distarray.externals import protocol_validator
from distarray.globalapi.context import Context, ContextCreationError
from distarray.globalapi.ipython_utils import IPythonClient
from distarray.error import InvalidCommSizeError
from distarray.localapi.mpiutils import MPI, create_comm_of_size

Expand Down
4 changes: 2 additions & 2 deletions quickstart
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ resolve_conda() {
}

install_osx() {
conda create -n $CONDA_ENV python=$PY_VER numpy ipyparallel notebook cython sphinx mock matplotlib
conda create -n $CONDA_ENV python=$PY_VER numpy ipyparallel notebook cython sphinx=1.3 mock matplotlib
source activate $CONDA_ENV
pip install mpi4py
pip install sphinxcontrib-programoutput
Expand All @@ -98,7 +98,7 @@ install_osx() {
}

install_linux() {
conda create -n $CONDA_ENV python=$PY_VER mpi4py numpy ipyparallel notebook cython sphinx mock matplotlib
conda create -n $CONDA_ENV python=$PY_VER mpi4py numpy ipyparallel notebook cython sphinx=1.3 mock matplotlib
source activate $CONDA_ENV
pip install sphinxcontrib-programoutput
python setup.py $INSTALL_MODE
Expand Down