diff --git a/stor/__init__.py b/stor/__init__.py index 589885d5..797109ee 100644 --- a/stor/__init__.py +++ b/stor/__init__.py @@ -53,6 +53,7 @@ def wrapper(path, *args, **kwargs): # extra compat! open = _delegate_to_path('open') +xopen = _delegate_to_path('xopen') abspath = _delegate_to_path('abspath') normcase = _delegate_to_path('normcase') normpath = _delegate_to_path('normpath') diff --git a/stor/base.py b/stor/base.py index 3d0cd290..f878eb9e 100644 --- a/stor/base.py +++ b/stor/base.py @@ -7,6 +7,7 @@ import shutil import sys import warnings +import gzip from six.moves import builtins from six import text_type @@ -272,6 +273,22 @@ def open(self, *args, **kwargs): raise NotImplementedError + def xopen(self, mode='r', *args, **kwargs): + """Open a file-like object, transparently handling gzip compression. + + See: :func:`open` + """ + if self.endswith('.gz'): + if mode == 'r': + mode = 'rb' + if mode == 'w': + mode = 'wb' + fp = self.open(mode, *args, **kwargs) + gzfp = gzip.GzipFile(fileobj=fp) + return gzfp + else: + return self.open(mode, *args, **kwargs) + def list(self, *args, **kwargs): """List all contents using the path as a prefix. diff --git a/stor/tests/file_data/utf8_file_with_unicode.txt b/stor/tests/file_data/utf8_file_with_unicode.txt new file mode 100644 index 00000000..58f12a7a --- /dev/null +++ b/stor/tests/file_data/utf8_file_with_unicode.txt @@ -0,0 +1,2 @@ +a ≥ b +c 😄 diff --git a/stor/tests/shared_obs.py b/stor/tests/shared_obs.py index af066ca0..1061642b 100644 --- a/stor/tests/shared_obs.py +++ b/stor/tests/shared_obs.py @@ -65,6 +65,39 @@ def test_works_with_gzip(self, mock_read_object): gzip_fp.seek(3) assert_same_data(fp, gzip_fp) + @mock_read_object + def test_xopen_gzip(self, mock_read_object): + gzip_path = stor.join(stor.dirname(__file__), 'file_data', 's_3_2126.bcl.gz') + + correct_binary_data = stor.open(gzip_path, 'rb').read() + mock_read_object.return_value = correct_binary_data + + with stor.xopen(stor.join(self.drive, 'A/C/s_3_2126.bcl.gz'), 'r') as fp: + with gzip.open(gzip_path) as gzip_fp: + assert_same_data(fp, gzip_fp) + + with stor.xopen(stor.join(self.drive, 'A/C/s_3_2126.bcl.gz'), 'r') as fp: + with gzip.open(gzip_path) as gzip_fp: + # after seeking, the rest of the data should still be the same + fp.seek(3) + gzip_fp.seek(3) + assert_same_data(fp, gzip_fp) + + @mock_read_object + def test_xopen_regular(self, mock_read_object): + fpath = stor.join(stor.dirname(__file__), 'file_data', 'utf8_file_with_unicode.txt') + correct_binary_data = stor.open(fpath, 'rb').read() + mock_read_object.return_value = correct_binary_data + + for mode in 'r', 'rb': + with stor.xopen(fpath, mode) as xfp: + with stor.open(fpath, mode) as fp: + assert_same_data(xfp, fp) + + with stor.xopen(stor.join(self.drive, 'A/C/utf8_file_with_unicode.txt'), 'rb') as xfp: + with stor.open(fpath, 'rb') as fp: + assert_same_data(xfp, fp) + def test_makedirs_p_does_nothing(self): # dumb test... but why not? self.normal_path.makedirs_p()