From f95c1300e4765e66822b2bd6d81b0ac95db0d445 Mon Sep 17 00:00:00 2001 From: kristjaneerik Date: Thu, 19 Apr 2018 17:57:19 -0700 Subject: [PATCH 1/3] Sem-Ver: feature - xopen: transparently handle gzipped-files --- stor/__init__.py | 1 + stor/base.py | 17 +++++++++++++++++ stor/tests/shared_obs.py | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/stor/__init__.py b/stor/__init__.py index 589885d5..797109ee 100644 --- a/stor/__init__.py +++ b/stor/__init__.py @@ -53,6 +53,7 @@ def wrapper(path, *args, **kwargs): # extra compat! open = _delegate_to_path('open') +xopen = _delegate_to_path('xopen') abspath = _delegate_to_path('abspath') normcase = _delegate_to_path('normcase') normpath = _delegate_to_path('normpath') diff --git a/stor/base.py b/stor/base.py index 3d0cd290..f878eb9e 100644 --- a/stor/base.py +++ b/stor/base.py @@ -7,6 +7,7 @@ import shutil import sys import warnings +import gzip from six.moves import builtins from six import text_type @@ -272,6 +273,22 @@ def open(self, *args, **kwargs): raise NotImplementedError + def xopen(self, mode='r', *args, **kwargs): + """Open a file-like object, transparently handling gzip compression. + + See: :func:`open` + """ + if self.endswith('.gz'): + if mode == 'r': + mode = 'rb' + if mode == 'w': + mode = 'wb' + fp = self.open(mode, *args, **kwargs) + gzfp = gzip.GzipFile(fileobj=fp) + return gzfp + else: + return self.open(mode, *args, **kwargs) + def list(self, *args, **kwargs): """List all contents using the path as a prefix. diff --git a/stor/tests/shared_obs.py b/stor/tests/shared_obs.py index af066ca0..1061642b 100644 --- a/stor/tests/shared_obs.py +++ b/stor/tests/shared_obs.py @@ -65,6 +65,39 @@ def test_works_with_gzip(self, mock_read_object): gzip_fp.seek(3) assert_same_data(fp, gzip_fp) + @mock_read_object + def test_xopen_gzip(self, mock_read_object): + gzip_path = stor.join(stor.dirname(__file__), 'file_data', 's_3_2126.bcl.gz') + + correct_binary_data = stor.open(gzip_path, 'rb').read() + mock_read_object.return_value = correct_binary_data + + with stor.xopen(stor.join(self.drive, 'A/C/s_3_2126.bcl.gz'), 'r') as fp: + with gzip.open(gzip_path) as gzip_fp: + assert_same_data(fp, gzip_fp) + + with stor.xopen(stor.join(self.drive, 'A/C/s_3_2126.bcl.gz'), 'r') as fp: + with gzip.open(gzip_path) as gzip_fp: + # after seeking, the rest of the data should still be the same + fp.seek(3) + gzip_fp.seek(3) + assert_same_data(fp, gzip_fp) + + @mock_read_object + def test_xopen_regular(self, mock_read_object): + fpath = stor.join(stor.dirname(__file__), 'file_data', 'utf8_file_with_unicode.txt') + correct_binary_data = stor.open(fpath, 'rb').read() + mock_read_object.return_value = correct_binary_data + + for mode in 'r', 'rb': + with stor.xopen(fpath, mode) as xfp: + with stor.open(fpath, mode) as fp: + assert_same_data(xfp, fp) + + with stor.xopen(stor.join(self.drive, 'A/C/utf8_file_with_unicode.txt'), 'rb') as xfp: + with stor.open(fpath, 'rb') as fp: + assert_same_data(xfp, fp) + def test_makedirs_p_does_nothing(self): # dumb test... but why not? self.normal_path.makedirs_p() From 9c4dbd882ec30cafdab7d4d514fd69a6c642e0d5 Mon Sep 17 00:00:00 2001 From: kristjaneerik Date: Thu, 19 Apr 2018 18:19:40 -0700 Subject: [PATCH 2/3] add file with some unicode --- stor/tests/file_data/utf8_file_with_unicode.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 stor/tests/file_data/utf8_file_with_unicode.txt diff --git a/stor/tests/file_data/utf8_file_with_unicode.txt b/stor/tests/file_data/utf8_file_with_unicode.txt new file mode 100644 index 00000000..d4fc16ce --- /dev/null +++ b/stor/tests/file_data/utf8_file_with_unicode.txt @@ -0,0 +1,2 @@ +a ≥ b +c \ No newline at end of file From 08e3b3e90183f5346a3773de4ea5c38a8d6b81a5 Mon Sep 17 00:00:00 2001 From: kristjaneerik Date: Fri, 20 Apr 2018 14:04:29 -0700 Subject: [PATCH 3/3] add emoji --- stor/tests/file_data/utf8_file_with_unicode.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stor/tests/file_data/utf8_file_with_unicode.txt b/stor/tests/file_data/utf8_file_with_unicode.txt index d4fc16ce..58f12a7a 100644 --- a/stor/tests/file_data/utf8_file_with_unicode.txt +++ b/stor/tests/file_data/utf8_file_with_unicode.txt @@ -1,2 +1,2 @@ a ≥ b -c \ No newline at end of file +c 😄