From 20877bc0ec16b5d38ccb9edef9d679305f2c85d5 Mon Sep 17 00:00:00 2001 From: Anuj Kumar Date: Wed, 15 Aug 2018 15:39:58 -0700 Subject: [PATCH 01/16] initial test commit --- stor/dx.py | 9 +++++ stor/tests/test_dx_path_compat.py | 56 +++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 stor/dx.py create mode 100644 stor/tests/test_dx_path_compat.py diff --git a/stor/dx.py b/stor/dx.py new file mode 100644 index 00000000..bf708df5 --- /dev/null +++ b/stor/dx.py @@ -0,0 +1,9 @@ +from stor.obs import OBSPath + + +class DXPath(OBSPath): + """ + Provides the ability to manipulate and access resources on swift + with a similar interface to the path library. + """ + drive = 'dx://' diff --git a/stor/tests/test_dx_path_compat.py b/stor/tests/test_dx_path_compat.py new file mode 100644 index 00000000..e12a5def --- /dev/null +++ b/stor/tests/test_dx_path_compat.py @@ -0,0 +1,56 @@ +import unittest + +from stor.dx import DXPath + + +class TestBasics(unittest.TestCase): + def test_relpath(self): + with self.assertRaises(AttributeError): + DXPath('dx://project').relpathto() + with self.assertRaises(AttributeError): + DXPath('dx://project').relpath() + + def test_construction_from_none(self): + with self.assertRaises(ValueError): + DXPath(None) + + def test_string_compatibility(self): + """ Test compatibility with ordinary strings. """ + x = DXPath('dx://xyzzy') + assert x == 'dx://xyzzy' + assert x == str('dx://xyzzy') + assert 'xyz' in x + assert 'analysis' not in x + + # sorting + items = [DXPath('dx://fhj'), + DXPath('dx://fgh'), + 'dx://E', + DXPath('dx://d'), + 'dx://A', + DXPath('dx://B'), + 'dx://c'] + items.sort() + self.assertEqual(items, + ['dx://A', 'dx://B', 'dx://E', 'dx://c', + 'dx://d', 'dx://fgh', 'dx://fhj']) + + # Test p1/p1. + p1 = DXPath("dx://foo") + p2 = "bar" + self.assertEqual(p1 / p2, DXPath("dx://foo/bar")) + + def test_properties(self): + # Create sample path object. + f = DXPath('dx://project/prefix/whatever.csv') + + self.assertEqual(f.parent, DXPath('dx://project/prefix')) + + # .name + self.assertEqual(f.name, 'whatever.csv') + self.assertEqual(f.parent.name, 'prefix') + self.assertEqual(f.parent.parent.name, 'project') + + # .ext + self.assertEqual(f.ext, '.csv') + self.assertEqual(f.parent.ext, '') From d2b24d0296d210927596919ae17e2f3fbd722876 Mon Sep 17 00:00:00 2001 From: Anuj Kumar Date: Fri, 17 Aug 2018 07:11:55 -0700 Subject: [PATCH 02/16] partial unit tests for DX - committed for feedback --- requirements-dev.txt | 2 + stor/dx.py | 92 ++++++++++++++- stor/test.py | 63 +++++++++- stor/tests/test_dx.py | 261 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 416 insertions(+), 2 deletions(-) create mode 100644 stor/tests/test_dx.py diff --git a/requirements-dev.txt b/requirements-dev.txt index 3218b4d7..a2a73724 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -8,3 +8,5 @@ testfixtures==4.7.0 coverage==4.4.1 pytest==3.2.3 pytest-cov==2.5.1 +vcrpy-unittest +dxpy \ No newline at end of file diff --git a/stor/dx.py b/stor/dx.py index bf708df5..49735758 100644 --- a/stor/dx.py +++ b/stor/dx.py @@ -1,5 +1,5 @@ from stor.obs import OBSPath - +import dxpy class DXPath(OBSPath): """ @@ -7,3 +7,93 @@ class DXPath(OBSPath): with a similar interface to the path library. """ drive = 'dx://' + + @property + def project(self): + """Returns the project name from the path or None""" + parts = self._get_parts() + return parts[0] if len(parts) > 0 and parts[0] else None + + #TODO(akumar) + def _get_dx_connection_vars(self): + raise NotImplementedError + + #TODO(akumar) + def _dx_api_call(self): + raise NotImplementedError + + def canonicalize(self): + raise NotImplementedError + + def humanize(self): + raise NotImplementedError + + def temp_url(self): + raise NotImplementedError + + def first(self): # may not need it + raise NotImplementedError + + def download_objects(self): # may not need it + raise NotImplementedError + + def remove(self): + raise NotImplementedError + + def rmtree(self): + raise NotImplementedError + + def isdir(self): + raise NotImplementedError + + def isfile(self): + raise NotImplementedError + + def stat(self): + raise NotImplementedError + + def getsize(self): + raise NotImplementedError + + def walkfiles(self, pattern=None): + raise NotImplementedError + + def download_object(self, dest): + """Download a single path or object to file.""" + raise NotImplementedError + + def download(self, dest): + """Download a directory.""" + raise NotImplementedError + + def upload(self, source): + """Upload a list of files and directories to a directory.""" + raise NotImplementedError + + def open(self, mode='r', encoding=None): + """ + Opens a OBSFile that can be read or written to and is uploaded to + the remote service. + """ + raise NotImplementedError + + def list(self): + """List contents using the resource of the path as a prefix.""" + raise NotImplementedError + + def listdir(self): + """list the path as a dir, returning top-level directories and files.""" + raise NotImplementedError + + def glob(self, pattern): + """ Glob for pattern relative to this directory. + + Note that Swift currently only supports a single trailing *""" + raise NotImplementedError + + def exists(self): + """Checks whether path exists on local filesystem or on swift. + + For directories on swift, checks whether directory sentinel exists or + at least one subdirectory exists""" + raise NotImplementedError \ No newline at end of file diff --git a/stor/test.py b/stor/test.py index b5c31dd5..d4bbd715 100644 --- a/stor/test.py +++ b/stor/test.py @@ -1,9 +1,12 @@ import mock +import unittest +from vcr_unittest import VCRMixin +import dxpy + from stor import s3 from stor.s3 import S3Path from stor.swift import SwiftPath from stor import settings -import unittest class SwiftTestMixin(object): @@ -181,6 +184,20 @@ def setup_s3_mocks(self): self.mock_get_s3_transfer_config = s3_transfer_config_patcher.start() +# TODO(akumar) +class DXTestMixin(VCRMixin): + """A mixin with helpers for mocking out swift. + + DXTestMixin should be used to create base test classes for anything + that accesses swift. + """ + def setup_dx_auth(self): + pass + + def setup_dx_mocks(self): + self.mock_dx_conn = mock.Mock() # TODO + + class SwiftTestCase(unittest.TestCase, SwiftTestMixin): """A TestCase class that sets up swift mocks and provides additional assertions""" def setUp(self): @@ -207,3 +224,47 @@ def setUp(self): del s3._thread_local.s3_transfer_config except AttributeError: pass + + +class DXTestCase(unittest.TestCase, DXTestMixin): + """A TestCase class that sets up DNAnexus vars and provides additional assertions""" + @classmethod + def setUpClass(cls): + #TODO(akumar) set up project in DX for testing + super(DXTestCase, cls).setup_dx_auth() + cls.proj_handler = cls.setup_project() + cls.project = cls.proj_handler.name + cls.proj_id = cls.proj_handler.get_id() + + #TODO(akumar) Is there a way around using login_token for testing purposes? + # settings.update({ + # 'dx': { + # 'dx_login_token': '__dummy__' + # } + # }) + + + @classmethod + def setup_project(cls): + test_proj = dxpy.bindings.dxproject.DXProject() + test_proj.new('Temp_Test') + return test_proj + + def setUp(self): + with dxpy.bindings.dxfile_functions.new_dxfile() as d: + d.write('data') + # subtle bug in dnanexus: on executing the above code, + # the state of file is left in 'open' state. However, when you + # do d.describe() or d.read(), it works and the file's state + # magically changes to closed. Hence, first force close below + d.state = 'closed' + self.file_handler = d + + def tearDown(self): + self.file_handler.remove() + self.file_handler = None + + def tearDownClass(cls): + #TODO(akumar) remove the project created + cls.proj_handler.destroy() + cls.proj_handler = None diff --git a/stor/tests/test_dx.py b/stor/tests/test_dx.py new file mode 100644 index 00000000..a05f1208 --- /dev/null +++ b/stor/tests/test_dx.py @@ -0,0 +1,261 @@ +import logging +import ntpath +import os +from tempfile import NamedTemporaryFile +import unittest + +import dxpy +import freezegun +import mock +from testfixtures import LogCapture + +import stor +from stor import exceptions +from stor import NamedTemporaryDirectory +from stor import Path +from stor import settings +from stor import swift +from stor import utils +from stor.dx import DXPath +from stor.test import DXTestCase +from stor.tests.shared_obs import SharedOBSFileCases + + +class TestBasicPathMethods(unittest.TestCase): + def test_name(self): + p = Path('dx://project/path/to/resource') + self.assertEquals(p.name, 'resource') + + def test_parent(self): + p = Path('dx://project/path/to/resource') + self.assertEquals(p.parent, 'dx://project/path/to') + + def test_dirname(self): + p = Path('dx://project/path/to/resource') + self.assertEquals(p.dirname(), 'dx://project/path/to') + + def test_dirname_top_level(self): + p1 = Path('dx://') + self.assertEquals(p1.dirname(), 'dx://') + + p2 = Path('dx://a') + self.assertEquals(p2.dirname(), 'dx://') + + def test_basename(self): + p = Path('dx://project/path/to/resource') + self.assertEquals(p.basename(), 'resource') + + def test_to_url(self): + p = Path('dx://project/path/to/resource') + + #TODO(akumar) to_url may not make sense for dx paths since DNAnexus uses S3 in backend. + #TODO(akumar) Remove this test if so... + + # self.assertEquals(p.to_url(), + # 'https://mybucket.s3.amazonaws.com/path/to/resource') + + def test_failed_new(self): + with self.assertRaises(ValueError): + DXPath('/bad/dx/path') + + def test_successful_new(self): + dx_p = DXPath('dx://project/path') + self.assertEquals(dx_p, 'dx://project/path') + + +class TestRepr(unittest.TestCase): + def test_repr(self): + dx_p = DXPath('dx://t/c/p') + self.assertEquals(eval(repr(dx_p)), dx_p) + + +class TestPathManipulations(unittest.TestCase): + def test_add(self): + dx_p = DXPath('dx://a') + dx_p = dx_p + 'b' + Path('c') + self.assertTrue(isinstance(dx_p, DXPath)) + self.assertEquals(dx_p, 'dx://abc') + + def test_div(self): + dx_p = DXPath('dx://t') + dx_p = dx_p / 'c' / Path('p') + self.assertTrue(isinstance(dx_p, DXPath)) + self.assertEquals(dx_p, 'dx://t/c/p') + + +class TestProject(unittest.TestCase): + def test_project_none(self): + dx_p = DXPath('dx://') + self.assertIsNone(dx_p.project) + + def test_project_exists(self): + dx_p = DXPath('dx://project') + self.assertEquals(dx_p.project, 'project') + + +class TestResource(unittest.TestCase): + def test_resource_none_no_project(self): + dx_p = DXPath('dx://') + self.assertIsNone(dx_p.resource) + + def test_resource_none_w_project(self): + dx_p = DXPath('dx://project/') + self.assertIsNone(dx_p.resource) + + def test_resource_object(self): + dx_p = DXPath('dx://bucket/obj') + self.assertEquals(dx_p.resource, 'obj') + + def test_resource_single_dir(self): + dx_p = DXPath('dx://project/dir/') + self.assertEquals(dx_p.resource, 'dir/') + + def test_resource_nested_obj(self): + dx_p = DXPath('dx://project/nested/obj') + self.assertEquals(dx_p.resource, 'nested/obj') + + def test_resource_nested_dir(self): + dx_p = DXPath('dx://project/nested/dir/') + self.assertEquals(dx_p.resource, 'nested/dir/') + + +class TestGetDXConnectionCreds(unittest.TestCase): + def test_login_token(self): + dx_p = DXPath('dx://tenant/') + with settings.use({'dx': {'dx_login_token': ''}}): + #TODO(akumar) move errors to dx class + with self.assertRaises(swift.ConfigurationError): + dx_p._get_dx_connection_vars() + + +class TestDXFile(DXTestCase): + + def test_read_on_open_file(self): + d = dxpy.bindings.dxfile_functions.new_dxfile() + self.assertEqual(d.describe()['state'], 'open') + + dx_p = DXPath('dx://{}/{}'.format(self.project, d.name)) + with self.assertRaisesRegexp(ValueError, 'not in closed state'): + dx_p.read_object() + + d.remove() + + def test_read_success_on_closed_file(self): + dx_p = DXPath('dx://{}/{}'.format(self.project, self.file_handler.name)) + self.assertEquals(dx_p.read_object(), b'data') + self.assertEquals(dx_p.open().read(), 'data') + + def test_iterating_over_files(self): + data = b'''\ +line1 +line2 +line3 +line4 +''' + with dxpy.bindings.dxfile_functions.new_dxfile() as d: + d.write(data) + d.state = 'closed' + dx_p = DXPath('dx://{}/{}'.format(self.project, d.name)) + # open().read() should return str for r + self.assertEquals(dx_p.open('r').read(), data.decode('ascii')) + # open().read() should return bytes for rb + self.assertEquals(dx_p.open('rb').read(), data) + self.assertEquals(dx_p.open().readlines(), + [l + '\n' for l in data.decode('ascii').split('\n')][:-1]) + for i, line in enumerate(dx_p.open(), 1): + self.assertEqual(line, 'line%d\n' % i) + + self.assertEqual(next(dx_p.open()), 'line1\n') + self.assertEqual(next(iter(dx_p.open())), 'line1\n') + + def test_write_multiple_w_context_manager(self, mock_upload): + dx_p = DXPath('dx://{}/{}'.format(self.project, self.file_handler.name)) + with dx_p.open(mode='wb') as obj: + obj.write(b'hello') + obj.write(b' world') + self.assertIn(b'hello world', dx_p.read_object()) + + @mock.patch('time.sleep', autospec=True) + @mock.patch.object(DXPath, 'upload', autospec=True) + def test_write_multiple_flush_multiple_upload(self, mock_upload): + dx_p = DXPath('dx://project/obj') + with NamedTemporaryFile(delete=False) as ntf1,\ + NamedTemporaryFile(delete=False) as ntf2,\ + NamedTemporaryFile(delete=False) as ntf3: + with mock.patch('tempfile.NamedTemporaryFile', autospec=True) as ntf: + ntf.side_effect = [ntf1, ntf2, ntf3] + with dx_p.open(mode='wb') as obj: + obj.write(b'hello') + obj.flush() + obj.write(b' world') + obj.flush() + u1, u2, u3 = mock_upload.call_args_list + u1[0][1][0].source == ntf1.name + u2[0][1][0].source == ntf2.name + u3[0][1][0].source == ntf3.name + u1[0][1][0].object_name == dx_p.resource + u2[0][1][0].object_name == dx_p.resource + u3[0][1][0].object_name == dx_p.resource + self.assertEqual(open(ntf1.name).read(), 'hello') + self.assertEqual(open(ntf2.name).read(), 'hello world') + # third call happens because we don't care about checking for + # additional file change + self.assertEqual(open(ntf3.name).read(), 'hello world') + + +class TestSwiftShared(SharedOBSFileCases, DXTestCase): + drive = 'dx://' + path_class = DXPath + normal_path = DXPath('dx://project/obj') + + +class TestTempURL(DXTestCase): + def test_success(self): + dx_p = DXPath('dx://{}/{}'.format(self.project, self.file_handler.name)) + temp_url = dx_p.temp_url() + #TODO (akumar) what is the expected_url? DXFile.get_download_url doesn't seem to work + # expected = 'https://swift.com/v1/tenant/container/obj?temp_url_sig=3b1adff9452165103716d308da692e6ec9c2d55f&temp_url_expires=1456229100&inline' # nopep8 + self.assertIn(str(self.file_handler.name / self.project), temp_url) + + +class TestList(DXTestCase): + + def test_no_project_error(self): + dx_p = DXPath('dx://') + with self.assertRaises(ValueError): + list(dx_p.list()) + + def test_list_project(self): + mock_list = self.mock_dx_conn.get_iterator + mock_list.return_value = ({}, [{ + 'name': 'path/to/resource1' + }, { + 'name': 'path/to/resource2' + }]) + dx_p = DXPath('dx://test-project') + results = dx_p.list() + + self.assertEquals(results, [ + 'dx://test-project/path/to/resource1', + 'dx://test-project/path/to/resourc2' + ]) + + def test_listdir(self): + mock_list = self.mock_dx_conn.get_iterator + mock_list.return_value = ({}, [{ + 'subdir': 'path/to/resource1/' + }, { + 'name': 'path/to/resource1' + }, { + 'name': 'path/to/resource2' + }, { + 'name': 'path/to/resource3' + }]) + + dx_p = DXPath('dx://project/path/to') + results = list(dx_p.listdir()) + self.assertListEqual(results, [ + 'dx://project/path/to/resource1', + 'dx://project/path/to/resource2', + 'dx://project/path/to/resource3' + ]) From f35311eb14d23eb73816937b587d91fa290b2226 Mon Sep 17 00:00:00 2001 From: Anuj Kumar Date: Fri, 17 Aug 2018 08:03:24 -0700 Subject: [PATCH 03/16] adding few more unit tests for dxpath.list --- stor/test.py | 8 +++ stor/tests/test_dx.py | 112 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 118 insertions(+), 2 deletions(-) diff --git a/stor/test.py b/stor/test.py index d4bbd715..273514c1 100644 --- a/stor/test.py +++ b/stor/test.py @@ -1,5 +1,7 @@ import mock import unittest +import six + from vcr_unittest import VCRMixin import dxpy @@ -197,6 +199,12 @@ def setup_dx_auth(self): def setup_dx_mocks(self): self.mock_dx_conn = mock.Mock() # TODO + def assertDXListsEqual(self, r1, r2): + if six.PY2: + return self.assertItemsEqual(r1, r2) + else: + return self.assertCountEqual(r1, r2) + class SwiftTestCase(unittest.TestCase, SwiftTestMixin): """A TestCase class that sets up swift mocks and provides additional assertions""" diff --git a/stor/tests/test_dx.py b/stor/tests/test_dx.py index a05f1208..b9f27a65 100644 --- a/stor/tests/test_dx.py +++ b/stor/tests/test_dx.py @@ -235,11 +235,48 @@ def test_list_project(self): dx_p = DXPath('dx://test-project') results = dx_p.list() - self.assertEquals(results, [ + self.assertDXListsEqual(results, [ 'dx://test-project/path/to/resource1', 'dx://test-project/path/to/resourc2' ]) + @mock.patch('time.sleep', autospec=True) + def test_list_unavailable(self, mock_sleep): + mock_list = self.mock_dx_conn.get_iterator + mock_list.side_effect = [ + DXException('unavaiable', http_status=503), + ({}, [{ + 'name': 'path/to/resource1' + }, { + 'name': 'path/to/resource2' + }]) + ] + + dx_p = DXPath('dx://project/path') + results = dx_p.list() + self.assertDXListsEqual(results, [ + 'dx://project/path/to/resource1', + 'dx://project/path/to/resource2' + ]) + + # Verify that list was retried one time + # self.assertEquals(len(mock_list.call_args_list), 2) + + @mock.patch('time.sleep', autospec=True) + def test_list_unauthorized(self, mock_sleep): + mock_list = self.mock_swift_conn.get_container + mock_list.side_effect = DXException( + 'unauthorized', http_status=403, http_response_headers={'X-Trans-Id': 'transactionid'}) + + dx_p = DXPath('dx://project/path') + with self.assertRaises(swift.UnauthorizedError): + try: + dx_p.list() + except swift.UnauthorizedError as exc: + self.assertIn('X-Trans-Id: transactionid', str(exc)) + self.assertIn('X-Trans-Id: transactionid', repr(exc)) + raise + def test_listdir(self): mock_list = self.mock_dx_conn.get_iterator mock_list.return_value = ({}, [{ @@ -254,8 +291,79 @@ def test_listdir(self): dx_p = DXPath('dx://project/path/to') results = list(dx_p.listdir()) - self.assertListEqual(results, [ + self.assertDXListsEqual(results, [ 'dx://project/path/to/resource1', 'dx://project/path/to/resource2', 'dx://project/path/to/resource3' ]) + + + @mock.patch('os.path', ntpath) + def test_list_windows(self): + mock_list = self.mock_dx_conn.get_iterator + mock_list.return_value = ({}, [{ + 'name': 'path/to/resource1' + }, { + 'name': 'path/to/resource2' + }, { + 'name': 'path/to/resource3' + }, { + 'name': 'path/to/resource4' + }]) + + dx_p = DXPath('dx://project/path') + results = list(dx_p.list()) + self.assertDXListsEqual(results, [ + 'dx://project/path/to/resource1', + 'dx://project/path/to/resource2', + 'dx://project/path/to/resource3', + 'dx://project/path/to/resource4' + ]) + + def test_list_limit(self): + mock_list = self.mock_dx_conn.get_iterator + mock_list.return_value = ({}, [{ + 'name': 'path/to/resource1' + }]) + + dx_p = DXPath('dx://project/path') + results = list(dx_p.list(limit=1)) + self.assertEquals(results, [ + 'dx://project/path/to/resource1' + ]) + + def test_list_starts_with(self): + mock_list = self.mock_dx_conn.get_iterator + mock_list.return_value = ({}, [{ + 'name': 'r1' + }, { + 'name': 'r2' + }]) + dx_p = DXPath('dx://project/r') + results = list(dx_p.list(starts_with='prefix')) + self.assertDXListsEqual(results, [ + 'dx://project/r1', + 'dx://project/r2' + ]) + mock_list.assert_called_once_with('project', + prefix='r/prefix', + limit=None, + full_listing=True) + + def test_list_starts_with_no_resource(self): + mock_list = self.mock_dx_conn.get_iterator + mock_list.return_value = ({}, [{ + 'name': 'r1' + }, { + 'name': 'r2' + }]) + dx_p = DXPath('dx://project') + results = list(dx_p.list(starts_with='prefix')) + self.assertDXListsEqual(results, [ + 'dx://project/r1', + 'dx://project/r2' + ]) + mock_list.assert_called_once_with('container', + prefix='prefix', + limit=None, + full_listing=True) From fbfe959c872d2beb1c61549a8e2d73dd60ff5c16 Mon Sep 17 00:00:00 2001 From: Anuj Kumar Date: Fri, 17 Aug 2018 08:56:24 -0700 Subject: [PATCH 04/16] adding tests for dxpath.walkfiles --- stor/tests/test_dx.py | 57 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/stor/tests/test_dx.py b/stor/tests/test_dx.py index b9f27a65..e8038348 100644 --- a/stor/tests/test_dx.py +++ b/stor/tests/test_dx.py @@ -203,7 +203,7 @@ def test_write_multiple_flush_multiple_upload(self, mock_upload): self.assertEqual(open(ntf3.name).read(), 'hello world') -class TestSwiftShared(SharedOBSFileCases, DXTestCase): +class TestDXShared(SharedOBSFileCases, DXTestCase): drive = 'dx://' path_class = DXPath normal_path = DXPath('dx://project/obj') @@ -264,11 +264,12 @@ def test_list_unavailable(self, mock_sleep): @mock.patch('time.sleep', autospec=True) def test_list_unauthorized(self, mock_sleep): - mock_list = self.mock_swift_conn.get_container + mock_list = self.mock_dx_conn.get_iterator mock_list.side_effect = DXException( 'unauthorized', http_status=403, http_response_headers={'X-Trans-Id': 'transactionid'}) dx_p = DXPath('dx://project/path') + # TODO(akumar) move errors to dx class with self.assertRaises(swift.UnauthorizedError): try: dx_p.list() @@ -367,3 +368,55 @@ def test_list_starts_with_no_resource(self): prefix='prefix', limit=None, full_listing=True) + + +class TestWalkFiles(DXTestCase): + def test_no_pattern_w_dir_markers(self): + mock_list = self.mock_dx_conn.get_iterator + mock_list.return_value = ({}, [{ + 'name': 'my/obj1', + 'content_type': 'application/directory' + }, { + 'name': 'my/obj2', + 'content_type': 'text/directory' + }, { + 'name': 'my/obj3', + 'content_type': 'application/octet-stream' + }, { + 'name': 'my/obj4', + 'content_type': 'application/octet-stream' + }]) + + f = list(DXPath('dx://project/').walkfiles()) + self.assertEquals(set(f), set([ + DXPath('dx://project/my/obj3'), + DXPath('dx://project/my/obj4') + ])) + + def test_w_pattern_w_dir_markers(self): + mock_list = self.mock_dx_conn.get_iterator + mock_list.return_value = ({}, [{ + 'name': 'my/obj1', + 'content_type': 'application/directory' + }, { + 'name': 'my/obj2', + 'content_type': 'text/directory' + }, { + 'name': 'my/obj3', + 'content_type': 'application/octet-stream' + }, { + 'name': 'my/obj4.sh', + 'content_type': 'application/octet-stream' + }, { + 'name': 'my/other/obj5.sh', + 'content_type': 'application/octet-stream' + }, { + 'name': 'my/dirwithpattern.sh/obj6', + 'content_type': 'application/octet-stream' + }]) + + f = list(DXPath('dx://project').walkfiles('*.sh')) + self.assertEquals(set(f), set([ + DXPath('dx://project/my/obj4.sh'), + DXPath('dx://project/my/other/obj5.sh') + ])) From 91440f18affc9918e0a331417a332b2ad0c97f1b Mon Sep 17 00:00:00 2001 From: Anuj Kumar Date: Fri, 17 Aug 2018 08:57:12 -0700 Subject: [PATCH 05/16] adding encoding env variables to tox for pycharm bug --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 7191dc5e..05ac1983 100644 --- a/tox.ini +++ b/tox.ini @@ -18,4 +18,4 @@ whitelist_externals = make nosetests passenv = SWIFT_TEST_USERNAME SWIFT_TEST_PASSWORD OS_TEMP_URL_KEY AWS_TEST_ACCESS_KEY_ID AWS_DEFAULT_REGION - AWS_TEST_SECRET_ACCESS_KEY OS_AUTH_URL + AWS_TEST_SECRET_ACCESS_KEY OS_AUTH_URL LC_LANG LC_ALL PYTHONIOENCODING PYTEST_ADDOPTS From 9f81fcedfdaa1d0002984f7a3ee098c2110c812e Mon Sep 17 00:00:00 2001 From: Anuj Kumar Date: Fri, 17 Aug 2018 09:45:58 -0700 Subject: [PATCH 06/16] adding tests for dxIDs and TestGlob --- stor/tests/test_dx.py | 45 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/stor/tests/test_dx.py b/stor/tests/test_dx.py index e8038348..ca06ce9e 100644 --- a/stor/tests/test_dx.py +++ b/stor/tests/test_dx.py @@ -59,8 +59,24 @@ def test_failed_new(self): DXPath('/bad/dx/path') def test_successful_new(self): - dx_p = DXPath('dx://project/path') - self.assertEquals(dx_p, 'dx://project/path') + p = DXPath('dx://project/path') + self.assertEquals(p, 'dx://project/path') + + def test_success_dx_id(self): + # dnanexus always has 24-chr key + # TODO (akumar) does initializing with project id actually initialize with abs dx path? + p = DXPath('dx://project-123456789012345678901234') + self.assertEqual(len(p.basename()), 32) # 'project-' and 24-chr key + + p = DXPath('dx://project-123456789012345678901234/file-123456789012345678901234') + self.assertEqual(len(p.dirname()), 32) # 'project-' and 24-chr key + self.assertEqual(len(p.basename()), 29) # 'file-' and 24-chr key + + def test_failed_dx_id(self): + with self.assertRaises(ValueError): + DXPath('dx://project-1234') + with self.assertRaises(ValueError): + DXPath('dx://project-123456789012345678901234/file-1234') class TestRepr(unittest.TestCase): @@ -420,3 +436,28 @@ def test_w_pattern_w_dir_markers(self): DXPath('dx://project/my/obj4.sh'), DXPath('dx://project/my/other/obj5.sh') ])) + + +@mock.patch.object(DXPath, 'list', autospec=True) +class TestGlob(DXTestCase): + def test_valid_pattern(self, mock_list): + dx_p = DXPath('dx://project') + dx_p.glob('pattern*') + mock_list.assert_called_once_with(mock.ANY, starts_with='pattern') + + def test_valid_pattern_wo_wildcard(self, mock_list): + dx_p = DXPath('dx://project') + dx_p.glob('pattern') + mock_list.assert_called_once_with(mock.ANY, starts_with='pattern') + + def test_multi_glob_pattern(self, mock_list): + dx_p = DXPath('dx://project') + with self.assertRaises(ValueError): + dx_p.glob('*invalid_pattern*', condition=None) + + def test_invalid_glob_pattern(self, mock_list): + dx_p = DXPath('dx://project') + with self.assertRaises(ValueError): + dx_p.glob('invalid_*pattern', condition=None) + + # TODO(akumar) add tests for retries From 39bcb9f344af96beae7d23e63a0f4dd13f496b95 Mon Sep 17 00:00:00 2001 From: Anuj Kumar Date: Mon, 20 Aug 2018 09:37:08 -0700 Subject: [PATCH 07/16] adding DXVirtualPath, DXCanonicalPath and stat methods --- requirements-dev.txt | 5 +-- stor/base.py | 4 ++- stor/dx.py | 81 +++++++++++++++++++++++++++++++++++++------- stor/utils.py | 43 +++++++++++++++++++++++ 4 files changed, 118 insertions(+), 15 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index a2a73724..35f52bd4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,7 @@ # Requirements needed for running the test suite. +cached-property +dxpy flake8==2.4.1 freezegun==0.3.6 mock==1.3.0 @@ -8,5 +10,4 @@ testfixtures==4.7.0 coverage==4.4.1 pytest==3.2.3 pytest-cov==2.5.1 -vcrpy-unittest -dxpy \ No newline at end of file +vcrpy-unittest \ No newline at end of file diff --git a/stor/base.py b/stor/base.py index 3d0cd290..14a78e08 100644 --- a/stor/base.py +++ b/stor/base.py @@ -45,7 +45,9 @@ def __new__(cls, path): if cls is Path: if not hasattr(path, 'startswith'): raise TypeError('must be a string like') - if utils.is_swift_path(path): + if utils.is_dx_path(path): + cls = utils.find_dx_class(path) + elif utils.is_swift_path(path): from stor.swift import SwiftPath cls = SwiftPath diff --git a/stor/dx.py b/stor/dx.py index 49735758..6f989f94 100644 --- a/stor/dx.py +++ b/stor/dx.py @@ -1,5 +1,9 @@ +from cached_property import cached_property + +import dxpy.bindings as db + from stor.obs import OBSPath -import dxpy + class DXPath(OBSPath): """ @@ -12,7 +16,7 @@ class DXPath(OBSPath): def project(self): """Returns the project name from the path or None""" parts = self._get_parts() - return parts[0] if len(parts) > 0 and parts[0] else None + return parts[0][:-1] if len(parts) > 0 and parts[0] else None #TODO(akumar) def _get_dx_connection_vars(self): @@ -22,12 +26,6 @@ def _get_dx_connection_vars(self): def _dx_api_call(self): raise NotImplementedError - def canonicalize(self): - raise NotImplementedError - - def humanize(self): - raise NotImplementedError - def temp_url(self): raise NotImplementedError @@ -49,9 +47,6 @@ def isdir(self): def isfile(self): raise NotImplementedError - def stat(self): - raise NotImplementedError - def getsize(self): raise NotImplementedError @@ -96,4 +91,66 @@ def exists(self): For directories on swift, checks whether directory sentinel exists or at least one subdirectory exists""" - raise NotImplementedError \ No newline at end of file + raise NotImplementedError + + +class DXVirtualPath(DXPath): + + @cached_property + def canonical_project(self): + """Returns the first project that matches the name that user has view access to. + If no match is found, returns None + """ + # we need the DX project-ID to get the file_ID and full path + proj_dict = db.search.find_one_project( + name=self.project, level='VIEW', zero_ok=True, more_ok=False) + if proj_dict: + return proj_dict['id'] + + @cached_property + def canonical_resource(self): + """Returns the dx file-ID of the first matched filename""" + if not self.resource: + return None + objects = [{ + 'name': self.name, + 'folder': '/' + self.resource.parent, + 'project': self.canonical_project + }] + object_d = next(iter(db.search.resolve_data_objects(objects=objects)[0]), None) + if object_d: + return object_d['id'] + + def canonical_path(self): + """returns the first file that matches the given path""" + return DXCanonicalPath(self.drive + self.canonical_project + ':') / (self.canonical_resource or '') + + @cached_property + def stat(self): + return self.canonical_path().stat() + + +class DXCanonicalPath(DXPath): + + def virtual_project(self): + return self.virtual_path.project + + def virtual_resource(self): + return self.virtual_path.resource + + @cached_property + def virtual_path(self): + proj = db.dxproject.DXProject(dxid=self.project) + virtual_p = DXVirtualPath(self.drive + proj.name + ':/') + if self.resource: + file_h = db.DXFile(dxid=self.resource) + virtual_p = virtual_p / file_h.folder[1:] / file_h.name + print(virtual_p) + return virtual_p + + @cached_property + def stat(self): + if self.resource: + return db.dxdataobject_functions.describe(self.resource) + elif self.project: + return db.dxdataobject_functions.describe(self.project) diff --git a/stor/utils.py b/stor/utils.py index a475e119..2e457c90 100644 --- a/stor/utils.py +++ b/stor/utils.py @@ -8,6 +8,9 @@ from subprocess import check_call import tempfile +from dxpy.bindings import verify_string_dxid +from dxpy.exceptions import DXError + from stor import exceptions logger = logging.getLogger(__name__) @@ -230,6 +233,46 @@ def is_obs_path(p): return is_s3_path(p) or is_swift_path(p) +def is_dx_path(p): + """Determines if the path is a DX path. + + All DX paths start with ``dx://`` + + Args: + p (str): The path string + + Returns + bool: True if p is a DX path, False otherwise. + """ + from stor.dx import DXPath + return p.startswith(DXPath.drive) + + +def find_dx_class(p): + """Finds the class of the DX path : DXVirtualPath or DXCanonicalPath + + Args: + p (str): The path string + + Returns + cls: DXVirtualPath or DXCanonicalPath + """ + from stor.dx import DXPath, DXCanonicalPath, DXVirtualPath + parts = p[len(DXPath.drive):].split('/') + if not parts or not parts[0]: + assert False, 'Only project level paths are supported for DX currently' + try: + # verify_string_dxid returns None if success, raises error if failed + if not verify_string_dxid(parts[0][:-1], 'project'): + if len(parts) > 1 and parts[1] and not verify_string_dxid(parts[1], 'file'): + if len(parts) > 2: + assert False, 'Invalid DX path: {}'.format(p) + else: + return DXCanonicalPath + except DXError: # DXPath of form 'dx://project-{ID}:/a/b/c' or 'dx://a/b/c' + return DXVirtualPath + + def is_writeable(path, swift_retry_options=None): """ Determine whether we have permission to write to path. From 26e0c7b83221591a5ca20afb9a4e79d7d65e60c0 Mon Sep 17 00:00:00 2001 From: Anuj Kumar Date: Tue, 21 Aug 2018 09:35:45 -0700 Subject: [PATCH 08/16] Cleaned up and added tests for initializing DXPaths --- stor/dx.py | 83 ++++++++++++++++--- stor/tests/test_dx_path_compat.py | 133 +++++++++++++++++++++++++----- stor/utils.py | 19 ++++- 3 files changed, 197 insertions(+), 38 deletions(-) diff --git a/stor/dx.py b/stor/dx.py index 6f989f94..199ae2af 100644 --- a/stor/dx.py +++ b/stor/dx.py @@ -1,7 +1,9 @@ from cached_property import cached_property -import dxpy.bindings as db +import dxpy.bindings as dxb +from dxpy.exceptions import DXError, DXSearchError +from stor import Path from stor.obs import OBSPath @@ -10,13 +12,28 @@ class DXPath(OBSPath): Provides the ability to manipulate and access resources on swift with a similar interface to the path library. """ + + def __new__(cls, path): + return super(DXPath, cls).__new__(Path, path) + drive = 'dx://' + def _get_parts(self): + """Returns the path parts (excluding the drive) as a list of strings.""" + parts = super(DXPath, self)._get_parts() + if len(parts) > 0 and parts[0]: + # first part can be 'proj:file' or 'proj:' or 'proj' + parts_first = parts[0].split(':') + parts[0] = parts_first[0] + if len(parts_first) > 1 and parts_first[1]: + parts.insert(1, parts_first[1]) + return parts + @property def project(self): """Returns the project name from the path or None""" parts = self._get_parts() - return parts[0][:-1] if len(parts) > 0 and parts[0] else None + return parts[0] if len(parts) > 0 and parts[0] else None #TODO(akumar) def _get_dx_connection_vars(self): @@ -29,6 +46,11 @@ def _dx_api_call(self): def temp_url(self): raise NotImplementedError + @property + def resource(self): + res = super(DXPath, self).resource + return self.parts_class('/'+res) if res else None + def first(self): # may not need it raise NotImplementedError @@ -95,21 +117,42 @@ def exists(self): class DXVirtualPath(DXPath): + """Class Handler for DXPath of form 'dx://project-{ID}:/a/b/c' or 'dx://a/b/c'""" + + @property + def virtual_project(self): + return self.project + + @property + def virtual_resource(self): + return self.resource + + @property + def virtual_path(self): + return self @cached_property def canonical_project(self): """Returns the first project that matches the name that user has view access to. If no match is found, returns None """ - # we need the DX project-ID to get the file_ID and full path - proj_dict = db.search.find_one_project( - name=self.project, level='VIEW', zero_ok=True, more_ok=False) - if proj_dict: - return proj_dict['id'] + try: + if not dxb.verify_string_dxid(self.project, 'project'): + return self.project + except DXError: + try: + proj_dict = dxb.search.find_one_project( + name=self.project, level='VIEW', zero_ok=True, more_ok=False) + if proj_dict: + return proj_dict['id'] + except DXSearchError as e: + raise e('Did not find exactly 1 matching project name to ID') @cached_property def canonical_resource(self): """Returns the dx file-ID of the first matched filename""" + if not self.ext: + raise ValueError('DXPath must be file with extension') if not self.resource: return None objects = [{ @@ -117,10 +160,11 @@ def canonical_resource(self): 'folder': '/' + self.resource.parent, 'project': self.canonical_project }] - object_d = next(iter(db.search.resolve_data_objects(objects=objects)[0]), None) + object_d = next(iter(dxb.search.resolve_data_objects(objects=objects)[0]), None) if object_d: return object_d['id'] + @property def canonical_path(self): """returns the first file that matches the given path""" return DXCanonicalPath(self.drive + self.canonical_project + ':') / (self.canonical_resource or '') @@ -131,26 +175,41 @@ def stat(self): class DXCanonicalPath(DXPath): + """Class Handler for DXPath of form 'dx://project-{ID}:/file-{ID}' or 'dx://project-{ID}:'""" + @property def virtual_project(self): return self.virtual_path.project + @property def virtual_resource(self): return self.virtual_path.resource @cached_property def virtual_path(self): - proj = db.dxproject.DXProject(dxid=self.project) + proj = dxb.dxproject.DXProject(dxid=self.project) virtual_p = DXVirtualPath(self.drive + proj.name + ':/') if self.resource: - file_h = db.DXFile(dxid=self.resource) + file_h = dxb.DXFile(dxid=self.resource) virtual_p = virtual_p / file_h.folder[1:] / file_h.name print(virtual_p) return virtual_p + @property + def canonical_project(self): + return self.project + + @property + def canonical_resource(self): + return self.resource + + @property + def canonical_path(self): + return self + @cached_property def stat(self): if self.resource: - return db.dxdataobject_functions.describe(self.resource) + return dxb.dxdataobject_functions.describe(self.resource) elif self.project: - return db.dxdataobject_functions.describe(self.project) + return dxb.dxdataobject_functions.describe(self.project) diff --git a/stor/tests/test_dx_path_compat.py b/stor/tests/test_dx_path_compat.py index e12a5def..3c4b8c78 100644 --- a/stor/tests/test_dx_path_compat.py +++ b/stor/tests/test_dx_path_compat.py @@ -1,55 +1,144 @@ +import pytest import unittest -from stor.dx import DXPath +from stor.dx import DXPath, DXCanonicalPath, DXVirtualPath class TestBasics(unittest.TestCase): def test_relpath(self): with self.assertRaises(AttributeError): - DXPath('dx://project').relpathto() + DXPath('dx://project:').relpathto() with self.assertRaises(AttributeError): - DXPath('dx://project').relpath() + DXPath('dx://project:').relpath() def test_construction_from_none(self): - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): DXPath(None) + def test_construction_from_no_project(self): + with pytest.raises(ValueError, match='Must specify a project'): + DXPath('dx://') + + def test_canonical_construct_fail(self): + with pytest.raises(ValueError, match='Invalid DXPath'): + DXPath('dx://project-123456789012345678901234:/file-123456789012345678901234/a/') + + def test_canonical_construct_wo_file(self): + for path_str in [ + 'dx://project-123456789012345678901234:/', + 'dx://project-123456789012345678901234:', + 'dx://project-123456789012345678901234' + ]: + p = DXPath(path_str) + self.assertIsInstance(p, DXCanonicalPath, 'Expected canonical DXPath for (%s)' % p) + self.assertEqual(p.project, 'project-123456789012345678901234', + 'Project parsing unsuccessful for %s' % p) + + def test_canonical_construct_w_file(self): + for path_str in [ + 'dx://project-123456789012345678901234:file-123456789012345678901234', + 'dx://project-123456789012345678901234:/file-123456789012345678901234' + ]: + p = DXPath(path_str) + self.assertIsInstance(p, DXCanonicalPath, 'Expected canonical DXPath for (%s)' % p) + self.assertEqual(p.project, 'project-123456789012345678901234', + 'Project parsing unsuccessful for %s' % p) + self.assertEqual(p.resource, '/file-123456789012345678901234', + 'Resource parsing error for %s' % p) + + def test_virtual_construct_wo_resource(self): + for path_str in [ + 'dx://proj123:/', + 'dx://proj123:', + 'dx://proj123' + ]: + p = DXPath(path_str) + self.assertIsInstance(p, DXVirtualPath, 'Expected virtual DXPath for (%s)' % p) + self.assertEqual(p.project, 'proj123', + 'Project parsing unsuccessful for %s' % p) + + def test_virtual_construct_wo_folder(self): + for path_str in [ + 'dx://proj123:/a.ext', + 'dx://proj123:a.ext' + ]: + p = DXPath(path_str) + self.assertIsInstance(p, DXVirtualPath, 'Expected virtual DXPath for (%s)' % p) + self.assertEqual(p.project, 'proj123', + 'Project parsing unsuccessful for %s' % p) + self.assertEqual(str(p.resource), '/a.ext', + 'Resource parsing error for %s' % p) + + for path_str in [ + 'dx://project-123456789012345678901234:/a.ext', + 'dx://project-123456789012345678901234:a.ext' + ]: + p = DXPath(path_str) + self.assertIsInstance(p, DXVirtualPath, 'Expected virtual DXPath for (%s)' % p) + self.assertEqual(p.project, 'project-123456789012345678901234', + 'Project parsing unsuccessful for %s' % p) + self.assertEqual(p.resource, '/a.ext', + 'Resource parsing error for %s' % p) + + def test_virtual_construct_w_folder(self): + for path_str in [ + 'dx://proj123:/b/c/a.ext', + 'dx://proj123:b/c/a.ext' + ]: + p = DXPath(path_str) + self.assertIsInstance(p, DXVirtualPath, 'Expected virtual DXPath for (%s)' % p) + self.assertEqual(p.project, 'proj123', + 'Project parsing unsuccessful for %s' % p) + self.assertEqual(p.resource, '/b/c/a.ext', + 'Resource parsing error for %s' % p) + + for path_str in [ + 'dx://project-123456789012345678901234:/b/c/a.ext', + 'dx://project-123456789012345678901234:b/c/a.ext' + ]: + p = DXPath(path_str) + self.assertIsInstance(p, DXVirtualPath, 'Expected virtual DXPath for (%s)' % p) + self.assertEqual(p.project, 'project-123456789012345678901234', + 'Project parsing unsuccessful for %s' % p) + self.assertEqual(p.resource, '/b/c/a.ext', + 'Resource parsing error for %s' % p) + def test_string_compatibility(self): """ Test compatibility with ordinary strings. """ - x = DXPath('dx://xyzzy') - assert x == 'dx://xyzzy' - assert x == str('dx://xyzzy') + x = DXPath('dx://xyzzy:') + assert x == 'dx://xyzzy:' + assert x == str('dx://xyzzy:') assert 'xyz' in x assert 'analysis' not in x # sorting - items = [DXPath('dx://fhj'), - DXPath('dx://fgh'), - 'dx://E', - DXPath('dx://d'), - 'dx://A', - DXPath('dx://B'), - 'dx://c'] + items = [DXPath('dx://fhj:'), + DXPath('dx://fgh:'), + 'dx://E:', + DXPath('dx://d:'), + 'dx://A:', + DXPath('dx://B:'), + 'dx://c:'] items.sort() self.assertEqual(items, - ['dx://A', 'dx://B', 'dx://E', 'dx://c', - 'dx://d', 'dx://fgh', 'dx://fhj']) + ['dx://A:', 'dx://B:', 'dx://E:', 'dx://c:', + 'dx://d:', 'dx://fgh:', 'dx://fhj:']) # Test p1/p1. - p1 = DXPath("dx://foo") + p1 = DXPath("dx://foo:") p2 = "bar" - self.assertEqual(p1 / p2, DXPath("dx://foo/bar")) + self.assertEqual(p1 / p2, DXPath("dx://foo:/bar")) def test_properties(self): - # Create sample path object. - f = DXPath('dx://project/prefix/whatever.csv') + # Create sample DXPath object. + f = DXPath('dx://project:/prefix/whatever.csv') - self.assertEqual(f.parent, DXPath('dx://project/prefix')) + self.assertEqual(f.parent, DXPath('dx://project:/prefix')) # .name self.assertEqual(f.name, 'whatever.csv') self.assertEqual(f.parent.name, 'prefix') - self.assertEqual(f.parent.parent.name, 'project') + self.assertEqual(f.parent.parent.name, 'project:') # .ext self.assertEqual(f.ext, '.csv') diff --git a/stor/utils.py b/stor/utils.py index 2e457c90..8b96e46e 100644 --- a/stor/utils.py +++ b/stor/utils.py @@ -260,17 +260,28 @@ def find_dx_class(p): from stor.dx import DXPath, DXCanonicalPath, DXVirtualPath parts = p[len(DXPath.drive):].split('/') if not parts or not parts[0]: - assert False, 'Only project level paths are supported for DX currently' + raise ValueError('Must specify a project for DXPath') + + # first part can be 'proj:file' or 'proj:' or 'proj' + parts_first = parts[0].split(':') + parts[0] = parts_first[0] + if len(parts_first) > 1 and parts_first[1]: + parts.insert(1, parts_first[1]) + try: # verify_string_dxid returns None if success, raises error if failed - if not verify_string_dxid(parts[0][:-1], 'project'): - if len(parts) > 1 and parts[1] and not verify_string_dxid(parts[1], 'file'): + if not verify_string_dxid(parts[0], 'project'): + if len(parts) == 1 or (len(parts) > 1 and not parts[1]): + return DXCanonicalPath + if not verify_string_dxid(parts[1], 'file'): if len(parts) > 2: - assert False, 'Invalid DX path: {}'.format(p) + raise ValueError('Invalid DXPath: {}'.format(p)) else: return DXCanonicalPath except DXError: # DXPath of form 'dx://project-{ID}:/a/b/c' or 'dx://a/b/c' return DXVirtualPath + except: + raise def is_writeable(path, swift_retry_options=None): From d3fe87acb8e924e53455220c750eee16ae15c367 Mon Sep 17 00:00:00 2001 From: Anuj Kumar Date: Wed, 22 Aug 2018 09:06:33 -0700 Subject: [PATCH 09/16] incorporating review comments --- stor/dx.py | 83 ++++++++++++++++++++++++++++++------------- stor/tests/test_dx.py | 24 +++++++++++++ stor/utils.py | 37 ++++++++++++------- 3 files changed, 108 insertions(+), 36 deletions(-) diff --git a/stor/dx.py b/stor/dx.py index 199ae2af..e90a609e 100644 --- a/stor/dx.py +++ b/stor/dx.py @@ -1,12 +1,43 @@ from cached_property import cached_property +import six import dxpy.bindings as dxb -from dxpy.exceptions import DXError, DXSearchError +import dxpy.exceptions as dx_exceptions +from dxpy.exceptions import DXError +from stor import exceptions as stor_exceptions from stor import Path +from stor import utils from stor.obs import OBSPath +DNAnexusError = stor_exceptions.RemoteError +NotFoundError = stor_exceptions.NotFoundError + + +def _parse_dx_error(exc, **kwargs): + """ + Parses DXError exceptions to throw a more informative exception. + """ + msg = exc.message + exc_type = type(exc) + + if exc_type is dx_exceptions.DXSearchError: + if msg and 'found more' in msg.lower(): + return DuplicateError(msg, exc) + elif msg and 'found none' in msg.lower(): + return NotFoundError(msg, exc) + + +class DuplicateError(DNAnexusError): + """Thrown when multiple projects exist with the same name + + Currently, we throw this when trying to get the canonical project + from virtual path and two or more projects were found with same name + """ + pass + + class DXPath(OBSPath): """ Provides the ability to manipulate and access resources on swift @@ -18,6 +49,8 @@ def __new__(cls, path): drive = 'dx://' + __stat = None + def _get_parts(self): """Returns the path parts (excluding the drive) as a list of strings.""" parts = super(DXPath, self)._get_parts() @@ -35,7 +68,7 @@ def project(self): parts = self._get_parts() return parts[0] if len(parts) > 0 and parts[0] else None - #TODO(akumar) + # TODO(akumar) make sure DX_LOGIN_TOKEN is set up here def _get_dx_connection_vars(self): raise NotImplementedError @@ -48,8 +81,9 @@ def temp_url(self): @property def resource(self): - res = super(DXPath, self).resource - return self.parts_class('/'+res) if res else None + parts = self._get_parts() + joined_resource = '/'.join(parts[1:]) if len(parts) > 1 else None + return self.parts_class('/'+joined_resource) if joined_resource else None def first(self): # may not need it raise NotImplementedError @@ -115,6 +149,16 @@ def exists(self): at least one subdirectory exists""" raise NotImplementedError + def stat(self): + if not self.__stat: + + if self.canonical_resource: + self.__stat = dxb.DXFile(dxid=self.canonical_resource, + project=self.canonical_project).describe() + else: + self.__stat = dxb.DXProject(dxid=self.canonical_project).describe() + return self.__stat + class DXVirtualPath(DXPath): """Class Handler for DXPath of form 'dx://project-{ID}:/a/b/c' or 'dx://a/b/c'""" @@ -135,18 +179,21 @@ def virtual_path(self): def canonical_project(self): """Returns the first project that matches the name that user has view access to. If no match is found, returns None + + Raises: + DuplicateError - if project name is not unique on DX platform + NotFoundError - If project name doesn't exist on DNAnexus """ - try: - if not dxb.verify_string_dxid(self.project, 'project'): - return self.project - except DXError: + if utils.is_valid_dxid(self.project, 'project'): + return self.project + else: try: proj_dict = dxb.search.find_one_project( name=self.project, level='VIEW', zero_ok=True, more_ok=False) - if proj_dict: + if proj_dict and proj_dict['id']: return proj_dict['id'] - except DXSearchError as e: - raise e('Did not find exactly 1 matching project name to ID') + except DXError as e: + six.raise_from(_parse_dx_error(e), e) @cached_property def canonical_resource(self): @@ -169,10 +216,6 @@ def canonical_path(self): """returns the first file that matches the given path""" return DXCanonicalPath(self.drive + self.canonical_project + ':') / (self.canonical_resource or '') - @cached_property - def stat(self): - return self.canonical_path().stat() - class DXCanonicalPath(DXPath): """Class Handler for DXPath of form 'dx://project-{ID}:/file-{ID}' or 'dx://project-{ID}:'""" @@ -187,12 +230,11 @@ def virtual_resource(self): @cached_property def virtual_path(self): - proj = dxb.dxproject.DXProject(dxid=self.project) + proj = dxb.DXProject(dxid=self.project) virtual_p = DXVirtualPath(self.drive + proj.name + ':/') if self.resource: file_h = dxb.DXFile(dxid=self.resource) virtual_p = virtual_p / file_h.folder[1:] / file_h.name - print(virtual_p) return virtual_p @property @@ -206,10 +248,3 @@ def canonical_resource(self): @property def canonical_path(self): return self - - @cached_property - def stat(self): - if self.resource: - return dxb.dxdataobject_functions.describe(self.resource) - elif self.project: - return dxb.dxdataobject_functions.describe(self.project) diff --git a/stor/tests/test_dx.py b/stor/tests/test_dx.py index ca06ce9e..a495ba83 100644 --- a/stor/tests/test_dx.py +++ b/stor/tests/test_dx.py @@ -17,6 +17,7 @@ from stor import swift from stor import utils from stor.dx import DXPath +import stor.dx as dx from stor.test import DXTestCase from stor.tests.shared_obs import SharedOBSFileCases @@ -461,3 +462,26 @@ def test_invalid_glob_pattern(self, mock_list): dx_p.glob('invalid_*pattern', condition=None) # TODO(akumar) add tests for retries + + +# TODO(akumar) insert mocks here +class TestStat(DXTestCase): + def test_stat_failure(self): + with self.assertRaises(ValueError): + DXPath('dx://Test_Project:/Test_Folder/').stat() + with self.assertRaises(ValueError): + DXPath('dx://Test_Project:/Test_File_No_Ext').stat() + with self.assertRaises(dx.DuplicateError): + DXPath('dx://Duplicate_Project:').stat() + with self.assertRaises(dx.NotFoundError): + DXPath('dx://Random_Proj:/').stat() + + def test_stat_project(self): + dx_p = DXPath('dx://Test_Project:/') + response = dx_p.stat() + self.assertIn('region', response) # only projects have regions + + def test_stat_file(self): + dx_p = DXPath('dx://Test_Project:/Test_File.txt') + response = dx_p.stat() + self.assertIn('folder', response) # only files have folders diff --git a/stor/utils.py b/stor/utils.py index 8b96e46e..b4b1c564 100644 --- a/stor/utils.py +++ b/stor/utils.py @@ -248,6 +248,21 @@ def is_dx_path(p): return p.startswith(DXPath.drive) +def is_valid_dxid(dxid, expected_classes): + """wrapper class for verify_string_dxid, because + verify_string_dxid returns None if success, raises error if failed + + Args: Accepts same args as verify_string_dxid + + Returns + bool: Whether given dxid is a valid path of one of expected_classes + """ + try: + return verify_string_dxid(dxid, expected_classes) is None + except DXError: + return False + + def find_dx_class(p): """Finds the class of the DX path : DXVirtualPath or DXCanonicalPath @@ -268,20 +283,18 @@ def find_dx_class(p): if len(parts_first) > 1 and parts_first[1]: parts.insert(1, parts_first[1]) - try: - # verify_string_dxid returns None if success, raises error if failed - if not verify_string_dxid(parts[0], 'project'): - if len(parts) == 1 or (len(parts) > 1 and not parts[1]): + if is_valid_dxid(parts[0], 'project'): + if len(parts) == 1 or (len(parts) > 1 and not parts[1]): + return DXCanonicalPath + if is_valid_dxid(parts[1], 'file'): + if len(parts) > 2: + raise ValueError('Invalid DXPath: {}'.format(p)) + else: return DXCanonicalPath - if not verify_string_dxid(parts[1], 'file'): - if len(parts) > 2: - raise ValueError('Invalid DXPath: {}'.format(p)) - else: - return DXCanonicalPath - except DXError: # DXPath of form 'dx://project-{ID}:/a/b/c' or 'dx://a/b/c' + else: # DXPath of form 'dx://project-{ID}:/a/b/c' + return DXVirtualPath + else: # DXPath of form 'dx://a/b/c' return DXVirtualPath - except: - raise def is_writeable(path, swift_retry_options=None): From f782c2a04bff77b201ca6b87b65f93f25c353fe8 Mon Sep 17 00:00:00 2001 From: Anuj Kumar Date: Wed, 22 Aug 2018 13:02:28 -0700 Subject: [PATCH 10/16] simplifying find_dx_class and other review comments --- stor/dx.py | 25 ++++++++++++++++----- stor/tests/test_dx_path_compat.py | 2 +- stor/utils.py | 37 +++++++++++++------------------ 3 files changed, 37 insertions(+), 27 deletions(-) diff --git a/stor/dx.py b/stor/dx.py index e90a609e..b3c5fa62 100644 --- a/stor/dx.py +++ b/stor/dx.py @@ -24,12 +24,12 @@ def _parse_dx_error(exc, **kwargs): if exc_type is dx_exceptions.DXSearchError: if msg and 'found more' in msg.lower(): - return DuplicateError(msg, exc) + return DuplicateProjectError(msg, exc) elif msg and 'found none' in msg.lower(): return NotFoundError(msg, exc) -class DuplicateError(DNAnexusError): +class DuplicateProjectError(DNAnexusError): """Thrown when multiple projects exist with the same name Currently, we throw this when trying to get the canonical project @@ -38,6 +38,15 @@ class DuplicateError(DNAnexusError): pass +class ProjectNotFoundError(NotFoundError): + """Thrown when no project exists with the given name + + Currently, we throw this when trying to get the canonical project + from virtual path and no project was found with same name + """ + pass + + class DXPath(OBSPath): """ Provides the ability to manipulate and access resources on swift @@ -45,6 +54,10 @@ class DXPath(OBSPath): """ def __new__(cls, path): + """Custom __new__ method so that the validation checks happen during creation + + This ensures invalid dx paths like DXPath('dx://) are never initialized + """ return super(DXPath, cls).__new__(Path, path) drive = 'dx://' @@ -181,7 +194,7 @@ def canonical_project(self): If no match is found, returns None Raises: - DuplicateError - if project name is not unique on DX platform + DuplicateProjectError - if project name is not unique on DX platform NotFoundError - If project name doesn't exist on DNAnexus """ if utils.is_valid_dxid(self.project, 'project'): @@ -190,8 +203,10 @@ def canonical_project(self): try: proj_dict = dxb.search.find_one_project( name=self.project, level='VIEW', zero_ok=True, more_ok=False) - if proj_dict and proj_dict['id']: - return proj_dict['id'] + if proj_dict is None: + raise ProjectNotFoundError('No projects were found with given name ({})' + .format(self.project)) + return proj_dict['id'] except DXError as e: six.raise_from(_parse_dx_error(e), e) diff --git a/stor/tests/test_dx_path_compat.py b/stor/tests/test_dx_path_compat.py index 3c4b8c78..3ec0cfc0 100644 --- a/stor/tests/test_dx_path_compat.py +++ b/stor/tests/test_dx_path_compat.py @@ -16,7 +16,7 @@ def test_construction_from_none(self): DXPath(None) def test_construction_from_no_project(self): - with pytest.raises(ValueError, match='Must specify a project'): + with pytest.raises(ValueError, match='Project is required to construct a DXPath'): DXPath('dx://') def test_canonical_construct_fail(self): diff --git a/stor/utils.py b/stor/utils.py index b4b1c564..8e37a633 100644 --- a/stor/utils.py +++ b/stor/utils.py @@ -273,27 +273,22 @@ def find_dx_class(p): cls: DXVirtualPath or DXCanonicalPath """ from stor.dx import DXPath, DXCanonicalPath, DXVirtualPath - parts = p[len(DXPath.drive):].split('/') - if not parts or not parts[0]: - raise ValueError('Must specify a project for DXPath') - - # first part can be 'proj:file' or 'proj:' or 'proj' - parts_first = parts[0].split(':') - parts[0] = parts_first[0] - if len(parts_first) > 1 and parts_first[1]: - parts.insert(1, parts_first[1]) - - if is_valid_dxid(parts[0], 'project'): - if len(parts) == 1 or (len(parts) > 1 and not parts[1]): - return DXCanonicalPath - if is_valid_dxid(parts[1], 'file'): - if len(parts) > 2: - raise ValueError('Invalid DXPath: {}'.format(p)) - else: - return DXCanonicalPath - else: # DXPath of form 'dx://project-{ID}:/a/b/c' - return DXVirtualPath - else: # DXPath of form 'dx://a/b/c' + colon_pieces = p[len(DXPath.drive):].split(':', 1) + if not colon_pieces or not colon_pieces[0]: + raise ValueError('Project is required to construct a DXPath') + project = colon_pieces[0] + resource = (colon_pieces[1] if len(colon_pieces) == 2 else '') + resource = resource[resource.startswith('/'):] + resource_parts = resource.split('/') + root_name, rest = resource_parts[0], resource_parts[1:] + canonical_resource = is_valid_dxid(root_name, 'file') + if canonical_resource and rest: + raise ValueError('Ambiguous and Invalid DXPath! ({})'.format(p)) + canonical_project = is_valid_dxid(project, 'project') + + if canonical_project and (canonical_resource or not resource): + return DXCanonicalPath + else: return DXVirtualPath From 85fa13f024f37ef46fc45e3c2c8a9977dd0a3fb2 Mon Sep 17 00:00:00 2001 From: Anuj Kumar Date: Thu, 23 Aug 2018 09:01:03 -0700 Subject: [PATCH 11/16] temporary non-working commit to sync up on vcr --- stor/dx.py | 10 ++++-- stor/test.py | 58 +++++++++++++++++-------------- stor/tests/test_dx.py | 34 +++++++++++++----- stor/tests/test_dx_path_compat.py | 2 +- stor/utils.py | 9 +++-- 5 files changed, 68 insertions(+), 45 deletions(-) diff --git a/stor/dx.py b/stor/dx.py index b3c5fa62..838f7ad2 100644 --- a/stor/dx.py +++ b/stor/dx.py @@ -166,9 +166,11 @@ def stat(self): if not self.__stat: if self.canonical_resource: + print("trying with canonical resource"+self.canonical_resource+" "+self.canonical_project) self.__stat = dxb.DXFile(dxid=self.canonical_resource, project=self.canonical_project).describe() else: + print("trying with canonical project" + self.canonical_project) self.__stat = dxb.DXProject(dxid=self.canonical_project).describe() return self.__stat @@ -213,18 +215,20 @@ def canonical_project(self): @cached_property def canonical_resource(self): """Returns the dx file-ID of the first matched filename""" - if not self.ext: - raise ValueError('DXPath must be file with extension') if not self.resource: return None + if not self.ext: + raise ValueError('DXPath ({}) must have extension'.format(self)) objects = [{ 'name': self.name, - 'folder': '/' + self.resource.parent, + 'folder': self.resource.parent, 'project': self.canonical_project }] object_d = next(iter(dxb.search.resolve_data_objects(objects=objects)[0]), None) if object_d: return object_d['id'] + else: + raise NotFoundError('The virtual resource does not exist on DNAnexus') @property def canonical_path(self): diff --git a/stor/test.py b/stor/test.py index 273514c1..27260d58 100644 --- a/stor/test.py +++ b/stor/test.py @@ -2,8 +2,9 @@ import unittest import six -from vcr_unittest import VCRMixin import dxpy +import dxpy.bindings as dxb +from vcr_unittest import VCRMixin from stor import s3 from stor.s3 import S3Path @@ -234,45 +235,48 @@ def setUp(self): pass -class DXTestCase(unittest.TestCase, DXTestMixin): +class DXTestCase(DXTestMixin, unittest.TestCase): """A TestCase class that sets up DNAnexus vars and provides additional assertions""" + + def _get_vcr_kwargs(self): + kwargs = super(DXTestCase, self)._get_vcr_kwargs() + kwargs.update({'record_mode': 'new_episodes'}) + return kwargs + @classmethod def setUpClass(cls): - #TODO(akumar) set up project in DX for testing - super(DXTestCase, cls).setup_dx_auth() - cls.proj_handler = cls.setup_project() - cls.project = cls.proj_handler.name - cls.proj_id = cls.proj_handler.get_id() - - #TODO(akumar) Is there a way around using login_token for testing purposes? - # settings.update({ - # 'dx': { - # 'dx_login_token': '__dummy__' - # } - # }) + cls.project_handler = cls.setup_project() + cls.project = cls.project_handler.name + cls.proj_id = cls.project_handler.get_id() @classmethod def setup_project(cls): - test_proj = dxpy.bindings.dxproject.DXProject() - test_proj.new('Temp_Test') + test_proj = dxb.DXProject() + test_proj.new('Temp_Proj') return test_proj def setUp(self): - with dxpy.bindings.dxfile_functions.new_dxfile() as d: - d.write('data') - # subtle bug in dnanexus: on executing the above code, - # the state of file is left in 'open' state. However, when you - # do d.describe() or d.read(), it works and the file's state - # magically changes to closed. Hence, first force close below - d.state = 'closed' - self.file_handler = d + super(DXTestCase, self).setUp() + self.project_handler.new_folder('/temp_folder') + with dxb.dxfile_functions.new_dxfile(name='Temp_File.txt', + project=self.proj_id) as f: + f.write('data') + with dxb.dxfile_functions.new_dxfile(name='Temp_File2.txt', + folder='/temp_folder', + project=self.proj_id) as ff: + ff.write('temp_data') + self.file_handler = f + self.folder_file_handler = ff def tearDown(self): + self.project_handler.remove_folder('/temp_folder', recurse=True, force=True) + # Temp_File2.txt is already removed above. + self.folder_file_handler = None self.file_handler.remove() self.file_handler = None + @classmethod def tearDownClass(cls): - #TODO(akumar) remove the project created - cls.proj_handler.destroy() - cls.proj_handler = None + cls.project_handler.destroy() + cls.project_handler = None diff --git a/stor/tests/test_dx.py b/stor/tests/test_dx.py index a495ba83..8a0f7719 100644 --- a/stor/tests/test_dx.py +++ b/stor/tests/test_dx.py @@ -5,6 +5,7 @@ import unittest import dxpy +import dxpy.bindings as dxb import freezegun import mock from testfixtures import LogCapture @@ -464,24 +465,39 @@ def test_invalid_glob_pattern(self, mock_list): # TODO(akumar) add tests for retries -# TODO(akumar) insert mocks here class TestStat(DXTestCase): - def test_stat_failure(self): + def test_stat_no_ext(self): + with dxb.dxfile_functions.new_dxfile(name='Temp_File3', + project=self.proj_id) as f: + f.write('temp_data') with self.assertRaises(ValueError): - DXPath('dx://Test_Project:/Test_Folder/').stat() + DXPath('dx://Temp_Proj:/temp_folder/').stat() with self.assertRaises(ValueError): - DXPath('dx://Test_Project:/Test_File_No_Ext').stat() - with self.assertRaises(dx.DuplicateError): - DXPath('dx://Duplicate_Project:').stat() + DXPath('dx://Test_Project:/Temp_File3').stat() + f.remove() + + def test_stat_project_error(self): + test_proj = dxb.DXProject() + test_proj.new('Temp_Proj') + + with self.assertRaises(dx.DuplicateProjectError): + DXPath('dx://Temp_Proj:').stat() + with self.assertRaises(dx.DuplicateProjectError): + DXPath('dx://Temp_Proj:/').stat() with self.assertRaises(dx.NotFoundError): - DXPath('dx://Random_Proj:/').stat() + DXPath('dx://Random_Proj:').stat() + + test_proj.destroy() def test_stat_project(self): - dx_p = DXPath('dx://Test_Project:/') + dx_p = DXPath('dx://Temp_Proj:/') response = dx_p.stat() self.assertIn('region', response) # only projects have regions def test_stat_file(self): - dx_p = DXPath('dx://Test_Project:/Test_File.txt') + dx_p = DXPath('dx://Temp_Proj:/Temp_File.txt') response = dx_p.stat() self.assertIn('folder', response) # only files have folders + dx_p = DXPath('dx://Temp_Proj:/temp_folder/Temp_File2.txt') + response = dx_p.stat() + self.assertIn('folder', response) diff --git a/stor/tests/test_dx_path_compat.py b/stor/tests/test_dx_path_compat.py index 3ec0cfc0..e4671c5d 100644 --- a/stor/tests/test_dx_path_compat.py +++ b/stor/tests/test_dx_path_compat.py @@ -20,7 +20,7 @@ def test_construction_from_no_project(self): DXPath('dx://') def test_canonical_construct_fail(self): - with pytest.raises(ValueError, match='Invalid DXPath'): + with pytest.raises(ValueError, match='ambiguous'): DXPath('dx://project-123456789012345678901234:/file-123456789012345678901234/a/') def test_canonical_construct_wo_file(self): diff --git a/stor/utils.py b/stor/utils.py index 8e37a633..0097bf1c 100644 --- a/stor/utils.py +++ b/stor/utils.py @@ -277,16 +277,15 @@ def find_dx_class(p): if not colon_pieces or not colon_pieces[0]: raise ValueError('Project is required to construct a DXPath') project = colon_pieces[0] - resource = (colon_pieces[1] if len(colon_pieces) == 2 else '') - resource = resource[resource.startswith('/'):] + resource = (colon_pieces[1] if len(colon_pieces) == 2 else '').lstrip('/') resource_parts = resource.split('/') root_name, rest = resource_parts[0], resource_parts[1:] - canonical_resource = is_valid_dxid(root_name, 'file') + canonical_resource = is_valid_dxid(root_name, 'file') or not resource if canonical_resource and rest: - raise ValueError('Ambiguous and Invalid DXPath! ({})'.format(p)) + raise ValueError('DX folder paths that start with a valid file dxid are ambiguous') canonical_project = is_valid_dxid(project, 'project') - if canonical_project and (canonical_resource or not resource): + if canonical_project and canonical_resource: return DXCanonicalPath else: return DXVirtualPath From b011328d04437c0823e7704687d1cff11bd92d48 Mon Sep 17 00:00:00 2001 From: Anuj Kumar Date: Fri, 24 Aug 2018 08:18:25 -0700 Subject: [PATCH 12/16] implementations of list, listdir and walkfiles --- stor/dx.py | 82 +++++++++++++++++++++++++++++++++++++------ stor/test.py | 1 + stor/tests/test_dx.py | 11 ++++-- 3 files changed, 80 insertions(+), 14 deletions(-) diff --git a/stor/dx.py b/stor/dx.py index 838f7ad2..80ae12aa 100644 --- a/stor/dx.py +++ b/stor/dx.py @@ -119,9 +119,6 @@ def isfile(self): def getsize(self): raise NotImplementedError - def walkfiles(self, pattern=None): - raise NotImplementedError - def download_object(self, dest): """Download a single path or object to file.""" raise NotImplementedError @@ -141,13 +138,76 @@ def open(self, mode='r', encoding=None): """ raise NotImplementedError - def list(self): + def list(self, canonicalize=False): """List contents using the resource of the path as a prefix.""" - raise NotImplementedError + return list(self.walkfiles(canonicalize=canonicalize)) - def listdir(self): + def listdir(self, canonicalize=False): """list the path as a dir, returning top-level directories and files.""" - raise NotImplementedError + proj_id = self.canonical_project + proj_name = self.virtual_project + ans_list = [] + if not self.resource: + obj_dict = dxb.DXProject(dxid=proj_id).list_folder( + describe={'fields': {'name': True, 'folder': True}} + ) + elif self.endswith('/'): + obj_dict = dxb.DXProject(dxid=proj_id).list_folder( + folder=self.resource, + describe={'fields': {'name': True, 'folder': True}} + ) + else: + return ans_list + for key, values in obj_dict: + for entry in values: + if key == 'folders': + ans_list.append(entry) + elif canonicalize: + ans_list.append(DXCanonicalPath('dx://{}:/{}'.format(proj_id, entry['id']))) + else: + ans_list.append(DXVirtualPath(self.drive + proj_name + ':/') + / entry['describe']['folder'] / entry['describe']['name']) + + def walkfiles(self, pattern=None, canonicalize=False): + """Iterates over listed files that match an optional pattern. + + :param pattern: pattern to match the filenames against. + :param canonicalize: boolean indicating whether to return canonicalized paths + :return: Iter[DXPath] Iterates over listed files that match an optional pattern. + """ + proj_id = self.canonical_project + proj_name = self.virtual_project + if not self.resource: + # the query performance is similar w/wo describe field, + # hence no need to customize query on canonicalize flag + list_gen = dxb.search.find_data_objects( + project=proj_id, + name=pattern, + name_mode='glob', + describe={'fields': {'name': True, 'folder': True}} + ) + elif self.endswith('/'): + list_gen = dxb.search.find_data_objects( + project=proj_id, + name=pattern, + name_mode='glob', + folder=self.resource, + describe={'fields': {'name': True, 'folder': True}} + ) + else: + list_gen = dxb.search.find_data_objects( + project=proj_id, + name=self.virtual_resource.name, + folder=self.resource.parent, + describe={'fields': {'name': True, 'folder': True}} + ) + for obj in list_gen: + if canonicalize: + yield DXCanonicalPath('dx://{}:/{}'.format(obj['project'], obj['id'])) + else: + dx_p = DXVirtualPath(self.drive + proj_name + ':/') + dx_p = dx_p / obj['describe']['folder'] / obj['describe']['name'] + yield dx_p def glob(self, pattern): """ Glob for pattern relative to this directory. @@ -166,11 +226,9 @@ def stat(self): if not self.__stat: if self.canonical_resource: - print("trying with canonical resource"+self.canonical_resource+" "+self.canonical_project) self.__stat = dxb.DXFile(dxid=self.canonical_resource, project=self.canonical_project).describe() else: - print("trying with canonical project" + self.canonical_project) self.__stat = dxb.DXProject(dxid=self.canonical_project).describe() return self.__stat @@ -180,6 +238,8 @@ class DXVirtualPath(DXPath): @property def virtual_project(self): + if utils.is_valid_dxid(self.project, 'project'): + return dxb.DXProject(dxid=self.project).name return self.project @property @@ -217,8 +277,8 @@ def canonical_resource(self): """Returns the dx file-ID of the first matched filename""" if not self.resource: return None - if not self.ext: - raise ValueError('DXPath ({}) must have extension'.format(self)) + if self.endswith('/'): + raise ValueError('DXPath ({}) ending in folders cannot be canonicalized'.format(self)) objects = [{ 'name': self.name, 'folder': self.resource.parent, diff --git a/stor/test.py b/stor/test.py index 27260d58..12f1ac8e 100644 --- a/stor/test.py +++ b/stor/test.py @@ -241,6 +241,7 @@ class DXTestCase(DXTestMixin, unittest.TestCase): def _get_vcr_kwargs(self): kwargs = super(DXTestCase, self)._get_vcr_kwargs() kwargs.update({'record_mode': 'new_episodes'}) + kwargs.update({'filter_headers': ['authorization']}) return kwargs @classmethod diff --git a/stor/tests/test_dx.py b/stor/tests/test_dx.py index 8a0f7719..4af4ade2 100644 --- a/stor/tests/test_dx.py +++ b/stor/tests/test_dx.py @@ -470,11 +470,13 @@ def test_stat_no_ext(self): with dxb.dxfile_functions.new_dxfile(name='Temp_File3', project=self.proj_id) as f: f.write('temp_data') + response = DXPath('dx://Temp_Proj:/Temp_File3').stat() + self.assertIn('folder', response) # only files have folders + f.remove() + + def test_stat_folder(self): with self.assertRaises(ValueError): DXPath('dx://Temp_Proj:/temp_folder/').stat() - with self.assertRaises(ValueError): - DXPath('dx://Test_Project:/Temp_File3').stat() - f.remove() def test_stat_project_error(self): test_proj = dxb.DXProject() @@ -493,6 +495,9 @@ def test_stat_project(self): dx_p = DXPath('dx://Temp_Proj:/') response = dx_p.stat() self.assertIn('region', response) # only projects have regions + dx_p = DXPath('dx://Temp_Proj:') + response = dx_p.stat() + self.assertIn('region', response) def test_stat_file(self): dx_p = DXPath('dx://Temp_Proj:/Temp_File.txt') From f436ecf70c43763cc618d72689760e70b4ba2784 Mon Sep 17 00:00:00 2001 From: Anuj Kumar Date: Tue, 4 Sep 2018 10:56:49 -0700 Subject: [PATCH 13/16] adding functionalities and tests for DX --- stor/dx.py | 304 ++++++++++++------ stor/test.py | 62 ++-- stor/tests/test_dx.py | 701 ++++++++++++++++++++++++------------------ stor/utils.py | 2 +- 4 files changed, 649 insertions(+), 420 deletions(-) diff --git a/stor/dx.py b/stor/dx.py index 80ae12aa..20c748d3 100644 --- a/stor/dx.py +++ b/stor/dx.py @@ -1,7 +1,7 @@ from cached_property import cached_property import six -import dxpy.bindings as dxb +import dxpy import dxpy.exceptions as dx_exceptions from dxpy.exceptions import DXError @@ -24,12 +24,21 @@ def _parse_dx_error(exc, **kwargs): if exc_type is dx_exceptions.DXSearchError: if msg and 'found more' in msg.lower(): - return DuplicateProjectError(msg, exc) + return DuplicateError(msg, exc) elif msg and 'found none' in msg.lower(): return NotFoundError(msg, exc) -class DuplicateProjectError(DNAnexusError): +class DuplicateError(DNAnexusError): + """Thrown when multiple objects exist with the same name + + Currently, we throw this when trying to get the canonical project + from virtual path and two or more projects were found with same name + """ + pass + + +class DuplicateProjectError(DuplicateError): """Thrown when multiple projects exist with the same name Currently, we throw this when trying to get the canonical project @@ -49,7 +58,7 @@ class ProjectNotFoundError(NotFoundError): class DXPath(OBSPath): """ - Provides the ability to manipulate and access resources on swift + Provides the ability to manipulate and access resources on DNAnexus with a similar interface to the path library. """ @@ -75,22 +84,48 @@ def _get_parts(self): parts.insert(1, parts_first[1]) return parts + def _is_folder(self): + return self.resource and not self.resource.ext + + def _noop(attr_name): + def wrapper(self): + return type(self)(self) + wrapper.__name__ = attr_name + wrapper.__doc__ = 'No-op for %r' % attr_name + return wrapper + + abspath = _noop('abspath') + realpath = _noop('realpath') + expanduser = _noop('expanduser') + @property def project(self): """Returns the project name from the path or None""" parts = self._get_parts() return parts[0] if len(parts) > 0 and parts[0] else None - # TODO(akumar) make sure DX_LOGIN_TOKEN is set up here - def _get_dx_connection_vars(self): - raise NotImplementedError - - #TODO(akumar) - def _dx_api_call(self): - raise NotImplementedError + def temp_url(self, lifetime=300, filename=None): + """Obtains a temporary URL to a DNAnexus data-object. - def temp_url(self): - raise NotImplementedError + Args: + lifetime (int): The time (in seconds) the temporary + URL will be valid + filename (str, optional): A urlencoded filename to use for + attachment, otherwise defaults to object name + """ + try: + if self.canonical_resource: + file_handler = dxpy.DXFile(self.canonical_resource) + return file_handler.get_download_url( + duration=lifetime, + preauthenticated=True, + filename=filename, + project=self.canonical_project + )[0] + else: + raise DXError('DX Projects cannot have a temporary download url') + except ValueError: + raise DXError('DXPaths ending in folders cannot have a temporary download url') @property def resource(self): @@ -98,26 +133,39 @@ def resource(self): joined_resource = '/'.join(parts[1:]) if len(parts) > 1 else None return self.parts_class('/'+joined_resource) if joined_resource else None - def first(self): # may not need it - raise NotImplementedError + def dirname(self): + if not self.resource: + return self + else: + return super(DXPath, self).dirname() - def download_objects(self): # may not need it + def download_objects(self): # may not need it raise NotImplementedError - + def remove(self): raise NotImplementedError - + def rmtree(self): raise NotImplementedError - + def isdir(self): - raise NotImplementedError - + if not self.resource or self._is_folder(): + return self.exists() + return False + def isfile(self): - raise NotImplementedError + try: + return self.resource and not self._is_folder() and self.exists() + except NotFoundError: + return False def getsize(self): - raise NotImplementedError + if not self.resource: + return self.stat()['dataUsage']*1e9 + elif self._is_folder(): + return 0 + else: + return self.stat()['size'] def download_object(self, dest): """Download a single path or object to file.""" @@ -138,9 +186,40 @@ def open(self, mode='r', encoding=None): """ raise NotImplementedError - def list(self, canonicalize=False): - """List contents using the resource of the path as a prefix.""" - return list(self.walkfiles(canonicalize=canonicalize)) + def list(self, + canonicalize=False, + starts_with=None, + limit=None, + category=None, + condition=None + ): + """List contents using the resource of the path as a prefix. + + Args: + canonicalize (boolean): whether to return canonicalized paths + starts_with (str): Allows for an additional search path to + be appended to the resource of the dx path. Note that this + resource path is treated as a directory + limit (int): Limit the amount of results returned + category (str): Restricting class : One of 'record', 'file', 'gtable, + 'applet', 'workflow' + condition (function(results) -> bool): The method will only return + when the results matches the condition. + + Returns: + Iter[DXPath]: Iterates over listed files that match an optional pattern. + """ + results = list(self.walkfiles( + canonicalize=canonicalize, + starts_with=starts_with, + limit=limit, + category=category, + )) + if not results or not results[0]: # when results == [[]] + results = [] + utils.validate_condition(condition) + utils.check_condition(condition, results) + return results def listdir(self, canonicalize=False): """list the path as a dir, returning top-level directories and files.""" @@ -148,88 +227,125 @@ def listdir(self, canonicalize=False): proj_name = self.virtual_project ans_list = [] if not self.resource: - obj_dict = dxb.DXProject(dxid=proj_id).list_folder( - describe={'fields': {'name': True, 'folder': True}} - ) - elif self.endswith('/'): - obj_dict = dxb.DXProject(dxid=proj_id).list_folder( - folder=self.resource, + obj_dict = dxpy.DXProject(dxid=proj_id).list_folder( describe={'fields': {'name': True, 'folder': True}} ) + elif self._is_folder(): + try: + obj_dict = dxpy.DXProject(dxid=proj_id).list_folder( + folder=self.resource, + describe={'fields': {'name': True, 'folder': True}} + ) + except dxpy.exceptions.ResourceNotFound: + raise NotFoundError('The specified folder ({}) was not found'.format( + self.resource)) else: return ans_list - for key, values in obj_dict: + for key, values in obj_dict.items(): for entry in values: - if key == 'folders': - ans_list.append(entry) - elif canonicalize: - ans_list.append(DXCanonicalPath('dx://{}:/{}'.format(proj_id, entry['id']))) + if canonicalize: + ans_list.append(DXCanonicalPath('dx://{}:/{}'.format( + proj_id, (entry.lstrip('/') if key == 'folders' else entry['id'])))) else: - ans_list.append(DXVirtualPath(self.drive + proj_name + ':/') - / entry['describe']['folder'] / entry['describe']['name']) - - def walkfiles(self, pattern=None, canonicalize=False): + if key == 'folders': + ans_list.append(DXVirtualPath(self.drive + proj_name + ':' + entry)) + else: + ans_list.append(DXVirtualPath(self.drive + proj_name + ':' + + entry['describe']['folder']) + / entry['describe']['name']) + return ans_list + + def walkfiles(self, + pattern=None, + canonicalize=False, + starts_with=None, + limit=None, + category=None): """Iterates over listed files that match an optional pattern. - :param pattern: pattern to match the filenames against. - :param canonicalize: boolean indicating whether to return canonicalized paths - :return: Iter[DXPath] Iterates over listed files that match an optional pattern. + Args: + pattern (str): glob pattern to match the filenames against. + canonicalize (boolean): whether to return canonicalized paths + starts_with (str): Allows for an additional search path to + be appended to the resource of the dx path. Note that this + resource path is treated as a directory + limit (int): Limit the amount of results returned + category (str): Restricting class : One of 'record', 'file', 'gtable, + 'applet', 'workflow' + + Returns: + Iter[DXPath]: Iterates over listed files that match an optional pattern. """ proj_id = self.canonical_project proj_name = self.virtual_project - if not self.resource: + kwargs = { + 'project': proj_id, + 'name': pattern, + 'name_mode': 'glob', # the query performance is similar w/wo describe field, - # hence no need to customize query on canonicalize flag - list_gen = dxb.search.find_data_objects( - project=proj_id, - name=pattern, - name_mode='glob', - describe={'fields': {'name': True, 'folder': True}} - ) - elif self.endswith('/'): - list_gen = dxb.search.find_data_objects( - project=proj_id, - name=pattern, - name_mode='glob', - folder=self.resource, - describe={'fields': {'name': True, 'folder': True}} - ) - else: - list_gen = dxb.search.find_data_objects( - project=proj_id, - name=self.virtual_resource.name, - folder=self.resource.parent, - describe={'fields': {'name': True, 'folder': True}} - ) + # hence no need to customize query based on canonicalize flag + 'describe': {'fields': {'name': True, 'folder': True}}, + 'classname': category, + 'limit': limit, + 'folder': (self.resource or '/') + (starts_with or '') + } + if self.resource and not self._is_folder(): # if path is a file path + yield [] + return + list_gen = dxpy.find_data_objects(**kwargs) for obj in list_gen: if canonicalize: yield DXCanonicalPath('dx://{}:/{}'.format(obj['project'], obj['id'])) else: - dx_p = DXVirtualPath(self.drive + proj_name + ':/') - dx_p = dx_p / obj['describe']['folder'] / obj['describe']['name'] + dx_p = DXVirtualPath(self.drive + proj_name + ':' + obj['describe']['folder']) + dx_p = dx_p / obj['describe']['name'] yield dx_p - def glob(self, pattern): - """ Glob for pattern relative to this directory. + def glob(self, pattern, condition=None, canonicalize=False): + """ Glob for pattern relative to this directory.""" - Note that Swift currently only supports a single trailing *""" - raise NotImplementedError + results = list(self.walkfiles( + canonicalize=canonicalize, + pattern=pattern + )) + if not results or not results[0]: # when results == [[]] + results = [] + utils.validate_condition(condition) + utils.check_condition(condition, results) + return results def exists(self): - """Checks whether path exists on local filesystem or on swift. + """Checks existence of the path. - For directories on swift, checks whether directory sentinel exists or - at least one subdirectory exists""" - raise NotImplementedError + Returns True if the path exists, False otherwise. + + Returns: + bool: True if the path exists, False otherwise. + """ + try: + # first see if there is a specific corresponding object + self.stat() + return True + except (NotFoundError, ValueError): + pass + # otherwise we could be a directory, so try to grab first + # file/subfolder + if self._is_folder(): + try: + self.list(limit=1) + return True + except NotFoundError: + return False + return False def stat(self): if not self.__stat: if self.canonical_resource: - self.__stat = dxb.DXFile(dxid=self.canonical_resource, - project=self.canonical_project).describe() + self.__stat = dxpy.DXFile(dxid=self.canonical_resource, + project=self.canonical_project).describe() else: - self.__stat = dxb.DXProject(dxid=self.canonical_project).describe() + self.__stat = dxpy.DXProject(dxid=self.canonical_project).describe() return self.__stat @@ -239,7 +355,7 @@ class DXVirtualPath(DXPath): @property def virtual_project(self): if utils.is_valid_dxid(self.project, 'project'): - return dxb.DXProject(dxid=self.project).name + return dxpy.DXProject(dxid=self.project).name return self.project @property @@ -252,7 +368,7 @@ def virtual_path(self): @cached_property def canonical_project(self): - """Returns the first project that matches the name that user has view access to. + """Returns the unique project that matches the name that user has view access to. If no match is found, returns None Raises: @@ -263,37 +379,41 @@ def canonical_project(self): return self.project else: try: - proj_dict = dxb.search.find_one_project( + proj_dict = dxpy.find_one_project( name=self.project, level='VIEW', zero_ok=True, more_ok=False) if proj_dict is None: raise ProjectNotFoundError('No projects were found with given name ({})' .format(self.project)) return proj_dict['id'] except DXError as e: - six.raise_from(_parse_dx_error(e), e) + raise DuplicateProjectError('Duplicate projects were found with given name ({})' + .format(self.project), e) @cached_property def canonical_resource(self): """Returns the dx file-ID of the first matched filename""" if not self.resource: return None - if self.endswith('/'): + if self._is_folder(): raise ValueError('DXPath ({}) ending in folders cannot be canonicalized'.format(self)) objects = [{ 'name': self.name, 'folder': self.resource.parent, 'project': self.canonical_project }] - object_d = next(iter(dxb.search.resolve_data_objects(objects=objects)[0]), None) - if object_d: - return object_d['id'] + results = dxpy.resolve_data_objects(objects=objects)[0] + if len(results) > 1: + raise DuplicateError('The virtual resource is not unique on DNAnexus') + elif len(results) == 1: + return results[0]['id'] else: raise NotFoundError('The virtual resource does not exist on DNAnexus') @property def canonical_path(self): - """returns the first file that matches the given path""" - return DXCanonicalPath(self.drive + self.canonical_project + ':') / (self.canonical_resource or '') + """Returns the first file that matches the given path""" + return DXCanonicalPath(self.drive + self.canonical_project + + ':') / (self.canonical_resource or '') class DXCanonicalPath(DXPath): @@ -309,10 +429,10 @@ def virtual_resource(self): @cached_property def virtual_path(self): - proj = dxb.DXProject(dxid=self.project) + proj = dxpy.DXProject(dxid=self.project) virtual_p = DXVirtualPath(self.drive + proj.name + ':/') if self.resource: - file_h = dxb.DXFile(dxid=self.resource) + file_h = dxpy.DXFile(dxid=self.resource) virtual_p = virtual_p / file_h.folder[1:] / file_h.name return virtual_p diff --git a/stor/test.py b/stor/test.py index 12f1ac8e..6b9b8aff 100644 --- a/stor/test.py +++ b/stor/test.py @@ -1,9 +1,9 @@ import mock import unittest import six +import os import dxpy -import dxpy.bindings as dxb from vcr_unittest import VCRMixin from stor import s3 @@ -197,14 +197,8 @@ class DXTestMixin(VCRMixin): def setup_dx_auth(self): pass - def setup_dx_mocks(self): - self.mock_dx_conn = mock.Mock() # TODO - - def assertDXListsEqual(self, r1, r2): - if six.PY2: - return self.assertItemsEqual(r1, r2) - else: - return self.assertCountEqual(r1, r2) + def assert_dx_lists_equal(self, r1, r2): + self.assertEquals(sorted(r1), sorted(r2)) class SwiftTestCase(unittest.TestCase, SwiftTestMixin): @@ -244,40 +238,48 @@ def _get_vcr_kwargs(self): kwargs.update({'filter_headers': ['authorization']}) return kwargs - @classmethod - def setUpClass(cls): - cls.project_handler = cls.setup_project() - cls.project = cls.project_handler.name - cls.proj_id = cls.project_handler.get_id() + def _get_cassette_library_dir(self): + cassette_dir = super(DXTestCase, self)._get_cassette_library_dir() + return os.path.join(cassette_dir, self.__class__.__name__) + + def _get_cassette_name(self): + return '{0}.yaml'.format(self._testMethodName) + def new_proj_name(self): + return '{0}.{1}'.format(self.__class__.__name__, + self._testMethodName) - @classmethod - def setup_project(cls): - test_proj = dxb.DXProject() - test_proj.new('Temp_Proj') + def setup_temporary_project(self): + self.project_handler = self.setup_project() + self.project = self.project_handler.name + self.proj_id = self.project_handler.get_id() + self.addCleanup(self.teardown_project) + + def setup_project(self): + test_proj = dxpy.DXProject() + test_proj.new(self.new_proj_name()) return test_proj - def setUp(self): - super(DXTestCase, self).setUp() + def setup_generic_files(self): self.project_handler.new_folder('/temp_folder') - with dxb.dxfile_functions.new_dxfile(name='Temp_File.txt', - project=self.proj_id) as f: + with dxpy.new_dxfile(name='temp_file.txt', + project=self.proj_id) as f: f.write('data') - with dxb.dxfile_functions.new_dxfile(name='Temp_File2.txt', - folder='/temp_folder', - project=self.proj_id) as ff: + with dxpy.new_dxfile(name='folder_file.txt', + folder='/temp_folder', + project=self.proj_id) as ff: ff.write('temp_data') self.file_handler = f self.folder_file_handler = ff + self.addCleanup(self.teardown_files) - def tearDown(self): + def teardown_files(self): self.project_handler.remove_folder('/temp_folder', recurse=True, force=True) # Temp_File2.txt is already removed above. self.folder_file_handler = None self.file_handler.remove() self.file_handler = None - @classmethod - def tearDownClass(cls): - cls.project_handler.destroy() - cls.project_handler = None + def teardown_project(self): + self.project_handler.destroy() + self.project_handler = None diff --git a/stor/tests/test_dx.py b/stor/tests/test_dx.py index 4af4ade2..b32bf002 100644 --- a/stor/tests/test_dx.py +++ b/stor/tests/test_dx.py @@ -1,8 +1,7 @@ -import logging -import ntpath -import os +import time from tempfile import NamedTemporaryFile import unittest +import vcr import dxpy import dxpy.bindings as dxb @@ -25,86 +24,53 @@ class TestBasicPathMethods(unittest.TestCase): def test_name(self): - p = Path('dx://project/path/to/resource') + p = Path('dx://project:/path/to/resource') self.assertEquals(p.name, 'resource') def test_parent(self): - p = Path('dx://project/path/to/resource') - self.assertEquals(p.parent, 'dx://project/path/to') + p = Path('dx://project:/path/to/resource') + self.assertEquals(p.parent, 'dx://project:/path/to') def test_dirname(self): - p = Path('dx://project/path/to/resource') - self.assertEquals(p.dirname(), 'dx://project/path/to') + p = Path('dx://project:/path/to/resource') + self.assertEquals(p.dirname(), 'dx://project:/path/to') def test_dirname_top_level(self): - p1 = Path('dx://') - self.assertEquals(p1.dirname(), 'dx://') + p1 = Path('dx://project') + self.assertEquals(p1.dirname(), 'dx://project') - p2 = Path('dx://a') - self.assertEquals(p2.dirname(), 'dx://') + p2 = Path('dx://project:/') + self.assertEquals(p2.dirname(), 'dx://project:/') def test_basename(self): - p = Path('dx://project/path/to/resource') + p = Path('dx://project:/path/to/resource') self.assertEquals(p.basename(), 'resource') - def test_to_url(self): - p = Path('dx://project/path/to/resource') - - #TODO(akumar) to_url may not make sense for dx paths since DNAnexus uses S3 in backend. - #TODO(akumar) Remove this test if so... - - # self.assertEquals(p.to_url(), - # 'https://mybucket.s3.amazonaws.com/path/to/resource') - - def test_failed_new(self): - with self.assertRaises(ValueError): - DXPath('/bad/dx/path') - - def test_successful_new(self): - p = DXPath('dx://project/path') - self.assertEquals(p, 'dx://project/path') - - def test_success_dx_id(self): - # dnanexus always has 24-chr key - # TODO (akumar) does initializing with project id actually initialize with abs dx path? - p = DXPath('dx://project-123456789012345678901234') - self.assertEqual(len(p.basename()), 32) # 'project-' and 24-chr key - - p = DXPath('dx://project-123456789012345678901234/file-123456789012345678901234') - self.assertEqual(len(p.dirname()), 32) # 'project-' and 24-chr key - self.assertEqual(len(p.basename()), 29) # 'file-' and 24-chr key - - def test_failed_dx_id(self): - with self.assertRaises(ValueError): - DXPath('dx://project-1234') - with self.assertRaises(ValueError): - DXPath('dx://project-123456789012345678901234/file-1234') - class TestRepr(unittest.TestCase): def test_repr(self): - dx_p = DXPath('dx://t/c/p') + dx_p = DXPath('dx://t:/c/p') self.assertEquals(eval(repr(dx_p)), dx_p) class TestPathManipulations(unittest.TestCase): def test_add(self): - dx_p = DXPath('dx://a') + dx_p = DXPath('dx://a:') dx_p = dx_p + 'b' + Path('c') self.assertTrue(isinstance(dx_p, DXPath)) - self.assertEquals(dx_p, 'dx://abc') + self.assertEquals(dx_p, 'dx://a:bc') def test_div(self): - dx_p = DXPath('dx://t') + dx_p = DXPath('dx://t:') dx_p = dx_p / 'c' / Path('p') self.assertTrue(isinstance(dx_p, DXPath)) - self.assertEquals(dx_p, 'dx://t/c/p') + self.assertEquals(dx_p, 'dx://t:/c/p') class TestProject(unittest.TestCase): def test_project_none(self): - dx_p = DXPath('dx://') - self.assertIsNone(dx_p.project) + with self.assertRaises(ValueError): + DXPath('dx://') def test_project_exists(self): dx_p = DXPath('dx://project') @@ -112,41 +78,30 @@ def test_project_exists(self): class TestResource(unittest.TestCase): - def test_resource_none_no_project(self): - dx_p = DXPath('dx://') - self.assertIsNone(dx_p.resource) def test_resource_none_w_project(self): dx_p = DXPath('dx://project/') self.assertIsNone(dx_p.resource) def test_resource_object(self): - dx_p = DXPath('dx://bucket/obj') - self.assertEquals(dx_p.resource, 'obj') + dx_p = DXPath('dx://project:/obj') + self.assertEquals(dx_p.resource, '/obj') - def test_resource_single_dir(self): - dx_p = DXPath('dx://project/dir/') - self.assertEquals(dx_p.resource, 'dir/') + def test_resource_trailing_slash(self): + dx_p = DXPath('dx://project:/dir/') + self.assertEquals(dx_p.resource, '/dir/') def test_resource_nested_obj(self): - dx_p = DXPath('dx://project/nested/obj') - self.assertEquals(dx_p.resource, 'nested/obj') + dx_p = DXPath('dx://project:/nested/obj.txt') + self.assertEquals(dx_p.resource, '/nested/obj.txt') def test_resource_nested_dir(self): - dx_p = DXPath('dx://project/nested/dir/') + dx_p = DXPath('dx://project:/nested/dir/') self.assertEquals(dx_p.resource, 'nested/dir/') -class TestGetDXConnectionCreds(unittest.TestCase): - def test_login_token(self): - dx_p = DXPath('dx://tenant/') - with settings.use({'dx': {'dx_login_token': ''}}): - #TODO(akumar) move errors to dx class - with self.assertRaises(swift.ConfigurationError): - dx_p._get_dx_connection_vars() - - -class TestDXFile(DXTestCase): +@unittest.skip("skipping") +class TestDXFile(DXTestCase): # TODO def test_read_on_open_file(self): d = dxpy.bindings.dxfile_functions.new_dxfile() @@ -224,10 +179,11 @@ def test_write_multiple_flush_multiple_upload(self, mock_upload): class TestDXShared(SharedOBSFileCases, DXTestCase): drive = 'dx://' path_class = DXPath - normal_path = DXPath('dx://project/obj') + normal_path = DXPath('dx://project:/obj') -class TestTempURL(DXTestCase): +@unittest.skip("demonstrating skipping") +class TestTempURL(DXTestCase): # TODO def test_success(self): dx_p = DXPath('dx://{}/{}'.format(self.project, self.file_handler.name)) temp_url = dx_p.temp_url() @@ -236,273 +192,424 @@ def test_success(self): self.assertIn(str(self.file_handler.name / self.project), temp_url) -class TestList(DXTestCase): +class TestCanonicalProject(DXTestCase): + def test_no_project(self): + dx_p = DXPath('dx://Random_Project:/') + with self.assertRaises(dx.ProjectNotFoundError): + dx_p.canonical_project - def test_no_project_error(self): - dx_p = DXPath('dx://') - with self.assertRaises(ValueError): - list(dx_p.list()) + def test_unique_project(self): + self.setup_temporary_project() + dx_p = DXPath('dx://'+self.project) + self.assertTrue(utils.is_valid_dxid(dx_p.canonical_project, 'project')) + + def test_duplicate_projects(self): + self.setup_temporary_project() + test_proj = dxb.DXProject() + test_proj.new(self.new_proj_name()) + dx_p = DXPath('dx://' + self.project) + with self.assertRaises(dx.DuplicateProjectError): + dx_p.canonical_project + test_proj.destroy() - def test_list_project(self): - mock_list = self.mock_dx_conn.get_iterator - mock_list.return_value = ({}, [{ - 'name': 'path/to/resource1' - }, { - 'name': 'path/to/resource2' - }]) - dx_p = DXPath('dx://test-project') - results = dx_p.list() - self.assertDXListsEqual(results, [ - 'dx://test-project/path/to/resource1', - 'dx://test-project/path/to/resourc2' +class TestCanonicalResource(DXTestCase): + def test_no_resource(self): + self.setup_temporary_project() + dx_p = DXPath('dx://' + self.project + ':/random.txt') + with self.assertRaises(dx.NotFoundError): + dx_p.canonical_resource + + def test_unique_resource(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://'+self.project + ':/temp_file.txt') + self.assertTrue(utils.is_valid_dxid(dx_p.canonical_resource, 'file')) + + def test_duplicate_resource(self): + self.setup_temporary_project() + self.setup_generic_files() + with dxpy.new_dxfile(name='folder_file.txt', + folder='/temp_folder', + project=self.proj_id) as ff: + ff.write('temp_data') + dx_p = DXPath('dx://' + self.project + ':/temp_folder/folder_file.txt') + with self.assertRaises(dx.DuplicateError): + dx_p.canonical_resource + + +class TestListDir(DXTestCase): + def test_listdir_project(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://'+self.project) + results = dx_p.listdir() + self.assert_dx_lists_equal(results, [ + 'dx://'+self.project+':/temp_folder', + 'dx://'+self.project+':/temp_file.txt', ]) - @mock.patch('time.sleep', autospec=True) - def test_list_unavailable(self, mock_sleep): - mock_list = self.mock_dx_conn.get_iterator - mock_list.side_effect = [ - DXException('unavaiable', http_status=503), - ({}, [{ - 'name': 'path/to/resource1' - }, { - 'name': 'path/to/resource2' - }]) - ] - - dx_p = DXPath('dx://project/path') - results = dx_p.list() - self.assertDXListsEqual(results, [ - 'dx://project/path/to/resource1', - 'dx://project/path/to/resource2' + def test_listdir_file(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://'+self.project+':/temp_file.txt') + results = dx_p.listdir() + self.assertEquals(results, []) + + def test_listdir_empty_folder(self): + self.setup_temporary_project() + self.project_handler.new_folder('/temp_folder') + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.listdir() + self.assertEquals(results, []) + + def test_listdir_folder_w_file(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.listdir() + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt' ]) - # Verify that list was retried one time - # self.assertEquals(len(mock_list.call_args_list), 2) + def test_listdir_absent_folder(self): + self.setup_temporary_project() + dx_p = DXPath('dx://' + self.project + ':/random_folder') + with self.assertRaises(dx.NotFoundError): + dx_p.listdir() + + def test_listdir_folder_share_filename(self): + self.setup_temporary_project() + self.setup_generic_files() + with dxpy.new_dxfile(name='temp_folder', + project=self.proj_id) as f: + f.write('data') + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.listdir() + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt' + ]) - @mock.patch('time.sleep', autospec=True) - def test_list_unauthorized(self, mock_sleep): - mock_list = self.mock_dx_conn.get_iterator - mock_list.side_effect = DXException( - 'unauthorized', http_status=403, http_response_headers={'X-Trans-Id': 'transactionid'}) - - dx_p = DXPath('dx://project/path') - # TODO(akumar) move errors to dx class - with self.assertRaises(swift.UnauthorizedError): - try: - dx_p.list() - except swift.UnauthorizedError as exc: - self.assertIn('X-Trans-Id: transactionid', str(exc)) - self.assertIn('X-Trans-Id: transactionid', repr(exc)) - raise - - def test_listdir(self): - mock_list = self.mock_dx_conn.get_iterator - mock_list.return_value = ({}, [{ - 'subdir': 'path/to/resource1/' - }, { - 'name': 'path/to/resource1' - }, { - 'name': 'path/to/resource2' - }, { - 'name': 'path/to/resource3' - }]) - - dx_p = DXPath('dx://project/path/to') - results = list(dx_p.listdir()) - self.assertDXListsEqual(results, [ - 'dx://project/path/to/resource1', - 'dx://project/path/to/resource2', - 'dx://project/path/to/resource3' + def test_listdir_canonical(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project) + results = dx_p.listdir(canonicalize=True) + self.assertIn('dx://'+self.proj_id+':/temp_folder', results) + self.assertEquals(len(results), 2) + + +class TestList(DXTestCase): + def test_list_project(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project) + results = dx_p.list() + self.assert_dx_lists_equal(results, [ + 'dx://'+self.project+':/temp_folder/folder_file.txt', + 'dx://'+self.project+':/temp_file.txt' ]) + def test_list_file(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://'+self.project+':/temp_file.txt') + results = dx_p.list() + self.assertEquals(results, []) - @mock.patch('os.path', ntpath) - def test_list_windows(self): - mock_list = self.mock_dx_conn.get_iterator - mock_list.return_value = ({}, [{ - 'name': 'path/to/resource1' - }, { - 'name': 'path/to/resource2' - }, { - 'name': 'path/to/resource3' - }, { - 'name': 'path/to/resource4' - }]) - - dx_p = DXPath('dx://project/path') - results = list(dx_p.list()) - self.assertDXListsEqual(results, [ - 'dx://project/path/to/resource1', - 'dx://project/path/to/resource2', - 'dx://project/path/to/resource3', - 'dx://project/path/to/resource4' + def test_list_empty_folder(self): + self.setup_temporary_project() + self.project_handler.new_folder('/temp_folder') + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.list() + self.assertEquals(results, []) + + def test_list_folder_w_file(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.list() + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt' ]) - def test_list_limit(self): - mock_list = self.mock_dx_conn.get_iterator - mock_list.return_value = ({}, [{ - 'name': 'path/to/resource1' - }]) - - dx_p = DXPath('dx://project/path') - results = list(dx_p.list(limit=1)) - self.assertEquals(results, [ - 'dx://project/path/to/resource1' + def test_list_absent_folder(self): + self.setup_temporary_project() + dx_p = DXPath('dx://' + self.project + ':/random_folder') + results = dx_p.list() + self.assertEquals(results, []) + + def test_list_folder_share_filename(self): + self.setup_temporary_project() + self.setup_generic_files() + with dxpy.new_dxfile(name='temp_folder', + project=self.proj_id) as f: + f.write('data') + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.list() + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt' ]) + def test_list_canonical(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project) + results = dx_p.list(canonicalize=True) + self.assertTrue(all(self.proj_id in result for result in results)) + self.assertTrue(any(self.file_handler.get_id() in result for result in results)) + self.assertTrue(any(self.folder_file_handler.get_id() in result for result in results)) + + def test_list_limit(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project) + results = dx_p.list(limit=1) + self.assertEquals(len(results), 1) + def test_list_starts_with(self): - mock_list = self.mock_dx_conn.get_iterator - mock_list.return_value = ({}, [{ - 'name': 'r1' - }, { - 'name': 'r2' - }]) - dx_p = DXPath('dx://project/r') - results = list(dx_p.list(starts_with='prefix')) - self.assertDXListsEqual(results, [ - 'dx://project/r1', - 'dx://project/r2' + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project) + results = dx_p.list(starts_with='temp_folder') + self.assert_dx_lists_equal(results, [ + 'dx://'+self.project+':/temp_folder/folder_file.txt' ]) - mock_list.assert_called_once_with('project', - prefix='r/prefix', - limit=None, - full_listing=True) - - def test_list_starts_with_no_resource(self): - mock_list = self.mock_dx_conn.get_iterator - mock_list.return_value = ({}, [{ - 'name': 'r1' - }, { - 'name': 'r2' - }]) - dx_p = DXPath('dx://project') - results = list(dx_p.list(starts_with='prefix')) - self.assertDXListsEqual(results, [ - 'dx://project/r1', - 'dx://project/r2' - ]) - mock_list.assert_called_once_with('container', - prefix='prefix', - limit=None, - full_listing=True) + def test_list_w_condition(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project) + results = dx_p.list(starts_with='temp_folder', condition=lambda res: len(res) == 1) + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt' + ]) -class TestWalkFiles(DXTestCase): - def test_no_pattern_w_dir_markers(self): - mock_list = self.mock_dx_conn.get_iterator - mock_list.return_value = ({}, [{ - 'name': 'my/obj1', - 'content_type': 'application/directory' - }, { - 'name': 'my/obj2', - 'content_type': 'text/directory' - }, { - 'name': 'my/obj3', - 'content_type': 'application/octet-stream' - }, { - 'name': 'my/obj4', - 'content_type': 'application/octet-stream' - }]) - - f = list(DXPath('dx://project/').walkfiles()) - self.assertEquals(set(f), set([ - DXPath('dx://project/my/obj3'), - DXPath('dx://project/my/obj4') - ])) - - def test_w_pattern_w_dir_markers(self): - mock_list = self.mock_dx_conn.get_iterator - mock_list.return_value = ({}, [{ - 'name': 'my/obj1', - 'content_type': 'application/directory' - }, { - 'name': 'my/obj2', - 'content_type': 'text/directory' - }, { - 'name': 'my/obj3', - 'content_type': 'application/octet-stream' - }, { - 'name': 'my/obj4.sh', - 'content_type': 'application/octet-stream' - }, { - 'name': 'my/other/obj5.sh', - 'content_type': 'application/octet-stream' - }, { - 'name': 'my/dirwithpattern.sh/obj6', - 'content_type': 'application/octet-stream' - }]) - - f = list(DXPath('dx://project').walkfiles('*.sh')) - self.assertEquals(set(f), set([ - DXPath('dx://project/my/obj4.sh'), - DXPath('dx://project/my/other/obj5.sh') - ])) - - -@mock.patch.object(DXPath, 'list', autospec=True) -class TestGlob(DXTestCase): - def test_valid_pattern(self, mock_list): - dx_p = DXPath('dx://project') - dx_p.glob('pattern*') - mock_list.assert_called_once_with(mock.ANY, starts_with='pattern') + def test_list_w_category(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project) + dxpy.new_dxworkflow(title='Workflow', project=self.proj_id) + results = dx_p.list(category='file') + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt', + 'dx://' + self.project + ':/temp_file.txt' + ]) - def test_valid_pattern_wo_wildcard(self, mock_list): - dx_p = DXPath('dx://project') - dx_p.glob('pattern') - mock_list.assert_called_once_with(mock.ANY, starts_with='pattern') - def test_multi_glob_pattern(self, mock_list): - dx_p = DXPath('dx://project') - with self.assertRaises(ValueError): - dx_p.glob('*invalid_pattern*', condition=None) +class TestWalkFiles(DXTestCase): + def test_pattern_w_prefix(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project) + results = list(dx_p.walkfiles(pattern='fold*')) + self.assert_dx_lists_equal(results, [ + 'dx://'+self.project+':/temp_folder/folder_file.txt' + ]) - def test_invalid_glob_pattern(self, mock_list): - dx_p = DXPath('dx://project') - with self.assertRaises(ValueError): - dx_p.glob('invalid_*pattern', condition=None) + def test_pattern_w_suffix(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://'+self.project) + results = list(dx_p.walkfiles(pattern='*p_file.txt')) + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_file.txt' + ]) - # TODO(akumar) add tests for retries + def test_pattern_w_prefix_suffix(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://'+self.project) + results = list(dx_p.walkfiles(pattern='*file*')) + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_file.txt', + 'dx://' + self.project + ':/temp_folder/folder_file.txt' + ]) class TestStat(DXTestCase): - def test_stat_no_ext(self): - with dxb.dxfile_functions.new_dxfile(name='Temp_File3', - project=self.proj_id) as f: - f.write('temp_data') - response = DXPath('dx://Temp_Proj:/Temp_File3').stat() - self.assertIn('folder', response) # only files have folders - f.remove() - def test_stat_folder(self): + self.setup_temporary_project() + self.setup_generic_files() + with self.assertRaises(ValueError): + DXPath('dx://'+self.project+':/temp_folder/').stat() with self.assertRaises(ValueError): - DXPath('dx://Temp_Proj:/temp_folder/').stat() + DXPath('dx://'+self.project+':/temp_folder').stat() def test_stat_project_error(self): + self.setup_temporary_project() test_proj = dxb.DXProject() - test_proj.new('Temp_Proj') + test_proj.new(self.new_proj_name()) with self.assertRaises(dx.DuplicateProjectError): - DXPath('dx://Temp_Proj:').stat() + DXPath('dx://'+self.project+':').stat() with self.assertRaises(dx.DuplicateProjectError): - DXPath('dx://Temp_Proj:/').stat() + DXPath('dx://'+self.project+':/').stat() with self.assertRaises(dx.NotFoundError): DXPath('dx://Random_Proj:').stat() test_proj.destroy() - def test_stat_project(self): - dx_p = DXPath('dx://Temp_Proj:/') + def test_stat_virtual_project(self): + self.setup_temporary_project() + dx_p = DXPath('dx://'+self.project) response = dx_p.stat() self.assertIn('region', response) # only projects have regions - dx_p = DXPath('dx://Temp_Proj:') + dx_p = DXPath('dx://'+self.project+':') + response = dx_p.stat() + self.assertIn('region', response) + + def test_stat_canonical_project(self): + self.setup_temporary_project() + dx_p = DXPath('dx://'+self.proj_id+':') response = dx_p.stat() self.assertIn('region', response) def test_stat_file(self): - dx_p = DXPath('dx://Temp_Proj:/Temp_File.txt') + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://'+self.project+':/temp_file.txt') response = dx_p.stat() self.assertIn('folder', response) # only files have folders - dx_p = DXPath('dx://Temp_Proj:/temp_folder/Temp_File2.txt') + dx_p = DXPath('dx://'+self.project+':/temp_folder/folder_file.txt') response = dx_p.stat() self.assertIn('folder', response) + + +class TestExists(DXTestCase): + def test_false(self): + self.setup_temporary_project() + dx_p = DXPath('dx://'+self.project+':/random.txt') + result = dx_p.exists() + self.assertFalse(result) + + def test_false_no_folder(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project + ':/random_folder/folder_file.txt') + result = dx_p.exists() + self.assertFalse(result) + + def test_raises_on_duplicate(self): + self.setup_temporary_project() + self.setup_generic_files() + with dxpy.new_dxfile(name='folder_file.txt', + folder='/temp_folder', + project=self.proj_id) as f: + f.write('data') + dx_p = DXPath('dx://' + self.project + ':/temp_folder/folder_file.txt') + with self.assertRaises(dx.DuplicateError): + dx_p.exists() + + def test_true_file(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project + ':/temp_folder/folder_file.txt') + result = dx_p.exists() + self.assertTrue(result) + + def test_true_dir_with_no_object(self): + self.setup_temporary_project() + self.project_handler.new_folder('/temp_folder') + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + result = dx_p.exists() + self.assertTrue(result) + + def test_true_dir_with_object(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + result = dx_p.exists() + self.assertTrue(result) + + def test_project_does_not_exist(self): + dx_p = DXPath('dx://random_project:/') + result = dx_p.exists() + self.assertFalse(result) + + +class TestGlob(DXTestCase): + def test_suffix_pattern(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.glob('*file.txt') + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt' + ]) + + def test_prefix_pattern(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.glob('file*') + self.assertEqual(results, []) + + def test_valid_pattern(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.glob('*fi*le*') + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt' + ]) + + def test_valid_pattern_wo_wildcard(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.glob('file') + self.assertEqual(results, []) + + def test_glob_cond_met(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + results = dx_p.glob('*fi*le*', condition=lambda res: len(res) == 1) + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt' + ]) + + def test_cond_no_met(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + with self.assertRaises(exceptions.ConditionNotMetError): + dx_p.glob('*fi*le*', condition=lambda res: len(res) > 1) + + +class TestTempUrl(DXTestCase): + def test_fail_on_project(self): + self.setup_temporary_project() + dx_p = DXPath('dx://' + self.project) + with self.assertRaises(dx.DXError): + dx_p.temp_url() + + def test_fail_on_folder(self): + self.setup_temporary_project() + self.setup_generic_files() + dx_p = DXPath('dx://' + self.project + ':/temp_folder') + with self.assertRaises(dx.DXError): + dx_p.temp_url() + + def test_on_file(self): + self.setup_temporary_project() + self.setup_generic_files() + while self.file_handler._get_state().lower() != 'closed': + time.sleep(1) + dx_p = DXPath('dx://' + self.project + ':/temp_file.txt') + result = dx_p.temp_url() + self.assertIn('dl.dnanex.us', result) + + def test_on_file_timed(self): # TODO + self.setup_temporary_project() + self.setup_generic_files() + while self.file_handler._get_state().lower() != 'closed': + time.sleep(1) + dx_p = DXPath('dx://' + self.project + ':/temp_file.txt') + result = dx_p.temp_url(filename='random.txt', lifetime=1) + self.assertIn('dl.dnanex.us', result) + self.assertIn('random.txt', result) + # TODO (check that link is valid) + time.sleep(2) + # TODO (check that link is invalid) diff --git a/stor/utils.py b/stor/utils.py index 0097bf1c..abcf44c8 100644 --- a/stor/utils.py +++ b/stor/utils.py @@ -274,7 +274,7 @@ def find_dx_class(p): """ from stor.dx import DXPath, DXCanonicalPath, DXVirtualPath colon_pieces = p[len(DXPath.drive):].split(':', 1) - if not colon_pieces or not colon_pieces[0]: + if not colon_pieces or not colon_pieces[0] or '/' in colon_pieces[0]: raise ValueError('Project is required to construct a DXPath') project = colon_pieces[0] resource = (colon_pieces[1] if len(colon_pieces) == 2 else '').lstrip('/') From 6420870c5330017f6e675ba84fb3ad99fe776e72 Mon Sep 17 00:00:00 2001 From: Anuj Kumar Date: Tue, 4 Sep 2018 11:01:50 -0700 Subject: [PATCH 14/16] adding environment variables in toc --- tox.ini | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 05ac1983..d4ae307b 100644 --- a/tox.ini +++ b/tox.ini @@ -18,4 +18,7 @@ whitelist_externals = make nosetests passenv = SWIFT_TEST_USERNAME SWIFT_TEST_PASSWORD OS_TEMP_URL_KEY AWS_TEST_ACCESS_KEY_ID AWS_DEFAULT_REGION - AWS_TEST_SECRET_ACCESS_KEY OS_AUTH_URL LC_LANG LC_ALL PYTHONIOENCODING PYTEST_ADDOPTS + AWS_TEST_SECRET_ACCESS_KEY OS_AUTH_URL + LC_LANG LC_ALL PYTHONIOENCODING + PYTEST_ADDOPTS + DXPY_LOGIN_TOKEN \ No newline at end of file From 916735326a0770098f1a5fe34c92540130be58f5 Mon Sep 17 00:00:00 2001 From: Anuj Kumar Date: Wed, 5 Sep 2018 10:39:52 -0700 Subject: [PATCH 15/16] Cleaning up code and adding list_iter etc: code review comments --- stor/dx.py | 110 ++++++++++--- stor/test.py | 37 ++--- stor/tests/test_dx.py | 370 ++++++++++++++++++++++++++++-------------- 3 files changed, 352 insertions(+), 165 deletions(-) diff --git a/stor/dx.py b/stor/dx.py index 20c748d3..ceed5fd1 100644 --- a/stor/dx.py +++ b/stor/dx.py @@ -85,7 +85,8 @@ def _get_parts(self): return parts def _is_folder(self): - return self.resource and not self.resource.ext + return self.resource and not self.resource.ext and \ + not utils.is_valid_dxid(self.resource.lstrip('/'), 'file') def _noop(attr_name): def wrapper(self): @@ -207,13 +208,13 @@ def list(self, when the results matches the condition. Returns: - Iter[DXPath]: Iterates over listed files that match an optional pattern. + List[DXPath]: Iterates over listed files that match an optional pattern. """ results = list(self.walkfiles( canonicalize=canonicalize, starts_with=starts_with, limit=limit, - category=category, + category=category )) if not results or not results[0]: # when results == [[]] results = [] @@ -221,26 +222,63 @@ def list(self, utils.check_condition(condition, results) return results - def listdir(self, canonicalize=False): - """list the path as a dir, returning top-level directories and files.""" + def list_iter(self, + canonicalize=False, + starts_with=None, + limit=None, + category=None, + ): + """Iterate over contents using the resource of the path as a prefix. + + Args: + canonicalize (boolean): whether to return canonicalized paths + starts_with (str): Allows for an additional search path to + be appended to the resource of the dx path. Note that this + resource path is treated as a directory + limit (int): Limit the amount of results returned + category (str): Restricting class : One of 'record', 'file', 'gtable, + 'applet', 'workflow' + + Returns: + Iter[DXPath]: Iterates over listed files that match an optional pattern. + """ + return self.walkfiles( + canonicalize=canonicalize, + starts_with=starts_with, + limit=limit, + category=category + ) + + def listdir(self, only='all', canonicalize=False): + """List the path as a dir, returning top-level directories and files. + + Args: + canonicalize (boolean): whether to return canonicalized paths + only (str): "objects" for only objects, "folders" for only folder, + "all" for both + + Returns: + List[DXPath]: Iterates over listed files directly within the resource + + Raises: + NotFoundError: When resource folder is not present on DX platform + """ proj_id = self.canonical_project proj_name = self.virtual_project ans_list = [] - if not self.resource: - obj_dict = dxpy.DXProject(dxid=proj_id).list_folder( - describe={'fields': {'name': True, 'folder': True}} - ) - elif self._is_folder(): - try: - obj_dict = dxpy.DXProject(dxid=proj_id).list_folder( - folder=self.resource, - describe={'fields': {'name': True, 'folder': True}} - ) - except dxpy.exceptions.ResourceNotFound: - raise NotFoundError('The specified folder ({}) was not found'.format( - self.resource)) - else: + kwargs = { + 'only': only, + 'describe': {'fields': {'name': True, 'folder': True}} + } + if self._is_folder(): + kwargs.update({'folder': self.resource}) + elif self.resource: return ans_list + try: + obj_dict = dxpy.DXProject(dxid=proj_id).list_folder(**kwargs) + except dxpy.exceptions.ResourceNotFound: + raise NotFoundError('The specified folder ({}) was not found'.format( + self.resource)) for key, values in obj_dict.items(): for entry in values: if canonicalize: @@ -255,9 +293,28 @@ def listdir(self, canonicalize=False): / entry['describe']['name']) return ans_list + def listdir_iter(self, canonicalize=False): + """Iterate the path as a dir, returning top-level directories and files. + + Args: + canonicalize (boolean): whether to return canonicalized paths + + Returns: + Iter[DXPath]: Iterates over listed files directly within the resource + """ + folders = self.listdir(only='folders', canonicalize=canonicalize) + for folder in folders: + yield folder + for data in self.walkfiles( + canonicalize=canonicalize, + recurse=False + ): + yield data + def walkfiles(self, pattern=None, canonicalize=False, + recurse=True, starts_with=None, limit=None, category=None): @@ -266,6 +323,7 @@ def walkfiles(self, Args: pattern (str): glob pattern to match the filenames against. canonicalize (boolean): whether to return canonicalized paths + recurse (boolean): whether to look in subfolders of folder as well starts_with (str): Allows for an additional search path to be appended to the resource of the dx path. Note that this resource path is treated as a directory @@ -285,6 +343,7 @@ def walkfiles(self, # the query performance is similar w/wo describe field, # hence no need to customize query based on canonicalize flag 'describe': {'fields': {'name': True, 'folder': True}}, + 'recurse' : recurse, 'classname': category, 'limit': limit, 'folder': (self.resource or '/') + (starts_with or '') @@ -391,7 +450,12 @@ def canonical_project(self): @cached_property def canonical_resource(self): - """Returns the dx file-ID of the first matched filename""" + """Returns the dx file-ID of the uniquely matched filename + + Raises: + DuplicateError: if filename is not unique + NotFoundError: if resource is not found on DX platform + """ if not self.resource: return None if self._is_folder(): @@ -411,7 +475,7 @@ def canonical_resource(self): @property def canonical_path(self): - """Returns the first file that matches the given path""" + """Returns the unique file that matches the given path""" return DXCanonicalPath(self.drive + self.canonical_project + ':') / (self.canonical_resource or '') @@ -432,7 +496,7 @@ def virtual_path(self): proj = dxpy.DXProject(dxid=self.project) virtual_p = DXVirtualPath(self.drive + proj.name + ':/') if self.resource: - file_h = dxpy.DXFile(dxid=self.resource) + file_h = dxpy.DXFile(dxid=self.canonical_resource) virtual_p = virtual_p / file_h.folder[1:] / file_h.name return virtual_p @@ -442,7 +506,7 @@ def canonical_project(self): @property def canonical_resource(self): - return self.resource + return self.resource.lstrip('/') if self.resource else None @property def canonical_path(self): diff --git a/stor/test.py b/stor/test.py index 6b9b8aff..4578e791 100644 --- a/stor/test.py +++ b/stor/test.py @@ -6,6 +6,7 @@ import dxpy from vcr_unittest import VCRMixin +from stor import Path from stor import s3 from stor.s3 import S3Path from stor.swift import SwiftPath @@ -260,25 +261,23 @@ def setup_project(self): test_proj.new(self.new_proj_name()) return test_proj - def setup_generic_files(self): - self.project_handler.new_folder('/temp_folder') - with dxpy.new_dxfile(name='temp_file.txt', - project=self.proj_id) as f: - f.write('data') - with dxpy.new_dxfile(name='folder_file.txt', - folder='/temp_folder', - project=self.proj_id) as ff: - ff.write('temp_data') - self.file_handler = f - self.folder_file_handler = ff - self.addCleanup(self.teardown_files) - - def teardown_files(self): - self.project_handler.remove_folder('/temp_folder', recurse=True, force=True) - # Temp_File2.txt is already removed above. - self.folder_file_handler = None - self.file_handler.remove() - self.file_handler = None + def setup_files(self, files): + """Sets up files for testing + + Args: + files (List[Str]): list of files relative to project root to be created. + Only virtual files are allowed + """ + for i, curr_file in enumerate(files): + dx_p = Path(curr_file) + try: + self.project_handler.new_folder(dx_p.parent, parents=True) + except dxpy.exceptions.InvalidState: + pass + with dxpy.new_dxfile(name=dx_p.name, + folder=dx_p.parent, + project=self.proj_id) as f: + f.write('data'+str(i)) def teardown_project(self): self.project_handler.destroy() diff --git a/stor/tests/test_dx.py b/stor/tests/test_dx.py index b32bf002..887f2247 100644 --- a/stor/tests/test_dx.py +++ b/stor/tests/test_dx.py @@ -1,3 +1,4 @@ +import pytest import time from tempfile import NamedTemporaryFile import unittest @@ -7,7 +8,7 @@ import dxpy.bindings as dxb import freezegun import mock -from testfixtures import LogCapture +import six.moves.urllib as urllib import stor from stor import exceptions @@ -16,7 +17,7 @@ from stor import settings from stor import swift from stor import utils -from stor.dx import DXPath +from stor.dx import DXPath, DXVirtualPath, DXCanonicalPath import stor.dx as dx from stor.test import DXTestCase from stor.tests.shared_obs import SharedOBSFileCases @@ -25,32 +26,32 @@ class TestBasicPathMethods(unittest.TestCase): def test_name(self): p = Path('dx://project:/path/to/resource') - self.assertEquals(p.name, 'resource') + self.assertEqual(p.name, 'resource') def test_parent(self): p = Path('dx://project:/path/to/resource') - self.assertEquals(p.parent, 'dx://project:/path/to') + self.assertEqual(p.parent, 'dx://project:/path/to') def test_dirname(self): p = Path('dx://project:/path/to/resource') - self.assertEquals(p.dirname(), 'dx://project:/path/to') + self.assertEqual(p.dirname(), 'dx://project:/path/to') def test_dirname_top_level(self): p1 = Path('dx://project') - self.assertEquals(p1.dirname(), 'dx://project') + self.assertEqual(p1.dirname(), 'dx://project') p2 = Path('dx://project:/') - self.assertEquals(p2.dirname(), 'dx://project:/') + self.assertEqual(p2.dirname(), 'dx://project:/') def test_basename(self): p = Path('dx://project:/path/to/resource') - self.assertEquals(p.basename(), 'resource') + self.assertEqual(p.basename(), 'resource') class TestRepr(unittest.TestCase): def test_repr(self): dx_p = DXPath('dx://t:/c/p') - self.assertEquals(eval(repr(dx_p)), dx_p) + self.assertEqual(eval(repr(dx_p)), dx_p) class TestPathManipulations(unittest.TestCase): @@ -58,46 +59,46 @@ def test_add(self): dx_p = DXPath('dx://a:') dx_p = dx_p + 'b' + Path('c') self.assertTrue(isinstance(dx_p, DXPath)) - self.assertEquals(dx_p, 'dx://a:bc') + self.assertEqual(dx_p, 'dx://a:bc') def test_div(self): dx_p = DXPath('dx://t:') dx_p = dx_p / 'c' / Path('p') self.assertTrue(isinstance(dx_p, DXPath)) - self.assertEquals(dx_p, 'dx://t:/c/p') + self.assertEqual(dx_p, 'dx://t:/c/p') class TestProject(unittest.TestCase): def test_project_none(self): - with self.assertRaises(ValueError): + with pytest.raises(ValueError, match='Project is required'): DXPath('dx://') def test_project_exists(self): dx_p = DXPath('dx://project') - self.assertEquals(dx_p.project, 'project') + self.assertEqual(dx_p.project, 'project') class TestResource(unittest.TestCase): def test_resource_none_w_project(self): - dx_p = DXPath('dx://project/') + dx_p = DXPath('dx://project:/') self.assertIsNone(dx_p.resource) def test_resource_object(self): dx_p = DXPath('dx://project:/obj') - self.assertEquals(dx_p.resource, '/obj') + self.assertEqual(dx_p.resource, '/obj') def test_resource_trailing_slash(self): dx_p = DXPath('dx://project:/dir/') - self.assertEquals(dx_p.resource, '/dir/') + self.assertEqual(dx_p.resource, '/dir/') def test_resource_nested_obj(self): dx_p = DXPath('dx://project:/nested/obj.txt') - self.assertEquals(dx_p.resource, '/nested/obj.txt') + self.assertEqual(dx_p.resource, '/nested/obj.txt') def test_resource_nested_dir(self): dx_p = DXPath('dx://project:/nested/dir/') - self.assertEquals(dx_p.resource, 'nested/dir/') + self.assertEqual(dx_p.resource, '/nested/dir/') @unittest.skip("skipping") @@ -115,8 +116,8 @@ def test_read_on_open_file(self): def test_read_success_on_closed_file(self): dx_p = DXPath('dx://{}/{}'.format(self.project, self.file_handler.name)) - self.assertEquals(dx_p.read_object(), b'data') - self.assertEquals(dx_p.open().read(), 'data') + self.assertEqual(dx_p.read_object(), b'data') + self.assertEqual(dx_p.open().read(), 'data') def test_iterating_over_files(self): data = b'''\ @@ -130,10 +131,10 @@ def test_iterating_over_files(self): d.state = 'closed' dx_p = DXPath('dx://{}/{}'.format(self.project, d.name)) # open().read() should return str for r - self.assertEquals(dx_p.open('r').read(), data.decode('ascii')) + self.assertEqual(dx_p.open('r').read(), data.decode('ascii')) # open().read() should return bytes for rb - self.assertEquals(dx_p.open('rb').read(), data) - self.assertEquals(dx_p.open().readlines(), + self.assertEqual(dx_p.open('rb').read(), data) + self.assertEqual(dx_p.open().readlines(), [l + '\n' for l in data.decode('ascii').split('\n')][:-1]) for i, line in enumerate(dx_p.open(), 1): self.assertEqual(line, 'line%d\n' % i) @@ -176,26 +177,17 @@ def test_write_multiple_flush_multiple_upload(self, mock_upload): self.assertEqual(open(ntf3.name).read(), 'hello world') +@unittest.skip("skipping") class TestDXShared(SharedOBSFileCases, DXTestCase): drive = 'dx://' path_class = DXPath normal_path = DXPath('dx://project:/obj') -@unittest.skip("demonstrating skipping") -class TestTempURL(DXTestCase): # TODO - def test_success(self): - dx_p = DXPath('dx://{}/{}'.format(self.project, self.file_handler.name)) - temp_url = dx_p.temp_url() - #TODO (akumar) what is the expected_url? DXFile.get_download_url doesn't seem to work - # expected = 'https://swift.com/v1/tenant/container/obj?temp_url_sig=3b1adff9452165103716d308da692e6ec9c2d55f&temp_url_expires=1456229100&inline' # nopep8 - self.assertIn(str(self.file_handler.name / self.project), temp_url) - - class TestCanonicalProject(DXTestCase): def test_no_project(self): dx_p = DXPath('dx://Random_Project:/') - with self.assertRaises(dx.ProjectNotFoundError): + with pytest.raises(dx.ProjectNotFoundError, match='No projects'): dx_p.canonical_project def test_unique_project(self): @@ -208,7 +200,7 @@ def test_duplicate_projects(self): test_proj = dxb.DXProject() test_proj.new(self.new_proj_name()) dx_p = DXPath('dx://' + self.project) - with self.assertRaises(dx.DuplicateProjectError): + with pytest.raises(dx.DuplicateProjectError, match='Duplicate projects'): dx_p.canonical_project test_proj.destroy() @@ -217,73 +209,71 @@ class TestCanonicalResource(DXTestCase): def test_no_resource(self): self.setup_temporary_project() dx_p = DXPath('dx://' + self.project + ':/random.txt') - with self.assertRaises(dx.NotFoundError): + with pytest.raises(dx.NotFoundError, match='does not exist'): dx_p.canonical_resource def test_unique_resource(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_file.txt']) dx_p = DXPath('dx://'+self.project + ':/temp_file.txt') self.assertTrue(utils.is_valid_dxid(dx_p.canonical_resource, 'file')) def test_duplicate_resource(self): self.setup_temporary_project() - self.setup_generic_files() - with dxpy.new_dxfile(name='folder_file.txt', - folder='/temp_folder', - project=self.proj_id) as ff: - ff.write('temp_data') + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/folder_file.txt']) dx_p = DXPath('dx://' + self.project + ':/temp_folder/folder_file.txt') - with self.assertRaises(dx.DuplicateError): + with pytest.raises(dx.DuplicateError, match='not unique'): dx_p.canonical_resource class TestListDir(DXTestCase): def test_listdir_project(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) dx_p = DXPath('dx://'+self.project) results = dx_p.listdir() self.assert_dx_lists_equal(results, [ 'dx://'+self.project+':/temp_folder', - 'dx://'+self.project+':/temp_file.txt', + 'dx://'+self.project+':/temp_file.txt' ]) def test_listdir_file(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_file.txt']) dx_p = DXPath('dx://'+self.project+':/temp_file.txt') results = dx_p.listdir() - self.assertEquals(results, []) + self.assertEqual(results, []) def test_listdir_empty_folder(self): self.setup_temporary_project() self.project_handler.new_folder('/temp_folder') dx_p = DXPath('dx://' + self.project + ':/temp_folder') results = dx_p.listdir() - self.assertEquals(results, []) + self.assertEqual(results, []) def test_listdir_folder_w_file(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/temp_file.txt']) dx_p = DXPath('dx://' + self.project + ':/temp_folder') results = dx_p.listdir() self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/temp_file.txt', 'dx://' + self.project + ':/temp_folder/folder_file.txt' ]) def test_listdir_absent_folder(self): self.setup_temporary_project() dx_p = DXPath('dx://' + self.project + ':/random_folder') - with self.assertRaises(dx.NotFoundError): + with pytest.raises(dx.NotFoundError, match='specified folder'): dx_p.listdir() def test_listdir_folder_share_filename(self): self.setup_temporary_project() - self.setup_generic_files() - with dxpy.new_dxfile(name='temp_folder', - project=self.proj_id) as f: - f.write('data') + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder']) dx_p = DXPath('dx://' + self.project + ':/temp_folder') results = dx_p.listdir() self.assert_dx_lists_equal(results, [ @@ -292,17 +282,57 @@ def test_listdir_folder_share_filename(self): def test_listdir_canonical(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) dx_p = DXPath('dx://' + self.project) results = dx_p.listdir(canonicalize=True) self.assertIn('dx://'+self.proj_id+':/temp_folder', results) - self.assertEquals(len(results), 2) + self.assertEqual(len(results), 2) + + def test_listdir_on_canonical_project(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.proj_id) + results = dx_p.listdir() + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder', + 'dx://' + self.project + ':/temp_file.txt' + ]) + + def test_listdir_on_canonical_resource(self): + self.setup_temporary_project() + self.setup_files(['/temp_file.txt']) + dx_p = DXPath('dx://' + self.project + ':/temp_file.txt').canonical_path + results = dx_p.listdir() + self.assertEqual(results, []) + + def test_listdir_iter_project(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.project) + results = list(dx_p.listdir_iter()) + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder', + 'dx://' + self.project + ':/temp_file.txt' + ]) + + def test_listdir_iter_canon_on_canon(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/random/temp_file.txt']) + dx_p = DXPath('dx://' + self.proj_id + ':/temp_folder') + results = list(dx_p.listdir_iter(canonicalize=True)) + self.assertIn('dx://' + self.proj_id + ':/temp_folder/random', results) + self.assertEqual(len(results), 2) class TestList(DXTestCase): def test_list_project(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) dx_p = DXPath('dx://' + self.project) results = dx_p.list() self.assert_dx_lists_equal(results, [ @@ -312,24 +342,26 @@ def test_list_project(self): def test_list_file(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_file.txt']) dx_p = DXPath('dx://'+self.project+':/temp_file.txt') results = dx_p.list() - self.assertEquals(results, []) + self.assertEqual(results, []) def test_list_empty_folder(self): self.setup_temporary_project() self.project_handler.new_folder('/temp_folder') dx_p = DXPath('dx://' + self.project + ':/temp_folder') results = dx_p.list() - self.assertEquals(results, []) + self.assertEqual(results, []) - def test_list_folder_w_file(self): + def test_list_folder_w_files(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/temp_file.txt']) dx_p = DXPath('dx://' + self.project + ':/temp_folder') results = dx_p.list() self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/temp_file.txt', 'dx://' + self.project + ':/temp_folder/folder_file.txt' ]) @@ -337,14 +369,12 @@ def test_list_absent_folder(self): self.setup_temporary_project() dx_p = DXPath('dx://' + self.project + ':/random_folder') results = dx_p.list() - self.assertEquals(results, []) + self.assertEqual(results, []) def test_list_folder_share_filename(self): self.setup_temporary_project() - self.setup_generic_files() - with dxpy.new_dxfile(name='temp_folder', - project=self.proj_id) as f: - f.write('data') + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder']) dx_p = DXPath('dx://' + self.project + ':/temp_folder') results = dx_p.list() self.assert_dx_lists_equal(results, [ @@ -353,23 +383,25 @@ def test_list_folder_share_filename(self): def test_list_canonical(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) dx_p = DXPath('dx://' + self.project) results = dx_p.list(canonicalize=True) self.assertTrue(all(self.proj_id in result for result in results)) - self.assertTrue(any(self.file_handler.get_id() in result for result in results)) - self.assertTrue(any(self.folder_file_handler.get_id() in result for result in results)) + self.assertEqual(len(results), 2) def test_list_limit(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) dx_p = DXPath('dx://' + self.project) results = dx_p.list(limit=1) - self.assertEquals(len(results), 1) + self.assertEqual(len(results), 1) def test_list_starts_with(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) dx_p = DXPath('dx://' + self.project) results = dx_p.list(starts_with='temp_folder') self.assert_dx_lists_equal(results, [ @@ -378,16 +410,26 @@ def test_list_starts_with(self): def test_list_w_condition(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) dx_p = DXPath('dx://' + self.project) results = dx_p.list(starts_with='temp_folder', condition=lambda res: len(res) == 1) self.assert_dx_lists_equal(results, [ 'dx://' + self.project + ':/temp_folder/folder_file.txt' ]) + def test_list_fail_w_condition(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.project) + with pytest.raises(exceptions.ConditionNotMetError, match='not met'): + results = dx_p.list(condition=lambda res: len(res) == 1) + def test_list_w_category(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) dx_p = DXPath('dx://' + self.project) dxpy.new_dxworkflow(title='Workflow', project=self.proj_id) results = dx_p.list(category='file') @@ -396,11 +438,51 @@ def test_list_w_category(self): 'dx://' + self.project + ':/temp_file.txt' ]) + def test_list_on_canonical_project(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.proj_id) + results = dx_p.list() + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt', + 'dx://' + self.project + ':/temp_file.txt' + ]) + + def test_list_on_canonical_resource(self): + self.setup_temporary_project() + self.setup_files(['/temp_file.txt']) + dx_p = DXPath('dx://' + self.project + ':/temp_file.txt').canonical_path + results = dx_p.list() + self.assertEqual(results, []) + + def test_list_iter_project(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) + dx_p = DXPath('dx://' + self.project) + results = list(dx_p.list_iter()) + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/folder_file.txt', + 'dx://' + self.project + ':/temp_file.txt' + ]) + + def test_list_iter_canon_on_canon(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/random/temp_file.txt']) + dx_p = DXPath('dx://' + self.proj_id + ':/temp_folder') + results = list(dx_p.list_iter(canonicalize=True)) + self.assertTrue(all(self.proj_id in result for result in results)) + self.assertTrue(all('temp_folder' not in result for result in results)) + self.assertEqual(len(results), 2) + class TestWalkFiles(DXTestCase): def test_pattern_w_prefix(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) dx_p = DXPath('dx://' + self.project) results = list(dx_p.walkfiles(pattern='fold*')) self.assert_dx_lists_equal(results, [ @@ -409,43 +491,63 @@ def test_pattern_w_prefix(self): def test_pattern_w_suffix(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.csv', + '/temp_file.txt']) dx_p = DXPath('dx://'+self.project) - results = list(dx_p.walkfiles(pattern='*p_file.txt')) + results = list(dx_p.walkfiles(pattern='*.txt')) self.assert_dx_lists_equal(results, [ 'dx://' + self.project + ':/temp_file.txt' ]) def test_pattern_w_prefix_suffix(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.csv', + '/temp_file.txt']) dx_p = DXPath('dx://'+self.project) results = list(dx_p.walkfiles(pattern='*file*')) self.assert_dx_lists_equal(results, [ 'dx://' + self.project + ':/temp_file.txt', - 'dx://' + self.project + ':/temp_folder/folder_file.txt' + 'dx://' + self.project + ':/temp_folder/folder_file.csv' + ]) + + def test_pattern_share_folder_match(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.csv', + '/temp_folder.txt']) + dx_p = DXPath('dx://'+self.project) + results = list(dx_p.walkfiles(pattern='temp_folder*')) + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder.txt' ]) + def test_pattern_no_match(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.csv', + '/temp_file.txt']) + dx_p = DXPath('dx://'+self.project) + results = list(dx_p.walkfiles(pattern='*tmp*')) + self.assertEqual(results, []) + class TestStat(DXTestCase): def test_stat_folder(self): self.setup_temporary_project() - self.setup_generic_files() - with self.assertRaises(ValueError): + self.setup_files(['/temp_folder/folder_file.csv']) + with pytest.raises(ValueError, match='ending in folders'): DXPath('dx://'+self.project+':/temp_folder/').stat() - with self.assertRaises(ValueError): + with pytest.raises(ValueError, match='ending in folders'): DXPath('dx://'+self.project+':/temp_folder').stat() def test_stat_project_error(self): - self.setup_temporary_project() + self.setup_temporary_project() # creates project with name in self.project test_proj = dxb.DXProject() - test_proj.new(self.new_proj_name()) + test_proj.new(self.new_proj_name()) # creates duplicate project - with self.assertRaises(dx.DuplicateProjectError): + with pytest.raises(dx.DuplicateProjectError, match='Duplicate projects'): DXPath('dx://'+self.project+':').stat() - with self.assertRaises(dx.DuplicateProjectError): + with pytest.raises(dx.DuplicateProjectError, match='Duplicate projects'): DXPath('dx://'+self.project+':/').stat() - with self.assertRaises(dx.NotFoundError): + with pytest.raises(dx.NotFoundError, match='No projects'): DXPath('dx://Random_Proj:').stat() test_proj.destroy() @@ -467,7 +569,8 @@ def test_stat_canonical_project(self): def test_stat_file(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_file.txt']) dx_p = DXPath('dx://'+self.project+':/temp_file.txt') response = dx_p.stat() self.assertIn('folder', response) # only files have folders @@ -475,6 +578,13 @@ def test_stat_file(self): response = dx_p.stat() self.assertIn('folder', response) + def test_stat_canonical_resource(self): + self.setup_temporary_project() + self.setup_files(['/temp_file.txt']) + dx_p = DXPath('dx://'+self.project+':/temp_file.txt').canonical_path + response = dx_p.stat() + self.assertIn('folder', response) # only files have folders + class TestExists(DXTestCase): def test_false(self): @@ -485,30 +595,27 @@ def test_false(self): def test_false_no_folder(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt']) dx_p = DXPath('dx://' + self.project + ':/random_folder/folder_file.txt') result = dx_p.exists() self.assertFalse(result) def test_raises_on_duplicate(self): self.setup_temporary_project() - self.setup_generic_files() - with dxpy.new_dxfile(name='folder_file.txt', - folder='/temp_folder', - project=self.proj_id) as f: - f.write('data') + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/folder_file.txt']) dx_p = DXPath('dx://' + self.project + ':/temp_folder/folder_file.txt') - with self.assertRaises(dx.DuplicateError): + with pytest.raises(dx.DuplicateError, match='not unique'): dx_p.exists() def test_true_file(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt']) dx_p = DXPath('dx://' + self.project + ':/temp_folder/folder_file.txt') result = dx_p.exists() self.assertTrue(result) - def test_true_dir_with_no_object(self): + def test_true_empty_dir(self): self.setup_temporary_project() self.project_handler.new_folder('/temp_folder') dx_p = DXPath('dx://' + self.project + ':/temp_folder') @@ -517,7 +624,7 @@ def test_true_dir_with_no_object(self): def test_true_dir_with_object(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt']) dx_p = DXPath('dx://' + self.project + ':/temp_folder') result = dx_p.exists() self.assertTrue(result) @@ -531,39 +638,46 @@ def test_project_does_not_exist(self): class TestGlob(DXTestCase): def test_suffix_pattern(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/temp_file.csv']) dx_p = DXPath('dx://' + self.project + ':/temp_folder') - results = dx_p.glob('*file.txt') + results = dx_p.glob('*.txt') self.assert_dx_lists_equal(results, [ 'dx://' + self.project + ':/temp_folder/folder_file.txt' ]) def test_prefix_pattern(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/file.csv']) dx_p = DXPath('dx://' + self.project + ':/temp_folder') results = dx_p.glob('file*') - self.assertEqual(results, []) + self.assert_dx_lists_equal(results, [ + 'dx://' + self.project + ':/temp_folder/file.csv' + ]) def test_valid_pattern(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/temp_file.csv']) dx_p = DXPath('dx://' + self.project + ':/temp_folder') results = dx_p.glob('*fi*le*') self.assert_dx_lists_equal(results, [ - 'dx://' + self.project + ':/temp_folder/folder_file.txt' + 'dx://' + self.project + ':/temp_folder/folder_file.txt', + 'dx://' + self.project + ':/temp_folder/temp_file.csv' ]) def test_valid_pattern_wo_wildcard(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/temp_file.csv']) dx_p = DXPath('dx://' + self.project + ':/temp_folder') results = dx_p.glob('file') self.assertEqual(results, []) def test_glob_cond_met(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt']) dx_p = DXPath('dx://' + self.project + ':/temp_folder') results = dx_p.glob('*fi*le*', condition=lambda res: len(res) == 1) self.assert_dx_lists_equal(results, [ @@ -572,44 +686,54 @@ def test_glob_cond_met(self): def test_cond_no_met(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt', + '/temp_folder/temp_file.csv']) dx_p = DXPath('dx://' + self.project + ':/temp_folder') - with self.assertRaises(exceptions.ConditionNotMetError): - dx_p.glob('*fi*le*', condition=lambda res: len(res) > 1) + with pytest.raises(exceptions.ConditionNotMetError, match='not met'): + dx_p.glob('*fi*le*', condition=lambda res: len(res) == 1) class TestTempUrl(DXTestCase): def test_fail_on_project(self): self.setup_temporary_project() dx_p = DXPath('dx://' + self.project) - with self.assertRaises(dx.DXError): + with pytest.raises(dx.DXError, match='DX Projects'): dx_p.temp_url() def test_fail_on_folder(self): self.setup_temporary_project() - self.setup_generic_files() + self.setup_files(['/temp_folder/folder_file.txt']) dx_p = DXPath('dx://' + self.project + ':/temp_folder') - with self.assertRaises(dx.DXError): + with pytest.raises(dx.DXError, match='DXPaths ending in folders'): dx_p.temp_url() def test_on_file(self): self.setup_temporary_project() - self.setup_generic_files() - while self.file_handler._get_state().lower() != 'closed': - time.sleep(1) + self.setup_files(['/temp_file.txt']) + time.sleep(2) # to allow for the file state to go to closed after calling close() dx_p = DXPath('dx://' + self.project + ':/temp_file.txt') result = dx_p.temp_url() self.assertIn('dl.dnanex.us', result) - def test_on_file_timed(self): # TODO + def test_on_file_canonical(self): + self.setup_temporary_project() + self.setup_files(['/temp_file.txt']) + time.sleep(2) # to allow for the file state to go to closed after calling close() + dx_p = DXPath('dx://' + self.project + ':/temp_file.txt').canonical_path + result = dx_p.temp_url() + self.assertIn('dl.dnanex.us', result) + + def test_on_file_named_timed(self): # TODO self.setup_temporary_project() - self.setup_generic_files() - while self.file_handler._get_state().lower() != 'closed': - time.sleep(1) + self.setup_files(['/temp_file.txt']) + time.sleep(2) # to allow for the file state to go to closed after calling close() dx_p = DXPath('dx://' + self.project + ':/temp_file.txt') result = dx_p.temp_url(filename='random.txt', lifetime=1) self.assertIn('dl.dnanex.us', result) self.assertIn('random.txt', result) - # TODO (check that link is valid) - time.sleep(2) - # TODO (check that link is invalid) + url = urllib.request.urlopen(result) + self.assertIn('attachment', url.headers['content-disposition']) + self.assertIn('random.txt', url.headers['content-disposition']) + time.sleep(2) # for link to expire + with pytest.raises(urllib.error.HTTPError): + urllib.request.urlopen(result) From 2b3520fec6d29d98ab429c7e1bce9f605951847b Mon Sep 17 00:00:00 2001 From: Anuj Kumar Date: Wed, 5 Sep 2018 11:37:36 -0700 Subject: [PATCH 16/16] implementing last review comment on glob/walkfiles --- stor/tests/test_dx.py | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/stor/tests/test_dx.py b/stor/tests/test_dx.py index 887f2247..948aff0b 100644 --- a/stor/tests/test_dx.py +++ b/stor/tests/test_dx.py @@ -523,9 +523,9 @@ def test_pattern_share_folder_match(self): def test_pattern_no_match(self): self.setup_temporary_project() self.setup_files(['/temp_folder/folder_file.csv', - '/temp_file.txt']) + '/random_file.txt']) dx_p = DXPath('dx://'+self.project) - results = list(dx_p.walkfiles(pattern='*tmp*')) + results = list(dx_p.walkfiles(pattern='*temp*')) self.assertEqual(results, []) @@ -675,6 +675,14 @@ def test_valid_pattern_wo_wildcard(self): results = dx_p.glob('file') self.assertEqual(results, []) + def test_pattern_no_file_match(self): + self.setup_temporary_project() + self.setup_files(['/temp_folder/folder_file.csv', + '/random_file.txt']) + dx_p = DXPath('dx://'+self.project) + results = dx_p.glob('*temp*') + self.assertEqual(results, []) + def test_glob_cond_met(self): self.setup_temporary_project() self.setup_files(['/temp_folder/folder_file.txt']) @@ -709,24 +717,33 @@ def test_fail_on_folder(self): def test_on_file(self): self.setup_temporary_project() - self.setup_files(['/temp_file.txt']) - time.sleep(2) # to allow for the file state to go to closed after calling close() + with dxpy.new_dxfile(name='temp_file.txt', + project=self.proj_id) as f: + f.write('data') + while f._get_state().lower() != 'closed': + time.sleep(1) # to allow for the file state to go to closed after calling close() dx_p = DXPath('dx://' + self.project + ':/temp_file.txt') result = dx_p.temp_url() self.assertIn('dl.dnanex.us', result) def test_on_file_canonical(self): self.setup_temporary_project() - self.setup_files(['/temp_file.txt']) - time.sleep(2) # to allow for the file state to go to closed after calling close() + with dxpy.new_dxfile(name='temp_file.txt', + project=self.proj_id) as f: + f.write('data') + while f._get_state().lower() != 'closed': + time.sleep(1) # to allow for the file state to go to closed after calling close() dx_p = DXPath('dx://' + self.project + ':/temp_file.txt').canonical_path result = dx_p.temp_url() self.assertIn('dl.dnanex.us', result) def test_on_file_named_timed(self): # TODO self.setup_temporary_project() - self.setup_files(['/temp_file.txt']) - time.sleep(2) # to allow for the file state to go to closed after calling close() + with dxpy.new_dxfile(name='temp_file.txt', + project=self.proj_id) as f: + f.write('data') + while f._get_state().lower() != 'closed': + time.sleep(1) # to allow for the file state to go to closed after calling close() dx_p = DXPath('dx://' + self.project + ':/temp_file.txt') result = dx_p.temp_url(filename='random.txt', lifetime=1) self.assertIn('dl.dnanex.us', result)