diff --git a/hsclient/hydroshare.py b/hsclient/hydroshare.py index 0b1feaa..bda4698 100644 --- a/hsclient/hydroshare.py +++ b/hsclient/hydroshare.py @@ -1,4 +1,5 @@ import getpass +import threading import os import pathlib import pickle @@ -62,12 +63,13 @@ class File(str): :param checksum: the md5 checksum of the file """ - def __new__(cls, value, file_url, checksum): + def __new__(cls, value, file_url, checksum, aggregation): return super(File, cls).__new__(cls, value) - def __init__(self, value, file_url, checksum): + def __init__(self, value, file_url, checksum, aggregation): self._file_url = file_url self._checksum = checksum + self._aggregation = aggregation @property def path(self) -> str: @@ -92,6 +94,10 @@ def folder(self) -> str: @property def checksum(self): """The md5 checksum of the file""" + if self._checksum is None: + _ = self._aggregation._checksums + path = urljoin('data/contents', quote(self.path)) + self._checksum = self._aggregation._checksums.get(path, "") return self._checksum @property @@ -173,7 +179,12 @@ def _files(self): "data/contents/", )[1] ) - f = File(file_path, unquote(file.path), self._checksums[file_checksum_path]) + if self._parsed_checksums is not None: + checksum = self._checksums[file_checksum_path] + else: + checksum = None + f = File(file_path, unquote(file.path), checksum, self) + self._parsed_files.append(f) return self._parsed_files @@ -181,13 +192,13 @@ def _files(self): def _aggregations(self): def populate_metadata(_aggr): - _aggr._metadata + _ = _aggr._metadata - if not self._parsed_aggregations: + if self._parsed_aggregations is None: self._parsed_aggregations = [] for file in self._map.describes.files: if is_aggregation(str(file)): - self._parsed_aggregations.append(Aggregation(unquote(file.path), self._hs_session, self._checksums)) + self._parsed_aggregations.append(Aggregation(unquote(file.path), self._hs_session, None)) # load metadata for all aggregations (metadata is needed to create any typed aggregation) with ThreadPoolExecutor() as executor: @@ -258,8 +269,22 @@ def _download(self, save_path: str = "", unzip_to: str = None) -> str: return unzip_to return downloaded_zip + def _reset(self): + self._retrieved_map = None + self._retrieved_metadata = None + self._parsed_files = None + self._parsed_aggregations = None + self._parsed_checksums = None + self._main_file_path = None + + def _refetch(self): + # not refreshing the checksums here - they will be refreshed when needed + _ = self._map + _ = self._metadata + _ = self._files + @property - def metadata_file(self): + def metadata_file(self) -> str: """The path to the metadata file""" return self.metadata_path.split("/data/contents/", 1)[1] @@ -290,7 +315,6 @@ def main_file_path(self) -> str: self._main_file_path = self.files()[0].path return self._main_file_path - @refresh def save(self) -> None: """ Saves the metadata back to HydroShare @@ -316,7 +340,7 @@ def files(self, search_aggregations: bool = False, **kwargs) -> List[File]: files = files + list(aggregation.files(search_aggregations=search_aggregations, **kwargs)) return files - def file(self, search_aggregations=False, **kwargs) -> File: + def file(self, search_aggregations=False, **kwargs) -> Union[File, None]: """ Returns a single file in the resource that matches the filtering parameters :param search_aggregations: Defaults False, set to true to search aggregations @@ -349,7 +373,7 @@ def aggregations(self, **kwargs) -> List[BaseMetadata]: aggregations = filter(lambda agg: attribute_filter(agg.metadata, key, value), aggregations) return list(aggregations) - def aggregation(self, **kwargs) -> BaseMetadata: + def aggregation(self, **kwargs) -> Union[BaseMetadata, None]: """ Returns a single Aggregation in the resource that matches the filtering parameters. Uses the same filtering rules described in the aggregations method. @@ -363,17 +387,12 @@ def aggregation(self, **kwargs) -> BaseMetadata: def refresh(self) -> None: """ - Forces the retrieval of the resource map and metadata files. Currently this is implemented to be lazy and will - only retrieve those files again after another call to access them is made. This will be later updated to be - eager and retrieve the files asynchronously. + Forces the retrieval of the resource map and metadata files. Files are retrieved asynchronously. """ - # TODO, refresh should destroy the aggregation objects and async fetch everything. - self._retrieved_map = None - self._retrieved_metadata = None - self._parsed_files = None - self._parsed_aggregations = None - self._parsed_checksums = None - self._main_file_path = None + + self._reset() + t = threading.Thread(target=self._refetch, daemon=True) + t.start() def delete(self) -> None: """Deletes this aggregation from HydroShare""" @@ -385,7 +404,7 @@ def delete(self) -> None: self.main_file_path, ) self._hs_session.delete(path, status_code=200) - self.refresh() + self._reset() class DataObjectSupportingAggregation(Aggregation): @@ -890,7 +909,7 @@ def resource_id(self) -> str: return self._map.identifier @property - def metadata_file(self): + def metadata_file(self) -> str: """The path to the metadata file""" return self.metadata_path.split("/data/", 1)[1] @@ -925,7 +944,7 @@ def access_permission(self): # resource operations - def new_version(self): + def new_version(self) -> 'Resource': """ Creates a new version of the resource on HydroShare :return: A Resource object of the newly created resource version @@ -935,7 +954,7 @@ def new_version(self): resource_id = response.text return Resource("/resource/{}/data/resourcemap.xml".format(resource_id), self._hs_session) - def copy(self): + def copy(self) -> 'Resource': """ Copies this Resource into a new resource on HydroShare returns: A Resource object of the newly copied resource @@ -953,7 +972,6 @@ def download(self, save_path: str = "") -> str: """ return self._hs_session.retrieve_bag(self._hsapi_path, save_path=save_path) - @refresh def delete(self) -> None: """ Deletes the resource on HydroShare @@ -961,8 +979,9 @@ def delete(self) -> None: """ hsapi_path = self._hsapi_path self._hs_session.delete(hsapi_path, status_code=204) + # refresh the resource to clear the cache + self._reset() - @refresh def save(self) -> None: """ Saves the metadata to HydroShare @@ -972,6 +991,10 @@ def save(self) -> None: path = urljoin(self._hsapi_path, "ingest_metadata") self._hs_session.upload_file(path, files={'file': ('resourcemetadata.xml', metadata_string)}) + # if new creators were added, refresh the resource to get the creator order + creators_with_no_order = [cr for cr in self.metadata.creators if cr.creator_order is None] + if creators_with_no_order: + self.refresh() # referenced content operations @refresh @@ -1008,7 +1031,6 @@ def reference_update(self, file_name: str, url: str, path: str = '') -> None: # file operations - @refresh def folder_create(self, folder: str) -> None: """ Creates a folder on HydroShare @@ -1037,7 +1059,7 @@ def folder_delete(self, path: str = None) -> None: """ self._delete_file_folder(path) - def folder_download(self, path: str, save_path: str = ""): + def folder_download(self, path: str, save_path: str = "") -> str: """ Downloads a folder from HydroShare :param path: The path to folder @@ -1048,7 +1070,7 @@ def folder_download(self, path: str, save_path: str = ""): urljoin(self._resource_path, "data", "contents", path), save_path, params={"zipped": "true"} ) - def file_download(self, path: str, save_path: str = "", zipped: bool = False): + def file_download(self, path: str, save_path: str = "", zipped: bool = False) -> str: """ Downloads a file from HydroShare :param path: The path to the file @@ -1072,17 +1094,34 @@ def file_delete(self, path: str = None) -> None: """ self._delete_file(path) - @refresh - def file_rename(self, path: str, new_path: str) -> None: + def file_rename(self, path: str, new_path: str, refresh=False) -> None: """ Rename a file on HydroShare :param path: The path to the file :param new_path: the renamed path to the file + :param refresh: Defaults to False, set to True to automatically refresh the resource from HydroShare :return: None """ rename_path = urljoin(self._hsapi_path, "functions", "move-or-rename") self._hs_session.post(rename_path, status_code=200, data={"source_path": path, "target_path": new_path}) + if refresh: + self.refresh() + return + if self._parsed_files is None: + self.refresh() + return + if path in self._parsed_files: + # path is a file path - just refresh checksums from hydroshare and update the cached parsed_files + self._parsed_checksums = None + # update the checksums + _ = self._checksums + + # update the parsed_files + checksum_path = urljoin("data", "contents", new_path) + new_file = File(new_path, unquote(new_path), self._checksums[checksum_path], self) + self._parsed_files = [new_file if file == path else file for file in self._parsed_files] + @refresh def file_zip(self, path: str, zip_name: str = None, remove_file: bool = True) -> None: """ @@ -1113,7 +1152,7 @@ def file_unzip(self, path: str, overwrite: bool = True, ingest_metadata=True) -> unzip_path, status_code=200, data={"overwrite": overwrite, "ingest_metadata": ingest_metadata} ) - def file_aggregate(self, path: str, agg_type: AggregationType, refresh: bool = True): + def file_aggregate(self, path: str, agg_type: AggregationType, refresh: bool = True) -> Union[Aggregation, None]: """ Aggregate a file to a HydroShare aggregation type. Aggregating files allows you to specify metadata specific to the files associated with the aggregation. To set a FileSet aggregation, include the path to the folder or @@ -1179,8 +1218,9 @@ def aggregation_remove(self, aggregation: Aggregation) -> None: aggregation.metadata.type.value + "LogicalFile", aggregation.main_file_path, ) + aggregation._hs_session.post(path, status_code=200) - aggregation.refresh() + self._parsed_aggregations = [agg for agg in self._parsed_aggregations if agg != aggregation] @refresh def aggregation_move(self, aggregation: Aggregation, dst_path: str = "") -> None: @@ -1205,24 +1245,25 @@ def aggregation_move(self, aggregation: Aggregation, dst_path: str = "") -> None if status in ("Not ready", "progress"): while aggregation._hs_session.check_task(task_id) != 'true': time.sleep(1) - aggregation.refresh() - @refresh def aggregation_delete(self, aggregation: Aggregation) -> None: """ Deletes an aggregation from HydroShare. This deletes the files and metadata in the aggregation. :param aggregation: The aggregation object to delete :return: None """ + # remove the aggregation from the cache + self._parsed_aggregations = [agg for agg in self._parsed_aggregations if agg != aggregation] aggregation.delete() - def aggregation_download(self, aggregation: Aggregation, save_path: str = "", unzip_to: str = None) -> str: + @staticmethod + def aggregation_download(aggregation: Aggregation, save_path: str = "", unzip_to: str = None) -> str: """ Download an aggregation from HydroShare :param aggregation: The aggregation to download :param save_path: The local path to save the aggregation to, defaults to the current directory :param unzip_to: If set, the resulting download will be unzipped to the specified path - :return: None + :return: The path to the downloaded file """ return aggregation._download(save_path=save_path, unzip_to=unzip_to) @@ -1464,15 +1505,16 @@ def search( ): """ Query the GET /hsapi/resource/ REST end point of the HydroShare server. - :param creator: Filter results by the HydroShare username or email - :param author: Filter results by the HydroShare username or email + :param creator: Filter results by the HydroShare username or email of creator + :param contributor: Filter results by the HydroShare username or email of contributor :param owner: Filter results by the HydroShare username or email :param group_name: Filter results by the HydroShare group name associated with resources :param from_date: Filter results to those created after from_date. Must be datetime.date. :param to_date: Filter results to those created before to_date. Must be datetime.date. Because dates have no time information, you must specify date+1 day to get results for date (e.g. use 2015-05-06 to get resources created up to and including 2015-05-05) - :param types: Filter results to particular HydroShare resource types (Deprecated, all types are Composite) + :param resource_types: Filter results to particular HydroShare resource types + (Deprecated, all types are Composite) :param subject: Filter by comma separated list of subjects :param full_text_search: Filter by full text search :param edit_permission: Filter by boolean edit permission @@ -1548,7 +1590,7 @@ def resource(self, resource_id: str, validate: bool = True, use_cache: bool = Tr res = Resource("/resource/{}/data/resourcemap.xml".format(resource_id), self._hs_session) if validate: - res.metadata + _ = res.metadata if use_cache: self._resource_object_cache[resource_id] = res @@ -1575,7 +1617,7 @@ def user(self, user_id: int) -> User: response = self._hs_session.get(f'/hsapi/userDetails/{user_id}/', status_code=200) return User(**response.json()) - def my_user_info(self): + def my_user_info(self) -> dict: """ Retrieves the user info of the user's credentials provided :return: JSON object representing the user info diff --git a/tests/test_functional.py b/tests/test_functional.py index 39e4d24..9d8eb76 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -89,12 +89,16 @@ def test_filtering_aggregations_by_files(timeseries_resource): assert not timeseries_resource.aggregation(file__path="No_match.sqlite") assert len(timeseries_resource.aggregations(files__path="No_match.sqlite")) == 0 assert not timeseries_resource.aggregation(files__path="No_match.sqlite") + # get the first aggregation + aggr = timeseries_resource.aggregations()[0] + file = aggr.file(path="ODM2_Multi_Site_One_Variable.sqlite") + assert file.checksum is not None def test_filtering_files(resource): - resource.folder_create("asdf", refresh=False) + resource.folder_create("asdf") resource.file_upload("data/test_resource_metadata_files/asdf/testing.xml", destination_path="asdf", refresh=False) - resource.folder_create("referenced_time_series", refresh=False) + resource.folder_create("referenced_time_series") resource.file_upload( "data/test_resource_metadata_files/msf_version.refts.json", destination_path="referenced_time_series" ) @@ -138,8 +142,11 @@ def test_creator_order(new_resource): res.metadata.creators = reversed res.save() # check creator_order does not change - assert res.metadata.creators[1].name == "Testing" - assert res.metadata.creators[1].creator_order == 2 + for cr in res.metadata.creators: + if cr.name == "Testing": + assert cr.creator_order == 2 + else: + assert cr.creator_order == 1 def test_resource_metadata_updating(new_resource): @@ -176,7 +183,7 @@ def test_resource_delete(hydroshare, new_resource): res_id = new_resource.resource_id new_resource.delete() try: - res = hydroshare.resource(res_id, use_cache=False) + _ = hydroshare.resource(res_id, use_cache=False) assert False except Exception as e: assert f"No resource was found for resource id:{res_id}" in str(e) @@ -291,7 +298,7 @@ def test_move_aggregation(resource_with_netcdf_aggr): def test_file_upload_and_rename(new_resource): assert len(new_resource.files()) == 0 - new_resource.file_upload("data/other.txt", refresh=False) + new_resource.file_upload("data/other.txt") new_resource.file_rename("other.txt", "updated.txt") assert len(new_resource.files()) == 1 assert new_resource.files()[0].name == "updated.txt" @@ -299,9 +306,12 @@ def test_file_upload_and_rename(new_resource): def test_file_aggregate(new_resource): assert len(new_resource.files()) == 0 - new_resource.folder_create("folder", refresh=False) - new_resource.file_upload("data/other.txt", destination_path="folder", refresh=False) - new_resource.file_aggregate("folder/other.txt", agg_type=AggregationType.SingleFileAggregation) + assert len(new_resource.aggregations()) == 0 + new_resource.folder_create("folder") + new_resource.file_upload("data/other.txt", destination_path="folder") + aggr = new_resource.file_aggregate("folder/other.txt", agg_type=AggregationType.SingleFileAggregation) + assert aggr is not None + assert aggr.metadata.type == AggregationType.SingleFileAggregation assert len(new_resource.files()) == 0 assert len(new_resource.aggregations()) == 1 assert len(new_resource.aggregations()[0].files()) == 1 @@ -346,12 +356,12 @@ def test_delete_file(new_resource): def test_access_rules(new_resource): - ap = new_resource.access_permission + _ = new_resource.access_permission pass def test_refresh(resource): - resource.metadata + _ = resource.metadata resource.files() resource.aggregations() @@ -446,7 +456,7 @@ def test_aggregations(new_resource, files): def test_aggregation_fileset(new_resource, files): root_path = "data/test_resource_metadata_files/" file_count = len(files) - 2 # exclude rdf/xml file - new_resource.folder_create("asdf", refresh=False) + new_resource.folder_create("asdf") new_resource.file_upload(*[os.path.join(root_path, file) for file in files], destination_path="asdf") assert len(new_resource.aggregations()) == 1 assert len(new_resource.files()) == 0 @@ -472,7 +482,7 @@ def test_aggregation_fileset(new_resource, files): def test_folder_zip(new_resource): - new_resource.folder_create("test_folder", refresh=False) + new_resource.folder_create("test_folder") new_resource.file_upload("data/other.txt", destination_path="test_folder", refresh=False) new_resource.file_zip("test_folder") assert new_resource.file().path == "test_folder.zip" @@ -480,7 +490,7 @@ def test_folder_zip(new_resource): def test_folder_zip_specify_name(new_resource): - new_resource.folder_create("test_folder", refresh=False) + new_resource.folder_create("test_folder") new_resource.file_upload("data/other.txt", destination_path="test_folder", refresh=False) new_resource.file_zip("test_folder", "test.zip", False) assert new_resource.file(path="test.zip").path == "test.zip" @@ -488,14 +498,14 @@ def test_folder_zip_specify_name(new_resource): def test_folder_rename(new_resource): - new_resource.folder_create("test_folder", refresh=False) + new_resource.folder_create("test_folder") new_resource.file_upload("data/other.txt", destination_path="test_folder", refresh=False) new_resource.folder_rename("test_folder", "renamed_folder") assert new_resource.file(path="renamed_folder/other.txt") def test_folder_delete(new_resource): - new_resource.folder_create("test_folder", refresh=False) + new_resource.folder_create("test_folder") new_resource.file_upload("data/other.txt", destination_path="test_folder") assert len(new_resource.files()) == 1 new_resource.folder_delete("test_folder") @@ -510,7 +520,7 @@ def test_zipped_file_download(resource): def test_folder_download(new_resource): - new_resource.folder_create("test_folder", refresh=False) + new_resource.folder_create("test_folder") new_resource.file_upload("data/other.txt", destination_path="test_folder") assert len(new_resource.files()) == 1 with tempfile.TemporaryDirectory() as td: @@ -520,9 +530,9 @@ def test_folder_download(new_resource): def test_filename_spaces(hydroshare): res = hydroshare.create() - res.folder_create("with spaces", refresh=False) + res.folder_create("with spaces") res.file_upload("data/other.txt", destination_path="with spaces", refresh=False) - res.file_rename("with spaces/other.txt", "with spaces/with spaces file.txt") + res.file_rename("with spaces/other.txt", "with spaces/with spaces file.txt", refresh=True) file = res.file(path="with spaces/with spaces file.txt") with tempfile.TemporaryDirectory() as td: filename = res.file_download(file, save_path=td)