From 7281585aafce83e170fba73b1b82cc23c4849422 Mon Sep 17 00:00:00 2001 From: Igor Lebedev Date: Tue, 19 May 2026 06:41:42 +0300 Subject: [PATCH] feat: Added snapshot-resolving to search in Cloud providers --- .../v2/artifacts_finder/artifact_finder.py | 2 +- .../providers/aws_code_artifact.py | 78 ++++++++--- .../providers/azure_artifacts.py | 129 +++++++++++------- .../providers/gcp_artifact_registry.py | 56 +++++++- .../utils/artifact_finder_utils.py | 8 ++ 5 files changed, 199 insertions(+), 74 deletions(-) diff --git a/qubership_pipelines_common_library/v2/artifacts_finder/artifact_finder.py b/qubership_pipelines_common_library/v2/artifacts_finder/artifact_finder.py index d5a8910..65ea2bb 100644 --- a/qubership_pipelines_common_library/v2/artifacts_finder/artifact_finder.py +++ b/qubership_pipelines_common_library/v2/artifacts_finder/artifact_finder.py @@ -12,7 +12,7 @@ class ArtifactFinder: Supports different repository providers: Artifactory, Nexus, AWS, GCP, Azure - Providers might slightly differ in functionality, refer to the Provider docs (e.g. not all providers support snapshot resolving) + Providers might slightly differ in functionality, refer to the Provider docs Provides different auth methods for Cloud Providers, implementing `CloudCredentialsProvider` interface diff --git a/qubership_pipelines_common_library/v2/artifacts_finder/providers/aws_code_artifact.py b/qubership_pipelines_common_library/v2/artifacts_finder/providers/aws_code_artifact.py index 7d6f5de..5d43c94 100644 --- a/qubership_pipelines_common_library/v2/artifacts_finder/providers/aws_code_artifact.py +++ b/qubership_pipelines_common_library/v2/artifacts_finder/providers/aws_code_artifact.py @@ -6,6 +6,7 @@ from qubership_pipelines_common_library.v2.artifacts_finder.model.artifact import Artifact from qubership_pipelines_common_library.v2.artifacts_finder.model.artifact_provider import ArtifactProvider from qubership_pipelines_common_library.v2.artifacts_finder.model.credentials import Credentials +from qubership_pipelines_common_library.v2.artifacts_finder.utils.artifact_finder_utils import ArtifactFinderUtils class AwsCodeArtifactProvider(ArtifactProvider): @@ -15,7 +16,7 @@ def __init__(self, credentials: Credentials, domain: str, repository: str, packa Initializes this client to work with **AWS Code Artifact** for generic or maven artifacts. Requires `Credentials` provided by `AwsCredentialsProvider`. - This provider currently doesn't support searching for `-SNAPSHOT` versions and/or resolving them + This provider supports resolving `-SNAPSHOT` artifacts into latest version """ super().__init__(**kwargs) self._credentials = credentials @@ -43,39 +44,78 @@ def download_artifact(self, resource_url: str, local_path: str | Path, **kwargs) file.write(response.get('asset').read()) def search_artifacts(self, artifact: Artifact, **kwargs) -> list[str]: - list_packages_response = self._aws_client.list_packages( - domain=self._domain, repository=self._repository, - format=self._format, packagePrefix=artifact.artifact_id - ) - logging.debug(f"list_packages_response: {list_packages_response}") - - namespaces = [package.get('namespace') for package in list_packages_response.get('packages') - if package.get('package') == artifact.artifact_id] - logging.debug(f"namespaces: {namespaces}") - - if not namespaces: - logging.warning(f"Found no packages with artifactId = {artifact.artifact_id}!") - return [] - if len(namespaces) > 1: - logging.warning(f"Found multiple namespaces with same artifactId = {artifact.artifact_id}:\n{namespaces}") + if artifact.group_id: + namespaces = [artifact.group_id] + else: + list_packages_response = self._aws_client.list_packages( + domain=self._domain, repository=self._repository, + format=self._format, packagePrefix=artifact.artifact_id + ) + logging.debug(f"list_packages_response: {list_packages_response}") + namespaces = [package.get('namespace') for package in list_packages_response.get('packages') + if package.get('package') == artifact.artifact_id] + logging.debug(f"namespaces: {namespaces}") + if not namespaces: + logging.warning(f"Found no packages with artifactId = {artifact.artifact_id}!") + return [] + if len(namespaces) > 1: + logging.warning(f"Found multiple namespaces with same artifactId = {artifact.artifact_id}:\n{namespaces}") results = [] for namespace in namespaces: + package_version = artifact.version + if artifact.is_snapshot(): + resolved = self._resolve_snapshot_version(artifact, namespace) + if not resolved: + continue + package_version = resolved + logging.debug(f"Resolved SNAPSHOT version '{artifact.version}' -> '{package_version}' (namespace: {namespace})") try: assets_response = self._aws_client.list_package_version_assets( domain=self._domain, repository=self._repository, format=self._format, package=artifact.artifact_id, - packageVersion=artifact.version, namespace=namespace + packageVersion=package_version, namespace=namespace ) - logging.debug(f"assets: {assets_response}") for asset in assets_response.get('assets'): if asset.get('name').lower().endswith(artifact.extension.lower()): results.append(f"{assets_response.get('namespace')}/{assets_response.get('package')}/" f"{assets_response.get('version')}/{asset.get('name')}") except Exception: - logging.warning(f"Specific version ({artifact.version}) of package ({namespace}.{artifact.artifact_id}) not found!") + logging.warning(f"Specific version ({package_version}) of package ({namespace}.{artifact.artifact_id}) not found!") logging.info(f"AWS search results: {results}") return results + def _resolve_snapshot_version(self, artifact: Artifact, namespace: str) -> str | None: + candidate_versions = [] + next_token = None + while True: + kwargs = { + 'domain': self._domain, + 'repository': self._repository, + 'format': self._format, + 'package': artifact.artifact_id, + 'namespace': namespace, + } + if next_token: + kwargs['nextToken'] = next_token + + response = self._aws_client.list_package_versions(**kwargs) + for version_entry in response.get('versions', []): + ver = version_entry.get('version', '') + parsed = ArtifactFinderUtils.parse_snapshot_timestamp_version(ver) + if parsed and f"{parsed[0]}-SNAPSHOT" == artifact.version: + candidate_versions.append((parsed[1], ver)) + + next_token = response.get('nextToken') + if not next_token: + break + + if not candidate_versions: + logging.debug(f"No snapshot versions found for {artifact.artifact_id}:{artifact.version} in namespace '{namespace}'") + return None + + candidate_versions.sort(key=lambda x: x[0], reverse=True) + return candidate_versions[0][1] + def get_provider_name(self) -> str: return "aws_code_artifact" diff --git a/qubership_pipelines_common_library/v2/artifacts_finder/providers/azure_artifacts.py b/qubership_pipelines_common_library/v2/artifacts_finder/providers/azure_artifacts.py index 84f04dd..ffa93fc 100644 --- a/qubership_pipelines_common_library/v2/artifacts_finder/providers/azure_artifacts.py +++ b/qubership_pipelines_common_library/v2/artifacts_finder/providers/azure_artifacts.py @@ -6,6 +6,7 @@ from qubership_pipelines_common_library.v2.artifacts_finder.model.artifact import Artifact from qubership_pipelines_common_library.v2.artifacts_finder.model.artifact_provider import ArtifactProvider from qubership_pipelines_common_library.v2.artifacts_finder.model.credentials import Credentials +from qubership_pipelines_common_library.v2.artifacts_finder.utils.artifact_finder_utils import ArtifactFinderUtils class AzureArtifactsProvider(ArtifactProvider): @@ -15,7 +16,7 @@ def __init__(self, credentials: Credentials, organization: str, project: str, fe Initializes this client to work with **Azure Artifacts** for generic artifacts. Requires `Credentials` provided by `AzureCredentialsProvider`. - This provider currently doesn't support searching for `-SNAPSHOT` versions and/or resolving them + This provider supports resolving `-SNAPSHOT` artifacts into latest version (in maven-format feeds) """ super().__init__(**kwargs) self._credentials = credentials @@ -32,11 +33,12 @@ def search_artifacts(self, artifact: Artifact, **kwargs) -> list[str]: if timestamp_version_match := re.match(self.TIMESTAMP_VERSION_PATTERN, artifact.version): acceptable_versions.append(timestamp_version_match.group(1) + "SNAPSHOT") - # Try to find package with name ~ "artifact_id" + # Search all packages with matching artifact_id feeds_search_url = f"https://feeds.dev.azure.com/{self.organization}/{self.project}/_apis/packaging/feeds/{self.feed}/packages" + name_query = f"{artifact.group_id}:{artifact.artifact_id}" if artifact.group_id else artifact.artifact_id feed_search_params = { - "includeAllVersions": "true", - "packageNameQuery": artifact.artifact_id, + "includeAllVersions": "false", + "packageNameQuery": name_query, "protocolType": "maven", "api-version": "7.1", } @@ -47,54 +49,81 @@ def search_artifacts(self, artifact: Artifact, **kwargs) -> list[str]: raise Exception(f"Could not find '{artifact.artifact_id}' - search request returned {feeds_response.status_code}!") logging.debug(f"Feeds search response: {feeds_response_json}") - if feeds_response_json.get("count") > 1: - logging.warning("Found more than 1 feeds. Use the first one.") - elif feeds_response_json.get("count") == 0: - logging.warning("No feeds were found.") + packages = feeds_response_json.get("value", []) + if not packages: + logging.warning("No packages were found.") return [] - feed = feeds_response_json.get("value")[0] - feed_links = feed.get("_links", {}) - - # Get feed versions - feed_versions_url = feed_links.get("versions", {}).get("href", "") - feed_versions_response = self._session.get(url=feed_versions_url, timeout=self.timeout) - feed_versions_response_json = feed_versions_response.json() - if feed_versions_response.status_code != 200: - logging.error(f"Feed versions error ({feed_versions_response.status_code}) response: {feed_versions_response_json}") - raise Exception(f"Could not find feed versions, search request returned {feed_versions_response.status_code}!") - logging.debug(f"Feed versions response: {feed_versions_response_json}") - feed_versions = feed_versions_response_json.get("value") - - # Filter by acceptable versions - logging.debug(f"Filtering by acceptable versions: '{acceptable_versions}'") - feed_version = [f for f in feed_versions if (f.get('protocolMetadata').get('data').get('version') in acceptable_versions)] - if len(feed_version) == 0: - logging.warning("All feed versions filtered.") - return [] - filtered_feed_version = feed_version[0] + if len(packages) > 1: + logging.debug(f"Found multiple packages (groups) for '{artifact.artifact_id}', processing all") - # Search for target file - files = [f for f in filtered_feed_version.get("files") if f.get('name').startswith(f"{artifact.artifact_id}-{artifact.version}") and f.get('name').endswith(artifact.extension)] - logging.debug(f"Files found: {files}") - if len(files) == 0: - logging.warning("All files filtered.") - return [] - target_file = files[0] - - # Build download url - feed_id = feed_links.get("feed").get("href").split("/")[-1] # take id from link to feed - feed_version = filtered_feed_version.get("version") - group_id = filtered_feed_version.get('protocolMetadata').get('data').get("groupId") - artifact_id = filtered_feed_version.get('protocolMetadata').get('data').get("artifactId") - target_file_name = target_file.get("name") - - download_url = ( - f"https://pkgs.dev.azure.com/{self.organization}/{self.project}/_apis/packaging/feeds/{feed_id}/maven/" - f"{group_id}/{artifact_id}/{feed_version}/{target_file_name}/content" - f"?api-version=7.1-preview.1" - ) - logging.info(f"Azure search resulting url: {download_url}") - return [download_url] + result_urls = [] + for feed_pkg in packages: + pkg_links = feed_pkg.get("_links", {}) + pkg_versions_url = pkg_links.get("versions", {}).get("href", "") + if not pkg_versions_url: + continue + + pkg_versions_response = self._session.get(url=pkg_versions_url, params={"isDeleted": "false"}, timeout=self.timeout) + if pkg_versions_response.status_code != 200: + logging.warning(f"Skipping package, versions request returned {pkg_versions_response.status_code}") + continue + + feed_versions = pkg_versions_response.json().get("value", []) + if not feed_versions: + continue + + # Filter by acceptable versions (stores snapshot versions literally: "5.0.0-SNAPSHOT") + feed_version = [ + f for f in feed_versions + if f.get("protocolMetadata", {}).get("data", {}).get("version") in acceptable_versions + ] + if not feed_version: + continue + filtered_feed_version = feed_version[0] + feed_id = pkg_links.get("feed").get("href").split("/")[-1] + feed_version = filtered_feed_version.get("version") + group_id = filtered_feed_version.get("protocolMetadata", {}).get("data", {}).get("groupId") + artifact_id = filtered_feed_version.get("protocolMetadata", {}).get("data", {}).get("artifactId") + + all_version_files = filtered_feed_version.get("files") or [] + if artifact.is_snapshot(): + base_version = artifact.version.removesuffix("-SNAPSHOT") + candidate_files = [] + for f in all_version_files: + name = f.get("name", "") + if not name.startswith(f"{artifact.artifact_id}-") or not name.endswith(f".{artifact.extension}"): + continue + version_part = name.removeprefix(f"{artifact.artifact_id}-").removesuffix(f".{artifact.extension}") + parsed = ArtifactFinderUtils.parse_snapshot_timestamp_version(version_part) + if parsed and parsed[0] == base_version: + candidate_files.append((parsed[1], parsed[2], f)) + if not candidate_files: + logging.warning("No snapshot files found.") + continue + candidate_files.sort(key=lambda x: (x[0], x[1]), reverse=True) + target_file = candidate_files[0][2] + logging.debug(f"Resolved SNAPSHOT version '{artifact.version}' -> '{target_file.get('name')}' (group_id: {group_id})") + else: + target_file = None + for f in all_version_files: + name = f.get("name", "") + if name.startswith(f"{artifact.artifact_id}-") and name.endswith(f".{artifact.extension}"): + target_file = f + break + if not target_file: + continue + + # Build download url + target_file_name = target_file.get("name") + + download_url = ( + f"https://pkgs.dev.azure.com/{self.organization}/{self.project}/_apis/packaging/feeds/{feed_id}/maven/" + f"{group_id}/{artifact_id}/{feed_version}/{target_file_name}/content" + f"?api-version=7.1-preview.1" + ) + result_urls.append(download_url) + + return result_urls def get_provider_name(self) -> str: return "azure_artifacts" diff --git a/qubership_pipelines_common_library/v2/artifacts_finder/providers/gcp_artifact_registry.py b/qubership_pipelines_common_library/v2/artifacts_finder/providers/gcp_artifact_registry.py index d663a8a..fe9018d 100644 --- a/qubership_pipelines_common_library/v2/artifacts_finder/providers/gcp_artifact_registry.py +++ b/qubership_pipelines_common_library/v2/artifacts_finder/providers/gcp_artifact_registry.py @@ -1,18 +1,25 @@ +import logging + from pathlib import Path +from urllib.parse import unquote from google.cloud import artifactregistry_v1 from qubership_pipelines_common_library.v2.artifacts_finder.model.artifact import Artifact from qubership_pipelines_common_library.v2.artifacts_finder.model.artifact_provider import ArtifactProvider from qubership_pipelines_common_library.v2.artifacts_finder.model.credentials import Credentials +from qubership_pipelines_common_library.v2.artifacts_finder.utils.artifact_finder_utils import ArtifactFinderUtils class GcpArtifactRegistryProvider(ArtifactProvider): + GAR_URL_PREFIX = "https://artifactregistry.googleapis.com/download/v1/" + GAR_URL_SUFFIX = ":download?alt=media" + def __init__(self, credentials: Credentials, project: str, region_name: str, repository: str, **kwargs): """ Initializes this client to work with **GCP Artifact Registry** for generic artifacts. Requires `Credentials` provided by `GcpCredentialsProvider`. - This provider currently doesn't support searching for `-SNAPSHOT` versions and/or resolving them + This provider supports resolving `-SNAPSHOT` artifacts into latest version (in maven-format repositories) """ super().__init__(**kwargs) self._credentials = credentials @@ -33,20 +40,61 @@ def download_artifact(self, resource_url: str, local_path: str | Path, **kwargs) file.write(response.content) def search_artifacts(self, artifact: Artifact, **kwargs) -> list[str]: - # works with both "Maven" and "Generic" type repositories + if artifact.is_snapshot(): + return self._search_snapshot_artifacts(artifact) + name_filter = f"{self._repo_resource_id}/files/*{artifact.artifact_id}-{artifact.version}.{artifact.extension}" list_files_request = artifactregistry_v1.ListFilesRequest( parent=f"{self._repo_resource_id}", filter=f'name="{name_filter}"', ) files = self._gcp_client.list_files(request=list_files_request) - # logging.debug(f"[GCP search_artifacts] files: {files}") + group_filter = None + if artifact.group_id: + group_filter = f"/{artifact.group_id.replace('.', '/')}/" urls = [] for file in files: - download_url = f"https://artifactregistry.googleapis.com/download/v1/{file.name}:download?alt=media" + if group_filter and group_filter not in unquote(file.name): + continue + download_url = f"{self.GAR_URL_PREFIX}{file.name}{self.GAR_URL_SUFFIX}" urls.append(download_url) return urls + def _search_snapshot_artifacts(self, artifact: Artifact) -> list[str]: + prefix = "*" + if artifact.group_id: + prefix = f"*{artifact.group_id.replace('.', '/')}/" + name_filter = f"{self._repo_resource_id}/files/{prefix}{artifact.artifact_id}/{artifact.version}/maven-metadata.xml" + list_files_request = artifactregistry_v1.ListFilesRequest( + parent=self._repo_resource_id, + filter=f'name="{name_filter}"', + ) + files = self._gcp_client.list_files(request=list_files_request) + + maven_base_url = f"https://{self._region_name}-maven.pkg.dev/{self._project}/{self._repository}" + base_version = artifact.version.removesuffix("-SNAPSHOT") + result_urls = [] + for file in files: + relative = unquote(file.name.removeprefix(f"{self._repo_resource_id}/files/")) + suffix = f"{artifact.artifact_id}/{artifact.version}/maven-metadata.xml" + if not relative.endswith(suffix): + continue + group_path = relative.removesuffix(suffix).rstrip("/") + if not group_path: + continue + + metadata_url = f"{self.GAR_URL_PREFIX}{file.name}{self.GAR_URL_SUFFIX}" + response = self._authorized_session.get(url=metadata_url, timeout=self.timeout) + response.raise_for_status() + timestamp = ArtifactFinderUtils.extract_metadata_snapshot_timestamp(response.content) + resolved_version = f"{base_version}-{timestamp}" + + url = f"{maven_base_url}/{group_path}/{artifact.artifact_id}/{artifact.version}/{artifact.artifact_id}-{resolved_version}.{artifact.extension}" + logging.debug(f"Resolved SNAPSHOT version '{artifact.version}' -> '{resolved_version}' (group: {group_path})") + result_urls.append(url) + + return result_urls + def get_provider_name(self) -> str: return "gcp_artifact_registry" diff --git a/qubership_pipelines_common_library/v2/artifacts_finder/utils/artifact_finder_utils.py b/qubership_pipelines_common_library/v2/artifacts_finder/utils/artifact_finder_utils.py index 7df9a23..9d3921a 100644 --- a/qubership_pipelines_common_library/v2/artifacts_finder/utils/artifact_finder_utils.py +++ b/qubership_pipelines_common_library/v2/artifacts_finder/utils/artifact_finder_utils.py @@ -221,6 +221,14 @@ def extract_group_id_from_artifact_url(artifact_url: str) -> str: raise ValueError(f"Can't extract group_id from URL: {artifact_url}") return ".".join(components[:-3]) + @staticmethod + def parse_snapshot_timestamp_version(version_str: str) -> tuple | None: + import re + match = re.match(r'^(.+)-(\d{8}\.\d{6}-(\d+))$', version_str) + if match: + return match.group(1), match.group(2), int(match.group(3)) + return None + @staticmethod def extract_metadata_snapshot_timestamp(metadata_content: str) -> str: import xml.etree.ElementTree as ET