diff --git a/src/lola/models.py b/src/lola/models.py index b08d571..13429f0 100644 --- a/src/lola/models.py +++ b/src/lola/models.py @@ -10,6 +10,7 @@ import tempfile from typing import Optional import yaml +import re from lola.config import MCPS_FILE, SKILL_FILE from lola import frontmatter as fm @@ -20,6 +21,11 @@ LOLA_MODULE_CONTENT_DIRNAME = "lola-module" +def _is_scp_style_git_url(url: str) -> bool: + """Detect git@host:org/repo.git SCP-style URLs.""" + return bool(re.match(r"^[^/]+@[^:]+:.+", url)) and "://" not in url + + @dataclass class Skill: """Represents a skill within a module.""" @@ -454,15 +460,29 @@ def from_cache(cls, cache_file: Path) -> "Marketplace": @classmethod def from_url(cls, url: str, name: str) -> "Marketplace": - """Load marketplace from URL (http/https) or local file path.""" + """Load marketplace from URL (http/https), git repo (git+https/git+ssh), or local file path.""" from urllib.request import urlopen from urllib.error import URLError from urllib.parse import urlparse + # Git-based fetch: git+https://, git+ssh://, SCP-style, or .git-suffixed URLs + # Uses git clone to leverage existing credentials (SSH keys, credential + # helpers, .netrc) — required for self-hosted GitLab/GitHub instances + # that don't allow unauthenticated HTTP access. + if url.startswith("git+") or _is_scp_style_git_url(url): + return cls._from_git_url(url, name) + parsed = urlparse(url) stored_url = url + # Auto-detect .git-suffixed HTTPS/HTTP URLs as git sources + # (e.g. https://github.com/org/marketplace.git) + if parsed.scheme in ("http", "https") and parsed.path.rstrip("/").endswith( + ".git" + ): + return cls._from_git_url(url, name) + if parsed.scheme in ("http", "https"): try: with urlopen(url, timeout=10) as response: # nosec B310 - scheme validated above @@ -484,7 +504,8 @@ def from_url(cls, url: str, name: str) -> "Marketplace": stored_url = file_path.as_uri() else: raise ValueError( - f"Marketplace URL must use http(s) or file/local path, got: {parsed.scheme!r}" + f"Marketplace URL must use http(s), git+https, git+ssh, " + f"or file/local path, got: {parsed.scheme!r}" ) return cls( @@ -496,6 +517,194 @@ def from_url(cls, url: str, name: str) -> "Marketplace": modules=data.get("modules", []), ) + @classmethod + def _from_git_url(cls, url: str, name: str) -> "Marketplace": + """Fetch marketplace YAML from a git repository. + + Supports URLs like: + git+https://gitlab.internal/org/marketplace.git + git+ssh://git@gitlab.internal/org/marketplace.git + git+https://gitlab.internal/org/marketplace.git#path/to/market.yml + git@gitlab.internal:org/marketplace.git (SCP-style) + https://github.com/org/marketplace.git (auto-detected by .git suffix) + + The optional fragment (#path/to/file.yml) specifies which file in the + repo contains the marketplace catalog. Without it, auto-detection is + used (see _find_marketplace_yaml). + """ + import shutil + import subprocess # nosec B404 - required for git clone + from urllib.parse import urlparse, urlunparse + + # Prevent git from prompting for credentials interactively + git_env = {**os.environ, "GIT_TERMINAL_PROMPT": "0"} + + # Strip "git+" prefix if present: git+https://... → https://... + # SCP-style URLs (git@host:path) are passed through as-is. + if url.startswith("git+"): + git_url = url[4:] + else: + git_url = url + + # SCP-style URLs don't support fragments; only parse for git+ URLs + if _is_scp_style_git_url(git_url): + file_fragment = None + git_url_clean = git_url + else: + # Extract optional fragment for file path within the repo + parsed = urlparse(git_url) + file_fragment = parsed.fragment or None + git_url_clean = urlunparse(parsed._replace(fragment="")) + + with tempfile.TemporaryDirectory() as tmp_dir: + repo_dir = Path(tmp_dir) / "repo" + # Sparse clone: fetch only metadata, no file content + clone_cmd = [ + "git", + "clone", + "--filter=blob:none", + "--sparse", + "--depth", + "1", + "--", + git_url_clean, + str(repo_dir), + ] + result = subprocess.run( # nosec B603 B607 - list args (no shell), git from PATH + clone_cmd, + capture_output=True, + text=True, + timeout=60, + env=git_env, + ) + if result.returncode != 0: + raise ValueError( + f"Failed to clone marketplace repository: {result.stderr.strip()}" + ) + try: + if file_fragment: + # Guard against path traversal via fragment + target_path = (repo_dir / file_fragment).resolve() + if ( + not str(target_path).startswith( + str(repo_dir.resolve()) + os.sep + ) + and target_path != repo_dir.resolve() + ): + raise ValueError( + f"Path traversal detected in fragment: {file_fragment}" + ) + checkout_path = file_fragment + else: + # Use git ls-tree to list files without downloading them + checkout_path = cls._pick_marketplace_yaml(repo_dir, name) + + # Sparse checkout: fetch only the file we need + sparse_cmd = [ + "git", + "-C", + str(repo_dir), + "sparse-checkout", + "set", + checkout_path, + ] + result = subprocess.run( # nosec B603 B607 + sparse_cmd, + capture_output=True, + text=True, + timeout=30, + env=git_env, + ) + if result.returncode != 0: + raise ValueError( + f"Failed to sparse-checkout file: {result.stderr.strip()}" + ) + + yaml_file = repo_dir / checkout_path + if not yaml_file.exists(): + raise ValueError(f"File '{checkout_path}' not found in repository") + with open(yaml_file) as f: + data = yaml.safe_load(f) or {} + finally: + # Clean up .git before the temp dir context manager does it, + # to avoid issues with read-only git objects on some platforms. + git_dir = repo_dir / ".git" + if git_dir.exists(): + shutil.rmtree(git_dir, ignore_errors=True) + + return cls( + name=name, + url=url, # Store the original git+ URL for future updates + enabled=True, + description=data.get("description", ""), + version=data.get("version", ""), + modules=data.get("modules", []), + ) + + @staticmethod + def _pick_marketplace_yaml(repo_dir: Path, name: str) -> str: + """Pick the marketplace YAML file path from a sparse-cloned repo using git ls-tree. + + Search order: + 1. .yml / .yaml (matches the marketplace name) + 2. marketplace.yml / marketplace.yaml (common convention) + 3. Single .yml/.yaml file at repo root (unambiguous auto-detect) + + Returns the relative path string for sparse-checkout. + """ + import subprocess # nosec B404 + + # List all files at the repo root via git ls-tree + ls_cmd = [ + "git", + "-C", + str(repo_dir), + "ls-tree", + "--name-only", + "HEAD", + ] + result = subprocess.run( # nosec B603 B607 + ls_cmd, + capture_output=True, + text=True, + timeout=15, + ) + if result.returncode != 0: + raise ValueError( + f"Failed to list repository contents: {result.stderr.strip()}" + ) + + root_files = [f for f in result.stdout.splitlines() if f] + + # Try name-specific file first + for ext in (".yml", ".yaml"): + candidate = f"{name}{ext}" + if candidate in root_files: + return candidate + + # Try common conventional names + for common_name in ("marketplace.yml", "marketplace.yaml"): + if common_name in root_files: + return common_name + + # Auto-detect: single YAML file at repo root + yml_files = [ + f + for f in root_files + if (f.endswith(".yml") or f.endswith(".yaml")) + and f != ".pre-commit-config.yaml" + ] + if len(yml_files) == 1: + return yml_files[0] + elif len(yml_files) == 0: + raise ValueError("No YAML files found in repository root") + else: + file_list = ", ".join(sorted(yml_files)) + raise ValueError( + f"Multiple YAML files found in repository root: {file_list}. " + f"Specify the file with a fragment: git+#filename.yml" + ) + def validate(self) -> tuple[bool, list[str]]: """Validate marketplace structure.""" errors = [] diff --git a/src/lola/parsers.py b/src/lola/parsers.py index 09ceb2a..32af2ff 100644 --- a/src/lola/parsers.py +++ b/src/lola/parsers.py @@ -116,11 +116,11 @@ def can_handle(self, source: str) -> bool: parsed = urlparse(source) if parsed.scheme in ("git", "ssh"): return True - if parsed.scheme in ("http", "https") and ( - "github.com" in source - or "gitlab.com" in source - or "bitbucket.org" in source - ): + # Accept any HTTP(S) URL with a valid host as a potential git source. + # Archive URLs (.zip, .tar*) are already handled by ZipUrlSourceHandler + # and TarUrlSourceHandler which run before this handler in + # SOURCE_HANDLERS, so they won't reach here. + if parsed.scheme in ("http", "https") and parsed.netloc: return True return False diff --git a/tests/test_marketplace_model.py b/tests/test_marketplace_model.py index 0535946..87f0474 100644 --- a/tests/test_marketplace_model.py +++ b/tests/test_marketplace_model.py @@ -198,6 +198,250 @@ def test_from_url_http_empty_or_null_does_not_crash(self): assert marketplace.modules == [] +class TestMarketplaceFromGitUrl: + """Tests for Marketplace.from_url() with git+ prefix.""" + + YAML_CONTENT = ( + "name: Git Marketplace\n" + "description: Self-hosted catalog\n" + "version: 1.0.0\n" + "modules:\n" + " - name: internal-module\n" + " description: An internal module\n" + " version: 1.0.0\n" + " repository: https://gitlab.internal/org/module.git\n" + ) + + def _mock_git_clone(self, yaml_content, filename="my-market.yml"): + """Return a side_effect for subprocess.run that handles sparse clone workflow. + + Handles three commands: + 1. git clone --filter=blob:none --sparse ... → creates repo dir + 2. git ls-tree --name-only HEAD → returns filename list + 3. git sparse-checkout set → writes the YAML file to disk + """ + + def side_effect(cmd, **kwargs): + from pathlib import Path + from unittest.mock import MagicMock + + result = MagicMock() + result.returncode = 0 + result.stderr = "" + + if cmd[1] == "clone": + # git clone --filter=blob:none --sparse --depth 1 -- url repo_dir + repo_dir = cmd[-1] + Path(repo_dir).mkdir(parents=True, exist_ok=True) + # Don't write file yet — sparse-checkout will "fetch" it + result.stdout = "" + elif "ls-tree" in cmd: + # git -C ls-tree --name-only HEAD + result.stdout = filename + "\n" + elif "sparse-checkout" in cmd: + # git -C sparse-checkout set + repo_dir = cmd[2] # -C + checkout_path = cmd[-1] + target = Path(repo_dir) / checkout_path + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(yaml_content) + result.stdout = "" + else: + result.stdout = "" + + return result + + return side_effect + + def test_from_git_url_https(self): + """Fetch marketplace from git+https:// URL.""" + with patch( + "subprocess.run", side_effect=self._mock_git_clone(self.YAML_CONTENT) + ): + marketplace = Marketplace.from_url( + "git+https://gitlab.internal/org/marketplace.git", "my-market" + ) + assert marketplace.name == "my-market" + assert marketplace.url == "git+https://gitlab.internal/org/marketplace.git" + assert marketplace.description == "Self-hosted catalog" + assert marketplace.version == "1.0.0" + assert len(marketplace.modules) == 1 + assert marketplace.modules[0]["name"] == "internal-module" + + def test_from_git_url_ssh(self): + """Fetch marketplace from git+ssh:// URL.""" + with patch( + "subprocess.run", side_effect=self._mock_git_clone(self.YAML_CONTENT) + ): + marketplace = Marketplace.from_url( + "git+ssh://git@gitlab.internal/org/marketplace.git", "my-market" + ) + assert marketplace.name == "my-market" + assert marketplace.url == "git+ssh://git@gitlab.internal/org/marketplace.git" + assert marketplace.description == "Self-hosted catalog" + + def test_from_git_url_scp_style(self): + """Fetch marketplace from SCP-style git@host:org/repo.git URL.""" + with patch( + "subprocess.run", side_effect=self._mock_git_clone(self.YAML_CONTENT) + ): + marketplace = Marketplace.from_url( + "git@gitlab.internal:org/marketplace.git", "my-market" + ) + assert marketplace.name == "my-market" + assert marketplace.url == "git@gitlab.internal:org/marketplace.git" + assert marketplace.description == "Self-hosted catalog" + assert marketplace.version == "1.0.0" + assert len(marketplace.modules) == 1 + + def test_from_git_url_https_dot_git_suffix(self): + """Auto-detect HTTPS URL ending in .git as a git source.""" + with patch( + "subprocess.run", side_effect=self._mock_git_clone(self.YAML_CONTENT) + ): + marketplace = Marketplace.from_url( + "https://github.com/org/marketplace.git", "my-market" + ) + assert marketplace.name == "my-market" + assert marketplace.url == "https://github.com/org/marketplace.git" + assert marketplace.description == "Self-hosted catalog" + assert marketplace.version == "1.0.0" + assert len(marketplace.modules) == 1 + + def test_from_git_url_with_fragment(self): + """Use fragment to specify YAML file path in the repo.""" + with patch( + "subprocess.run", + side_effect=self._mock_git_clone(self.YAML_CONTENT, "catalogs/market.yml"), + ): + marketplace = Marketplace.from_url( + "git+https://gitlab.internal/org/repo.git#catalogs/market.yml", + "my-market", + ) + assert marketplace.name == "my-market" + assert marketplace.description == "Self-hosted catalog" + + def test_from_git_url_clone_failure(self): + """Raise ValueError when git clone fails.""" + + def fail_clone(cmd, **kwargs): + from unittest.mock import MagicMock + + result = MagicMock() + result.returncode = 128 + result.stderr = "fatal: repository not found" + return result + + with patch("subprocess.run", side_effect=fail_clone): + with pytest.raises( + ValueError, match="Failed to clone marketplace repository" + ): + Marketplace.from_url( + "git+https://gitlab.internal/org/missing.git", "test" + ) + + def test_from_git_url_file_not_found_in_repo(self): + """Raise ValueError when fragment points to missing file.""" + + def sparse_no_file(cmd, **kwargs): + from pathlib import Path + from unittest.mock import MagicMock + + result = MagicMock() + result.returncode = 0 + result.stderr = "" + result.stdout = "" + if cmd[1] == "clone": + repo_dir = cmd[-1] + Path(repo_dir).mkdir(parents=True, exist_ok=True) + # sparse-checkout succeeds but file doesn't materialize + return result + + with patch("subprocess.run", side_effect=sparse_no_file): + with pytest.raises(ValueError, match="not found in repository"): + Marketplace.from_url( + "git+https://gitlab.internal/org/repo.git#missing.yml", "test" + ) + + def test_from_git_url_no_yaml_in_repo(self): + """Raise ValueError when repo has no YAML files.""" + + def ls_tree_no_yaml(cmd, **kwargs): + from pathlib import Path + from unittest.mock import MagicMock + + result = MagicMock() + result.returncode = 0 + result.stderr = "" + result.stdout = "" + if cmd[1] == "clone": + repo_dir = cmd[-1] + Path(repo_dir).mkdir(parents=True, exist_ok=True) + elif "ls-tree" in cmd: + result.stdout = "README.md\n" + return result + + with patch("subprocess.run", side_effect=ls_tree_no_yaml): + with pytest.raises(ValueError, match="No YAML files found"): + Marketplace.from_url("git+https://gitlab.internal/org/repo.git", "test") + + def test_from_git_url_multiple_yaml_ambiguous(self): + """Raise ValueError when multiple YAML files found and name doesn't match.""" + + def ls_tree_multi_yaml(cmd, **kwargs): + from pathlib import Path + from unittest.mock import MagicMock + + result = MagicMock() + result.returncode = 0 + result.stderr = "" + result.stdout = "" + if cmd[1] == "clone": + repo_dir = cmd[-1] + Path(repo_dir).mkdir(parents=True, exist_ok=True) + elif "ls-tree" in cmd: + result.stdout = "one.yml\ntwo.yml\n" + return result + + with patch("subprocess.run", side_effect=ls_tree_multi_yaml): + with pytest.raises(ValueError, match="Multiple YAML files found"): + Marketplace.from_url("git+https://gitlab.internal/org/repo.git", "test") + + def test_from_git_url_fragment_traversal_blocked(self): + """Block path traversal in fragment.""" + + def clone_only(cmd, **kwargs): + from pathlib import Path + from unittest.mock import MagicMock + + result = MagicMock() + result.returncode = 0 + result.stderr = "" + result.stdout = "" + if cmd[1] == "clone": + repo_dir = cmd[-1] + Path(repo_dir).mkdir(parents=True, exist_ok=True) + return result + + with patch("subprocess.run", side_effect=clone_only): + with pytest.raises(ValueError, match="Path traversal detected"): + Marketplace.from_url( + "git+https://gitlab.internal/org/repo.git#../../etc/passwd", + "test", + ) + + def test_from_git_url_stored_for_updates(self): + """The git+ URL is preserved so market update can re-clone.""" + with patch( + "subprocess.run", side_effect=self._mock_git_clone(self.YAML_CONTENT) + ): + marketplace = Marketplace.from_url( + "git+https://gitlab.internal/org/marketplace.git", "my-market" + ) + # The stored URL keeps the git+ prefix for future from_url calls + assert marketplace.url.startswith("git+") + + class TestMarketplaceValidate: """Tests for Marketplace.validate().""" diff --git a/tests/test_sources.py b/tests/test_sources.py index 133021f..a04f8cc 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -172,9 +172,18 @@ def test_can_handle_bitbucket(self): """Handle Bitbucket HTTPS URLs.""" assert self.handler.can_handle("https://bitbucket.org/user/repo") is True - def test_cannot_handle_random_url(self): - """Don't handle random HTTP URLs.""" - assert self.handler.can_handle("https://example.com/somefile") is False + def test_can_handle_self_hosted(self): + """Handle self-hosted git instance HTTPS URLs.""" + assert ( + self.handler.can_handle("https://gitlab.internal.company.com/org/repo") + is True + ) + assert self.handler.can_handle("https://git.example.com/user/repo") is True + assert self.handler.can_handle("http://192.168.1.100:3000/org/repo") is True + + def test_cannot_handle_no_host(self): + """Don't handle URLs without a valid host.""" + assert self.handler.can_handle("https://") is False def test_cannot_handle_local_path(self): """Don't handle local paths."""