diff --git a/.github/workflows/build-ide-bundles.yml b/.github/workflows/build-ide-bundles.yml index ab2223e..2d11af3 100644 --- a/.github/workflows/build-ide-bundles.yml +++ b/.github/workflows/build-ide-bundles.yml @@ -64,6 +64,8 @@ jobs: zip -r ../ide-rules-codex.zip .agents/skills/ zip -r ../ide-rules-openclaw.zip .openclaw/ zip -r ../ide-rules-hermes.zip .hermes/ + zip -r ../ide-rules-cline.zip .clinerules/ + zip -r ../ide-rules-continuedev.zip .continue/ cd .. zip -r ide-rules-all.zip dist/ ls -lh ide-rules-*.zip @@ -84,5 +86,7 @@ jobs: ide-rules-codex.zip \ ide-rules-openclaw.zip \ ide-rules-hermes.zip \ + ide-rules-cline.zip \ + ide-rules-continuedev.zip \ --clobber diff --git a/.github/workflows/test-mcp-server.yml b/.github/workflows/test-mcp-server.yml new file mode 100644 index 0000000..b976fe6 --- /dev/null +++ b/.github/workflows/test-mcp-server.yml @@ -0,0 +1,52 @@ +--- +name: Test MCP Server + +permissions: + contents: read + +on: + pull_request: + paths: + - 'src/codeguard-mcp/**' + - 'sources/rules/core/**' + push: + branches: + - main + - develop + paths: + - 'src/codeguard-mcp/**' + - 'sources/rules/core/**' + workflow_dispatch: + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.11', '3.12', '3.13'] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + enable-cache: true + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + working-directory: src/codeguard-mcp + run: uv sync --group dev + + - name: Run linter (ruff) + working-directory: src/codeguard-mcp + run: uv run ruff check src/ tests/ + + - name: Run tests + working-directory: src/codeguard-mcp + run: uv run pytest tests/ -v --tb=short diff --git a/.github/workflows/update-codeguard-rules.yml b/.github/workflows/update-codeguard-rules.yml index 278025b..cce724e 100644 --- a/.github/workflows/update-codeguard-rules.yml +++ b/.github/workflows/update-codeguard-rules.yml @@ -41,6 +41,8 @@ jobs: [".agents/rules"]="ide-rules-antigravity.zip" [".opencode/skills/software-security/rules"]="ide-rules-opencode.zip" [".agents/skills/software-security/rules"]="ide-rules-codex.zip" + [".clinerules"]="ide-rules-cline.zip" + [".continue/rules"]="ide-rules-continuedev.zip" ) # File patterns for each format @@ -51,6 +53,8 @@ jobs: [".agents/rules"]="codeguard-*.md" [".opencode/skills/software-security/rules"]="codeguard-*.md" [".agents/skills/software-security/rules"]="codeguard-*.md" + [".clinerules"]="codeguard-*.md" + [".continue/rules"]="codeguard-*.md" ) # Legacy paths: warn so users migrate instead of silently going stale. diff --git a/.github/workflows/validate-rules.yml b/.github/workflows/validate-rules.yml index 798d198..995f4c5 100644 --- a/.github/workflows/validate-rules.yml +++ b/.github/workflows/validate-rules.yml @@ -139,6 +139,16 @@ jobs: exit 1 fi + if [ ! -d "test-output/.clinerules" ]; then + echo "❌ Cline rules not generated at .clinerules/" + exit 1 + fi + + if [ ! -d "test-output/.continue/rules" ]; then + echo "❌ Continue.dev rules not generated at .continue/rules/" + exit 1 + fi + AGENT_HOSTS=( ".claude:.claude/skills/software-security/rules" ".cursor:.cursor/rules" diff --git a/sources/agents/codeguard-reviewer/AGENT.md b/sources/agents/codeguard-reviewer/AGENT.md index ee301c0..6321dea 100644 --- a/sources/agents/codeguard-reviewer/AGENT.md +++ b/sources/agents/codeguard-reviewer/AGENT.md @@ -29,7 +29,7 @@ The CodeGuard rule files live at `{RULES_DIR}/codeguard-*{RULE_EXT}` (one per ru - Your own rule directory `{RULES_DIR}/` and any CodeGuard host directories (`.claude/`, `.cursor/`, `.codex/`, `.opencode/`, `.agents/`, `.windsurf/`, `.github/instructions/`, `.openclaw/`, - `.hermes/`). These contain the rule bodies themselves (with example + `.hermes/`, `.clinerules/`, `.continue/`). These contain the rule bodies themselves (with example secrets and banned-API snippets) and must never be reported as findings. - Vendored/generated paths: `.git/`, `node_modules/`, `vendor/`, diff --git a/src/codeguard-mcp/src/codeguard_mcp/rule_processor.py b/src/codeguard-mcp/src/codeguard_mcp/rule_processor.py index 559a118..44f3663 100644 --- a/src/codeguard-mcp/src/codeguard_mcp/rule_processor.py +++ b/src/codeguard-mcp/src/codeguard_mcp/rule_processor.py @@ -26,6 +26,7 @@ class ProcessedRule: always_apply: bool = False content: str = "" filename: str = "" + tags: list[str] = field(default_factory=list) class RuleProcessor: @@ -36,6 +37,7 @@ def __init__(self, rules_dir: str | Path | None = None) -> None: self.rules_dir = Path(settings.RULES_DIR) else: self.rules_dir = Path(rules_dir) + self._cache: list[ProcessedRule] | None = None @staticmethod def _split_frontmatter(text: str) -> tuple[dict | None, str]: @@ -91,6 +93,9 @@ def parse_rule(self, filepath: Path) -> ProcessedRule: f"{', '.join(languages)}." ) + tags_raw = fm.get("tags", []) + tags = [t.lower().strip() for t in tags_raw] if isinstance(tags_raw, list) else [] + return ProcessedRule( rule_id=filepath.stem, description=tool_desc, @@ -98,9 +103,14 @@ def parse_rule(self, filepath: Path) -> ProcessedRule: always_apply=always_apply, content=body, filename=filepath.name, + tags=tags, ) def get_all_rules(self) -> list[ProcessedRule]: + """Return all parsed rules, using a cached copy after the first load.""" + if self._cache is not None: + return self._cache + if not self.rules_dir.exists(): logger.error("Rules directory missing: %s", self.rules_dir) return [] @@ -109,7 +119,11 @@ def get_all_rules(self) -> list[ProcessedRule]: for md in sorted(self.rules_dir.glob("*.md")): if "template" in md.name.lower(): continue - rules.append(self.parse_rule(md)) + try: + rules.append(self.parse_rule(md)) + except (ValueError, OSError) as exc: + logger.warning("Skipping malformed rule %s: %s", md.name, exc) logger.info("Loaded %d security rules from %s", len(rules), self.rules_dir) + self._cache = rules return rules diff --git a/src/codeguard-mcp/src/codeguard_mcp/server.py b/src/codeguard-mcp/src/codeguard_mcp/server.py index 26f3f16..83e965f 100644 --- a/src/codeguard-mcp/src/codeguard_mcp/server.py +++ b/src/codeguard-mcp/src/codeguard_mcp/server.py @@ -71,11 +71,22 @@ def _register_rules() -> None: factory = RuleToolFactory() rules = processor.get_all_rules() + if not rules: + logger.warning( + "No rules loaded from %s - the server will start with zero " + "rule tools. Check CODEGUARD_RULES_DIR.", + processor.rules_dir, + ) + return + logger.info("Registering %d security rules as MCP tools", len(rules)) for rule in rules: mcp.add_tool(factory.create_tool(rule)) - logger.info("All %d tools registered", len(rules)) + # Register the search/filter meta-tool + mcp.add_tool(factory.create_search_tool(rules)) + + logger.info("All %d tools registered (+1 search_rules)", len(rules)) _register_rules() diff --git a/src/codeguard-mcp/src/codeguard_mcp/tool_factory.py b/src/codeguard-mcp/src/codeguard_mcp/tool_factory.py index 6a301bb..5ac7eb7 100644 --- a/src/codeguard-mcp/src/codeguard_mcp/tool_factory.py +++ b/src/codeguard-mcp/src/codeguard_mcp/tool_factory.py @@ -17,10 +17,84 @@ class RuleToolFactory: def create_tool(self, rule: ProcessedRule) -> Tool: async def _handler() -> str: logger.debug("Tool invoked: %s", rule.rule_id) - header = f"Rule ID: {rule.rule_id}\nDescription: {rule.description}" + meta_parts = [ + f"Rule ID: {rule.rule_id}", + f"Description: {rule.description}", + ] + if rule.languages: + meta_parts.append(f"Languages: {', '.join(rule.languages)}") + if rule.tags: + meta_parts.append(f"Tags: {', '.join(rule.tags)}") + header = "\n".join(meta_parts) return f"{header}\n---\n{rule.content}" tool_name = rule.rule_id.replace("-", "_") tool = Tool.from_function(fn=_handler, name=tool_name, description=rule.description) logger.debug("Created tool: %s", tool_name) return tool + + def create_search_tool(self, rules: list[ProcessedRule]) -> Tool: + """Create a ``search_rules`` tool that filters the rule catalogue. + + Filters by language, tag, or free-text keyword. All filters are + optional and combined with AND logic. + """ + + async def _search( + language: str | None = None, + tag: str | None = None, + keyword: str | None = None, + ) -> str: + """Search CodeGuard security rules by language, tag, or keyword. + + Args: + language: Filter by programming language (e.g. 'python'). + tag: Filter by security domain tag (e.g. 'authentication'). + keyword: Free-text search across rule ID and description. + + Returns: + A formatted list of matching rules with their metadata. + """ + matches = rules + + if language: + lang_lower = language.lower().strip() + matches = [ + r for r in matches + if r.always_apply or lang_lower in r.languages + ] + + if tag: + tag_lower = tag.lower().strip() + matches = [r for r in matches if tag_lower in r.tags] + + if keyword: + kw_lower = keyword.lower().strip() + matches = [ + r for r in matches + if kw_lower in r.rule_id.lower() + or kw_lower in r.description.lower() + ] + + if not matches: + return "No rules matched the given filters." + + lines = [f"Found {len(matches)} matching rule(s):\n"] + for r in matches: + langs = ", ".join(r.languages) if r.languages else "all" + tags = ", ".join(r.tags) if r.tags else "none" + lines.append( + f"- {r.rule_id} [languages: {langs}] [tags: {tags}]\n" + f" {r.description.splitlines()[0]}" + ) + return "\n".join(lines) + + return Tool.from_function( + fn=_search, + name="search_rules", + description=( + "Search and filter CodeGuard security rules by programming " + "language, security domain tag, or free-text keyword. " + "Returns a summary list of matching rules." + ), + ) diff --git a/src/codeguard-mcp/tests/test_tool_factory.py b/src/codeguard-mcp/tests/test_tool_factory.py index 383eb2c..9379d19 100644 --- a/src/codeguard-mcp/tests/test_tool_factory.py +++ b/src/codeguard-mcp/tests/test_tool_factory.py @@ -45,3 +45,90 @@ async def test_tool_returns_rule_content(self): result = await tool.fn() assert "codeguard-1-test" in result assert "Do the right thing." in result + + @pytest.mark.asyncio + async def test_tool_returns_language_metadata(self): + rule = ProcessedRule( + rule_id="codeguard-0-logging", + description="Logging security", + languages=["python", "java"], + content="# Logging", + filename="codeguard-0-logging.md", + tags=["logging"], + ) + tool = self.factory.create_tool(rule) + result = await tool.fn() + assert "Languages: python, java" in result + assert "Tags: logging" in result + + +class TestSearchTool: + + def setup_method(self): + self.factory = RuleToolFactory() + self.rules = [ + ProcessedRule( + rule_id="codeguard-1-hardcoded-credentials", + description="No hardcoded creds", + always_apply=True, + content="# Creds", + filename="codeguard-1-hardcoded-credentials.md", + tags=["secrets"], + ), + ProcessedRule( + rule_id="codeguard-0-input-validation-injection", + description="Input validation and injection prevention", + languages=["python", "java", "javascript"], + content="# Injection", + filename="codeguard-0-input-validation-injection.md", + tags=["web", "input-validation"], + ), + ProcessedRule( + rule_id="codeguard-0-authentication-mfa", + description="Authentication and MFA best practices", + languages=["python", "java"], + content="# Auth", + filename="codeguard-0-authentication-mfa.md", + tags=["authentication", "web"], + ), + ] + self.search_tool = self.factory.create_search_tool(self.rules) + + @pytest.mark.asyncio + async def test_search_by_language(self): + result = await self.search_tool.fn(language="javascript") + assert "codeguard-0-input-validation-injection" in result + # always_apply rules should also match + assert "codeguard-1-hardcoded-credentials" in result + # python/java only rule should not match + assert "codeguard-0-authentication-mfa" not in result + + @pytest.mark.asyncio + async def test_search_by_tag(self): + result = await self.search_tool.fn(tag="secrets") + assert "codeguard-1-hardcoded-credentials" in result + assert "codeguard-0-input-validation-injection" not in result + + @pytest.mark.asyncio + async def test_search_by_keyword(self): + result = await self.search_tool.fn(keyword="injection") + assert "codeguard-0-input-validation-injection" in result + assert "codeguard-1-hardcoded-credentials" not in result + + @pytest.mark.asyncio + async def test_search_combined_filters(self): + result = await self.search_tool.fn(language="python", tag="web") + assert "codeguard-0-input-validation-injection" in result + assert "codeguard-0-authentication-mfa" in result + # always_apply but no 'web' tag + assert "codeguard-1-hardcoded-credentials" not in result + + @pytest.mark.asyncio + async def test_search_no_matches(self): + result = await self.search_tool.fn(tag="nonexistent-tag") + assert "No rules matched" in result + + @pytest.mark.asyncio + async def test_search_no_filters_returns_all(self): + result = await self.search_tool.fn() + assert "Found 3 matching rule(s)" in result diff --git a/src/convert_to_ide_formats.py b/src/convert_to_ide_formats.py index 15e8b1c..a0ffa8d 100644 --- a/src/convert_to_ide_formats.py +++ b/src/convert_to_ide_formats.py @@ -16,6 +16,8 @@ from converter import RuleConverter from emit_agents import emit_agents from formats import ( + ClineFormat, + ContinueDevFormat, CursorFormat, WindsurfFormat, CopilotFormat, @@ -137,6 +139,8 @@ def convert_rules( WindsurfFormat(version), CopilotFormat(version), AntigravityFormat(version), + ClineFormat(version), + ContinueDevFormat(version), ] # Only include Agent Skills–based formats (skills with SKILL.md) for core rules diff --git a/src/formats/__init__.py b/src/formats/__init__.py index cabe495..9904643 100644 --- a/src/formats/__init__.py +++ b/src/formats/__init__.py @@ -14,9 +14,11 @@ - OpenClawFormat: Generates .md files for OpenClaw AI assistant - HermesFormat: Generates .md files for Hermes AI coding agent - ClaudeFormat: Generates .md files for Claude Code Agent Skills under .claude/ +- ClineFormat: Generates .md files for Cline AI coding agent under .clinerules/ +- ContinueDevFormat: Generates .md files for Continue.dev under .continue/rules/ Usage: - from formats import BaseFormat, ProcessedRule, CursorFormat, WindsurfFormat, CopilotFormat, AgentSkillsFormat, AntigravityFormat, OpenCodeFormat, CodexFormat, OpenClawFormat, HermesFormat, ClaudeFormat + from formats import BaseFormat, ProcessedRule, CursorFormat, WindsurfFormat, CopilotFormat, AgentSkillsFormat, AntigravityFormat, OpenCodeFormat, CodexFormat, OpenClawFormat, HermesFormat, ClaudeFormat, ClineFormat, ContinueDevFormat version = "1.0.0" formats = [ @@ -30,6 +32,8 @@ OpenClawFormat(version), HermesFormat(version), ClaudeFormat(version), + ClineFormat(version), + ContinueDevFormat(version), ] """ @@ -44,6 +48,8 @@ from formats.openclaw import OpenClawFormat from formats.hermes import HermesFormat from formats.claude import ClaudeFormat +from formats.cline import ClineFormat +from formats.continuedev import ContinueDevFormat __all__ = [ "BaseFormat", @@ -58,4 +64,6 @@ "OpenClawFormat", "HermesFormat", "ClaudeFormat", + "ClineFormat", + "ContinueDevFormat", ] diff --git a/src/formats/cline.py b/src/formats/cline.py new file mode 100644 index 0000000..c1a8c69 --- /dev/null +++ b/src/formats/cline.py @@ -0,0 +1,65 @@ +""" +Cline Format Implementation + +Generates .md rule files for Cline AI coding agent. +""" + +from formats.base import BaseFormat, ProcessedRule + + +class ClineFormat(BaseFormat): + """ + Cline format implementation (.md rule files). + + Cline uses .md files in a .clinerules/ directory with YAML frontmatter containing: + - description: Rule description + - paths: List of glob patterns for conditional activation + - alwaysApply: Whether the rule always loads + - version: Rule version + """ + + def get_format_name(self) -> str: + """Return Cline format identifier.""" + return "cline" + + def get_file_extension(self) -> str: + """Return Cline format file extension.""" + return ".md" + + def get_output_subpath(self) -> str: + """Return Cline output subdirectory.""" + return ".clinerules" + + def generate(self, rule: ProcessedRule, globs: str) -> str: + """ + Generate Cline .md format with YAML frontmatter. + + Args: + rule: The processed rule to format + globs: Glob patterns for file matching + + Returns: + Formatted .md content + """ + yaml_lines = [] + + # Add description + desc = self._format_yaml_field("description", rule.description) + if desc: + yaml_lines.append(desc) + + # Cline uses 'paths' for conditional activation + if rule.always_apply: + yaml_lines.append("alwaysApply: true") + else: + # Convert comma-separated globs to YAML list of paths + yaml_lines.append("paths:") + for pattern in globs.split(","): + pattern = pattern.strip() + if pattern: + yaml_lines.append(f"- \"{pattern}\"") + + # Add version + yaml_lines.append(f"version: {self.version}") + + return self._build_yaml_frontmatter(yaml_lines, rule.content) diff --git a/src/formats/continuedev.py b/src/formats/continuedev.py new file mode 100644 index 0000000..e202127 --- /dev/null +++ b/src/formats/continuedev.py @@ -0,0 +1,60 @@ +""" +Continue.dev Format Implementation + +Generates .md rule files for Continue.dev AI coding assistant. +""" + +from formats.base import BaseFormat, ProcessedRule + + +class ContinueDevFormat(BaseFormat): + """ + Continue.dev format implementation (.md rule files). + + Continue.dev uses .md files in a .continue/rules/ directory with YAML frontmatter containing: + - name: Display name/title for the rule + - globs: File matching patterns for conditional activation + - alwaysApply: Whether the rule always loads + - version: Rule version + """ + + def get_format_name(self) -> str: + """Return Continue.dev format identifier.""" + return "continuedev" + + def get_file_extension(self) -> str: + """Return Continue.dev format file extension.""" + return ".md" + + def get_output_subpath(self) -> str: + """Return Continue.dev output subdirectory.""" + return ".continue/rules" + + def generate(self, rule: ProcessedRule, globs: str) -> str: + """ + Generate Continue.dev .md format with YAML frontmatter. + + Args: + rule: The processed rule to format + globs: Glob patterns for file matching + + Returns: + Formatted .md content + """ + yaml_lines = [] + + # Continue.dev uses 'name' instead of 'description' + name = self._format_yaml_field("name", rule.description) + if name: + yaml_lines.append(name) + + # Glob-based activation or always-on + if rule.always_apply: + yaml_lines.append("alwaysApply: true") + else: + yaml_lines.append(f"globs: \"{globs}\"") + + # Add version + yaml_lines.append(f"version: {self.version}") + + return self._build_yaml_frontmatter(yaml_lines, rule.content) diff --git a/src/tag_mappings.py b/src/tag_mappings.py index 9544f92..d69b512 100644 --- a/src/tag_mappings.py +++ b/src/tag_mappings.py @@ -2,16 +2,46 @@ Tag Mappings Centralized list of known tags for categorizing security rules. + +Tags are grouped by security domain to aid discoverability and filtering. +When adding a new tag, place it in the most appropriate domain group below. """ # Known tags used in rules # Add new tags here as they are introduced in rules KNOWN_TAGS = { + # Identity and access "authentication", + "authorization", + "session-management", + # Data protection "data-security", - "infrastructure", + "cryptography", "privacy", "secrets", + # Application security "web", + "api-security", + "input-validation", + "injection-prevention", + "client-side-security", + "error-handling", + # Infrastructure and operations + "infrastructure", + "cloud-security", + "container-security", + "network-security", + "ci-cd", + # Supply chain and dependencies + "supply-chain", + "dependency-management", + # Platform-specific + "mobile-security", + # Compliance and governance + "logging", + "configuration", + # Serialization and data formats + "serialization", + "xml-security", }