NVIDIA · WhereIs38 · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026
diff --git a/contrib/multilingual/.env.example b/contrib/multilingual/.env.example
@@ -0,0 +1,27 @@
+# SkillSpector Contrib Batch Scanner — Environment Configuration
+#
+# Copy to the repository root as .env:
+#   cp contrib/multilingual/.env.example .env
+#
+# The scanner also respects the upstream .env.example keys
+# (OPENAI_API_KEY, SKILLSPECTOR_PROVIDER, SKILLSPECTOR_MODEL).
+
+# Provider configuration
+SKILLSPECTOR_PROVIDER=openai
+SKILLSPECTOR_MODEL=deepseek-v4-flash
+
+# Single-key mode (standard OpenAI-compatible)
+OPENAI_API_KEY=sk-or-xxxxxxxxxxxxxxxxxxxxxxxx
+OPENAI_BASE_URL=https://api.deepseek.com/v1
+
+# Multi-key pool (recommended for batch scans).
+# Pipe-delimited: key|base_url|model.  Separate entries with newlines
+# or semicolons.  Supports up to 10 keys.  Leave unset to use
+# single-key mode above.
+# SKILLSPECTOR_API_KEYS="
+#   sk-or-xxx1|https://api.deepseek.com/v1|deepseek-v4-flash
+#   sk-or-xxx2|https://api.deepseek.com/v1|deepseek-v4-flash
+# "
+
+# Logging (DEBUG | INFO | WARNING | ERROR)
+SKILLSPECTOR_LOG_LEVEL=WARNING
diff --git a/contrib/multilingual/__init__.py b/contrib/multilingual/__init__.py
@@ -0,0 +1,69 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Multilingual batch scan for SkillSpector.
+
+Community-contributed tool for scanning directories of AI agent skills
+in non-English languages.  Extends SkillSpector's built-in analyzers
+with targeted LLM gap-fill for vulnerability categories that static
+English-keyword regex rules cannot detect.
+
+Public API
+----------
+- :func:`~.discovery.discover_skills`
+- :func:`~.detection.detect_language`
+- :func:`~.detection.detect_skill_language`
+- :func:`~.annotation.is_language_compatible`
+- :func:`~.annotation.annotate_findings`
+- :func:`~.gap_fill.run_gap_fill`
+- :func:`~.runner.run_one`
+"""
+
+from __future__ import annotations
+
+# -- .env MUST load before any skillspector import.  Python imports
+#    this __init__.py before executing the batch_scan module body;
+#    without this early load, constants.py resolves the provider
+#    with stale env vars.
+try:
+    import dotenv as _dotenv
+except ImportError:
+    pass
+else:
+    _dotenv.load_dotenv(_dotenv.find_dotenv(usecwd=True), override=True)
+
+from .annotation import annotate_findings, is_language_compatible
+from .api_pool import ApiKey, ApiKeyPool, PooledChatModel, create_api_key_pool_from_env
+from .detection import detect_language, detect_skill_language
+from .discovery import discover_skills
+from .gap_fill import GapFillAnalyzer, GapFillFinding, GapFillResult, run_gap_fill
+from .runner import run_one
+
+__all__ = [
+    "annotate_findings",
+    "ApiKey",
+    "ApiKeyPool",
+    "create_api_key_pool_from_env",
+    "detect_language",
+    "detect_skill_language",
+    "discover_skills",
+    "GapFillAnalyzer",
+    "GapFillFinding",
+    "GapFillResult",
+    "is_language_compatible",
+    "PooledChatModel",
+    "run_gap_fill",
+    "run_one",
+]
diff --git a/contrib/multilingual/annotation.py b/contrib/multilingual/annotation.py
@@ -0,0 +1,100 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Finding language-compatibility annotation.
+
+Classifies each finding's ``rule_id`` against known buckets so downstream
+reports can flag which findings are reliable for non-English skills.
+"""
+
+from __future__ import annotations
+
+# ---------------------------------------------------------------------------
+# Rule classification
+# ---------------------------------------------------------------------------
+
+# Rule IDs from LLM-based semantic analyzers — inherently multilingual.
+_SEMANTIC_RULES: frozenset[str] = frozenset(
+    {
+        "SSD1", "SSD2", "SSD3", "SSD4",
+        "SDI1", "SDI2", "SDI3", "SDI4",
+        "SQP1", "SQP2", "SQP3",
+        "TP4",
+    }
+)
+
+# Rule IDs from the gap-fill pass (P5 / P6-P8 / MP1-MP3 / RA1-RA2) —
+# these are LLM-generated for non-English skills.
+_GAP_FILL_RULES: frozenset[str] = frozenset(
+    {"P5", "P6", "P7", "P8", "MP1", "MP2", "MP3", "RA1", "RA2"}
+)
+
+# Rule IDs from code-level analyzers — language-independent by design.
+_CODE_RULES: frozenset[str] = frozenset(
+    {
+        "AST1", "AST2", "AST3", "AST4", "AST5", "AST6", "AST7", "AST8",
+        "TT1", "TT2", "TT3", "TT4", "TT5",
+        "YR1", "YR2", "YR3", "YR4",
+        "SC1", "SC2", "SC3", "SC4", "SC5", "SC6",
+        "LP1", "LP2", "LP3", "LP4",
+        "TP1", "TP2", "TP3",
+        "TM1", "TM2", "TM3",
+    }
+)
+
+# English-keyword static rules that have semantic-equivalent coverage
+# via SSD / SDI / SQP for non-English skills.  These are listed for
+# documentation; the compatibility check treats them as needing scrutiny
+# when the detected language is non-English.
+_ENGLISH_KEYWORD_RULES: frozenset[str] = frozenset(
+    {
+        "P1", "P2", "P3", "P4",
+        "E1", "E2", "E3", "E4",
+        "PE1", "PE2", "PE3",
+        "EA1", "EA2", "EA3", "EA4",
+        "OH1", "OH2", "OH3",
+        "TR1", "TR2", "TR3",
+    }
+)
+
+
+def is_language_compatible(rule_id: str, detected_language: str) -> bool:
+    """Return ``True`` when *rule_id* is reliable for *detected_language*.
+
+    Code-level rules are always compatible.  Semantic rules are always
+    compatible.  English-keyword rules are only compatible when the skill
+    is English.  Gap-fill rules are compatible (they were generated by
+    an LLM specifically for this language).
+    """
+    if detected_language == "en":
+        return True
+    return rule_id in _SEMANTIC_RULES | _CODE_RULES | _GAP_FILL_RULES
+
+
+def annotate_findings(
+    issues: list[dict[str, object]],
+    detected_language: str,
+) -> list[dict[str, object]]:
+    """Add a ``language_compatible`` field to each issue dict.
+
+    Returns a new list — the input *issues* list is not mutated.
+    """
+    annotated: list[dict[str, object]] = []
+    for issue in issues:
+        rule_id = str(issue.get("id", ""))
+        entry = dict(issue)
+        entry["language_compatible"] = is_language_compatible(rule_id, detected_language)
+        annotated.append(entry)
+    return annotated