Ashex · Ashex · Feb 22, 2026 · Feb 18, 2026 · Feb 22, 2026 · Feb 22, 2026
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,33 @@
+name: Tests
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+jobs:
+  unittest:
+    name: Run unittest suite
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+
+      - name: Set up Python
+        uses: actions/setup-python@v6.2.0
+        with:
+          python-version: '3.12'
+          cache: 'pip'
+          cache-dependency-path: |
+            pyproject.toml
+            uv.lock
+
+      - name: Install project
+        run: |
+          python -m pip install --upgrade pip
+          pip install .
+
+      - name: Run tests
+        run: python -m unittest discover -s tests -p 'test*.py'
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
 dist/
 uv.lock
-.venv/
+.venv/
+*.pyc
+__pycache__
+.vscode
diff --git a/POWER.md b/POWER.md
@@ -0,0 +1,44 @@
+---
+name: "atproto"
+displayName: "AT Protocol Docs & Lexicons"
+description: "Search AT Protocol docs, lexicons, Bluesky API docs, and cookbook examples with atproto-mcp"
+keywords: ["atproto", "bluesky", "lexicon", "mcp", "api docs", "cookbook", "federation", "firehose"]
+---
+
+# Onboarding
+
+## Step 1: Verify runtime tools
+
+Before using this power, ensure one of these is available:
+
+- `uvx` (recommended): verify with `uvx --version`
+- `python` + `pip`: verify with `python --version`
+
+## Step 2: Configure environment (optional)
+
+You can customize cache/index behavior with environment variables:
+
+- `ATPROTO_MCP_CACHE_DIR`
+- `ATPROTO_MCP_REFRESH_HOURS`
+- `ATPROTO_MCP_EMBEDDING_MODEL`
+
+## Best Practices
+
+- Start broad with `search_atproto_docs`, then narrow with `get_lexicon` and `search_bsky_api`.
+- Use `list_lexicons` to discover valid NSIDs before requesting a full schema.
+- Use `list_cookbook_examples` before `get_cookbook_example` when you need language-specific starter code.
+- Use `refresh_sources` when you suspect upstream docs changed.
+
+## Suggested Workflows
+
+### Explore a namespace
+
+1. Run `list_lexicons` with a namespace prefix (for example `app.bsky.feed`).
+2. Fetch target schemas with `get_lexicon`.
+3. Cross-check implementation details with `search_atproto_docs`.
+
+### Build a feature
+
+1. Search concepts and endpoint behavior with `search_atproto_docs`.
+2. Inspect canonical schemas with `get_lexicon`.
+3. Find implementation references with cookbook tools.
diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 # atproto-mcp
 
+[![Tests](https://github.com/Ashex/atproto-mcp/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/Ashex/atproto-mcp/actions/workflows/tests.yml)
+
 MCP server providing a searchable knowledge base for the [AT Protocol](https://atproto.com/) ecosystem — protocol documentation, lexicon schemas, Bluesky developer API docs, and cookbook examples — powered by [txtai](https://github.com/neuml/txtai) semantic search.
 
 ## Data Sources
@@ -76,6 +78,12 @@ Add to `.vscode/mcp.json` in your workspace:
 }
 ```
 
+### Kiro Power
+
+1. Open **Kiro → Powers**
+2. Select **Import power from GitHub**
+3. Enter `https://github.com/ashex/atproto-mcp`
+
 ### Claude Desktop
 
 Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
@@ -105,10 +113,10 @@ Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
 
 On first launch, the server:
 
-1. **Clones** the 4 source repositories into `~/.cache/atproto-mcp/repos/` (shallow clones, ~minutes)
-2. **Parses** MDX documentation, lexicon JSON schemas, and cookbook examples into text chunks
-3. **Indexes** all chunks using txtai with the `all-MiniLM-L6-v2` sentence-transformer model (~80MB, runs locally on CPU)
-4. **Persists** the index to `~/.cache/atproto-mcp/index/` for fast subsequent starts
+1. Shallow clones the repos into `~/.cache/atproto-mcp/repos/`
+2. Parses MDX docs, lexicon schemas, and cookbook examples into text chunks
+3. Indexes the chunks using txtai with the `all-MiniLM-L6-v2` sentence-transformer model (~80MB, runs locally)
+4. Index is persisted in `~/.cache/atproto-mcp/index/` for subsequent starts
 
 On subsequent launches, the cached index loads in seconds. Repos older than 24 hours are automatically refreshed with `git pull`.
 

diff --git a/mcp.json b/mcp.json
@@ -0,0 +1,15 @@
+{
+  "mcpServers": {
+    "atproto": {
+      "command": "uvx",
+      "args": [
+        "atproto-mcp"
+      ],
+      "env": {
+        "ATPROTO_MCP_CACHE_DIR": "${ATPROTO_MCP_CACHE_DIR}",
+        "ATPROTO_MCP_REFRESH_HOURS": "${ATPROTO_MCP_REFRESH_HOURS}",
+        "ATPROTO_MCP_EMBEDDING_MODEL": "${ATPROTO_MCP_EMBEDDING_MODEL}"
+      }
+    }
+  }
+}
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "atproto-mcp"
-version = "0.1.1"
+version = "0.1.2"
 description = "MCP server providing AT Protocol documentation, lexicons, Bluesky API docs, and cookbook examples as a searchable knowledge base powered by txtai semantic search."
 readme = "README.md"
 requires-python = ">=3.10"

diff --git a/src/atproto_mcp/indexer.py b/src/atproto_mcp/indexer.py
@@ -139,18 +139,13 @@ def search(
         if not self._embeddings:
             return []
 
-        if source:
-            results = self._embeddings.search(
-                f"select id, text, score from txtai where similar('{_escape_sql(query)}') and "
-                f"tags is not null limit {limit}"
-            )
-        else:
-            results = self._embeddings.search(query, limit=limit)
+        fetch_limit = limit if not source else max(limit * 5, 50)
+        results = self._embeddings.search(query, limit=fetch_limit)
 
         return self._enrich_results(
             list(results) if isinstance(results, list) else [],  # type: ignore[arg-type]
             source_filter=source,
-        )
+        )[:limit]
 
     def search_lexicons(self, query: str, limit: int = 10) -> list[dict[str, object]]:
         """Semantic search specifically within lexicons."""
@@ -256,11 +251,6 @@ def lexicon_count(self) -> int:
         return len(self._lexicon_map)
 
 
-def _escape_sql(value: str) -> str:
-    """Escape single quotes for txtai SQL queries."""
-    return value.replace("'", "''")
-
-
 def build_knowledge_base(config: Config, chunks: list[ContentChunk]) -> KnowledgeBase:
     """Build a new knowledge base from parsed content chunks."""
     kb = KnowledgeBase(config)

diff --git a/tests/test_source_filtered_search_regression.py b/tests/test_source_filtered_search_regression.py
@@ -0,0 +1,88 @@
+"""Regression tests for source-filtered semantic search."""
+
+from __future__ import annotations
+
+import unittest
+
+from atproto_mcp.config import Config
+from atproto_mcp.indexer import KnowledgeBase
+from atproto_mcp.parser import ContentChunk
+
+
+class _FakeEmbeddings:
+    def __init__(self, rows: list[dict[str, object]]) -> None:
+        self._rows = rows
+
+    def search(self, query: str, limit: int = 10) -> list[dict[str, object]]:
+        return self._rows[:limit]
+
+
+class SourceFilteredSearchRegressionTests(unittest.TestCase):
+    def test_search_lexicons_returns_source_filtered_results(self) -> None:
+        kb = KnowledgeBase(Config())
+
+        kb._embeddings = _FakeEmbeddings(
+            [
+                {"id": "a", "text": "lexicon schema", "score": 0.9},
+                {"id": "b", "text": "lexicon schema", "score": 0.8},
+            ]
+        )
+        kb._chunks_by_uid = {
+            "a": ContentChunk(
+                text="",
+                source="bsky-docs",
+                file_path="docs/a.md",
+                title="A",
+            ),
+            "b": ContentChunk(
+                text="",
+                source="lexicons",
+                file_path="lexicons/b.json",
+                title="B",
+                nsid="com.atproto.lexicon.schema",
+            ),
+        }
+
+        results = kb.search_lexicons("com.atproto.lexicon.schema record type", limit=1)
+
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]["source"], "lexicons")
+        self.assertEqual(results[0]["nsid"], "com.atproto.lexicon.schema")
+
+    def test_search_atproto_website_source_filter_returns_matching_rows(self) -> None:
+        kb = KnowledgeBase(Config())
+
+        kb._embeddings = _FakeEmbeddings(
+            [
+                {"id": "x", "text": "dns txt", "score": 0.7},
+                {"id": "y", "text": "dns txt", "score": 0.6},
+            ]
+        )
+        kb._chunks_by_uid = {
+            "x": ContentChunk(
+                text="",
+                source="lexicons",
+                file_path="lexicons/x.json",
+                title="X",
+            ),
+            "y": ContentChunk(
+                text="",
+                source="atproto-website",
+                file_path="specs/y.mdx",
+                title="En > DNS TXT Method",
+            ),
+        }
+
+        results = kb.search(
+            "lexicon schema record DNS TXT _lexicon authority resolution PDS serving",
+            source="atproto-website",
+            limit=1,
+        )
+
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0]["source"], "atproto-website")
+        self.assertEqual(results[0]["title"], "En > DNS TXT Method")
+
+
+if __name__ == "__main__":
+    unittest.main()