diff --git a/.changeset/fix-model-dir-mismatch.md b/.changeset/fix-model-dir-mismatch.md new file mode 100644 index 0000000..838622b --- /dev/null +++ b/.changeset/fix-model-dir-mismatch.md @@ -0,0 +1,5 @@ +--- +"@prosdevlab/dev-agent": patch +--- + +Fix `dev setup` reporting model ready while `dev index` fails with "model not found". The CLI's `hasModel`/`pullModel` used `~/.termite/models` but the running server looked in `~/.antfly/models`. Both now use a shared `--models-dir` pointing at the server's data directory. diff --git a/README.md b/README.md index 09cede9..833ef96 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,8 @@ dev-agent indexes your codebase and provides 6 MCP tools to AI assistants. Inste ```bash # Install npm install -g @prosdevlab/dev-agent -brew install --cask antflydb/antfly/antfly -# One-time setup +# One-time setup (installs Antfly, pulls embedding model, starts server) dev setup # Index your repository @@ -138,7 +137,7 @@ Server health, Antfly connectivity, and repository access. ## Prerequisites - Node.js 22+ (LTS) -- [Antfly](https://antfly.io) — `brew install --cask antflydb/antfly/antfly` +- [Antfly](https://antfly.io) — installed automatically by `dev setup` ## Development diff --git a/packages/cli/src/utils/__tests__/antfly.test.ts b/packages/cli/src/utils/__tests__/antfly.test.ts new file mode 100644 index 0000000..c2d3888 --- /dev/null +++ b/packages/cli/src/utils/__tests__/antfly.test.ts @@ -0,0 +1,92 @@ +/** + * Tests for antfly utility helpers. + * + * Regression for: hasModel() false positive when antfly termite list defaulted + * to ~/.termite/models (different from the server's ~/.antfly/models), causing + * "Embedding model ready" in `dev setup` but "model not found" in `dev index`. + */ + +import { describe, expect, it } from 'vitest'; + +// modelPresentInOutput is not exported — test via the exported path by extracting +// the pure logic into a local copy that mirrors the implementation exactly. +// This keeps the test focused on the matching logic without requiring CLI env. + +function modelPresentInOutput(model: string, output: string): boolean { + if (output.includes(model)) return true; + + const shortName = model.split('/').pop() ?? model; + const escaped = shortName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + return new RegExp(`(? { + const FULL_NAME = 'BAAI/bge-small-en-v1.5'; + const SHORT_NAME = 'bge-small-en-v1.5'; + + // Simulates `antfly termite list --models-dir ~/.antfly/models` output when + // the model IS present (full name in NAME column, also in SOURCE column). + const PRESENT_OUTPUT = `Local models in /Users/dev/.antfly/models: + +NAME TYPE SIZE VARIANTS SOURCE +BAAI/bge-small-en-v1.5 embedder 127.8 MB BAAI/bge-small-en-v1.5 +`; + + // Output when NO models are installed (the bug scenario: server's models-dir + // is empty, but ~/.termite/models has the model — the old code read the wrong + // directory and would never see "No models found"). + const EMPTY_OUTPUT = `Local models in /Users/dev/.antfly/models: + +NAME TYPE SIZE VARIANTS SOURCE +No models found locally. + +Use 'antfly termite pull ' to download models. +Use 'antfly termite list --remote' to see available models. +`; + + // Output with a DIFFERENT model that happens to contain the short name as a + // suffix — the old substring check would incorrectly return true here. + const OTHER_MODEL_OUTPUT = `Local models in /Users/dev/.antfly/models: + +NAME TYPE SIZE VARIANTS SOURCE +vendor/other-bge-small-en-v1.5 embedder 200.0 MB vendor/other-bge-small-en-v1.5 +`; + + it('returns true when full model name is present in output', () => { + expect(modelPresentInOutput(FULL_NAME, PRESENT_OUTPUT)).toBe(true); + }); + + it('returns true when only short name is present as a standalone token', () => { + const outputWithShortName = `Local models:\n\n${SHORT_NAME} embedder 127 MB\n`; + expect(modelPresentInOutput(FULL_NAME, outputWithShortName)).toBe(true); + }); + + it('returns false when models directory is empty (server has no models)', () => { + // This is the core regression: old code checked ~/.termite/models which had + // the model, new code checks ~/.antfly/models which was empty. When empty, + // hasModel must return false so pullModel is invoked. + expect(modelPresentInOutput(FULL_NAME, EMPTY_OUTPUT)).toBe(false); + }); + + it('returns false when a different model shares the short name as a suffix', () => { + // Old bug: output.includes("bge-small-en-v1.5") matched + // "vendor/other-bge-small-en-v1.5" — false positive. + expect(modelPresentInOutput(FULL_NAME, OTHER_MODEL_OUTPUT)).toBe(false); + }); + + it('returns false for completely unrelated output', () => { + expect(modelPresentInOutput(FULL_NAME, 'No models found locally.')).toBe(false); + }); + + it('handles model names without an org prefix', () => { + // model = "mxbai-embed-large-v1" (no slash) + const bareModel = 'mxbai-embed-large-v1'; + const output = `NAME TYPE\nmxbai-embed-large-v1 embedder\n`; + expect(modelPresentInOutput(bareModel, output)).toBe(true); + }); + + it('handles bare model not present', () => { + const bareModel = 'mxbai-embed-large-v1'; + expect(modelPresentInOutput(bareModel, EMPTY_OUTPUT)).toBe(false); + }); +}); diff --git a/packages/cli/src/utils/antfly.ts b/packages/cli/src/utils/antfly.ts index 250e713..bb31d52 100644 --- a/packages/cli/src/utils/antfly.ts +++ b/packages/cli/src/utils/antfly.ts @@ -5,6 +5,8 @@ */ import { execSync, spawn } from 'node:child_process'; +import { homedir } from 'node:os'; +import { join } from 'node:path'; import { logger } from './logger.js'; const DEFAULT_ANTFLY_URL = process.env.ANTFLY_URL ?? 'http://localhost:18080/api/v1'; @@ -14,6 +16,18 @@ const DOCKER_PORT = 18080; const STARTUP_TIMEOUT_MS = 30_000; const POLL_INTERVAL_MS = 500; +/** + * The Termite models directory used by the running Antfly swarm server. + * + * `antfly swarm` uses `--data-dir` (default: ~/.antfly) as its root for all + * storage, including Termite models at {data-dir}/models. + * `antfly termite list/pull` defaults to --models-dir ~/.termite/models, which + * is a DIFFERENT path. We must always pass --models-dir explicitly so that + * `pullModel` and `hasModel` operate on the same directory the server uses. + */ +const ANTFLY_DATA_DIR = process.env.ANTFLY_DATA_DIR ?? join(homedir(), '.antfly'); +const TERMITE_MODELS_DIR = join(ANTFLY_DATA_DIR, 'models'); + /** * Ensure antfly is running. Auto-starts if needed. * @@ -32,10 +46,14 @@ export async function ensureAntfly(options?: { quiet?: boolean }): Promise