From d2c7f81ce014d67459f58dbef5e31540b0dad605 Mon Sep 17 00:00:00 2001
From: Faraazuddin Mohammed <mohdfaraaz1@gmail.com>
Date: Sun, 10 May 2026 00:29:09 -0400
Subject: [PATCH] fix(web): Vercel SPA routing + docs/SEO sweep for all v1.0.0
 features
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two unrelated fixes bundled:

1. Vercel SPA routing — playground routes added in Wave 3 (/diff, /by-file,
   /sarif, /vision, /config-builder, /init, /models, /models/<id>, /editor,
   /claude-code) returned 404 in production. Vercel's Vite framework preset
   serves the static dist/ output literally; React Router's client-side
   paths weren't being rewritten to index.html. Added a `rewrites` rule in
   vercel.json that falls through to index.html for everything except
   assets/, robots.txt, and sitemap.xml.

2. Docs + SEO sweep — Waves 2 + 3 added significant feature surface that
   the existing READMEs and package.json metadata didn't reflect. Updated
   to land cleanly with the v1.0.0 launch:

   - Root README: refreshed hero, comparison table now shows Latency / VS
     Code / Claude Code / Mistral / Cohere / Vision / SARIF columns; added
     "five-line use" with VS Code + Claude Code panels; methodology table
     gains Mistral (split exact-for-SP / approximate-for-Tekken) and Cohere
     rows; status bumped to "approaching v1.0.0".
   - packages/cli/README: flag table at top; sections per output format,
     vision tokens, per-file attribution, latency mode (TTFT, p50/p95),
     auto provider detection, .tokenometer.yml config.
   - packages/core/README: full export list (parseConfig, loadConfig,
     toSarif, the three vision-tokens, measureLatency + types) plus
     Providers section listing all 5 with offline + empirical exactness.
   - packages/action/README: `top-n-files` input documented; example
     sticky-comment shape now shows the per-file Δ table + collapsible
     all-files block.
   - packages/vscode/README: settings, commands, supported file types,
     placeholder Marketplace install (coming in Phase I).
   - packages/claude-code-skill/README: install one-liner verified.
   - All package.json: refreshed `description`; extended `keywords`
     across CLI / core / action / vscode / claude-code-skill with mistral,
     cohere, latency, ttft, vision-tokens, vscode, cursor, claude-code,
     sarif, prompt-cost-regression, model-comparison, etc.
   - .github/repo-metadata.md: 7 new GH topics (mistral, cohere,
     vscode-extension, claude-code, latency, vision-tokens, sarif).
   - .github/release-notes-v1.0.0.md: removed [planned] markers for
     features now shipped; added "v1.0.0 launch surface" section.
   - .github/awesome-list-prs/*.md (5 files): updated one-liner to cite
     the full surface (CLI + Action + VS Code + Claude Code skill, 63
     models, 5 providers).
   - .changeset/wave-4-docs-seo-sweep.md added.

Phase I unified release pipeline (vsce publish, ovsx publish, smoke test
job, Marketplace verify) is intentionally NOT in this PR — it lands
separately so this docs/playground fix can ship immediately and the
Phase I work can take its time.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .changeset/wave-4-docs-seo-sweep.md           |  11 ++
 .github/awesome-list-prs/awesome-actions.md   |   2 +-
 .github/awesome-list-prs/awesome-ai-tools.md  |   2 +-
 .github/awesome-list-prs/awesome-llm-apps.md  |   2 +-
 .github/awesome-list-prs/awesome-llmops.md    |   2 +-
 .../awesome-prompt-engineering.md             |   2 +-
 .github/release-notes-v1.0.0.md               | 125 +++++++++++++-----
 .github/repo-metadata.md                      |  25 +++-
 README.md                                     |  90 ++++++++++---
 package.json                                  |   2 +-
 packages/action/README.md                     |  24 +++-
 packages/action/package.json                  |  20 ++-
 packages/claude-code-skill/README.md          |  11 +-
 packages/claude-code-skill/package.json       |  14 +-
 packages/cli/README.md                        |  82 +++++++-----
 packages/cli/package.json                     |  24 +++-
 packages/core/README.md                       | 102 +++++++++++++-
 packages/core/package.json                    |  24 +++-
 packages/vscode/README.md                     |  14 +-
 packages/vscode/package.json                  |  28 +++-
 packages/web/package.json                     |   2 +-
 vercel.json                                   |   5 +-
 22 files changed, 479 insertions(+), 134 deletions(-)
 create mode 100644 .changeset/wave-4-docs-seo-sweep.md
diff --git a/.changeset/wave-4-docs-seo-sweep.md b/.changeset/wave-4-docs-seo-sweep.md
new file mode 100644
index 0000000..a69f9c4
--- /dev/null
+++ b/.changeset/wave-4-docs-seo-sweep.md
@@ -0,0 +1,11 @@
+---
+"tokenometer": minor
+"@tokenometer/core": minor
+---
+
+Documentation + SEO sweep: README, per-package READMEs, package.json
+descriptions and keywords updated to reflect every shipped feature
+(CLI flags, GitHub Action, VS Code extension, Claude Code skill,
+Mistral + Cohere providers, latency benchmarking, vision tokens, SARIF).
+Repo metadata + awesome-list templates regenerated. No source code
+changes.
diff --git a/.github/awesome-list-prs/awesome-actions.md b/.github/awesome-list-prs/awesome-actions.md
index f4855be..bb3bf7d 100644
--- a/.github/awesome-list-prs/awesome-actions.md
+++ b/.github/awesome-list-prs/awesome-actions.md
@@ -22,5 +22,5 @@ It is a first-class GitHub Action that posts a sticky PR comment diffing prompt-
 ## Markdown line for the list
 
 ```
-- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost CLI + GitHub Action. Multi-provider, empirical, CI-native.
+- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost + latency CLI + GitHub Action + VS Code extension + Claude Code skill. 63 models across Claude, GPT-4o, Gemini, Mistral, Cohere. Empirical mode, CI-native cost guardrail.
 ```
diff --git a/.github/awesome-list-prs/awesome-ai-tools.md b/.github/awesome-list-prs/awesome-ai-tools.md
index a5ed81f..aa84d84 100644
--- a/.github/awesome-list-prs/awesome-ai-tools.md
+++ b/.github/awesome-list-prs/awesome-ai-tools.md
@@ -22,5 +22,5 @@ The directory already includes generation, agent, and eval tooling but very litt
 ## Markdown line for the list
 
 ```
-- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost CLI + GitHub Action. Multi-provider, empirical, CI-native.
+- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost + latency CLI + GitHub Action + VS Code extension + Claude Code skill. 63 models across Claude, GPT-4o, Gemini, Mistral, Cohere. Empirical mode, CI-native cost guardrail.
 ```
diff --git a/.github/awesome-list-prs/awesome-llm-apps.md b/.github/awesome-list-prs/awesome-llm-apps.md
index cdb1eea..2736839 100644
--- a/.github/awesome-list-prs/awesome-llm-apps.md
+++ b/.github/awesome-list-prs/awesome-llm-apps.md
@@ -22,5 +22,5 @@ Awesome-llm-apps already curates apps and tooling that help builders ship with L
 ## Markdown line for the list
 
 ```
-- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost CLI + GitHub Action. Multi-provider, empirical, CI-native.
+- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost + latency CLI + GitHub Action + VS Code extension + Claude Code skill. 63 models across Claude, GPT-4o, Gemini, Mistral, Cohere. Empirical mode, CI-native cost guardrail.
 ```
diff --git a/.github/awesome-list-prs/awesome-llmops.md b/.github/awesome-list-prs/awesome-llmops.md
index daf740e..06f9791 100644
--- a/.github/awesome-list-prs/awesome-llmops.md
+++ b/.github/awesome-list-prs/awesome-llmops.md
@@ -22,5 +22,5 @@ Cost is a first-class LLMOps concern but most platforms treat it as a post-hoc d
 ## Markdown line for the list
 
 ```
-- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost CLI + GitHub Action. Multi-provider, empirical, CI-native.
+- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost + latency CLI + GitHub Action + VS Code extension + Claude Code skill. 63 models across Claude, GPT-4o, Gemini, Mistral, Cohere. Empirical mode, CI-native cost guardrail.
 ```
diff --git a/.github/awesome-list-prs/awesome-prompt-engineering.md b/.github/awesome-list-prs/awesome-prompt-engineering.md
index 3d7f5fa..7d23fd0 100644
--- a/.github/awesome-list-prs/awesome-prompt-engineering.md
+++ b/.github/awesome-list-prs/awesome-prompt-engineering.md
@@ -22,5 +22,5 @@ Prompt engineers iterate on wording, structure, and few-shot examples — all of
 ## Markdown line for the list
 
 ```
-- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost CLI + GitHub Action. Multi-provider, empirical, CI-native.
+- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost + latency CLI + GitHub Action + VS Code extension + Claude Code skill. 63 models across Claude, GPT-4o, Gemini, Mistral, Cohere. Empirical mode, CI-native cost guardrail.
 ```
diff --git a/.github/release-notes-v1.0.0.md b/.github/release-notes-v1.0.0.md
index 9d1d138..e73db31 100644
--- a/.github/release-notes-v1.0.0.md
+++ b/.github/release-notes-v1.0.0.md
@@ -1,43 +1,83 @@
-# Tokenometer v1.0.0 — Production-ready LLM cost calculator and CI guardrail.
+# Tokenometer v1.0.0 — Production-ready LLM cost calculator, latency benchmark, and CI guardrail.
 
-> Note: this is a **DRAFT**. Some items below are marked `[planned]` because the
-> phase has not yet landed on `main`. Edit before publishing the GitHub Release.
+> Note: this is a **DRAFT**. Edit before publishing the GitHub Release. All
+> items below have already landed on `main` (Waves 2 + 3); the only items
+> still in-flight at the time this draft was written are the marketplace
+> publish, smoke tests, and release pipeline polish (Phase I).
 
 ## Highlights
 
-- **CLI + GitHub Action shipped** — `npx tokenometer` and the marketplace action
-  share a single core, so local results match CI results.
-- **Multi-provider** — Claude (Opus / Sonnet / Haiku), OpenAI GPT-4o family, and
-  Google Gemini all run through the same interface.
-- **Empirical mode** — real `countTokens` API calls (free for Anthropic and
-  Google) instead of ad-hoc heuristics; falls back to `approximate` only when
-  the user opts out or no key is configured.
-- **CI cost-guardrail** — sticky PR comment with a per-model diff, plus a
-  `budget` input that fails the run when the head delta exceeds your USD cap.
-- **Auto-updating prices** — pricing and context limits are sourced from the
-  `tokenlens` registry, not a hand-maintained table.
-- **Honest `approximate` flag** — every output row is tagged so you always know
-  whether a number came from a real tokenizer or an estimate.
-- **Vision-token cost** `[planned — Phase D]` — image-aware token accounting
-  for multi-modal prompts.
-- **Per-file attribution** `[planned — Phase C.4]` — costs broken down by source
-  file in PR comments.
-- **SARIF output** `[planned — Phase C.5]` — lets cost regressions surface in
-  GitHub code scanning.
-- **Auto provider detection** `[planned — Phase C.1]` — infer the provider from
-  the model id without an extra flag.
-- **`.tokenometer.yml` config** `[planned — Phase C.2]` — repo-level defaults
-  for paths, models, and budgets.
-- **VS Code / Cursor extension** `[planned — Phase E.1]` — inline cost lens for
-  prompt files in the editor.
-- **Claude Code skill** `[planned — Phase E.2]` — first-class skill so Claude
-  Code can call Tokenometer when iterating on prompts.
-- **Mistral + Cohere providers** `[planned — Phase H]` — additional model
-  families behind the same CLI surface.
-- **Latency mode** `[planned]` — `--latency` flag to surface per-model latency
-  alongside cost.
-- **Unified release pipeline** — one workflow publishes both `tokenometer` and
-  `@tokenometer/core` so the CLI and library never drift.
+- **CLI + GitHub Action + VS Code extension + Claude Code skill — one core.**
+  `npx tokenometer`, the marketplace Action, the editor status bar, and the
+  Claude Code skill all share `@tokenometer/core`, so local results match CI
+  results match what's on screen in the editor match what an agent reports.
+- **Multi-provider** — Claude (Opus / Sonnet / Haiku, Claude 3.x), OpenAI
+  GPT-4o family + o1, Google Gemini 2.5 / 1.5, Mistral (19 models — open
+  weights, large, codestral, NeMo, Pixtral, Magistral, Ministral, Devstral,
+  Mistral Medium 2505), and Cohere (command-r, command-r-plus). 63 models
+  total.
+- **Empirical mode** — real `countTokens` API calls (free for Anthropic,
+  Google, and Cohere; tiktoken-anchored for OpenAI) instead of ad-hoc
+  heuristics; falls back to `approximate` only when the user opts out or
+  no key is configured. Mistral has no public token-count endpoint —
+  offline `mistral-tokenizer-js` is exact for SentencePiece-family models
+  and `chars/4` for Tekken-family models.
+- **Latency benchmarking (`--latency`)** — TTFT, total ms, and tokens/sec
+  reported as p50 / p95 / mean over `n` real generations (default `n=3`,
+  configurable with `--latency-trials 1..10`). Supported on Anthropic,
+  OpenAI, Google, Cohere, and Mistral. The default `--max-spend` is
+  bumped from `$0.05` to `$0.25` when `--latency` is set.
+- **CI cost-guardrail** — sticky PR comment with a per-model summary and a
+  per-file Δ table (configurable via `top-n-files`, with the rest folded
+  into a `<details>` block). The `budget` input fails the run when the
+  head delta exceeds your USD cap.
+- **Per-file attribution** (`--by-file` in the CLI; per-file Δ in the
+  Action comment) so you know which prompt files dominate cost.
+- **SARIF output** (`--output sarif`) — drop the file into GitHub Code
+  Scanning or any SARIF viewer to surface cost regressions next to lint
+  findings.
+- **Vision-token cost** (`--image <path>`) — image-aware accounting for
+  Claude (`(w*h)/750`, capped at 1600), GPT-4o (high-detail tile cost),
+  and Gemini (`258 × ⌈w/768⌉ × ⌈h/768⌉`).
+- **Auto provider detection** — infer the default model from whichever
+  `*_API_KEY` env var is set. No flag needed for the common case.
+- **`.tokenometer.yml` config** — repo-level defaults for paths, models,
+  formats, and budgets. Walks up from cwd, stopping at `.git`. CLI
+  flags always win.
+- **VS Code / Cursor extension** — status bar shows live `model · tokens
+  · USD` for the active prompt file. Settings: `tokenometer.model`,
+  `tokenometer.format`, `tokenometer.warnOnCostAbove`. Commands:
+  *Tokenometer: Switch model*, *Tokenometer: Show details*. Marketplace
+  listing arrives with this release; until then, `npm run package:vsix
+  --workspace=@tokenometer/vscode` builds a side-loadable `.vsix`.
+- **Claude Code skill** (`tokenometer-cost-check`) — drop into
+  `~/.claude/skills/tokenometer/SKILL.md` and Claude Code agents will
+  reach for `npx tokenometer` when asked anything cost- or latency-shaped.
+- **Auto-updating prices** — pricing and context limits are sourced from
+  the `tokenlens` registry, not a hand-maintained table. A small
+  `LOCAL_OVERRIDES` map covers bleeding-edge models (and the entire
+  Cohere catalog, which `@tokenlens/models` doesn't ship at v1.3.0).
+- **Honest `approximate` flag** — every output row is tagged so you
+  always know whether a number came from a real tokenizer or an estimate.
+- **Unified release pipeline** — one workflow publishes both
+  `tokenometer` and `@tokenometer/core` so the CLI and library never
+  drift.
+
+## v1.0.0 launch surface
+
+| Surface | Where | Notes |
+|---|---|---|
+| CLI | `npx tokenometer` / `npm i -g tokenometer` | All flags above |
+| GitHub Action | `faraa2m/tokenometer@v1` (Marketplace) | Sticky PR comment with per-file Δ + budget gate |
+| VS Code extension | Marketplace + Open VSX (Cursor / VSCodium) | Status bar live cost |
+| Claude Code skill | `~/.claude/skills/tokenometer/SKILL.md` | Agentic prompt-cost awareness |
+| Web playground | https://tokenometer.vercel.app | Calculator, diff, by-file, SARIF, vision, config builder, init wizard, Cost Atlas |
+| Library | `@tokenometer/core` on npm | Engine for everything above |
+
+**Providers (5):** Anthropic, OpenAI, Google, Mistral, Cohere
+**Models (63):** Claude 4.x + 3.x, GPT-4o family + o1, Gemini 2.5 + 1.5, Mistral 19-model catalog, Cohere command-r family
+**Output formats:** table, JSON, SARIF
+**Latency providers:** Anthropic, OpenAI, Google, Cohere, Mistral
 
 ## Empirical findings
 
@@ -61,6 +101,19 @@ GitHub Action:
       prompts/**/*.md
     models: claude-opus-4-7,claude-sonnet-4-6,gpt-4o
     budget: '0.50'
+    top-n-files: 5
+```
+
+VS Code / Cursor:
+
+```
+ext install faraa2m.tokenometer-vscode
+```
+
+Claude Code skill:
+
+```
+cp -R packages/claude-code-skill ~/.claude/skills/tokenometer
 ```
 
 ## What's next
diff --git a/.github/repo-metadata.md b/.github/repo-metadata.md
index f8e9bdd..63ccce5 100644
--- a/.github/repo-metadata.md
+++ b/.github/repo-metadata.md
@@ -2,8 +2,29 @@
 
 Apply via:
 ```
-gh repo edit faraa2m/tokenometer --description "Tokenometer — LLM cost calculator, token counter, and CI cost-guardrail Action for Claude, GPT-4o, Gemini."
-gh repo edit faraa2m/tokenometer --add-topic llm --add-topic cost-tracking --add-topic prompt-engineering --add-topic tokenizer --add-topic claude --add-topic gpt-4 --add-topic gemini --add-topic github-actions --add-topic cli --add-topic developer-tools --add-topic ai --add-topic cost-optimization --add-topic anthropic --add-topic openai
+gh repo edit faraa2m/tokenometer --description "Tokenometer — LLM cost calculator, token counter, latency benchmark, and CI cost-guardrail Action for Claude, GPT-4o, Gemini, Mistral, and Cohere. CLI + GitHub Action + VS Code extension + Claude Code skill."
+gh repo edit faraa2m/tokenometer \
+  --add-topic ai \
+  --add-topic anthropic \
+  --add-topic claude \
+  --add-topic claude-code \
+  --add-topic cli \
+  --add-topic cohere \
+  --add-topic cost-optimization \
+  --add-topic cost-tracking \
+  --add-topic developer-tools \
+  --add-topic gemini \
+  --add-topic github-actions \
+  --add-topic gpt-4 \
+  --add-topic latency \
+  --add-topic llm \
+  --add-topic mistral \
+  --add-topic openai \
+  --add-topic prompt-engineering \
+  --add-topic sarif \
+  --add-topic tokenizer \
+  --add-topic vision-tokens \
+  --add-topic vscode-extension
 ```
 
 ## Marketplace categories to choose during release publish
diff --git a/README.md b/README.md
index 210829c..2d12f3c 100644
--- a/README.md
+++ b/README.md
@@ -7,26 +7,32 @@
 [![GitHub stars](https://img.shields.io/github/stars/faraa2m/tokenometer.svg?style=social)](https://github.com/faraa2m/tokenometer/stargazers)
 <!-- TODO: add marketplace badge after v1.0.0 publish -->
 
-> Tokenometer — LLM cost calculator, token counter, and CI cost-guardrail Action for Claude, GPT-4o, Gemini.
+> Tokenometer — LLM cost calculator, token counter, latency benchmark, and CI cost-guardrail for Claude, GPT-4o, Gemini, Mistral, and Cohere. CLI + GitHub Action + VS Code extension + Claude Code skill.
 > **Live: https://tokenometer.vercel.app**
 
-Tokenometer answers a simple, expensive question: **does it actually cost less to send your prompt as YAML, JSON, XML, or Markdown — across Claude, GPT-4o, and Gemini?** It started as a [\$23 question](https://hackernoon.com/i-spent-$23-testing-the-yaml-saves-tokens-hack-it-doesnt-work). This is the tool anyone can run — offline, empirically, or as a PR guardrail.
+Tokenometer answers a simple, expensive question: **does it actually cost less to send your prompt as YAML, JSON, XML, or Markdown — across Claude, GPT-4o, Gemini, Mistral, and Cohere — and how fast does each provider actually respond?** It started as a [\$23 question](https://hackernoon.com/i-spent-$23-testing-the-yaml-saves-tokens-hack-it-doesnt-work). Today it's the only LLM cost CLI that also tells you latency, ships a PR-blocking GitHub Action, lights up your editor's status bar, and teaches Claude Code agents to think in dollars.
 
 ## Why Tokenometer vs alternatives
 
 |                                       | Tokenometer | [tokencost](https://github.com/AgentOps-AI/tokencost) (AgentOps) | [tiktoken](https://github.com/openai/tiktoken) (OpenAI) | [gpt-tokenizer](https://github.com/niieani/gpt-tokenizer) | [promptfoo](https://github.com/promptfoo/promptfoo) | gpt-token-counter-live (VS Code) |
 |---------------------------------------|:-----------:|:--------:|:--------:|:--------:|:--------:|:--------:|
 | Multi-provider (Anthropic / OpenAI / Google) | ✓ | ✓ | – | – | ✓ | – |
+| Mistral support                       | ✓ | – | – | – | partial | – |
+| Cohere support                        | ✓ | – | – | – | partial | – |
 | Multi-format compare (JSON / YAML / XML / MD / text) | ✓ | – | – | – | – | – |
 | Empirical mode (real provider `countTokens`) | ✓ | – | – | – | partial | – |
-| CLI                                   | ✓ | ✓ | – | – | ✓ | – |
-| GitHub Action (PR cost-diff guardrail) | ✓ | – | – | – | partial | – |
-| VS Code / Cursor extension            | – (planned) | – | – | – | – | ✓ |
+| Latency (TTFT + tokens/sec, p50/p95)  | ✓ | – | – | – | partial | – |
+| Vision-token cost (image inputs)      | ✓ | – | – | – | – | – |
 | Cost (USD), not just tokens           | ✓ | ✓ | – | – | partial | – |
 | Honest "approximate" flag when offline is a proxy | ✓ | – | – | – | – | – |
+| CLI                                   | ✓ | ✓ | – | – | ✓ | – |
+| GitHub Action (PR cost-diff guardrail) | ✓ | – | – | – | partial | – |
 | Per-file attribution in CI            | ✓ | – | – | – | – | – |
+| SARIF output (GitHub code scanning)   | ✓ | – | – | – | – | – |
+| VS Code / Cursor extension            | ✓ | – | – | – | – | ✓ |
+| Claude Code skill                     | ✓ | – | – | – | – | – |
 
-Tokenometer is the only tool in this list that combines **multi-provider + multi-format + empirical mode + USD cost + a PR-blocking GitHub Action + an honest approximate-vs-exact flag**. tokencost is the closest match for cost-in-USD across providers, but it doesn't compare formats or run as a CI guardrail. tiktoken and gpt-tokenizer are great single-provider primitives — Tokenometer uses gpt-tokenizer under the hood for the offline path. promptfoo is the broadest evaluator overall, but cost is one input among many; it isn't a dedicated cost-guardrail. The VS Code extension is real-time-in-editor only.
+Tokenometer is the only tool in this list that combines **multi-provider (5 providers, 63 models) + multi-format + empirical mode + latency benchmarking + USD cost + a PR-blocking GitHub Action + an editor extension + a Claude Code skill + an honest approximate-vs-exact flag**. tokencost is the closest match for cost-in-USD across providers, but it doesn't compare formats, measure latency, or run as a CI guardrail. tiktoken and gpt-tokenizer are great single-provider primitives — Tokenometer uses gpt-tokenizer under the hood for the offline path. promptfoo is the broadest evaluator overall, but cost is one input among many; it isn't a dedicated cost-guardrail. The VS Code extension is real-time-in-editor only.
 
 ## Findings (Anthropic, n=150 cells across 10 prompt shapes)
 
@@ -52,7 +58,7 @@ $ tokenometer ./prompt.md --model claude-opus-4-7 --format json,yaml,markdown
   Priciest: claude-opus-4-7 as json     ($0.0186, 1.08x more)
 ```
 
-The `Approx` column shows `✓` when the count is a proxy (Anthropic / Google offline) and is empty when it's an exact match (OpenAI offline, or any provider with `--empirical`).
+The `Approx` column shows `✓` when the count is a proxy (Anthropic / Google / Mistral-Tekken / Cohere offline) and is empty when it's an exact match (OpenAI offline, Mistral SentencePiece-family offline, or any provider with `--empirical`).
 
 > Real demo (with empirical mode + GIF) at **https://tokenometer.vercel.app**.
 
@@ -60,13 +66,14 @@ The `Approx` column shows `✓` when the count is a proxy (Anthropic / Google of
 
 **Cost AND latency in one CLI — the only tool that does both.** `tiktoken` and `@anthropic-ai/tokenizer` give you a token count for one provider. They don't tell you:
 
-- What the same prompt costs across **multiple providers and models**
-- How **fast** each provider actually responds (TTFT + tokens/sec) — a real generation, not a synthetic benchmark
+- What the same prompt costs across **multiple providers and models** (Claude, GPT-4o, Gemini, Mistral, Cohere)
+- How **fast** each provider actually responds (TTFT + tokens/sec, p50/p95/mean) — a real generation, not a synthetic benchmark
 - Whether **format conversion** (YAML ↔ JSON ↔ XML ↔ MD) actually moves the needle
 - The **empirical** cost — what your provider actually charged on a real call, after prompt caching
 - Whether a PR introduced a **prompt-cost regression**
+- The **vision-token** cost when your prompt includes images
 
-Tokenometer is dev-time, multi-provider, multi-format, optionally empirical, latency-aware, and CI-native.
+Tokenometer is dev-time, multi-provider, multi-format, optionally empirical, latency-aware, vision-aware, and CI-native. And the same core powers the CLI, the GitHub Action, the VS Code / Cursor status bar, and the Claude Code skill — counts, pricing, and tokenizer choices stay identical across surfaces.
 
 ## Install
 
@@ -80,7 +87,7 @@ Global:
 
 ```bash
 npm i -g tokenometer
-tokenometer ./prompt.md --format yaml,json,xml,markdown,text --model claude-opus-4-7,gpt-4o
+tokenometer ./prompt.md --format yaml,json,xml,markdown,text --model claude-opus-4-7,gpt-4o,mistral-large-latest,command-r-plus
 ```
 
 Stdin works too:
@@ -89,9 +96,9 @@ Stdin works too:
 echo "prompt body" | tokenometer - --model claude-sonnet-4-6
 ```
 
-Run `tokenometer --help` for the full flag list and the current set of known model ids.
+Run `tokenometer --help` for the full flag list and the current set of known model ids (63 across 5 providers).
 
-## Three-line use
+## Five-line use
 
 ### 1. Compare formats for a single prompt (offline, no API key)
 
@@ -99,7 +106,7 @@ Run `tokenometer --help` for the full flag list and the current set of known mod
 tokenometer ./prompt.md --model claude-opus-4-7
 ```
 
-Prints estimated tokens + USD across each format × the chosen model(s). Default model is `claude-opus-4-7`; default formats are all of `json,markdown,text,xml,yaml`.
+Prints estimated tokens + USD across each format × the chosen model(s). Default model is `claude-opus-4-7` (or auto-detected from `*_API_KEY` env vars); default formats are all of `json,markdown,text,xml,yaml`.
 
 ### 2. Empirical mode (real provider `countTokens`, with a hard ceiling)
 
@@ -112,8 +119,10 @@ For each `(model × format)` cell, calls the provider's exact token-count API:
 - Anthropic → `messages.countTokens` (free)
 - Google → `model.countTokens` (free)
 - OpenAI → tiktoken `o200k_base` (matches OpenAI's production count exactly, no API call)
+- Cohere → `POST /v1/tokenize` (free, requires `COHERE_API_KEY`)
+- Mistral → unsupported (no public token-count endpoint); offline `mistral-tokenizer-js` is exact for SentencePiece-family models, approximate (chars/4) for Tekken-family models.
 
-Set `GOOGLE_API_KEY` (or `GEMINI_API_KEY`) for Gemini models. `--offline` forces the offline path even if `--empirical` is also passed.
+Set `GOOGLE_API_KEY` (or `GEMINI_API_KEY`) for Gemini, `MISTRAL_API_KEY` for Mistral, `COHERE_API_KEY` for Cohere. `--offline` forces the offline path even if `--empirical` is also passed.
 
 ### 3. CI guardrail (GitHub Action)
 
@@ -123,14 +132,27 @@ Set `GOOGLE_API_KEY` (or `GEMINI_API_KEY`) for Gemini models. `--offline` forces
     paths: prompts/**/*.md,prompts/**/*.json
     models: claude-opus-4-7,claude-sonnet-4-6,gpt-4o
     formats: json,yaml,markdown
-    budget: '0.50'   # USD; omit to disable the gate
+    budget: '0.50'      # USD; omit to disable the gate
+    top-n-files: 5      # rows shown in the per-file Δ table; the rest fold into <details>
 ```
 
-Posts a sticky PR comment with the cost diff vs the base branch. Fails the check when the total Δ exceeds `budget`. See [`packages/action/README.md`](packages/action/README.md) for all inputs and outputs.
+Posts a sticky PR comment with the cost diff vs the base branch, including a per-file Δ table and a collapsible "all files" block. Fails the check when the total Δ exceeds `budget`. See [`packages/action/README.md`](packages/action/README.md) for all inputs and outputs.
 
-### More flags
+### 4. Live cost in your editor (VS Code / Cursor)
 
-The CLI also supports `--output json|sarif` for machine-readable output, `--by-file` for per-file attribution, `--image <path>` for vision-token cost on Claude / GPT-4o / Gemini, and `.tokenometer.yml` config files (auto-discovered, walk-up). See [`packages/cli/README.md`](packages/cli/README.md) for the full list.
+```
+ext install faraa2m.tokenometer-vscode   # marketplace listing arrives with v1.0.0
+```
+
+Status bar shows `model · tokens · USD` for the active prompt file, updates on every keystroke (debounced), and turns warning-colored when you exceed `tokenometer.warnOnCostAbove`. Same `@tokenometer/core` as the CLI — what you see in the editor matches what CI computes. See [`packages/vscode/README.md`](packages/vscode/README.md).
+
+### 5. Claude Code skill (agentic prompt-cost awareness)
+
+```bash
+cp -R packages/claude-code-skill ~/.claude/skills/tokenometer
+```
+
+Installs the `tokenometer-cost-check` skill so Claude Code agents can answer "what does this prompt cost?" with a real number — they shell out to `npx tokenometer` instead of guessing from `tiktoken`. See [`packages/claude-code-skill/README.md`](packages/claude-code-skill/README.md).
 
 ## Methodology
 
@@ -141,10 +163,34 @@ Tokenometer picks a tokenizer per provider and flags the count as approximate (`
 | OpenAI    | `gpt-tokenizer` `o200k_base`                   | exact       | same `o200k_base` (matches OpenAI production count) |
 | Anthropic | `gpt-tokenizer` `cl100k_base`                  | approximate | `messages.countTokens` (exact, free) |
 | Google    | `chars / 4` heuristic                          | approximate | `model.countTokens` (exact, free) |
-| Mistral   | `mistral-tokenizer-js` (V1/V2/V3) · `chars/4` for Tekken | approximate | unsupported (no public token-count endpoint) |
+| Mistral   | `mistral-tokenizer-js` (V1/V2/V3) · `chars/4` for Tekken family | exact for SP-family · approximate for Tekken | unsupported (no public token-count endpoint) |
 | Cohere    | `chars / 4` heuristic                          | approximate | `POST /v1/tokenize` (exact, free, requires `COHERE_API_KEY`) |
 
-Cost = `tokens / 1000 × per-1k input rate`. Pricing and context windows are sourced from the [`tokenlens`](https://www.npmjs.com/package/tokenlens) registry, with a small set of local overrides for bleeding-edge models the registry hasn't picked up yet — see [`packages/core/src/rates.ts`](packages/core/src/rates.ts) (`RATES_VERSION`).
+Cost = `tokens / 1000 × per-1k input rate`. Pricing and context windows are sourced from the [`tokenlens`](https://www.npmjs.com/package/tokenlens) registry, with a small set of local overrides for bleeding-edge models the registry hasn't picked up yet (and the full Cohere catalog, which `@tokenlens/models` doesn't ship at v1.3.0) — see [`packages/core/src/rates.ts`](packages/core/src/rates.ts) (`RATES_VERSION`).
+
+## Output formats
+
+The CLI is multi-surface by design:
+
+- **`--output table`** (default) — human-readable per-cell table.
+- **`--output json`** — emits a `TokenometerResult` shape (`{ files: [{ path, results: [...] }] }`); pipe to `jq`.
+- **`--output sarif`** — emits SARIF 2.1.0; drop into GitHub Code Scanning or any SARIF viewer.
+- **`--by-file`** — appends a per-file token + USD summary table for multi-file inputs.
+- **`--image <path>`** (repeatable) — adds vision-token cost for Claude / GPT-4o / Gemini.
+- **`--latency`** — measures real generation latency (TTFT + total ms + tokens/sec, p50/p95/mean over `n` trials, default 3). Implies `--empirical`. Supported on Anthropic, OpenAI, Google, Cohere, and Mistral.
+
+```bash
+npx tokenometer ./prompt.md --output sarif > tokenometer.sarif
+npx tokenometer ./prompts/*.md --by-file --output json | jq '.files[].results | map(.inputCost) | add'
+ANTHROPIC_API_KEY=… OPENAI_API_KEY=… npx tokenometer ./prompt.md --latency --model claude-opus-4-7,gpt-4o
+```
+
+Full flag reference: [`packages/cli/README.md`](packages/cli/README.md).
+
+## Editor + Claude Code
+
+- **VS Code / Cursor** — [`@tokenometer/vscode`](packages/vscode/README.md). Status bar with live token count + USD cost; settings for model, format, and a warn-above-USD threshold; `Tokenometer: Switch model` and `Tokenometer: Show details` commands.
+- **Claude Code skill** — [`@tokenometer/claude-code-skill`](packages/claude-code-skill/README.md). Drop in `~/.claude/skills/tokenometer/SKILL.md` and Claude Code agents will reach for `npx tokenometer …` when you ask them anything cost-shaped.
 
 ## Project health
 
@@ -156,7 +202,7 @@ Cost = `tokens / 1000 × per-1k input rate`. Pricing and context windows are sou
 
 ## Status
 
-Early. v0.0.x — see [milestones](https://github.com/faraa2m/tokenometer/milestones). Roadmap to v1.0.0 in progress: VS Code extension, Claude Code skill, vision-token cost.
+Approaching v1.0.0. See [milestones](https://github.com/faraa2m/tokenometer/milestones) for what's left in Phase I (marketplace publish, smoke tests, release pipeline) before the v1 cut.
 
 ## License
 
diff --git a/package.json b/package.json
index 220fa26..f6f9de8 100644
--- a/package.json
+++ b/package.json
@@ -2,7 +2,7 @@
   "name": "tokenometer-monorepo",
   "private": true,
   "version": "0.0.0",
-  "description": "Tokenometer — LLM cost calculator, token counter, and CI cost-guardrail Action for Claude, GPT-4o, Gemini.",
+  "description": "Tokenometer — LLM cost calculator, token counter, latency benchmark, and CI cost-guardrail Action for Claude, GPT-4o, Gemini, Mistral, and Cohere. Includes VS Code extension and Claude Code skill.",
   "license": "MIT",
   "author": "Faraazuddin Mohammed <mohdfaraaz1@gmail.com>",
   "homepage": "https://github.com/faraa2m/tokenometer",
diff --git a/packages/action/README.md b/packages/action/README.md
index b58c596..fa7fd8f 100644
--- a/packages/action/README.md
+++ b/packages/action/README.md
@@ -3,7 +3,7 @@
 [![License: MIT](https://img.shields.io/github/license/faraa2m/tokenometer.svg)](https://github.com/faraa2m/tokenometer/blob/main/LICENSE)
 <!-- TODO: add marketplace badge after v1.0.0 publish -->
 
-Posts a sticky PR comment with the prompt-cost diff between your branch and its base. Fails the check when the delta exceeds a budget.
+Posts a sticky PR comment with the prompt-cost diff between your branch and its base, including a per-file Δ table. Fails the check when the delta exceeds a budget.
 
 See the [root README](https://github.com/faraa2m/tokenometer#readme) for findings, methodology, and the full project overview.
 
@@ -31,7 +31,8 @@ jobs:
           paths: prompts/**/*.md,prompts/**/*.json
           models: claude-opus-4-7,claude-sonnet-4-6,gpt-4o
           formats: json,yaml,markdown
-          budget: '0.50' # USD; omit to disable the gate
+          budget: '0.50'      # USD; omit to disable the gate
+          top-n-files: 5      # rows shown in the per-file Δ table
 ```
 
 ## Inputs
@@ -39,7 +40,7 @@ jobs:
 | Name | Default | Notes |
 |---|---|---|
 | `paths` | `prompts/**/*.{md,json,yaml,yml,txt}` | Comma- or newline-separated globs |
-| `models` | `claude-opus-4-7,claude-sonnet-4-6,gpt-4o` | Any tokenometer-supported model id |
+| `models` | `claude-opus-4-7,claude-sonnet-4-6,gpt-4o` | Any tokenometer-supported model id (63 across Anthropic / OpenAI / Google / Mistral / Cohere) |
 | `formats` | `json,yaml,xml,markdown,text` | Subset of supported formats |
 | `budget` | _empty_ | Max acceptable total Δ in USD. Empty = disabled |
 | `base-ref` | _auto_ | Falls back to `origin/<pr-base>` for PRs, `HEAD~1` otherwise |
@@ -56,9 +57,16 @@ jobs:
 
 ## Comment shape
 
-The sticky comment opens with the existing total-cost line and per-file table, then appends a "Top changed files" section. When more files changed than `top-n-files`, the rest are folded into a collapsible block:
+The sticky comment opens with the existing total-cost line and per-model summary, then appends a "Top changed files" Δ table. When more files changed than `top-n-files`, the rest are folded into a collapsible `<details>` block:
 
 ```markdown
+**Total prompt cost Δ:** +$0.0124 (head $0.4823, base $0.4699)
+
+| Model | Base | Head | Δ |
+|---|---:|---:|---:|
+| `claude-opus-4-7` | $0.2402 | $0.2484 | +$0.0082 |
+| `gpt-4o` | $0.2297 | $0.2339 | +$0.0042 |
+
 ### Top changed files (5)
 
 | File | Tokens Δ | USD Δ |
@@ -74,7 +82,7 @@ The sticky comment opens with the existing total-cost line and per-file table, t
 </details>
 ```
 
-Sorting: `|Δ USD|` desc → `Δ tokens` desc → path. Added files are tagged `(+)`, deleted files `(−)`.
+Sorting: `|Δ USD|` desc → `Δ tokens` desc → path. Added files are tagged `(+)`, deleted files `(−)`. The per-file aggregator that produces this table is unit-tested (`packages/action/src/per-file-diff.test.ts`) so you can rely on the math.
 
 ## What it measures
 
@@ -82,9 +90,11 @@ Same offline tokenizer dispatch as the CLI:
 
 - OpenAI: `gpt-tokenizer` `o200k_base` (exact)
 - Anthropic: `gpt-tokenizer` `cl100k_base` (approximation — Anthropic does not ship a public Claude 3+ tokenizer)
-- Google: `chars / 4` heuristic
+- Google: `chars / 4` heuristic (approximate)
+- Mistral: `mistral-tokenizer-js` for SentencePiece-family models (exact); `chars/4` for Tekken-family models (approximate)
+- Cohere: `chars / 4` heuristic (approximate)
 
-Empirical mode (real provider `countTokens` calls) is intentionally **not** wired into the Action — the Action runs on every PR and would either need an Anthropic key in repo secrets (risk) or limit itself to OpenAI (asymmetric). For exact Claude / Gemini numbers, run `npx tokenometer <file> --empirical` locally.
+Empirical mode (real provider `countTokens` calls) is intentionally **not** wired into the Action — the Action runs on every PR and would either need provider keys in repo secrets (risk) or limit itself to OpenAI (asymmetric). For exact Claude / Gemini / Cohere numbers, run `npx tokenometer <file> --empirical` locally. Same for `--latency` — it requires real generation calls and is dev-machine-only.
 
 ## License
 
diff --git a/packages/action/package.json b/packages/action/package.json
index 001818e..aa67089 100644
--- a/packages/action/package.json
+++ b/packages/action/package.json
@@ -1,10 +1,26 @@
 {
   "name": "@tokenometer/action",
   "version": "0.0.2",
-  "description": "Tokenometer GitHub Action — sticky PR comment with prompt-cost diff and budget gate.",
+  "description": "Tokenometer GitHub Action — sticky PR comment with prompt-cost diff, per-file attribution, and budget gate.",
   "license": "MIT",
   "private": true,
-  "keywords": ["ci", "code-quality", "github-action", "llm-cost", "prompt-cost"],
+  "keywords": [
+    "anthropic",
+    "ci",
+    "ci-cd",
+    "claude",
+    "code-quality",
+    "code-scanning",
+    "cohere",
+    "github-action",
+    "gpt-4o",
+    "llm-cost",
+    "mistral",
+    "openai",
+    "prompt-cost",
+    "prompt-cost-regression",
+    "sarif"
+  ],
   "type": "module",
   "main": "./dist/index.js",
   "scripts": {
diff --git a/packages/claude-code-skill/README.md b/packages/claude-code-skill/README.md
index 78a07a8..8b9ae56 100644
--- a/packages/claude-code-skill/README.md
+++ b/packages/claude-code-skill/README.md
@@ -38,11 +38,12 @@ discoverable.
 ## What's it do
 
 The skill registers trigger phrases like "what does this prompt cost",
-"is this cheaper as JSON or YAML", and "did my change increase prompt
-cost". When matched, Claude Code knows to shell out to
-`npx tokenometer …` instead of guessing — then surfaces real token
-counts and USD cost across Claude / GPT-4o / Gemini, with an
-honesty flag for approximate vs exact counts.
+"is this cheaper as JSON or YAML", "is GPT-4o or Claude faster on this
+prompt", and "did my change increase prompt cost". When matched, Claude
+Code knows to shell out to `npx tokenometer …` instead of guessing —
+then surfaces real token counts, USD cost, and (with `--latency`) TTFT
++ tokens/sec across Claude, GPT-4o, Gemini, Mistral, and Cohere, with
+an honesty flag for approximate vs exact counts.
 
 See [`SKILL.md`](./SKILL.md) for the full skill content (trigger
 phrases, invocation patterns, output handling, and the empirical
diff --git a/packages/claude-code-skill/package.json b/packages/claude-code-skill/package.json
index 13be3a3..0148808 100644
--- a/packages/claude-code-skill/package.json
+++ b/packages/claude-code-skill/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@tokenometer/claude-code-skill",
   "version": "0.0.2",
-  "description": "Claude Code skill that wraps the tokenometer CLI.",
+  "description": "Claude Code skill that wraps the tokenometer CLI for prompt-cost-aware agents.",
   "license": "MIT",
   "author": "Faraazuddin Mohammed <mohdfaraaz1@gmail.com>",
   "homepage": "https://tokenometer.vercel.app",
@@ -15,13 +15,23 @@
   },
   "private": true,
   "keywords": [
+    "anthropic",
     "claude",
     "claude-code",
     "claude-code-skill",
+    "cohere",
+    "gemini",
+    "gpt-4o",
+    "latency",
+    "llm",
     "llm-cost",
+    "mistral",
+    "openai",
     "prompt-cost",
+    "prompt-engineering",
     "skill",
-    "tokenometer"
+    "tokenometer",
+    "vision-tokens"
   ],
   "files": ["SKILL.md", "README.md", "install.sh"],
   "scripts": {
diff --git a/packages/cli/README.md b/packages/cli/README.md
index c77424e..83d69c0 100644
--- a/packages/cli/README.md
+++ b/packages/cli/README.md
@@ -3,7 +3,7 @@
 [![npm tokenometer](https://img.shields.io/npm/v/tokenometer.svg?label=tokenometer)](https://www.npmjs.com/package/tokenometer)
 [![License: MIT](https://img.shields.io/github/license/faraa2m/tokenometer.svg)](https://github.com/faraa2m/tokenometer/blob/main/LICENSE)
 
-> Empirical token-cost benchmarking for LLM prompts. Tells you what your prompt actually costs across Claude, GPT-4o, and Gemini, in every format.
+> Empirical token-cost + latency benchmarking for LLM prompts. Tells you what your prompt actually costs and how fast each provider responds across Claude, GPT-4o, Gemini, Mistral, and Cohere — in every format.
 
 See the [root README](https://github.com/faraa2m/tokenometer#readme) for findings, methodology, and the full project overview.
 
@@ -25,17 +25,57 @@ Cheapest: gpt-4o as json ($0.000192)
 Priciest: claude-opus-4-7 as yaml ($0.001260, 6.74x more)
 ```
 
-A leading `~` marks an approximate count (offline mode for Claude / Gemini, since neither vendor publishes a public tokenizer).
+A leading `~` marks an approximate count (offline mode for Claude / Gemini / Mistral-Tekken / Cohere, since none of those vendors publishes a public production tokenizer that ships in JS).
+
+## Flags
+
+| Flag | Default | Notes |
+|---|---|---|
+| `--model <id[,id…]>` | `claude-opus-4-7` (or auto-detected) | Any registered model id (63 across 5 providers). |
+| `--format <fmt[,fmt…]>` | `json,yaml,xml,markdown,text` | Subset of supported formats. |
+| `--output <fmt>` | `table` | `table` \| `json` \| `sarif`. |
+| `--by-file` | _off_ | Append a per-file token/USD table (multi-file only). |
+| `--image <path>` | _none_ | Add vision-token cost for the image (repeatable). |
+| `--config <path>` | _none_ | Load this exact config file (skips walk-up). |
+| `--no-config` | _off_ | Skip `.tokenometer.yml` loading entirely. |
+| `--empirical` | _off_ | Use provider `countTokens` APIs (free, exact). |
+| `--latency` | _off_ | Measure real generation latency (TTFT, total ms, tokens/sec). Implies `--empirical`. |
+| `--latency-trials <n>` | `3` | Trials per cell when `--latency` is set (1–10). |
+| `--max-spend <usd>` | `0.05` (or `0.25` with `--latency`) | Hard ceiling for empirical / latency mode. |
+| `--offline` | _off_ | Force offline path (overrides `--empirical`). |
+| `-h`, `--help` |  | Print help. |
+| `-v`, `--version` |  | Print version. |
+
+```
+tokenometer <file> [options]
+echo "prompt" | tokenometer - [options]
+```
+
+## Models supported
+
+63 models across 5 providers. Run `tokenometer --help` for the full list at runtime, or browse the [Cost Atlas](https://tokenometer.vercel.app/models) for sortable per-model pages.
+
+| Provider | Examples | Offline tokenizer | Empirical |
+|---|---|---|---|
+| Anthropic | `claude-opus-4-7`, `claude-sonnet-4-6`, `claude-haiku-4-5`, Claude 3.x family | `gpt-tokenizer` `cl100k_base` (approximate) | `messages.countTokens` (free, exact) |
+| OpenAI | `gpt-4o`, `gpt-4o-mini`, `gpt-4-turbo`, `gpt-3.5-turbo`, `o1` family | `gpt-tokenizer` `o200k_base` (exact) | same `o200k_base` (matches production) |
+| Google | `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-1.5-pro`, `gemini-1.5-flash` | `chars / 4` (approximate) | `model.countTokens` (free, exact) |
+| Mistral (19 models) | `open-mistral-7b`, `open-mixtral-8x22b`, `mistral-large-latest`, `codestral-latest`, `mistral-nemo`, `pixtral-large-latest`, `mistral-medium-2505`, `magistral-small`, `ministral-3b-latest`, `devstral-small-2505` | `mistral-tokenizer-js` for SentencePiece V1/V2/V3 (exact); `chars/4` for Tekken (approximate) | unsupported (no public token-count API) |
+| Cohere | `command-r`, `command-r-plus` | `chars / 4` (approximate) | `POST /v1/tokenize` (free, exact, requires `COHERE_API_KEY`) |
+
+Pricing comes from the [`tokenlens`](https://www.npmjs.com/package/tokenlens) registry with a small set of local overrides for bleeding-edge models. Cohere pricing lives entirely in `LOCAL_OVERRIDES` because `@tokenlens/models` doesn't yet ship a Cohere catalog at v1.3.0.
 
 ## Empirical mode
 
-For exact, vendor-billed counts on Claude and Gemini, set the right env var and pass `--empirical`. The tool calls the providers' free `countTokens` endpoints — no charge.
+For exact, vendor-billed counts on Claude, Gemini, and Cohere, set the right env var and pass `--empirical`. The tool calls each provider's free `countTokens`-equivalent endpoint — no charge.
 
 ```bash
-ANTHROPIC_API_KEY=… GOOGLE_API_KEY=… \
-  npx tokenometer ./prompt.md --empirical
+ANTHROPIC_API_KEY=… GOOGLE_API_KEY=… COHERE_API_KEY=… \
+  npx tokenometer ./prompt.md --empirical --model claude-opus-4-7,gemini-2.5-pro,command-r-plus
 ```
 
+OpenAI's empirical path uses tiktoken `o200k_base` locally — that encoding matches OpenAI's production count exactly, so no API call is needed. Mistral has no public token-count endpoint; the offline `mistral-tokenizer-js` path is used regardless.
+
 ## Auto provider detection
 
 When `--model` is omitted, tokenometer picks a default based on which provider key is set in your environment:
@@ -43,6 +83,8 @@ When `--model` is omitted, tokenometer picks a default based on which provider k
 - `ANTHROPIC_API_KEY` only → `claude-opus-4-7`
 - `OPENAI_API_KEY` only → `gpt-4o`
 - `GOOGLE_API_KEY` / `GEMINI_API_KEY` only → first known `gemini-*` model (falls back to `gemini-2.5-pro`)
+- `MISTRAL_API_KEY` only → first known `mistral-*` model
+- `COHERE_API_KEY` only → `command-r-plus`
 - Multiple keys set → falls back to `claude-opus-4-7` and prints a stderr note. Pass `--model` to disambiguate.
 - No keys set → existing default (`claude-opus-4-7`).
 
@@ -53,7 +95,7 @@ This means `npx tokenometer prompt.md` does the right thing in any of those envi
 Drop a `.tokenometer.yml` (or `.yaml`) at the project root and tokenometer will pick it up automatically (walks up from the cwd, stopping at `.git`):
 
 ```yaml
-models: [claude-opus-4-7, gpt-4o]
+models: [claude-opus-4-7, gpt-4o, mistral-large-latest]
 formats: [json, yaml, markdown]
 paths: [prompts/**/*.md]
 budgets:
@@ -76,7 +118,7 @@ npx tokenometer ./prompt.md --output sarif > tokenometer.sarif
 npx tokenometer ./prompt.md --output json | jq '.files[].results | map(.inputCost) | add'
 ```
 
-### Latency
+## Latency
 
 `--latency` measures real generation latency in addition to token cost. For each `(model, format)` cell, tokenometer streams `n` real chat completions (default `n=3`, override with `--latency-trials 1..10`) capped at `max_tokens=200`, and reports:
 
@@ -107,7 +149,7 @@ By file:
   prompts/router.md   872    $0.0131
 ```
 
-Useful for figuring out which prompt files dominate the cost of a multi-file pipeline.
+Useful for figuring out which prompt files dominate the cost of a multi-file pipeline. The aggregator that produces this table is also what powers the GitHub Action's per-file Δ comment, and is unit-tested in [`packages/action`](https://github.com/faraa2m/tokenometer/tree/main/packages/action).
 
 ## Vision tokens
 
@@ -123,34 +165,12 @@ Each image's dimensions are read with `image-size` (no native deps), then dispat
 - GPT-4o → OpenAI's high-detail tiling: `85 + 170 × ceil(w/512) × ceil(h/512)` after the 2048/768 resize step.
 - Gemini → Google's `258 × ceil(w/768) × ceil(h/768)` (with a flat 258 for ≤384×384 images).
 
-Vision-token cells are always marked `approximate: true` since they're formula-derived. Each image also gets its own row in the `--by-file` table as a virtual file `<image-path> [vision]`.
+Mistral and Cohere don't have published vision-token formulas, so vision images are skipped for those providers (with a stderr note). Vision-token cells are always marked `approximate: true` since they're formula-derived. Each image also gets its own row in the `--by-file` table as a virtual file `<image-path> [vision]`.
 
 ## Why not just `tiktoken`?
 
 `tiktoken`'s `cl100k_base` (the encoding most "Claude tokenizer" libraries fall back on) **under-counts Opus 4.7 by a median of +62%** across a 10-prompt benchmark. Sonnet 4.6 and Haiku 4.5 are closer (~17%). Format choice is a wash. Model choice swings cost by 12×. See [README](https://github.com/faraa2m/tokenometer#findings-anthropic-n150-cells-across-10-prompt-shapes) for the dataset findings.
 
-## Flags
-
-```
-tokenometer <file> [options]
-echo "prompt" | tokenometer - [options]
-
---model <id[,id…]>     Default: claude-opus-4-7, or auto-detected from env
---format <fmt[,fmt…]>  Default: all (json,yaml,xml,markdown,text)
---output <fmt>         table (default) | json | sarif
---by-file              Append a per-file token/cost table (multi-file only)
---image <path>         Add vision-token cost for the image (repeatable)
---config <path>        Load this exact config file
---no-config            Skip .tokenometer.yml loading
---empirical            Use provider countTokens APIs (free, exact)
---latency              Measure real generation latency (TTFT, total ms, tokens/s)
---latency-trials <n>   Trials per cell when --latency is set (1-10, default 3)
---max-spend <usd>      Hard ceiling for empirical mode (default 0.05; with --latency, 0.25)
---offline              Force offline (overrides --empirical)
--h, --help
--v, --version
-```
-
 ## License
 
 MIT
diff --git a/packages/cli/package.json b/packages/cli/package.json
index 029cce4..9d271cd 100644
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@@ -1,7 +1,7 @@
 {
   "name": "tokenometer",
   "version": "0.0.2",
-  "description": "Tokenometer CLI — measure prompt token cost across Claude, GPT-4o, Gemini. Multi-provider, multi-format, empirical mode.",
+  "description": "Tokenometer CLI — LLM token cost + latency benchmarking across Claude, GPT-4o, Gemini, Mistral, and Cohere. Multi-format, empirical mode, vision tokens, SARIF output.",
   "license": "MIT",
   "author": "Faraazuddin Mohammed <mohdfaraaz1@gmail.com>",
   "homepage": "https://tokenometer.vercel.app",
@@ -18,24 +18,44 @@
     "anthropic",
     "ci-cd",
     "claude",
+    "claude-code",
+    "claude-code-skill",
     "cli",
+    "code-scanning",
+    "codestral",
+    "cohere",
+    "command-r",
     "cost",
     "cost-calculator",
+    "cursor",
     "gemini",
     "github-action",
     "gpt",
     "gpt-4o",
+    "latency",
     "llm",
     "llm-cost",
+    "mistral",
+    "mistral-7b",
+    "mistral-large",
+    "mixtral",
+    "model-comparison",
+    "multimodal",
     "openai",
+    "pixtral",
     "prompt",
     "prompt-cost",
+    "prompt-cost-regression",
     "prompt-engineering",
     "prompt-regression",
+    "sarif",
     "tiktoken",
     "token",
     "token-budget",
-    "tokenizer"
+    "tokenizer",
+    "ttft",
+    "vision-tokens",
+    "vscode"
   ],
   "type": "module",
   "main": "./dist/index.js",
diff --git a/packages/core/README.md b/packages/core/README.md
index 1ee4e11..e0463ee 100644
--- a/packages/core/README.md
+++ b/packages/core/README.md
@@ -3,32 +3,81 @@
 [![npm @tokenometer/core](https://img.shields.io/npm/v/@tokenometer/core.svg?label=@tokenometer/core)](https://www.npmjs.com/package/@tokenometer/core)
 [![License: MIT](https://img.shields.io/github/license/faraa2m/tokenometer.svg)](https://github.com/faraa2m/tokenometer/blob/main/LICENSE)
 
-> Core library powering [tokenometer](https://www.npmjs.com/package/tokenometer): tokenizer dispatch, format converters, versioned cost rate matrix, and an empirical-mode `countTokens` adapter for Anthropic, OpenAI, and Google.
+> Core library powering [tokenometer](https://www.npmjs.com/package/tokenometer): tokenizer dispatch, format converters, versioned cost rate matrix, vision-token estimators, latency measurement, SARIF emitter, config loader, and an empirical-mode `countTokens` adapter for Anthropic, OpenAI, Google, Mistral, and Cohere.
 
 See the [root README](https://github.com/faraa2m/tokenometer#readme) for findings, methodology, and the full project overview.
 
 [**Live playground**](https://tokenometer.vercel.app) · [Source](https://github.com/faraa2m/tokenometer) · MIT
 
-If you just want a CLI, `npm install -g tokenometer`. This package is for programmatic use.
+If you just want a CLI, `npm install -g tokenometer`. This package is for programmatic use — it's the engine the CLI, the GitHub Action, the VS Code / Cursor extension, and the playground all share, so counts and pricing stay identical across every surface.
 
 ## API
 
 ```ts
 import {
+  // Core tokenization
   tokenize,
   tokenizeMatrix,
+  countTokens,
+  // Empirical (real provider countTokens / tokenize endpoints)
   tokenizeEmpirical,
   tokenizeMatrixEmpirical,
-  countTokens,
+  // Latency benchmarking
+  measureLatency,
+  nthPercentile,
+  // Format conversion
   toFormat,
   isFormat,
   allFormats,
+  // Config (.tokenometer.yml)
+  loadConfig,
+  parseConfig,
+  // SARIF + JSON emitter
+  toSarif,
+  // Vision-token estimators
+  anthropicVisionTokens,
+  openaiVisionTokens,
+  googleVisionTokens,
+  // Pricing / model registry
   KNOWN_MODELS,
+  MODELS,
   RATES,
   RATES_VERSION,
   getModel,
   getRate,
 } from '@tokenometer/core';
+
+import type {
+  // Token results
+  CountResult,
+  TokenizeResult,
+  EmpiricalResult,
+  EmpiricalCountResult,
+  EmpiricalEnv,
+  // Latency
+  LatencyResult,
+  LatencyTrial,
+  LatencyStats,
+  LatencyDeps,
+  MeasureLatencyOptions,
+  // Aggregates / formatters
+  TokenometerResult,
+  TokenometerFileResult,
+  ToSarifOptions,
+  // Config
+  TokenometerConfig,
+  ConfigFormat,
+  // Vision input shapes
+  AnthropicVisionInput,
+  OpenAIVisionInput,
+  GoogleVisionInput,
+  // Registry
+  ModelDescriptor,
+  Provider,
+  RateEntry,
+  Format,
+  TokenizerKind,
+} from '@tokenometer/core';
 ```
 
 ### Offline (deterministic, no API key)
@@ -62,9 +111,54 @@ const result = await tokenizeEmpirical({
 // approximate: false  ← uses Anthropic's messages.countTokens
 ```
 
+### Latency benchmarking
+
+```ts
+const stats = await measureLatency({
+  modelId: 'claude-opus-4-7',
+  prompt: 'Write a haiku about CI.',
+  trials: 3,
+  env: { anthropicApiKey: process.env.ANTHROPIC_API_KEY! },
+});
+// LatencyResult: { trials: LatencyTrial[], stats: { ttftMs, totalMs, tokensPerSec } }
+// Each stat is { p50, p95, mean }.
+```
+
+Supported providers: Anthropic (`messages.stream`), OpenAI (`/v1/chat/completions` SSE), Google (`generateContentStream`), Cohere (`/v1/chat` NDJSON), Mistral (`/v1/chat/completions` SSE). Each call is capped at `max_tokens=200`; trials retry once on transient failures.
+
+### Vision tokens
+
+```ts
+const tokens = anthropicVisionTokens({ width: 1280, height: 720 });
+// 1228 (capped at 1600 for very large images)
+```
+
+The `openaiVisionTokens` and `googleVisionTokens` exports are formula-equivalent to the OpenAI high-detail tile cost and Gemini's 258-per-768²-tile cost respectively.
+
+### SARIF + JSON output
+
+```ts
+const sarif = toSarif({ files: [{ path: 'prompt.md', results: [...] }] });
+// SARIF 2.1.0 — drop into GitHub Code Scanning or any SARIF viewer.
+```
+
 ### Rate table
 
-`RATES` is a `Record<modelId, { inputPer1k, outputPer1k, cachedInputPer1k? }>`. `RATES_VERSION` ships as a date string so consumers can pin or audit.
+`RATES` is a `Record<modelId, { inputPer1k, outputPer1k, cachedInputPer1k? }>`. `RATES_VERSION` ships as a date string so consumers can pin or audit. `KNOWN_MODELS` is the union (currently 63 across 5 providers).
+
+## Providers
+
+| Provider  | Models | Offline tokenizer | Exactness | Empirical (`tokenizeEmpirical`) |
+|-----------|--------|-------------------|-----------|----------------------------------|
+| OpenAI    | `gpt-4o`, `gpt-4o-mini`, `gpt-4-turbo`, `gpt-3.5-turbo`, `o1` family | `gpt-tokenizer` `o200k_base` | exact | same `o200k_base` (matches production) |
+| Anthropic | `claude-opus-4-7`, `claude-sonnet-4-6`, `claude-haiku-4-5`, Claude 3.x family | `gpt-tokenizer` `cl100k_base` | approximate | `messages.countTokens` (free, exact) |
+| Google    | `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-1.5-pro`, `gemini-1.5-flash` | `chars / 4` heuristic | approximate | `model.countTokens` (free, exact) |
+| Mistral (19 models) | `open-mistral-7b`, `open-mixtral-8x22b`, `mistral-large-latest`, `codestral-latest`, `mistral-nemo`, `pixtral-large-latest`, `mistral-medium-2505`, `magistral-small`, `ministral-3b-latest`, `devstral-small-2505` | `mistral-tokenizer-js` (V1/V2/V3 SentencePiece); `chars/4` for Tekken family (NeMo, Pixtral, Mistral Small 2409+, Devstral, Mistral Medium 2505+, Magistral, Ministral) | exact for SentencePiece · approximate for Tekken | unsupported (no public token-count endpoint) |
+| Cohere    | `command-r`, `command-r-plus` | `chars / 4` heuristic | approximate | `POST /v1/tokenize` (free, exact, requires `COHERE_API_KEY`) |
+
+Pricing comes from `@tokenlens/models` plus a small `LOCAL_OVERRIDES` map for bleeding-edge models the registry hasn't picked up yet. Cohere lives entirely in `LOCAL_OVERRIDES` because `@tokenlens/models` does not yet ship a Cohere catalog at v1.3.0; pull from `cohere.com/pricing` whenever `RATES_VERSION` bumps.
+
+Internally the dispatch helpers `mistralCount`, `cohereCount`, `cohereTokenizeApi`, and `isTekken` (in `tokenize-mistral.ts` / `tokenize-cohere.ts`) are not part of the public API — they're called from `tokenize` / `tokenizeEmpirical`. If you need them, import the files directly; they may move.
 
 ## License
 
diff --git a/packages/core/package.json b/packages/core/package.json
index 7876808..9d5c87b 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@tokenometer/core",
   "version": "0.0.2",
-  "description": "Tokenometer core library — multi-provider LLM token cost estimation with empirical countTokens fallback.",
+  "description": "Tokenometer core library — multi-provider LLM token cost, latency, and vision-token estimation with empirical countTokens fallback (Anthropic, OpenAI, Google, Mistral, Cohere).",
   "license": "MIT",
   "author": "Faraazuddin Mohammed <mohdfaraaz1@gmail.com>",
   "homepage": "https://tokenometer.vercel.app",
@@ -19,24 +19,44 @@
     "ci-cd",
     "cl100k",
     "claude",
+    "claude-code",
+    "claude-code-skill",
+    "code-scanning",
+    "codestral",
+    "cohere",
+    "command-r",
     "cost",
     "cost-calculator",
+    "cursor",
     "gemini",
     "github-action",
     "gpt",
     "gpt-4o",
+    "latency",
     "llm",
     "llm-cost",
+    "mistral",
+    "mistral-7b",
+    "mistral-large",
+    "mixtral",
+    "model-comparison",
+    "multimodal",
     "o200k",
     "openai",
+    "pixtral",
     "prompt",
     "prompt-cost",
+    "prompt-cost-regression",
     "prompt-engineering",
     "prompt-regression",
+    "sarif",
     "tiktoken",
     "token",
     "token-budget",
-    "tokenizer"
+    "tokenizer",
+    "ttft",
+    "vision-tokens",
+    "vscode"
   ],
   "type": "module",
   "main": "./dist/index.js",
diff --git a/packages/vscode/README.md b/packages/vscode/README.md
index d3d3229..85c7765 100644
--- a/packages/vscode/README.md
+++ b/packages/vscode/README.md
@@ -10,12 +10,16 @@ Most "token counter" extensions stop at counts. Tokenometer adds the dollar valu
 
 ## Install
 
-Once published:
+### Marketplace (coming with v1.0.0)
 
-- **VS Code Marketplace:** _coming soon_
-- **Open VSX (Cursor / VSCodium):** _coming soon_
+The Marketplace install button arrives with the v1.0.0 cut — Phase I owns that publish step. Track it on the [milestones page](https://github.com/faraa2m/tokenometer/milestones).
 
-While you're waiting, you can build a `.vsix` locally from this repo:
+- **VS Code Marketplace:** _coming with v1.0.0_
+- **Open VSX (Cursor / VSCodium):** _coming with v1.0.0_
+
+### Build locally now (`.vsix`)
+
+While the Marketplace listing is in flight, build a `.vsix` from this repo and side-load it:
 
 ```bash
 npm install
@@ -24,6 +28,8 @@ npm run package:vsix --workspace=@tokenometer/vscode
 code --install-extension packages/vscode/tokenometer-*.vsix
 ```
 
+The same `.vsix` works in Cursor and VSCodium.
+
 ## What it shows
 
 The status bar (right side) shows three things, separated by ` · `:
diff --git a/packages/vscode/package.json b/packages/vscode/package.json
index 52dd8b9..8c6615c 100644
--- a/packages/vscode/package.json
+++ b/packages/vscode/package.json
@@ -2,7 +2,7 @@
   "name": "@tokenometer/vscode",
   "version": "0.0.2",
   "displayName": "Tokenometer",
-  "description": "Live token count and USD cost for LLM prompts in the editor (Claude, GPT-4o, Gemini).",
+  "description": "Tokenometer for VS Code / Cursor — live token count and USD cost in the status bar for LLM prompts (Claude, GPT-4o, Gemini, Mistral, Cohere).",
   "publisher": "faraa2m",
   "license": "MIT",
   "private": true,
@@ -21,15 +21,29 @@
   },
   "categories": ["Programming Languages", "Other"],
   "keywords": [
-    "llm",
-    "tokens",
-    "cost",
+    "ai",
+    "anthropic",
     "claude",
-    "gpt",
+    "cohere",
+    "command-r",
+    "cost",
+    "cost-calculator",
+    "cursor",
     "gemini",
+    "gpt",
+    "gpt-4o",
+    "llm",
+    "llm-cost",
+    "mistral",
+    "multimodal",
+    "openai",
+    "prompt",
+    "prompt-cost",
+    "prompt-engineering",
+    "token",
     "tokenizer",
-    "ai",
-    "prompt-engineering"
+    "tokens",
+    "vscode"
   ],
   "activationEvents": ["onStartupFinished"],
   "main": "./dist/extension.js",
diff --git a/packages/web/package.json b/packages/web/package.json
index 6dca8c4..46757f8 100644
--- a/packages/web/package.json
+++ b/packages/web/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@tokenometer/web",
   "version": "0.0.2",
-  "description": "Tokenometer playground — calculator, vision-cost, prompt-diff, model atlas.",
+  "description": "Tokenometer playground — calculator, vision-token cost, prompt diff, SARIF, model atlas, init wizard.",
   "license": "MIT",
   "private": true,
   "type": "module",
diff --git a/vercel.json b/vercel.json
index 3f4a6d7..f3fbb57 100644
--- a/vercel.json
+++ b/vercel.json
@@ -3,5 +3,8 @@
   "framework": "vite",
   "installCommand": "npm install",
   "buildCommand": "npm run build && npm run build -w @tokenometer/web",
-  "outputDirectory": "packages/web/dist"
+  "outputDirectory": "packages/web/dist",
+  "rewrites": [
+    { "source": "/:path((?!assets/|robots.txt|sitemap.xml).*)", "destination": "/index.html" }
+  ]
 }