From d2c7f81ce014d67459f58dbef5e31540b0dad605 Mon Sep 17 00:00:00 2001 From: Faraazuddin Mohammed Date: Sun, 10 May 2026 00:29:09 -0400 Subject: [PATCH] fix(web): Vercel SPA routing + docs/SEO sweep for all v1.0.0 features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two unrelated fixes bundled: 1. Vercel SPA routing — playground routes added in Wave 3 (/diff, /by-file, /sarif, /vision, /config-builder, /init, /models, /models/, /editor, /claude-code) returned 404 in production. Vercel's Vite framework preset serves the static dist/ output literally; React Router's client-side paths weren't being rewritten to index.html. Added a `rewrites` rule in vercel.json that falls through to index.html for everything except assets/, robots.txt, and sitemap.xml. 2. Docs + SEO sweep — Waves 2 + 3 added significant feature surface that the existing READMEs and package.json metadata didn't reflect. Updated to land cleanly with the v1.0.0 launch: - Root README: refreshed hero, comparison table now shows Latency / VS Code / Claude Code / Mistral / Cohere / Vision / SARIF columns; added "five-line use" with VS Code + Claude Code panels; methodology table gains Mistral (split exact-for-SP / approximate-for-Tekken) and Cohere rows; status bumped to "approaching v1.0.0". - packages/cli/README: flag table at top; sections per output format, vision tokens, per-file attribution, latency mode (TTFT, p50/p95), auto provider detection, .tokenometer.yml config. - packages/core/README: full export list (parseConfig, loadConfig, toSarif, the three vision-tokens, measureLatency + types) plus Providers section listing all 5 with offline + empirical exactness. - packages/action/README: `top-n-files` input documented; example sticky-comment shape now shows the per-file Δ table + collapsible all-files block. - packages/vscode/README: settings, commands, supported file types, placeholder Marketplace install (coming in Phase I). - packages/claude-code-skill/README: install one-liner verified. - All package.json: refreshed `description`; extended `keywords` across CLI / core / action / vscode / claude-code-skill with mistral, cohere, latency, ttft, vision-tokens, vscode, cursor, claude-code, sarif, prompt-cost-regression, model-comparison, etc. - .github/repo-metadata.md: 7 new GH topics (mistral, cohere, vscode-extension, claude-code, latency, vision-tokens, sarif). - .github/release-notes-v1.0.0.md: removed [planned] markers for features now shipped; added "v1.0.0 launch surface" section. - .github/awesome-list-prs/*.md (5 files): updated one-liner to cite the full surface (CLI + Action + VS Code + Claude Code skill, 63 models, 5 providers). - .changeset/wave-4-docs-seo-sweep.md added. Phase I unified release pipeline (vsce publish, ovsx publish, smoke test job, Marketplace verify) is intentionally NOT in this PR — it lands separately so this docs/playground fix can ship immediately and the Phase I work can take its time. Co-Authored-By: Claude Opus 4.7 (1M context) --- .changeset/wave-4-docs-seo-sweep.md | 11 ++ .github/awesome-list-prs/awesome-actions.md | 2 +- .github/awesome-list-prs/awesome-ai-tools.md | 2 +- .github/awesome-list-prs/awesome-llm-apps.md | 2 +- .github/awesome-list-prs/awesome-llmops.md | 2 +- .../awesome-prompt-engineering.md | 2 +- .github/release-notes-v1.0.0.md | 125 +++++++++++++----- .github/repo-metadata.md | 25 +++- README.md | 90 ++++++++++--- package.json | 2 +- packages/action/README.md | 24 +++- packages/action/package.json | 20 ++- packages/claude-code-skill/README.md | 11 +- packages/claude-code-skill/package.json | 14 +- packages/cli/README.md | 82 +++++++----- packages/cli/package.json | 24 +++- packages/core/README.md | 102 +++++++++++++- packages/core/package.json | 24 +++- packages/vscode/README.md | 14 +- packages/vscode/package.json | 28 +++- packages/web/package.json | 2 +- vercel.json | 5 +- 22 files changed, 479 insertions(+), 134 deletions(-) create mode 100644 .changeset/wave-4-docs-seo-sweep.md diff --git a/.changeset/wave-4-docs-seo-sweep.md b/.changeset/wave-4-docs-seo-sweep.md new file mode 100644 index 0000000..a69f9c4 --- /dev/null +++ b/.changeset/wave-4-docs-seo-sweep.md @@ -0,0 +1,11 @@ +--- +"tokenometer": minor +"@tokenometer/core": minor +--- + +Documentation + SEO sweep: README, per-package READMEs, package.json +descriptions and keywords updated to reflect every shipped feature +(CLI flags, GitHub Action, VS Code extension, Claude Code skill, +Mistral + Cohere providers, latency benchmarking, vision tokens, SARIF). +Repo metadata + awesome-list templates regenerated. No source code +changes. diff --git a/.github/awesome-list-prs/awesome-actions.md b/.github/awesome-list-prs/awesome-actions.md index f4855be..bb3bf7d 100644 --- a/.github/awesome-list-prs/awesome-actions.md +++ b/.github/awesome-list-prs/awesome-actions.md @@ -22,5 +22,5 @@ It is a first-class GitHub Action that posts a sticky PR comment diffing prompt- ## Markdown line for the list ``` -- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost CLI + GitHub Action. Multi-provider, empirical, CI-native. +- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost + latency CLI + GitHub Action + VS Code extension + Claude Code skill. 63 models across Claude, GPT-4o, Gemini, Mistral, Cohere. Empirical mode, CI-native cost guardrail. ``` diff --git a/.github/awesome-list-prs/awesome-ai-tools.md b/.github/awesome-list-prs/awesome-ai-tools.md index a5ed81f..aa84d84 100644 --- a/.github/awesome-list-prs/awesome-ai-tools.md +++ b/.github/awesome-list-prs/awesome-ai-tools.md @@ -22,5 +22,5 @@ The directory already includes generation, agent, and eval tooling but very litt ## Markdown line for the list ``` -- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost CLI + GitHub Action. Multi-provider, empirical, CI-native. +- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost + latency CLI + GitHub Action + VS Code extension + Claude Code skill. 63 models across Claude, GPT-4o, Gemini, Mistral, Cohere. Empirical mode, CI-native cost guardrail. ``` diff --git a/.github/awesome-list-prs/awesome-llm-apps.md b/.github/awesome-list-prs/awesome-llm-apps.md index cdb1eea..2736839 100644 --- a/.github/awesome-list-prs/awesome-llm-apps.md +++ b/.github/awesome-list-prs/awesome-llm-apps.md @@ -22,5 +22,5 @@ Awesome-llm-apps already curates apps and tooling that help builders ship with L ## Markdown line for the list ``` -- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost CLI + GitHub Action. Multi-provider, empirical, CI-native. +- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost + latency CLI + GitHub Action + VS Code extension + Claude Code skill. 63 models across Claude, GPT-4o, Gemini, Mistral, Cohere. Empirical mode, CI-native cost guardrail. ``` diff --git a/.github/awesome-list-prs/awesome-llmops.md b/.github/awesome-list-prs/awesome-llmops.md index daf740e..06f9791 100644 --- a/.github/awesome-list-prs/awesome-llmops.md +++ b/.github/awesome-list-prs/awesome-llmops.md @@ -22,5 +22,5 @@ Cost is a first-class LLMOps concern but most platforms treat it as a post-hoc d ## Markdown line for the list ``` -- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost CLI + GitHub Action. Multi-provider, empirical, CI-native. +- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost + latency CLI + GitHub Action + VS Code extension + Claude Code skill. 63 models across Claude, GPT-4o, Gemini, Mistral, Cohere. Empirical mode, CI-native cost guardrail. ``` diff --git a/.github/awesome-list-prs/awesome-prompt-engineering.md b/.github/awesome-list-prs/awesome-prompt-engineering.md index 3d7f5fa..7d23fd0 100644 --- a/.github/awesome-list-prs/awesome-prompt-engineering.md +++ b/.github/awesome-list-prs/awesome-prompt-engineering.md @@ -22,5 +22,5 @@ Prompt engineers iterate on wording, structure, and few-shot examples — all of ## Markdown line for the list ``` -- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost CLI + GitHub Action. Multi-provider, empirical, CI-native. +- [Tokenometer](https://github.com/faraa2m/tokenometer) — LLM token cost + latency CLI + GitHub Action + VS Code extension + Claude Code skill. 63 models across Claude, GPT-4o, Gemini, Mistral, Cohere. Empirical mode, CI-native cost guardrail. ``` diff --git a/.github/release-notes-v1.0.0.md b/.github/release-notes-v1.0.0.md index 9d1d138..e73db31 100644 --- a/.github/release-notes-v1.0.0.md +++ b/.github/release-notes-v1.0.0.md @@ -1,43 +1,83 @@ -# Tokenometer v1.0.0 — Production-ready LLM cost calculator and CI guardrail. +# Tokenometer v1.0.0 — Production-ready LLM cost calculator, latency benchmark, and CI guardrail. -> Note: this is a **DRAFT**. Some items below are marked `[planned]` because the -> phase has not yet landed on `main`. Edit before publishing the GitHub Release. +> Note: this is a **DRAFT**. Edit before publishing the GitHub Release. All +> items below have already landed on `main` (Waves 2 + 3); the only items +> still in-flight at the time this draft was written are the marketplace +> publish, smoke tests, and release pipeline polish (Phase I). ## Highlights -- **CLI + GitHub Action shipped** — `npx tokenometer` and the marketplace action - share a single core, so local results match CI results. -- **Multi-provider** — Claude (Opus / Sonnet / Haiku), OpenAI GPT-4o family, and - Google Gemini all run through the same interface. -- **Empirical mode** — real `countTokens` API calls (free for Anthropic and - Google) instead of ad-hoc heuristics; falls back to `approximate` only when - the user opts out or no key is configured. -- **CI cost-guardrail** — sticky PR comment with a per-model diff, plus a - `budget` input that fails the run when the head delta exceeds your USD cap. -- **Auto-updating prices** — pricing and context limits are sourced from the - `tokenlens` registry, not a hand-maintained table. -- **Honest `approximate` flag** — every output row is tagged so you always know - whether a number came from a real tokenizer or an estimate. -- **Vision-token cost** `[planned — Phase D]` — image-aware token accounting - for multi-modal prompts. -- **Per-file attribution** `[planned — Phase C.4]` — costs broken down by source - file in PR comments. -- **SARIF output** `[planned — Phase C.5]` — lets cost regressions surface in - GitHub code scanning. -- **Auto provider detection** `[planned — Phase C.1]` — infer the provider from - the model id without an extra flag. -- **`.tokenometer.yml` config** `[planned — Phase C.2]` — repo-level defaults - for paths, models, and budgets. -- **VS Code / Cursor extension** `[planned — Phase E.1]` — inline cost lens for - prompt files in the editor. -- **Claude Code skill** `[planned — Phase E.2]` — first-class skill so Claude - Code can call Tokenometer when iterating on prompts. -- **Mistral + Cohere providers** `[planned — Phase H]` — additional model - families behind the same CLI surface. -- **Latency mode** `[planned]` — `--latency` flag to surface per-model latency - alongside cost. -- **Unified release pipeline** — one workflow publishes both `tokenometer` and - `@tokenometer/core` so the CLI and library never drift. +- **CLI + GitHub Action + VS Code extension + Claude Code skill — one core.** + `npx tokenometer`, the marketplace Action, the editor status bar, and the + Claude Code skill all share `@tokenometer/core`, so local results match CI + results match what's on screen in the editor match what an agent reports. +- **Multi-provider** — Claude (Opus / Sonnet / Haiku, Claude 3.x), OpenAI + GPT-4o family + o1, Google Gemini 2.5 / 1.5, Mistral (19 models — open + weights, large, codestral, NeMo, Pixtral, Magistral, Ministral, Devstral, + Mistral Medium 2505), and Cohere (command-r, command-r-plus). 63 models + total. +- **Empirical mode** — real `countTokens` API calls (free for Anthropic, + Google, and Cohere; tiktoken-anchored for OpenAI) instead of ad-hoc + heuristics; falls back to `approximate` only when the user opts out or + no key is configured. Mistral has no public token-count endpoint — + offline `mistral-tokenizer-js` is exact for SentencePiece-family models + and `chars/4` for Tekken-family models. +- **Latency benchmarking (`--latency`)** — TTFT, total ms, and tokens/sec + reported as p50 / p95 / mean over `n` real generations (default `n=3`, + configurable with `--latency-trials 1..10`). Supported on Anthropic, + OpenAI, Google, Cohere, and Mistral. The default `--max-spend` is + bumped from `$0.05` to `$0.25` when `--latency` is set. +- **CI cost-guardrail** — sticky PR comment with a per-model summary and a + per-file Δ table (configurable via `top-n-files`, with the rest folded + into a `
` block). The `budget` input fails the run when the + head delta exceeds your USD cap. +- **Per-file attribution** (`--by-file` in the CLI; per-file Δ in the + Action comment) so you know which prompt files dominate cost. +- **SARIF output** (`--output sarif`) — drop the file into GitHub Code + Scanning or any SARIF viewer to surface cost regressions next to lint + findings. +- **Vision-token cost** (`--image `) — image-aware accounting for + Claude (`(w*h)/750`, capped at 1600), GPT-4o (high-detail tile cost), + and Gemini (`258 × ⌈w/768⌉ × ⌈h/768⌉`). +- **Auto provider detection** — infer the default model from whichever + `*_API_KEY` env var is set. No flag needed for the common case. +- **`.tokenometer.yml` config** — repo-level defaults for paths, models, + formats, and budgets. Walks up from cwd, stopping at `.git`. CLI + flags always win. +- **VS Code / Cursor extension** — status bar shows live `model · tokens + · USD` for the active prompt file. Settings: `tokenometer.model`, + `tokenometer.format`, `tokenometer.warnOnCostAbove`. Commands: + *Tokenometer: Switch model*, *Tokenometer: Show details*. Marketplace + listing arrives with this release; until then, `npm run package:vsix + --workspace=@tokenometer/vscode` builds a side-loadable `.vsix`. +- **Claude Code skill** (`tokenometer-cost-check`) — drop into + `~/.claude/skills/tokenometer/SKILL.md` and Claude Code agents will + reach for `npx tokenometer` when asked anything cost- or latency-shaped. +- **Auto-updating prices** — pricing and context limits are sourced from + the `tokenlens` registry, not a hand-maintained table. A small + `LOCAL_OVERRIDES` map covers bleeding-edge models (and the entire + Cohere catalog, which `@tokenlens/models` doesn't ship at v1.3.0). +- **Honest `approximate` flag** — every output row is tagged so you + always know whether a number came from a real tokenizer or an estimate. +- **Unified release pipeline** — one workflow publishes both + `tokenometer` and `@tokenometer/core` so the CLI and library never + drift. + +## v1.0.0 launch surface + +| Surface | Where | Notes | +|---|---|---| +| CLI | `npx tokenometer` / `npm i -g tokenometer` | All flags above | +| GitHub Action | `faraa2m/tokenometer@v1` (Marketplace) | Sticky PR comment with per-file Δ + budget gate | +| VS Code extension | Marketplace + Open VSX (Cursor / VSCodium) | Status bar live cost | +| Claude Code skill | `~/.claude/skills/tokenometer/SKILL.md` | Agentic prompt-cost awareness | +| Web playground | https://tokenometer.vercel.app | Calculator, diff, by-file, SARIF, vision, config builder, init wizard, Cost Atlas | +| Library | `@tokenometer/core` on npm | Engine for everything above | + +**Providers (5):** Anthropic, OpenAI, Google, Mistral, Cohere +**Models (63):** Claude 4.x + 3.x, GPT-4o family + o1, Gemini 2.5 + 1.5, Mistral 19-model catalog, Cohere command-r family +**Output formats:** table, JSON, SARIF +**Latency providers:** Anthropic, OpenAI, Google, Cohere, Mistral ## Empirical findings @@ -61,6 +101,19 @@ GitHub Action: prompts/**/*.md models: claude-opus-4-7,claude-sonnet-4-6,gpt-4o budget: '0.50' + top-n-files: 5 +``` + +VS Code / Cursor: + +``` +ext install faraa2m.tokenometer-vscode +``` + +Claude Code skill: + +``` +cp -R packages/claude-code-skill ~/.claude/skills/tokenometer ``` ## What's next diff --git a/.github/repo-metadata.md b/.github/repo-metadata.md index f8e9bdd..63ccce5 100644 --- a/.github/repo-metadata.md +++ b/.github/repo-metadata.md @@ -2,8 +2,29 @@ Apply via: ``` -gh repo edit faraa2m/tokenometer --description "Tokenometer — LLM cost calculator, token counter, and CI cost-guardrail Action for Claude, GPT-4o, Gemini." -gh repo edit faraa2m/tokenometer --add-topic llm --add-topic cost-tracking --add-topic prompt-engineering --add-topic tokenizer --add-topic claude --add-topic gpt-4 --add-topic gemini --add-topic github-actions --add-topic cli --add-topic developer-tools --add-topic ai --add-topic cost-optimization --add-topic anthropic --add-topic openai +gh repo edit faraa2m/tokenometer --description "Tokenometer — LLM cost calculator, token counter, latency benchmark, and CI cost-guardrail Action for Claude, GPT-4o, Gemini, Mistral, and Cohere. CLI + GitHub Action + VS Code extension + Claude Code skill." +gh repo edit faraa2m/tokenometer \ + --add-topic ai \ + --add-topic anthropic \ + --add-topic claude \ + --add-topic claude-code \ + --add-topic cli \ + --add-topic cohere \ + --add-topic cost-optimization \ + --add-topic cost-tracking \ + --add-topic developer-tools \ + --add-topic gemini \ + --add-topic github-actions \ + --add-topic gpt-4 \ + --add-topic latency \ + --add-topic llm \ + --add-topic mistral \ + --add-topic openai \ + --add-topic prompt-engineering \ + --add-topic sarif \ + --add-topic tokenizer \ + --add-topic vision-tokens \ + --add-topic vscode-extension ``` ## Marketplace categories to choose during release publish diff --git a/README.md b/README.md index 210829c..2d12f3c 100644 --- a/README.md +++ b/README.md @@ -7,26 +7,32 @@ [![GitHub stars](https://img.shields.io/github/stars/faraa2m/tokenometer.svg?style=social)](https://github.com/faraa2m/tokenometer/stargazers) -> Tokenometer — LLM cost calculator, token counter, and CI cost-guardrail Action for Claude, GPT-4o, Gemini. +> Tokenometer — LLM cost calculator, token counter, latency benchmark, and CI cost-guardrail for Claude, GPT-4o, Gemini, Mistral, and Cohere. CLI + GitHub Action + VS Code extension + Claude Code skill. > **Live: https://tokenometer.vercel.app** -Tokenometer answers a simple, expensive question: **does it actually cost less to send your prompt as YAML, JSON, XML, or Markdown — across Claude, GPT-4o, and Gemini?** It started as a [\$23 question](https://hackernoon.com/i-spent-$23-testing-the-yaml-saves-tokens-hack-it-doesnt-work). This is the tool anyone can run — offline, empirically, or as a PR guardrail. +Tokenometer answers a simple, expensive question: **does it actually cost less to send your prompt as YAML, JSON, XML, or Markdown — across Claude, GPT-4o, Gemini, Mistral, and Cohere — and how fast does each provider actually respond?** It started as a [\$23 question](https://hackernoon.com/i-spent-$23-testing-the-yaml-saves-tokens-hack-it-doesnt-work). Today it's the only LLM cost CLI that also tells you latency, ships a PR-blocking GitHub Action, lights up your editor's status bar, and teaches Claude Code agents to think in dollars. ## Why Tokenometer vs alternatives | | Tokenometer | [tokencost](https://github.com/AgentOps-AI/tokencost) (AgentOps) | [tiktoken](https://github.com/openai/tiktoken) (OpenAI) | [gpt-tokenizer](https://github.com/niieani/gpt-tokenizer) | [promptfoo](https://github.com/promptfoo/promptfoo) | gpt-token-counter-live (VS Code) | |---------------------------------------|:-----------:|:--------:|:--------:|:--------:|:--------:|:--------:| | Multi-provider (Anthropic / OpenAI / Google) | ✓ | ✓ | – | – | ✓ | – | +| Mistral support | ✓ | – | – | – | partial | – | +| Cohere support | ✓ | – | – | – | partial | – | | Multi-format compare (JSON / YAML / XML / MD / text) | ✓ | – | – | – | – | – | | Empirical mode (real provider `countTokens`) | ✓ | – | – | – | partial | – | -| CLI | ✓ | ✓ | – | – | ✓ | – | -| GitHub Action (PR cost-diff guardrail) | ✓ | – | – | – | partial | – | -| VS Code / Cursor extension | – (planned) | – | – | – | – | ✓ | +| Latency (TTFT + tokens/sec, p50/p95) | ✓ | – | – | – | partial | – | +| Vision-token cost (image inputs) | ✓ | – | – | – | – | – | | Cost (USD), not just tokens | ✓ | ✓ | – | – | partial | – | | Honest "approximate" flag when offline is a proxy | ✓ | – | – | – | – | – | +| CLI | ✓ | ✓ | – | – | ✓ | – | +| GitHub Action (PR cost-diff guardrail) | ✓ | – | – | – | partial | – | | Per-file attribution in CI | ✓ | – | – | – | – | – | +| SARIF output (GitHub code scanning) | ✓ | – | – | – | – | – | +| VS Code / Cursor extension | ✓ | – | – | – | – | ✓ | +| Claude Code skill | ✓ | – | – | – | – | – | -Tokenometer is the only tool in this list that combines **multi-provider + multi-format + empirical mode + USD cost + a PR-blocking GitHub Action + an honest approximate-vs-exact flag**. tokencost is the closest match for cost-in-USD across providers, but it doesn't compare formats or run as a CI guardrail. tiktoken and gpt-tokenizer are great single-provider primitives — Tokenometer uses gpt-tokenizer under the hood for the offline path. promptfoo is the broadest evaluator overall, but cost is one input among many; it isn't a dedicated cost-guardrail. The VS Code extension is real-time-in-editor only. +Tokenometer is the only tool in this list that combines **multi-provider (5 providers, 63 models) + multi-format + empirical mode + latency benchmarking + USD cost + a PR-blocking GitHub Action + an editor extension + a Claude Code skill + an honest approximate-vs-exact flag**. tokencost is the closest match for cost-in-USD across providers, but it doesn't compare formats, measure latency, or run as a CI guardrail. tiktoken and gpt-tokenizer are great single-provider primitives — Tokenometer uses gpt-tokenizer under the hood for the offline path. promptfoo is the broadest evaluator overall, but cost is one input among many; it isn't a dedicated cost-guardrail. The VS Code extension is real-time-in-editor only. ## Findings (Anthropic, n=150 cells across 10 prompt shapes) @@ -52,7 +58,7 @@ $ tokenometer ./prompt.md --model claude-opus-4-7 --format json,yaml,markdown Priciest: claude-opus-4-7 as json ($0.0186, 1.08x more) ``` -The `Approx` column shows `✓` when the count is a proxy (Anthropic / Google offline) and is empty when it's an exact match (OpenAI offline, or any provider with `--empirical`). +The `Approx` column shows `✓` when the count is a proxy (Anthropic / Google / Mistral-Tekken / Cohere offline) and is empty when it's an exact match (OpenAI offline, Mistral SentencePiece-family offline, or any provider with `--empirical`). > Real demo (with empirical mode + GIF) at **https://tokenometer.vercel.app**. @@ -60,13 +66,14 @@ The `Approx` column shows `✓` when the count is a proxy (Anthropic / Google of **Cost AND latency in one CLI — the only tool that does both.** `tiktoken` and `@anthropic-ai/tokenizer` give you a token count for one provider. They don't tell you: -- What the same prompt costs across **multiple providers and models** -- How **fast** each provider actually responds (TTFT + tokens/sec) — a real generation, not a synthetic benchmark +- What the same prompt costs across **multiple providers and models** (Claude, GPT-4o, Gemini, Mistral, Cohere) +- How **fast** each provider actually responds (TTFT + tokens/sec, p50/p95/mean) — a real generation, not a synthetic benchmark - Whether **format conversion** (YAML ↔ JSON ↔ XML ↔ MD) actually moves the needle - The **empirical** cost — what your provider actually charged on a real call, after prompt caching - Whether a PR introduced a **prompt-cost regression** +- The **vision-token** cost when your prompt includes images -Tokenometer is dev-time, multi-provider, multi-format, optionally empirical, latency-aware, and CI-native. +Tokenometer is dev-time, multi-provider, multi-format, optionally empirical, latency-aware, vision-aware, and CI-native. And the same core powers the CLI, the GitHub Action, the VS Code / Cursor status bar, and the Claude Code skill — counts, pricing, and tokenizer choices stay identical across surfaces. ## Install @@ -80,7 +87,7 @@ Global: ```bash npm i -g tokenometer -tokenometer ./prompt.md --format yaml,json,xml,markdown,text --model claude-opus-4-7,gpt-4o +tokenometer ./prompt.md --format yaml,json,xml,markdown,text --model claude-opus-4-7,gpt-4o,mistral-large-latest,command-r-plus ``` Stdin works too: @@ -89,9 +96,9 @@ Stdin works too: echo "prompt body" | tokenometer - --model claude-sonnet-4-6 ``` -Run `tokenometer --help` for the full flag list and the current set of known model ids. +Run `tokenometer --help` for the full flag list and the current set of known model ids (63 across 5 providers). -## Three-line use +## Five-line use ### 1. Compare formats for a single prompt (offline, no API key) @@ -99,7 +106,7 @@ Run `tokenometer --help` for the full flag list and the current set of known mod tokenometer ./prompt.md --model claude-opus-4-7 ``` -Prints estimated tokens + USD across each format × the chosen model(s). Default model is `claude-opus-4-7`; default formats are all of `json,markdown,text,xml,yaml`. +Prints estimated tokens + USD across each format × the chosen model(s). Default model is `claude-opus-4-7` (or auto-detected from `*_API_KEY` env vars); default formats are all of `json,markdown,text,xml,yaml`. ### 2. Empirical mode (real provider `countTokens`, with a hard ceiling) @@ -112,8 +119,10 @@ For each `(model × format)` cell, calls the provider's exact token-count API: - Anthropic → `messages.countTokens` (free) - Google → `model.countTokens` (free) - OpenAI → tiktoken `o200k_base` (matches OpenAI's production count exactly, no API call) +- Cohere → `POST /v1/tokenize` (free, requires `COHERE_API_KEY`) +- Mistral → unsupported (no public token-count endpoint); offline `mistral-tokenizer-js` is exact for SentencePiece-family models, approximate (chars/4) for Tekken-family models. -Set `GOOGLE_API_KEY` (or `GEMINI_API_KEY`) for Gemini models. `--offline` forces the offline path even if `--empirical` is also passed. +Set `GOOGLE_API_KEY` (or `GEMINI_API_KEY`) for Gemini, `MISTRAL_API_KEY` for Mistral, `COHERE_API_KEY` for Cohere. `--offline` forces the offline path even if `--empirical` is also passed. ### 3. CI guardrail (GitHub Action) @@ -123,14 +132,27 @@ Set `GOOGLE_API_KEY` (or `GEMINI_API_KEY`) for Gemini models. `--offline` forces paths: prompts/**/*.md,prompts/**/*.json models: claude-opus-4-7,claude-sonnet-4-6,gpt-4o formats: json,yaml,markdown - budget: '0.50' # USD; omit to disable the gate + budget: '0.50' # USD; omit to disable the gate + top-n-files: 5 # rows shown in the per-file Δ table; the rest fold into
``` -Posts a sticky PR comment with the cost diff vs the base branch. Fails the check when the total Δ exceeds `budget`. See [`packages/action/README.md`](packages/action/README.md) for all inputs and outputs. +Posts a sticky PR comment with the cost diff vs the base branch, including a per-file Δ table and a collapsible "all files" block. Fails the check when the total Δ exceeds `budget`. See [`packages/action/README.md`](packages/action/README.md) for all inputs and outputs. -### More flags +### 4. Live cost in your editor (VS Code / Cursor) -The CLI also supports `--output json|sarif` for machine-readable output, `--by-file` for per-file attribution, `--image ` for vision-token cost on Claude / GPT-4o / Gemini, and `.tokenometer.yml` config files (auto-discovered, walk-up). See [`packages/cli/README.md`](packages/cli/README.md) for the full list. +``` +ext install faraa2m.tokenometer-vscode # marketplace listing arrives with v1.0.0 +``` + +Status bar shows `model · tokens · USD` for the active prompt file, updates on every keystroke (debounced), and turns warning-colored when you exceed `tokenometer.warnOnCostAbove`. Same `@tokenometer/core` as the CLI — what you see in the editor matches what CI computes. See [`packages/vscode/README.md`](packages/vscode/README.md). + +### 5. Claude Code skill (agentic prompt-cost awareness) + +```bash +cp -R packages/claude-code-skill ~/.claude/skills/tokenometer +``` + +Installs the `tokenometer-cost-check` skill so Claude Code agents can answer "what does this prompt cost?" with a real number — they shell out to `npx tokenometer` instead of guessing from `tiktoken`. See [`packages/claude-code-skill/README.md`](packages/claude-code-skill/README.md). ## Methodology @@ -141,10 +163,34 @@ Tokenometer picks a tokenizer per provider and flags the count as approximate (` | OpenAI | `gpt-tokenizer` `o200k_base` | exact | same `o200k_base` (matches OpenAI production count) | | Anthropic | `gpt-tokenizer` `cl100k_base` | approximate | `messages.countTokens` (exact, free) | | Google | `chars / 4` heuristic | approximate | `model.countTokens` (exact, free) | -| Mistral | `mistral-tokenizer-js` (V1/V2/V3) · `chars/4` for Tekken | approximate | unsupported (no public token-count endpoint) | +| Mistral | `mistral-tokenizer-js` (V1/V2/V3) · `chars/4` for Tekken family | exact for SP-family · approximate for Tekken | unsupported (no public token-count endpoint) | | Cohere | `chars / 4` heuristic | approximate | `POST /v1/tokenize` (exact, free, requires `COHERE_API_KEY`) | -Cost = `tokens / 1000 × per-1k input rate`. Pricing and context windows are sourced from the [`tokenlens`](https://www.npmjs.com/package/tokenlens) registry, with a small set of local overrides for bleeding-edge models the registry hasn't picked up yet — see [`packages/core/src/rates.ts`](packages/core/src/rates.ts) (`RATES_VERSION`). +Cost = `tokens / 1000 × per-1k input rate`. Pricing and context windows are sourced from the [`tokenlens`](https://www.npmjs.com/package/tokenlens) registry, with a small set of local overrides for bleeding-edge models the registry hasn't picked up yet (and the full Cohere catalog, which `@tokenlens/models` doesn't ship at v1.3.0) — see [`packages/core/src/rates.ts`](packages/core/src/rates.ts) (`RATES_VERSION`). + +## Output formats + +The CLI is multi-surface by design: + +- **`--output table`** (default) — human-readable per-cell table. +- **`--output json`** — emits a `TokenometerResult` shape (`{ files: [{ path, results: [...] }] }`); pipe to `jq`. +- **`--output sarif`** — emits SARIF 2.1.0; drop into GitHub Code Scanning or any SARIF viewer. +- **`--by-file`** — appends a per-file token + USD summary table for multi-file inputs. +- **`--image `** (repeatable) — adds vision-token cost for Claude / GPT-4o / Gemini. +- **`--latency`** — measures real generation latency (TTFT + total ms + tokens/sec, p50/p95/mean over `n` trials, default 3). Implies `--empirical`. Supported on Anthropic, OpenAI, Google, Cohere, and Mistral. + +```bash +npx tokenometer ./prompt.md --output sarif > tokenometer.sarif +npx tokenometer ./prompts/*.md --by-file --output json | jq '.files[].results | map(.inputCost) | add' +ANTHROPIC_API_KEY=… OPENAI_API_KEY=… npx tokenometer ./prompt.md --latency --model claude-opus-4-7,gpt-4o +``` + +Full flag reference: [`packages/cli/README.md`](packages/cli/README.md). + +## Editor + Claude Code + +- **VS Code / Cursor** — [`@tokenometer/vscode`](packages/vscode/README.md). Status bar with live token count + USD cost; settings for model, format, and a warn-above-USD threshold; `Tokenometer: Switch model` and `Tokenometer: Show details` commands. +- **Claude Code skill** — [`@tokenometer/claude-code-skill`](packages/claude-code-skill/README.md). Drop in `~/.claude/skills/tokenometer/SKILL.md` and Claude Code agents will reach for `npx tokenometer …` when you ask them anything cost-shaped. ## Project health @@ -156,7 +202,7 @@ Cost = `tokens / 1000 × per-1k input rate`. Pricing and context windows are sou ## Status -Early. v0.0.x — see [milestones](https://github.com/faraa2m/tokenometer/milestones). Roadmap to v1.0.0 in progress: VS Code extension, Claude Code skill, vision-token cost. +Approaching v1.0.0. See [milestones](https://github.com/faraa2m/tokenometer/milestones) for what's left in Phase I (marketplace publish, smoke tests, release pipeline) before the v1 cut. ## License diff --git a/package.json b/package.json index 220fa26..f6f9de8 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "tokenometer-monorepo", "private": true, "version": "0.0.0", - "description": "Tokenometer — LLM cost calculator, token counter, and CI cost-guardrail Action for Claude, GPT-4o, Gemini.", + "description": "Tokenometer — LLM cost calculator, token counter, latency benchmark, and CI cost-guardrail Action for Claude, GPT-4o, Gemini, Mistral, and Cohere. Includes VS Code extension and Claude Code skill.", "license": "MIT", "author": "Faraazuddin Mohammed ", "homepage": "https://github.com/faraa2m/tokenometer", diff --git a/packages/action/README.md b/packages/action/README.md index b58c596..fa7fd8f 100644 --- a/packages/action/README.md +++ b/packages/action/README.md @@ -3,7 +3,7 @@ [![License: MIT](https://img.shields.io/github/license/faraa2m/tokenometer.svg)](https://github.com/faraa2m/tokenometer/blob/main/LICENSE) -Posts a sticky PR comment with the prompt-cost diff between your branch and its base. Fails the check when the delta exceeds a budget. +Posts a sticky PR comment with the prompt-cost diff between your branch and its base, including a per-file Δ table. Fails the check when the delta exceeds a budget. See the [root README](https://github.com/faraa2m/tokenometer#readme) for findings, methodology, and the full project overview. @@ -31,7 +31,8 @@ jobs: paths: prompts/**/*.md,prompts/**/*.json models: claude-opus-4-7,claude-sonnet-4-6,gpt-4o formats: json,yaml,markdown - budget: '0.50' # USD; omit to disable the gate + budget: '0.50' # USD; omit to disable the gate + top-n-files: 5 # rows shown in the per-file Δ table ``` ## Inputs @@ -39,7 +40,7 @@ jobs: | Name | Default | Notes | |---|---|---| | `paths` | `prompts/**/*.{md,json,yaml,yml,txt}` | Comma- or newline-separated globs | -| `models` | `claude-opus-4-7,claude-sonnet-4-6,gpt-4o` | Any tokenometer-supported model id | +| `models` | `claude-opus-4-7,claude-sonnet-4-6,gpt-4o` | Any tokenometer-supported model id (63 across Anthropic / OpenAI / Google / Mistral / Cohere) | | `formats` | `json,yaml,xml,markdown,text` | Subset of supported formats | | `budget` | _empty_ | Max acceptable total Δ in USD. Empty = disabled | | `base-ref` | _auto_ | Falls back to `origin/` for PRs, `HEAD~1` otherwise | @@ -56,9 +57,16 @@ jobs: ## Comment shape -The sticky comment opens with the existing total-cost line and per-file table, then appends a "Top changed files" section. When more files changed than `top-n-files`, the rest are folded into a collapsible block: +The sticky comment opens with the existing total-cost line and per-model summary, then appends a "Top changed files" Δ table. When more files changed than `top-n-files`, the rest are folded into a collapsible `
` block: ```markdown +**Total prompt cost Δ:** +$0.0124 (head $0.4823, base $0.4699) + +| Model | Base | Head | Δ | +|---|---:|---:|---:| +| `claude-opus-4-7` | $0.2402 | $0.2484 | +$0.0082 | +| `gpt-4o` | $0.2297 | $0.2339 | +$0.0042 | + ### Top changed files (5) | File | Tokens Δ | USD Δ | @@ -74,7 +82,7 @@ The sticky comment opens with the existing total-cost line and per-file table, t
``` -Sorting: `|Δ USD|` desc → `Δ tokens` desc → path. Added files are tagged `(+)`, deleted files `(−)`. +Sorting: `|Δ USD|` desc → `Δ tokens` desc → path. Added files are tagged `(+)`, deleted files `(−)`. The per-file aggregator that produces this table is unit-tested (`packages/action/src/per-file-diff.test.ts`) so you can rely on the math. ## What it measures @@ -82,9 +90,11 @@ Same offline tokenizer dispatch as the CLI: - OpenAI: `gpt-tokenizer` `o200k_base` (exact) - Anthropic: `gpt-tokenizer` `cl100k_base` (approximation — Anthropic does not ship a public Claude 3+ tokenizer) -- Google: `chars / 4` heuristic +- Google: `chars / 4` heuristic (approximate) +- Mistral: `mistral-tokenizer-js` for SentencePiece-family models (exact); `chars/4` for Tekken-family models (approximate) +- Cohere: `chars / 4` heuristic (approximate) -Empirical mode (real provider `countTokens` calls) is intentionally **not** wired into the Action — the Action runs on every PR and would either need an Anthropic key in repo secrets (risk) or limit itself to OpenAI (asymmetric). For exact Claude / Gemini numbers, run `npx tokenometer --empirical` locally. +Empirical mode (real provider `countTokens` calls) is intentionally **not** wired into the Action — the Action runs on every PR and would either need provider keys in repo secrets (risk) or limit itself to OpenAI (asymmetric). For exact Claude / Gemini / Cohere numbers, run `npx tokenometer --empirical` locally. Same for `--latency` — it requires real generation calls and is dev-machine-only. ## License diff --git a/packages/action/package.json b/packages/action/package.json index 001818e..aa67089 100644 --- a/packages/action/package.json +++ b/packages/action/package.json @@ -1,10 +1,26 @@ { "name": "@tokenometer/action", "version": "0.0.2", - "description": "Tokenometer GitHub Action — sticky PR comment with prompt-cost diff and budget gate.", + "description": "Tokenometer GitHub Action — sticky PR comment with prompt-cost diff, per-file attribution, and budget gate.", "license": "MIT", "private": true, - "keywords": ["ci", "code-quality", "github-action", "llm-cost", "prompt-cost"], + "keywords": [ + "anthropic", + "ci", + "ci-cd", + "claude", + "code-quality", + "code-scanning", + "cohere", + "github-action", + "gpt-4o", + "llm-cost", + "mistral", + "openai", + "prompt-cost", + "prompt-cost-regression", + "sarif" + ], "type": "module", "main": "./dist/index.js", "scripts": { diff --git a/packages/claude-code-skill/README.md b/packages/claude-code-skill/README.md index 78a07a8..8b9ae56 100644 --- a/packages/claude-code-skill/README.md +++ b/packages/claude-code-skill/README.md @@ -38,11 +38,12 @@ discoverable. ## What's it do The skill registers trigger phrases like "what does this prompt cost", -"is this cheaper as JSON or YAML", and "did my change increase prompt -cost". When matched, Claude Code knows to shell out to -`npx tokenometer …` instead of guessing — then surfaces real token -counts and USD cost across Claude / GPT-4o / Gemini, with an -honesty flag for approximate vs exact counts. +"is this cheaper as JSON or YAML", "is GPT-4o or Claude faster on this +prompt", and "did my change increase prompt cost". When matched, Claude +Code knows to shell out to `npx tokenometer …` instead of guessing — +then surfaces real token counts, USD cost, and (with `--latency`) TTFT ++ tokens/sec across Claude, GPT-4o, Gemini, Mistral, and Cohere, with +an honesty flag for approximate vs exact counts. See [`SKILL.md`](./SKILL.md) for the full skill content (trigger phrases, invocation patterns, output handling, and the empirical diff --git a/packages/claude-code-skill/package.json b/packages/claude-code-skill/package.json index 13be3a3..0148808 100644 --- a/packages/claude-code-skill/package.json +++ b/packages/claude-code-skill/package.json @@ -1,7 +1,7 @@ { "name": "@tokenometer/claude-code-skill", "version": "0.0.2", - "description": "Claude Code skill that wraps the tokenometer CLI.", + "description": "Claude Code skill that wraps the tokenometer CLI for prompt-cost-aware agents.", "license": "MIT", "author": "Faraazuddin Mohammed ", "homepage": "https://tokenometer.vercel.app", @@ -15,13 +15,23 @@ }, "private": true, "keywords": [ + "anthropic", "claude", "claude-code", "claude-code-skill", + "cohere", + "gemini", + "gpt-4o", + "latency", + "llm", "llm-cost", + "mistral", + "openai", "prompt-cost", + "prompt-engineering", "skill", - "tokenometer" + "tokenometer", + "vision-tokens" ], "files": ["SKILL.md", "README.md", "install.sh"], "scripts": { diff --git a/packages/cli/README.md b/packages/cli/README.md index c77424e..83d69c0 100644 --- a/packages/cli/README.md +++ b/packages/cli/README.md @@ -3,7 +3,7 @@ [![npm tokenometer](https://img.shields.io/npm/v/tokenometer.svg?label=tokenometer)](https://www.npmjs.com/package/tokenometer) [![License: MIT](https://img.shields.io/github/license/faraa2m/tokenometer.svg)](https://github.com/faraa2m/tokenometer/blob/main/LICENSE) -> Empirical token-cost benchmarking for LLM prompts. Tells you what your prompt actually costs across Claude, GPT-4o, and Gemini, in every format. +> Empirical token-cost + latency benchmarking for LLM prompts. Tells you what your prompt actually costs and how fast each provider responds across Claude, GPT-4o, Gemini, Mistral, and Cohere — in every format. See the [root README](https://github.com/faraa2m/tokenometer#readme) for findings, methodology, and the full project overview. @@ -25,17 +25,57 @@ Cheapest: gpt-4o as json ($0.000192) Priciest: claude-opus-4-7 as yaml ($0.001260, 6.74x more) ``` -A leading `~` marks an approximate count (offline mode for Claude / Gemini, since neither vendor publishes a public tokenizer). +A leading `~` marks an approximate count (offline mode for Claude / Gemini / Mistral-Tekken / Cohere, since none of those vendors publishes a public production tokenizer that ships in JS). + +## Flags + +| Flag | Default | Notes | +|---|---|---| +| `--model ` | `claude-opus-4-7` (or auto-detected) | Any registered model id (63 across 5 providers). | +| `--format ` | `json,yaml,xml,markdown,text` | Subset of supported formats. | +| `--output ` | `table` | `table` \| `json` \| `sarif`. | +| `--by-file` | _off_ | Append a per-file token/USD table (multi-file only). | +| `--image ` | _none_ | Add vision-token cost for the image (repeatable). | +| `--config ` | _none_ | Load this exact config file (skips walk-up). | +| `--no-config` | _off_ | Skip `.tokenometer.yml` loading entirely. | +| `--empirical` | _off_ | Use provider `countTokens` APIs (free, exact). | +| `--latency` | _off_ | Measure real generation latency (TTFT, total ms, tokens/sec). Implies `--empirical`. | +| `--latency-trials ` | `3` | Trials per cell when `--latency` is set (1–10). | +| `--max-spend ` | `0.05` (or `0.25` with `--latency`) | Hard ceiling for empirical / latency mode. | +| `--offline` | _off_ | Force offline path (overrides `--empirical`). | +| `-h`, `--help` | | Print help. | +| `-v`, `--version` | | Print version. | + +``` +tokenometer [options] +echo "prompt" | tokenometer - [options] +``` + +## Models supported + +63 models across 5 providers. Run `tokenometer --help` for the full list at runtime, or browse the [Cost Atlas](https://tokenometer.vercel.app/models) for sortable per-model pages. + +| Provider | Examples | Offline tokenizer | Empirical | +|---|---|---|---| +| Anthropic | `claude-opus-4-7`, `claude-sonnet-4-6`, `claude-haiku-4-5`, Claude 3.x family | `gpt-tokenizer` `cl100k_base` (approximate) | `messages.countTokens` (free, exact) | +| OpenAI | `gpt-4o`, `gpt-4o-mini`, `gpt-4-turbo`, `gpt-3.5-turbo`, `o1` family | `gpt-tokenizer` `o200k_base` (exact) | same `o200k_base` (matches production) | +| Google | `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-1.5-pro`, `gemini-1.5-flash` | `chars / 4` (approximate) | `model.countTokens` (free, exact) | +| Mistral (19 models) | `open-mistral-7b`, `open-mixtral-8x22b`, `mistral-large-latest`, `codestral-latest`, `mistral-nemo`, `pixtral-large-latest`, `mistral-medium-2505`, `magistral-small`, `ministral-3b-latest`, `devstral-small-2505` | `mistral-tokenizer-js` for SentencePiece V1/V2/V3 (exact); `chars/4` for Tekken (approximate) | unsupported (no public token-count API) | +| Cohere | `command-r`, `command-r-plus` | `chars / 4` (approximate) | `POST /v1/tokenize` (free, exact, requires `COHERE_API_KEY`) | + +Pricing comes from the [`tokenlens`](https://www.npmjs.com/package/tokenlens) registry with a small set of local overrides for bleeding-edge models. Cohere pricing lives entirely in `LOCAL_OVERRIDES` because `@tokenlens/models` doesn't yet ship a Cohere catalog at v1.3.0. ## Empirical mode -For exact, vendor-billed counts on Claude and Gemini, set the right env var and pass `--empirical`. The tool calls the providers' free `countTokens` endpoints — no charge. +For exact, vendor-billed counts on Claude, Gemini, and Cohere, set the right env var and pass `--empirical`. The tool calls each provider's free `countTokens`-equivalent endpoint — no charge. ```bash -ANTHROPIC_API_KEY=… GOOGLE_API_KEY=… \ - npx tokenometer ./prompt.md --empirical +ANTHROPIC_API_KEY=… GOOGLE_API_KEY=… COHERE_API_KEY=… \ + npx tokenometer ./prompt.md --empirical --model claude-opus-4-7,gemini-2.5-pro,command-r-plus ``` +OpenAI's empirical path uses tiktoken `o200k_base` locally — that encoding matches OpenAI's production count exactly, so no API call is needed. Mistral has no public token-count endpoint; the offline `mistral-tokenizer-js` path is used regardless. + ## Auto provider detection When `--model` is omitted, tokenometer picks a default based on which provider key is set in your environment: @@ -43,6 +83,8 @@ When `--model` is omitted, tokenometer picks a default based on which provider k - `ANTHROPIC_API_KEY` only → `claude-opus-4-7` - `OPENAI_API_KEY` only → `gpt-4o` - `GOOGLE_API_KEY` / `GEMINI_API_KEY` only → first known `gemini-*` model (falls back to `gemini-2.5-pro`) +- `MISTRAL_API_KEY` only → first known `mistral-*` model +- `COHERE_API_KEY` only → `command-r-plus` - Multiple keys set → falls back to `claude-opus-4-7` and prints a stderr note. Pass `--model` to disambiguate. - No keys set → existing default (`claude-opus-4-7`). @@ -53,7 +95,7 @@ This means `npx tokenometer prompt.md` does the right thing in any of those envi Drop a `.tokenometer.yml` (or `.yaml`) at the project root and tokenometer will pick it up automatically (walks up from the cwd, stopping at `.git`): ```yaml -models: [claude-opus-4-7, gpt-4o] +models: [claude-opus-4-7, gpt-4o, mistral-large-latest] formats: [json, yaml, markdown] paths: [prompts/**/*.md] budgets: @@ -76,7 +118,7 @@ npx tokenometer ./prompt.md --output sarif > tokenometer.sarif npx tokenometer ./prompt.md --output json | jq '.files[].results | map(.inputCost) | add' ``` -### Latency +## Latency `--latency` measures real generation latency in addition to token cost. For each `(model, format)` cell, tokenometer streams `n` real chat completions (default `n=3`, override with `--latency-trials 1..10`) capped at `max_tokens=200`, and reports: @@ -107,7 +149,7 @@ By file: prompts/router.md 872 $0.0131 ``` -Useful for figuring out which prompt files dominate the cost of a multi-file pipeline. +Useful for figuring out which prompt files dominate the cost of a multi-file pipeline. The aggregator that produces this table is also what powers the GitHub Action's per-file Δ comment, and is unit-tested in [`packages/action`](https://github.com/faraa2m/tokenometer/tree/main/packages/action). ## Vision tokens @@ -123,34 +165,12 @@ Each image's dimensions are read with `image-size` (no native deps), then dispat - GPT-4o → OpenAI's high-detail tiling: `85 + 170 × ceil(w/512) × ceil(h/512)` after the 2048/768 resize step. - Gemini → Google's `258 × ceil(w/768) × ceil(h/768)` (with a flat 258 for ≤384×384 images). -Vision-token cells are always marked `approximate: true` since they're formula-derived. Each image also gets its own row in the `--by-file` table as a virtual file ` [vision]`. +Mistral and Cohere don't have published vision-token formulas, so vision images are skipped for those providers (with a stderr note). Vision-token cells are always marked `approximate: true` since they're formula-derived. Each image also gets its own row in the `--by-file` table as a virtual file ` [vision]`. ## Why not just `tiktoken`? `tiktoken`'s `cl100k_base` (the encoding most "Claude tokenizer" libraries fall back on) **under-counts Opus 4.7 by a median of +62%** across a 10-prompt benchmark. Sonnet 4.6 and Haiku 4.5 are closer (~17%). Format choice is a wash. Model choice swings cost by 12×. See [README](https://github.com/faraa2m/tokenometer#findings-anthropic-n150-cells-across-10-prompt-shapes) for the dataset findings. -## Flags - -``` -tokenometer [options] -echo "prompt" | tokenometer - [options] - ---model Default: claude-opus-4-7, or auto-detected from env ---format Default: all (json,yaml,xml,markdown,text) ---output table (default) | json | sarif ---by-file Append a per-file token/cost table (multi-file only) ---image Add vision-token cost for the image (repeatable) ---config Load this exact config file ---no-config Skip .tokenometer.yml loading ---empirical Use provider countTokens APIs (free, exact) ---latency Measure real generation latency (TTFT, total ms, tokens/s) ---latency-trials Trials per cell when --latency is set (1-10, default 3) ---max-spend Hard ceiling for empirical mode (default 0.05; with --latency, 0.25) ---offline Force offline (overrides --empirical) --h, --help --v, --version -``` - ## License MIT diff --git a/packages/cli/package.json b/packages/cli/package.json index 029cce4..9d271cd 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -1,7 +1,7 @@ { "name": "tokenometer", "version": "0.0.2", - "description": "Tokenometer CLI — measure prompt token cost across Claude, GPT-4o, Gemini. Multi-provider, multi-format, empirical mode.", + "description": "Tokenometer CLI — LLM token cost + latency benchmarking across Claude, GPT-4o, Gemini, Mistral, and Cohere. Multi-format, empirical mode, vision tokens, SARIF output.", "license": "MIT", "author": "Faraazuddin Mohammed ", "homepage": "https://tokenometer.vercel.app", @@ -18,24 +18,44 @@ "anthropic", "ci-cd", "claude", + "claude-code", + "claude-code-skill", "cli", + "code-scanning", + "codestral", + "cohere", + "command-r", "cost", "cost-calculator", + "cursor", "gemini", "github-action", "gpt", "gpt-4o", + "latency", "llm", "llm-cost", + "mistral", + "mistral-7b", + "mistral-large", + "mixtral", + "model-comparison", + "multimodal", "openai", + "pixtral", "prompt", "prompt-cost", + "prompt-cost-regression", "prompt-engineering", "prompt-regression", + "sarif", "tiktoken", "token", "token-budget", - "tokenizer" + "tokenizer", + "ttft", + "vision-tokens", + "vscode" ], "type": "module", "main": "./dist/index.js", diff --git a/packages/core/README.md b/packages/core/README.md index 1ee4e11..e0463ee 100644 --- a/packages/core/README.md +++ b/packages/core/README.md @@ -3,32 +3,81 @@ [![npm @tokenometer/core](https://img.shields.io/npm/v/@tokenometer/core.svg?label=@tokenometer/core)](https://www.npmjs.com/package/@tokenometer/core) [![License: MIT](https://img.shields.io/github/license/faraa2m/tokenometer.svg)](https://github.com/faraa2m/tokenometer/blob/main/LICENSE) -> Core library powering [tokenometer](https://www.npmjs.com/package/tokenometer): tokenizer dispatch, format converters, versioned cost rate matrix, and an empirical-mode `countTokens` adapter for Anthropic, OpenAI, and Google. +> Core library powering [tokenometer](https://www.npmjs.com/package/tokenometer): tokenizer dispatch, format converters, versioned cost rate matrix, vision-token estimators, latency measurement, SARIF emitter, config loader, and an empirical-mode `countTokens` adapter for Anthropic, OpenAI, Google, Mistral, and Cohere. See the [root README](https://github.com/faraa2m/tokenometer#readme) for findings, methodology, and the full project overview. [**Live playground**](https://tokenometer.vercel.app) · [Source](https://github.com/faraa2m/tokenometer) · MIT -If you just want a CLI, `npm install -g tokenometer`. This package is for programmatic use. +If you just want a CLI, `npm install -g tokenometer`. This package is for programmatic use — it's the engine the CLI, the GitHub Action, the VS Code / Cursor extension, and the playground all share, so counts and pricing stay identical across every surface. ## API ```ts import { + // Core tokenization tokenize, tokenizeMatrix, + countTokens, + // Empirical (real provider countTokens / tokenize endpoints) tokenizeEmpirical, tokenizeMatrixEmpirical, - countTokens, + // Latency benchmarking + measureLatency, + nthPercentile, + // Format conversion toFormat, isFormat, allFormats, + // Config (.tokenometer.yml) + loadConfig, + parseConfig, + // SARIF + JSON emitter + toSarif, + // Vision-token estimators + anthropicVisionTokens, + openaiVisionTokens, + googleVisionTokens, + // Pricing / model registry KNOWN_MODELS, + MODELS, RATES, RATES_VERSION, getModel, getRate, } from '@tokenometer/core'; + +import type { + // Token results + CountResult, + TokenizeResult, + EmpiricalResult, + EmpiricalCountResult, + EmpiricalEnv, + // Latency + LatencyResult, + LatencyTrial, + LatencyStats, + LatencyDeps, + MeasureLatencyOptions, + // Aggregates / formatters + TokenometerResult, + TokenometerFileResult, + ToSarifOptions, + // Config + TokenometerConfig, + ConfigFormat, + // Vision input shapes + AnthropicVisionInput, + OpenAIVisionInput, + GoogleVisionInput, + // Registry + ModelDescriptor, + Provider, + RateEntry, + Format, + TokenizerKind, +} from '@tokenometer/core'; ``` ### Offline (deterministic, no API key) @@ -62,9 +111,54 @@ const result = await tokenizeEmpirical({ // approximate: false ← uses Anthropic's messages.countTokens ``` +### Latency benchmarking + +```ts +const stats = await measureLatency({ + modelId: 'claude-opus-4-7', + prompt: 'Write a haiku about CI.', + trials: 3, + env: { anthropicApiKey: process.env.ANTHROPIC_API_KEY! }, +}); +// LatencyResult: { trials: LatencyTrial[], stats: { ttftMs, totalMs, tokensPerSec } } +// Each stat is { p50, p95, mean }. +``` + +Supported providers: Anthropic (`messages.stream`), OpenAI (`/v1/chat/completions` SSE), Google (`generateContentStream`), Cohere (`/v1/chat` NDJSON), Mistral (`/v1/chat/completions` SSE). Each call is capped at `max_tokens=200`; trials retry once on transient failures. + +### Vision tokens + +```ts +const tokens = anthropicVisionTokens({ width: 1280, height: 720 }); +// 1228 (capped at 1600 for very large images) +``` + +The `openaiVisionTokens` and `googleVisionTokens` exports are formula-equivalent to the OpenAI high-detail tile cost and Gemini's 258-per-768²-tile cost respectively. + +### SARIF + JSON output + +```ts +const sarif = toSarif({ files: [{ path: 'prompt.md', results: [...] }] }); +// SARIF 2.1.0 — drop into GitHub Code Scanning or any SARIF viewer. +``` + ### Rate table -`RATES` is a `Record`. `RATES_VERSION` ships as a date string so consumers can pin or audit. +`RATES` is a `Record`. `RATES_VERSION` ships as a date string so consumers can pin or audit. `KNOWN_MODELS` is the union (currently 63 across 5 providers). + +## Providers + +| Provider | Models | Offline tokenizer | Exactness | Empirical (`tokenizeEmpirical`) | +|-----------|--------|-------------------|-----------|----------------------------------| +| OpenAI | `gpt-4o`, `gpt-4o-mini`, `gpt-4-turbo`, `gpt-3.5-turbo`, `o1` family | `gpt-tokenizer` `o200k_base` | exact | same `o200k_base` (matches production) | +| Anthropic | `claude-opus-4-7`, `claude-sonnet-4-6`, `claude-haiku-4-5`, Claude 3.x family | `gpt-tokenizer` `cl100k_base` | approximate | `messages.countTokens` (free, exact) | +| Google | `gemini-2.5-pro`, `gemini-2.5-flash`, `gemini-1.5-pro`, `gemini-1.5-flash` | `chars / 4` heuristic | approximate | `model.countTokens` (free, exact) | +| Mistral (19 models) | `open-mistral-7b`, `open-mixtral-8x22b`, `mistral-large-latest`, `codestral-latest`, `mistral-nemo`, `pixtral-large-latest`, `mistral-medium-2505`, `magistral-small`, `ministral-3b-latest`, `devstral-small-2505` | `mistral-tokenizer-js` (V1/V2/V3 SentencePiece); `chars/4` for Tekken family (NeMo, Pixtral, Mistral Small 2409+, Devstral, Mistral Medium 2505+, Magistral, Ministral) | exact for SentencePiece · approximate for Tekken | unsupported (no public token-count endpoint) | +| Cohere | `command-r`, `command-r-plus` | `chars / 4` heuristic | approximate | `POST /v1/tokenize` (free, exact, requires `COHERE_API_KEY`) | + +Pricing comes from `@tokenlens/models` plus a small `LOCAL_OVERRIDES` map for bleeding-edge models the registry hasn't picked up yet. Cohere lives entirely in `LOCAL_OVERRIDES` because `@tokenlens/models` does not yet ship a Cohere catalog at v1.3.0; pull from `cohere.com/pricing` whenever `RATES_VERSION` bumps. + +Internally the dispatch helpers `mistralCount`, `cohereCount`, `cohereTokenizeApi`, and `isTekken` (in `tokenize-mistral.ts` / `tokenize-cohere.ts`) are not part of the public API — they're called from `tokenize` / `tokenizeEmpirical`. If you need them, import the files directly; they may move. ## License diff --git a/packages/core/package.json b/packages/core/package.json index 7876808..9d5c87b 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -1,7 +1,7 @@ { "name": "@tokenometer/core", "version": "0.0.2", - "description": "Tokenometer core library — multi-provider LLM token cost estimation with empirical countTokens fallback.", + "description": "Tokenometer core library — multi-provider LLM token cost, latency, and vision-token estimation with empirical countTokens fallback (Anthropic, OpenAI, Google, Mistral, Cohere).", "license": "MIT", "author": "Faraazuddin Mohammed ", "homepage": "https://tokenometer.vercel.app", @@ -19,24 +19,44 @@ "ci-cd", "cl100k", "claude", + "claude-code", + "claude-code-skill", + "code-scanning", + "codestral", + "cohere", + "command-r", "cost", "cost-calculator", + "cursor", "gemini", "github-action", "gpt", "gpt-4o", + "latency", "llm", "llm-cost", + "mistral", + "mistral-7b", + "mistral-large", + "mixtral", + "model-comparison", + "multimodal", "o200k", "openai", + "pixtral", "prompt", "prompt-cost", + "prompt-cost-regression", "prompt-engineering", "prompt-regression", + "sarif", "tiktoken", "token", "token-budget", - "tokenizer" + "tokenizer", + "ttft", + "vision-tokens", + "vscode" ], "type": "module", "main": "./dist/index.js", diff --git a/packages/vscode/README.md b/packages/vscode/README.md index d3d3229..85c7765 100644 --- a/packages/vscode/README.md +++ b/packages/vscode/README.md @@ -10,12 +10,16 @@ Most "token counter" extensions stop at counts. Tokenometer adds the dollar valu ## Install -Once published: +### Marketplace (coming with v1.0.0) -- **VS Code Marketplace:** _coming soon_ -- **Open VSX (Cursor / VSCodium):** _coming soon_ +The Marketplace install button arrives with the v1.0.0 cut — Phase I owns that publish step. Track it on the [milestones page](https://github.com/faraa2m/tokenometer/milestones). -While you're waiting, you can build a `.vsix` locally from this repo: +- **VS Code Marketplace:** _coming with v1.0.0_ +- **Open VSX (Cursor / VSCodium):** _coming with v1.0.0_ + +### Build locally now (`.vsix`) + +While the Marketplace listing is in flight, build a `.vsix` from this repo and side-load it: ```bash npm install @@ -24,6 +28,8 @@ npm run package:vsix --workspace=@tokenometer/vscode code --install-extension packages/vscode/tokenometer-*.vsix ``` +The same `.vsix` works in Cursor and VSCodium. + ## What it shows The status bar (right side) shows three things, separated by ` · `: diff --git a/packages/vscode/package.json b/packages/vscode/package.json index 52dd8b9..8c6615c 100644 --- a/packages/vscode/package.json +++ b/packages/vscode/package.json @@ -2,7 +2,7 @@ "name": "@tokenometer/vscode", "version": "0.0.2", "displayName": "Tokenometer", - "description": "Live token count and USD cost for LLM prompts in the editor (Claude, GPT-4o, Gemini).", + "description": "Tokenometer for VS Code / Cursor — live token count and USD cost in the status bar for LLM prompts (Claude, GPT-4o, Gemini, Mistral, Cohere).", "publisher": "faraa2m", "license": "MIT", "private": true, @@ -21,15 +21,29 @@ }, "categories": ["Programming Languages", "Other"], "keywords": [ - "llm", - "tokens", - "cost", + "ai", + "anthropic", "claude", - "gpt", + "cohere", + "command-r", + "cost", + "cost-calculator", + "cursor", "gemini", + "gpt", + "gpt-4o", + "llm", + "llm-cost", + "mistral", + "multimodal", + "openai", + "prompt", + "prompt-cost", + "prompt-engineering", + "token", "tokenizer", - "ai", - "prompt-engineering" + "tokens", + "vscode" ], "activationEvents": ["onStartupFinished"], "main": "./dist/extension.js", diff --git a/packages/web/package.json b/packages/web/package.json index 6dca8c4..46757f8 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -1,7 +1,7 @@ { "name": "@tokenometer/web", "version": "0.0.2", - "description": "Tokenometer playground — calculator, vision-cost, prompt-diff, model atlas.", + "description": "Tokenometer playground — calculator, vision-token cost, prompt diff, SARIF, model atlas, init wizard.", "license": "MIT", "private": true, "type": "module", diff --git a/vercel.json b/vercel.json index 3f4a6d7..f3fbb57 100644 --- a/vercel.json +++ b/vercel.json @@ -3,5 +3,8 @@ "framework": "vite", "installCommand": "npm install", "buildCommand": "npm run build && npm run build -w @tokenometer/web", - "outputDirectory": "packages/web/dist" + "outputDirectory": "packages/web/dist", + "rewrites": [ + { "source": "/:path((?!assets/|robots.txt|sitemap.xml).*)", "destination": "/index.html" } + ] }