From 459cfcbacdefba5239e441ef062adf4066b87373 Mon Sep 17 00:00:00 2001 From: Faraazuddin Mohammed Date: Thu, 7 May 2026 02:55:16 -0400 Subject: [PATCH] chore: prep packages/core + packages/cli for public npm publish Adds the metadata npmjs.com renders on package pages and that npm clients use for resolution: - description, author, homepage (= tokenometer.vercel.app), repository (with directory pointer for monorepo), bugs, keywords - publishConfig.access = public on both (required for the scoped @tokenometer/core; harmless on tokenometer) - publishConfig.registry pinned to public registry (overrides any host ~/.npmrc that points to a private mirror) - per-package READMEs that show install + 1-glance usage (npm renders these on the package page) Tarball sizes after polish: - @tokenometer/core: 8.8 kB packed / 34.0 kB unpacked / 26 files - tokenometer: 6.8 kB packed / 23.6 kB unpacked / 14 files After this merges, publish flow is `npm login` once, then `npm publish` from each package dir. --- packages/cli/README.md | 55 +++++++++++++++++++++++++++++++ packages/cli/package.json | 44 +++++++++++++++++-------- packages/core/README.md | 66 ++++++++++++++++++++++++++++++++++++++ packages/core/package.json | 31 +++++++++++++++++- 4 files changed, 181 insertions(+), 15 deletions(-) create mode 100644 packages/cli/README.md create mode 100644 packages/core/README.md diff --git a/packages/cli/README.md b/packages/cli/README.md new file mode 100644 index 0000000..664f462 --- /dev/null +++ b/packages/cli/README.md @@ -0,0 +1,55 @@ +# tokenometer + +> Empirical token-cost benchmarking for LLM prompts. Tells you what your prompt actually costs across Claude, GPT-4o, and Gemini, in every format. + +[**Live playground: tokenometer.vercel.app**](https://tokenometer.vercel.app) · [Source](https://github.com/faraa2m/tokenometer) · MIT + +```bash +npx tokenometer ./prompt.md --model claude-opus-4-7,gpt-4o +``` + +``` +model format tokens est. cost tokenizer +--------------- -------- ------ --------- -------------- +claude-opus-4-7 json ~78 $0.001170 cl100k_base +claude-opus-4-7 yaml ~84 $0.001260 cl100k_base +gpt-4o json 77 $0.000192 o200k_base +gpt-4o yaml 83 $0.000208 o200k_base + +Cheapest: gpt-4o as json ($0.000192) +Priciest: claude-opus-4-7 as yaml ($0.001260, 6.74x more) +``` + +A leading `~` marks an approximate count (offline mode for Claude / Gemini, since neither vendor publishes a public tokenizer). + +## Empirical mode + +For exact, vendor-billed counts on Claude and Gemini, set the right env var and pass `--empirical`. The tool calls the providers' free `countTokens` endpoints — no charge. + +```bash +ANTHROPIC_API_KEY=… GOOGLE_API_KEY=… \ + npx tokenometer ./prompt.md --empirical +``` + +## Why not just `tiktoken`? + +`tiktoken`'s `cl100k_base` (the encoding most "Claude tokenizer" libraries fall back on) **under-counts Opus 4.7 by a median of +62%** across a 10-prompt benchmark. Sonnet 4.6 and Haiku 4.5 are closer (~17%). Format choice is a wash. Model choice swings cost by 12×. See [README](https://github.com/faraa2m/tokenometer#findings-anthropic-n150-cells-across-10-prompt-shapes) for the dataset findings. + +## Flags + +``` +tokenometer [options] +echo "prompt" | tokenometer - [options] + +--model Default: claude-opus-4-7 +--format Default: all (json,yaml,xml,markdown,text) +--empirical Use provider countTokens APIs (free, exact) +--max-spend Hard ceiling for empirical mode (default 0.05) +--offline Force offline (overrides --empirical) +-h, --help +-v, --version +``` + +## License + +MIT diff --git a/packages/cli/package.json b/packages/cli/package.json index 5eaca1e..c4de1d4 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -1,8 +1,32 @@ { "name": "tokenometer", "version": "0.0.1", - "description": "Empirical token-cost benchmarking CLI for LLM prompts.", + "description": "Empirical token-cost benchmarking CLI for LLM prompts. Tells you what your prompt actually costs across Claude, GPT-4o, and Gemini, in every format.", "license": "MIT", + "author": "Faraazuddin Mohammed ", + "homepage": "https://tokenometer.vercel.app", + "repository": { + "type": "git", + "url": "git+https://github.com/faraa2m/tokenometer.git", + "directory": "packages/cli" + }, + "bugs": { + "url": "https://github.com/faraa2m/tokenometer/issues" + }, + "keywords": [ + "ai", + "anthropic", + "claude", + "cli", + "cost", + "gemini", + "gpt", + "llm", + "openai", + "prompt", + "token", + "tokenizer" + ], "type": "module", "main": "./dist/index.js", "types": "./dist/index.d.ts", @@ -16,6 +40,10 @@ } }, "files": ["dist", "README.md"], + "publishConfig": { + "access": "public", + "registry": "https://registry.npmjs.org/" + }, "scripts": { "build": "tsc -b", "clean": "rm -rf dist" @@ -27,17 +55,5 @@ "@types/node": "^22.10.5", "typescript": "^5.7.2", "vitest": "^3.0.0" - }, - "keywords": [ - "ai", - "anthropic", - "claude", - "cost", - "gpt", - "llm", - "openai", - "prompt", - "token", - "tokenizer" - ] + } } diff --git a/packages/core/README.md b/packages/core/README.md new file mode 100644 index 0000000..ec45fc3 --- /dev/null +++ b/packages/core/README.md @@ -0,0 +1,66 @@ +# @tokenometer/core + +> Core library powering [tokenometer](https://www.npmjs.com/package/tokenometer): tokenizer dispatch, format converters, versioned cost rate matrix, and an empirical-mode `countTokens` adapter for Anthropic, OpenAI, and Google. + +[**Live playground**](https://tokenometer.vercel.app) · [Source](https://github.com/faraa2m/tokenometer) · MIT + +If you just want a CLI, `npm install -g tokenometer`. This package is for programmatic use. + +## API + +```ts +import { + tokenize, + tokenizeMatrix, + tokenizeEmpirical, + tokenizeMatrixEmpirical, + countTokens, + toFormat, + isFormat, + allFormats, + KNOWN_MODELS, + RATES, + RATES_VERSION, + getModel, + getRate, +} from '@tokenometer/core'; +``` + +### Offline (deterministic, no API key) + +```ts +const result = tokenize({ + prompt: '{"hello": "world"}', + format: 'yaml', + modelId: 'claude-opus-4-7', +}); +// { +// model: 'claude-opus-4-7', +// provider: 'anthropic', +// format: 'yaml', +// tokenizer: 'cl100k_base', +// inputTokens: 12, +// inputCost: 0.00018, +// approximate: true // ← Anthropic does not publish a public Claude 3+ tokenizer +// } +``` + +### Empirical (real provider counts, free) + +```ts +const result = await tokenizeEmpirical({ + prompt: '{"hello": "world"}', + format: 'yaml', + modelId: 'claude-opus-4-7', + env: { anthropicApiKey: process.env.ANTHROPIC_API_KEY! }, +}); +// approximate: false ← uses Anthropic's messages.countTokens +``` + +### Rate table + +`RATES` is a `Record`. `RATES_VERSION` ships as a date string so consumers can pin or audit. + +## License + +MIT diff --git a/packages/core/package.json b/packages/core/package.json index 4507f78..d83093f 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -1,8 +1,33 @@ { "name": "@tokenometer/core", "version": "0.0.1", - "description": "Core: tokenizer dispatch, format conversion, cost rate matrix.", + "description": "Empirical token-cost benchmarking for LLM prompts — core library (tokenizers, format converters, rate matrix, empirical countTokens dispatch).", "license": "MIT", + "author": "Faraazuddin Mohammed ", + "homepage": "https://tokenometer.vercel.app", + "repository": { + "type": "git", + "url": "git+https://github.com/faraa2m/tokenometer.git", + "directory": "packages/core" + }, + "bugs": { + "url": "https://github.com/faraa2m/tokenometer/issues" + }, + "keywords": [ + "ai", + "anthropic", + "claude", + "cl100k", + "cost", + "gemini", + "gpt", + "llm", + "o200k", + "openai", + "prompt", + "token", + "tokenizer" + ], "type": "module", "main": "./dist/index.js", "types": "./dist/index.d.ts", @@ -13,6 +38,10 @@ } }, "files": ["dist", "README.md"], + "publishConfig": { + "access": "public", + "registry": "https://registry.npmjs.org/" + }, "scripts": { "build": "tsc -b", "clean": "rm -rf dist"