From 459cfcbacdefba5239e441ef062adf4066b87373 Mon Sep 17 00:00:00 2001
From: Faraazuddin Mohammed <mohdfaraaz1@gmail.com>
Date: Thu, 7 May 2026 02:55:16 -0400
Subject: [PATCH] chore: prep packages/core + packages/cli for public npm
 publish

Adds the metadata npmjs.com renders on package pages and that npm clients use for resolution:

- description, author, homepage (= tokenometer.vercel.app), repository (with directory pointer for monorepo), bugs, keywords
- publishConfig.access = public on both (required for the scoped @tokenometer/core; harmless on tokenometer)
- publishConfig.registry pinned to public registry (overrides any host ~/.npmrc that points to a private mirror)
- per-package READMEs that show install + 1-glance usage (npm renders these on the package page)

Tarball sizes after polish:
- @tokenometer/core: 8.8 kB packed / 34.0 kB unpacked / 26 files
- tokenometer:        6.8 kB packed / 23.6 kB unpacked / 14 files

After this merges, publish flow is `npm login` once, then `npm publish` from each package dir.
---
 packages/cli/README.md     | 55 +++++++++++++++++++++++++++++++
 packages/cli/package.json  | 44 +++++++++++++++++--------
 packages/core/README.md    | 66 ++++++++++++++++++++++++++++++++++++++
 packages/core/package.json | 31 +++++++++++++++++-
 4 files changed, 181 insertions(+), 15 deletions(-)
 create mode 100644 packages/cli/README.md
 create mode 100644 packages/core/README.md
diff --git a/packages/cli/README.md b/packages/cli/README.md
new file mode 100644
index 0000000..664f462
--- /dev/null
+++ b/packages/cli/README.md
@@ -0,0 +1,55 @@
+# tokenometer
+
+> Empirical token-cost benchmarking for LLM prompts. Tells you what your prompt actually costs across Claude, GPT-4o, and Gemini, in every format.
+
+[**Live playground: tokenometer.vercel.app**](https://tokenometer.vercel.app) · [Source](https://github.com/faraa2m/tokenometer) · MIT
+
+```bash
+npx tokenometer ./prompt.md --model claude-opus-4-7,gpt-4o
+```
+
+```
+model            format    tokens  est. cost  tokenizer
+---------------  --------  ------  ---------  --------------
+claude-opus-4-7  json         ~78  $0.001170  cl100k_base
+claude-opus-4-7  yaml         ~84  $0.001260  cl100k_base
+gpt-4o           json          77  $0.000192  o200k_base
+gpt-4o           yaml          83  $0.000208  o200k_base
+
+Cheapest: gpt-4o as json ($0.000192)
+Priciest: claude-opus-4-7 as yaml ($0.001260, 6.74x more)
+```
+
+A leading `~` marks an approximate count (offline mode for Claude / Gemini, since neither vendor publishes a public tokenizer).
+
+## Empirical mode
+
+For exact, vendor-billed counts on Claude and Gemini, set the right env var and pass `--empirical`. The tool calls the providers' free `countTokens` endpoints — no charge.
+
+```bash
+ANTHROPIC_API_KEY=… GOOGLE_API_KEY=… \
+  npx tokenometer ./prompt.md --empirical
+```
+
+## Why not just `tiktoken`?
+
+`tiktoken`'s `cl100k_base` (the encoding most "Claude tokenizer" libraries fall back on) **under-counts Opus 4.7 by a median of +62%** across a 10-prompt benchmark. Sonnet 4.6 and Haiku 4.5 are closer (~17%). Format choice is a wash. Model choice swings cost by 12×. See [README](https://github.com/faraa2m/tokenometer#findings-anthropic-n150-cells-across-10-prompt-shapes) for the dataset findings.
+
+## Flags
+
+```
+tokenometer <file> [options]
+echo "prompt" | tokenometer - [options]
+
+--model <id[,id…]>     Default: claude-opus-4-7
+--format <fmt[,fmt…]>  Default: all (json,yaml,xml,markdown,text)
+--empirical            Use provider countTokens APIs (free, exact)
+--max-spend <usd>      Hard ceiling for empirical mode (default 0.05)
+--offline              Force offline (overrides --empirical)
+-h, --help
+-v, --version
+```
+
+## License
+
+MIT
diff --git a/packages/cli/package.json b/packages/cli/package.json
index 5eaca1e..c4de1d4 100644
--- a/packages/cli/package.json
+++ b/packages/cli/package.json
@@ -1,8 +1,32 @@
 {
   "name": "tokenometer",
   "version": "0.0.1",
-  "description": "Empirical token-cost benchmarking CLI for LLM prompts.",
+  "description": "Empirical token-cost benchmarking CLI for LLM prompts. Tells you what your prompt actually costs across Claude, GPT-4o, and Gemini, in every format.",
   "license": "MIT",
+  "author": "Faraazuddin Mohammed <mohdfaraaz1@gmail.com>",
+  "homepage": "https://tokenometer.vercel.app",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/faraa2m/tokenometer.git",
+    "directory": "packages/cli"
+  },
+  "bugs": {
+    "url": "https://github.com/faraa2m/tokenometer/issues"
+  },
+  "keywords": [
+    "ai",
+    "anthropic",
+    "claude",
+    "cli",
+    "cost",
+    "gemini",
+    "gpt",
+    "llm",
+    "openai",
+    "prompt",
+    "token",
+    "tokenizer"
+  ],
   "type": "module",
   "main": "./dist/index.js",
   "types": "./dist/index.d.ts",
@@ -16,6 +40,10 @@
     }
   },
   "files": ["dist", "README.md"],
+  "publishConfig": {
+    "access": "public",
+    "registry": "https://registry.npmjs.org/"
+  },
   "scripts": {
     "build": "tsc -b",
     "clean": "rm -rf dist"
@@ -27,17 +55,5 @@
     "@types/node": "^22.10.5",
     "typescript": "^5.7.2",
     "vitest": "^3.0.0"
-  },
-  "keywords": [
-    "ai",
-    "anthropic",
-    "claude",
-    "cost",
-    "gpt",
-    "llm",
-    "openai",
-    "prompt",
-    "token",
-    "tokenizer"
-  ]
+  }
 }
diff --git a/packages/core/README.md b/packages/core/README.md
new file mode 100644
index 0000000..ec45fc3
--- /dev/null
+++ b/packages/core/README.md
@@ -0,0 +1,66 @@
+# @tokenometer/core
+
+> Core library powering [tokenometer](https://www.npmjs.com/package/tokenometer): tokenizer dispatch, format converters, versioned cost rate matrix, and an empirical-mode `countTokens` adapter for Anthropic, OpenAI, and Google.
+
+[**Live playground**](https://tokenometer.vercel.app) · [Source](https://github.com/faraa2m/tokenometer) · MIT
+
+If you just want a CLI, `npm install -g tokenometer`. This package is for programmatic use.
+
+## API
+
+```ts
+import {
+  tokenize,
+  tokenizeMatrix,
+  tokenizeEmpirical,
+  tokenizeMatrixEmpirical,
+  countTokens,
+  toFormat,
+  isFormat,
+  allFormats,
+  KNOWN_MODELS,
+  RATES,
+  RATES_VERSION,
+  getModel,
+  getRate,
+} from '@tokenometer/core';
+```
+
+### Offline (deterministic, no API key)
+
+```ts
+const result = tokenize({
+  prompt: '{"hello": "world"}',
+  format: 'yaml',
+  modelId: 'claude-opus-4-7',
+});
+// {
+//   model: 'claude-opus-4-7',
+//   provider: 'anthropic',
+//   format: 'yaml',
+//   tokenizer: 'cl100k_base',
+//   inputTokens: 12,
+//   inputCost: 0.00018,
+//   approximate: true   // ← Anthropic does not publish a public Claude 3+ tokenizer
+// }
+```
+
+### Empirical (real provider counts, free)
+
+```ts
+const result = await tokenizeEmpirical({
+  prompt: '{"hello": "world"}',
+  format: 'yaml',
+  modelId: 'claude-opus-4-7',
+  env: { anthropicApiKey: process.env.ANTHROPIC_API_KEY! },
+});
+// approximate: false  ← uses Anthropic's messages.countTokens
+```
+
+### Rate table
+
+`RATES` is a `Record<modelId, { inputPer1k, outputPer1k, cachedInputPer1k? }>`. `RATES_VERSION` ships as a date string so consumers can pin or audit.
+
+## License
+
+MIT
diff --git a/packages/core/package.json b/packages/core/package.json
index 4507f78..d83093f 100644
--- a/packages/core/package.json
+++ b/packages/core/package.json
@@ -1,8 +1,33 @@
 {
   "name": "@tokenometer/core",
   "version": "0.0.1",
-  "description": "Core: tokenizer dispatch, format conversion, cost rate matrix.",
+  "description": "Empirical token-cost benchmarking for LLM prompts — core library (tokenizers, format converters, rate matrix, empirical countTokens dispatch).",
   "license": "MIT",
+  "author": "Faraazuddin Mohammed <mohdfaraaz1@gmail.com>",
+  "homepage": "https://tokenometer.vercel.app",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/faraa2m/tokenometer.git",
+    "directory": "packages/core"
+  },
+  "bugs": {
+    "url": "https://github.com/faraa2m/tokenometer/issues"
+  },
+  "keywords": [
+    "ai",
+    "anthropic",
+    "claude",
+    "cl100k",
+    "cost",
+    "gemini",
+    "gpt",
+    "llm",
+    "o200k",
+    "openai",
+    "prompt",
+    "token",
+    "tokenizer"
+  ],
   "type": "module",
   "main": "./dist/index.js",
   "types": "./dist/index.d.ts",
@@ -13,6 +38,10 @@
     }
   },
   "files": ["dist", "README.md"],
+  "publishConfig": {
+    "access": "public",
+    "registry": "https://registry.npmjs.org/"
+  },
   "scripts": {
     "build": "tsc -b",
     "clean": "rm -rf dist"