-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdebug_tokens.py
More file actions
25 lines (19 loc) · 842 Bytes
/
debug_tokens.py
File metadata and controls
25 lines (19 loc) · 842 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import json
import tiktoken
def count_tokens_in_file(file_path):
try:
with open(file_path, "r") as f:
data = json.load(f)
# Approximate the string representation that would be sent to the LLM
# This includes all keys and values as a JSON string
text_payload = json.dumps(data)
# Use cl100k_base which is a common encoding for modern LLMs (like GPT-4, similar to Gemini)
encoding = tiktoken.get_encoding("cl100k_base")
token_count = len(encoding.encode(text_payload))
print(f"File: {file_path}")
print(f"Approximate Character Count: {len(text_payload)}")
print(f"Approximate Token Count: {token_count}")
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
count_tokens_in_file("backend/request_body.json")