pythonbeginner

Token Counting with tiktoken

Count tokens, split text by token limits, and estimate API costs using the tiktoken library.

pythonPress ⌘/Ctrl + Shift + C to copy

import tiktoken

COST_PER_1K = {'gpt-4o': 0.005, 'gpt-4o-mini': 0.00015, 'gpt-3.5-turbo': 0.0005}

def count_tokens(text: str, model: str = 'gpt-4o-mini') -> int:
    enc = tiktoken.encoding_for_model(model)
    return len(enc.encode(text))

def estimate_cost(text: str, model: str = 'gpt-4o-mini') -> float:
    tokens = count_tokens(text, model)
    return tokens / 1000 * COST_PER_1K[model]

def split_by_tokens(text: str, max_tokens: int = 4000, model: str = 'gpt-4o-mini') -> list[str]:
    enc = tiktoken.encoding_for_model(model)
    ids = enc.encode(text)
    chunks = [ids[i:i+max_tokens] for i in range(0, len(ids), max_tokens)]
    return [enc.decode(c) for c in chunks]

text = 'Machine learning is a branch of AI. ' * 100
print(f'Tokens: {count_tokens(text)}')
print(f'Est cost: ${estimate_cost(text):.6f}')
print(f'Chunks: {len(split_by_tokens(text))}')