pythonintermediate
Batch Process Embeddings Efficiently
Process large datasets of embeddings with batching, caching, and rate limiting.
pythonPress ⌘/Ctrl + Shift + C to copy
import time
import json
import hashlib
from pathlib import Path
from openai import OpenAI
client = OpenAI()
class EmbeddingCache:
def __init__(self, cache_file: str = "embeddings_cache.json"):
self.cache_file = Path(cache_file)
self.cache = json.loads(self.cache_file.read_text()) if self.cache_file.exists() else {}
def _key(self, text: str) -> str:
return hashlib.sha256(text.encode()).hexdigest()[:16]
def get(self, text: str):
return self.cache.get(self._key(text))
def set(self, text: str, embedding: list):
self.cache[self._key(text)] = embedding
def save(self):
self.cache_file.write_text(json.dumps(self.cache))
def batch_embed(
texts: list[str],
model: str = "text-embedding-3-small",
batch_size: int = 100,
requests_per_min: int = 500
) -> list[list[float]]:
cache = EmbeddingCache()
results = [None] * len(texts)
uncached = []
# Check cache first
for i, text in enumerate(texts):
cached = cache.get(text)
if cached:
results[i] = cached
else:
uncached.append((i, text))
print(f"Cache hits: {len(texts) - len(uncached)}/{len(texts)}")
# Batch API calls for uncached
delay = 60.0 / requests_per_min
for batch_start in range(0, len(uncached), batch_size):
batch = uncached[batch_start:batch_start + batch_size]
response = client.embeddings.create(
model=model,
input=[text for _, text in batch]
)
for (idx, text), emb in zip(batch, response.data):
results[idx] = emb.embedding
cache.set(text, emb.embedding)
time.sleep(delay)
cache.save()
return results
# Usage
texts = ["Hello world", "Machine learning", "Hello world"] # duplicate uses cache
embeddings = batch_embed(texts)
print(f"Generated {len(embeddings)} embeddings")Use Cases
- Large-scale indexing
- Search engine building
- Document processing
Tags
Related Snippets
Similar patterns you can reuse in the same workflow.
typescriptadvanced
Semantic Caching Layer for LLM Calls
Cache LLM responses by semantic similarity of prompts to reduce API costs and improve latency.
Best for: Reducing LLM API costs for repeated queries
#caching#embeddings
typescriptadvanced
RAG Pipeline Implementation
Build a retrieval-augmented generation pipeline that grounds LLM answers in your own documents.
Best for: Grounding LLM answers in private documents
#ai#rag
pythonadvanced
Build a RAG Pipeline with LangChain
Implement retrieval-augmented generation using LangChain, embeddings, and a vector store.
Best for: Knowledge base Q&A
#ai#langchain
pythonintermediate
Semantic Similarity Search with Embeddings
Compute and compare text embeddings for semantic search and matching.
Best for: Semantic search
#ai#embeddings