pythonintermediate

Semantic Similarity Search with Embeddings

Compute and compare text embeddings for semantic search and matching.

python
import numpy as np
from openai import OpenAI

client = OpenAI()

def get_embeddings(texts: list[str]) -> np.ndarray:
    response = client.embeddings.create(
        model="text-embedding-3-small",
        input=texts
    )
    return np.array([e.embedding for e in response.data])

def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# Build document index
documents = [
    "Python is a programming language",
    "Machine learning uses statistical models",
    "React is a JavaScript UI library",
    "Neural networks are inspired by the brain",
    "CSS is used for styling web pages"
]
doc_embeddings = get_embeddings(documents)

# Search
query = "deep learning frameworks"
query_embedding = get_embeddings([query])[0]

# Rank by similarity
scores = [
    cosine_similarity(query_embedding, doc_emb)
    for doc_emb in doc_embeddings
]

results = sorted(zip(scores, documents), reverse=True)
for score, doc in results[:3]:
    print(f"  {score:.3f}: {doc}")

Use Cases

  • Semantic search
  • Document matching
  • Recommendation systems

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.