pythonintermediate

Cosine Similarity Semantic Search in Python

Implement semantic search with NumPy cosine similarity over OpenAI embeddings.

python
import numpy as np
from openai import OpenAI

client = OpenAI()

def embed(texts: list[str]) -> np.ndarray:
    resp = client.embeddings.create(input=texts, model='text-embedding-3-small')
    return np.array([e.embedding for e in resp.data], dtype='float32')

def cosine_search(query: str, docs: list[str], top_k: int = 3) -> list[tuple[float, str]]:
    doc_embs   = embed(docs)
    query_emb  = embed([query])[0]
    scores     = doc_embs @ query_emb / (np.linalg.norm(doc_embs, axis=1) * np.linalg.norm(query_emb))
    top_idx    = np.argsort(scores)[::-1][:top_k]
    return [(float(scores[i]), docs[i]) for i in top_idx]

docs = ['Python for data science', 'JavaScript web frameworks', 'SQL database queries', 'Machine learning pipelines']
for score, doc in cosine_search('ML and data analysis', docs):
    print(f'{score:.3f}: {doc}')

Use Cases

  • semantic search
  • document retrieval
  • Q&A systems

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.