pythonintermediate

ChromaDB Vector Database Operations

Store and query vector embeddings using ChromaDB for semantic search and RAG applications.

python
import chromadb
from chromadb.utils import embedding_functions

client = chromadb.PersistentClient(path="./chroma_db")

openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key="sk-...",
    model_name="text-embedding-3-small"
)

collection = client.get_or_create_collection(
    name="documents",
    embedding_function=openai_ef,
    metadata={"hnsw:space": "cosine"}
)

# Add documents (embeddings auto-generated)
collection.add(
    documents=[
        "Python is a versatile programming language",
        "JavaScript powers the modern web",
        "Rust provides memory safety without GC",
    ],
    metadatas=[
        {"lang": "python", "type": "general"},
        {"lang": "javascript", "type": "web"},
        {"lang": "rust", "type": "systems"},
    ],
    ids=["doc1", "doc2", "doc3"]
)

# Semantic search
results = collection.query(
    query_texts=["Which language is best for web?"],
    n_results=2,
    where={"type": "web"},
)
print(results["documents"])
print(results["distances"])

# Update
collection.update(
    ids=["doc1"],
    documents=["Python is widely used in AI and data science"],
    metadatas=[{"lang": "python", "type": "ai"}]
)

# Delete
collection.delete(ids=["doc3"])
print(f"Total: {collection.count()}")

Use Cases

  • semantic search
  • RAG context retrieval
  • document similarity

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.