typescriptadvanced

RAG Pipeline (Retrieve + Augment + Generate)

Minimal RAG implementation: embed a query, retrieve top-k chunks, inject into prompt.

typescript
import OpenAI from "openai";

const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });

type Chunk = { text: string; embedding: number[] };

function cosine(a: number[], b: number[]) {
  const dot = a.reduce((s, v, i) => s + v * b[i], 0);
  const mag = (v: number[]) => Math.sqrt(v.reduce((s, x) => s + x * x, 0));
  return dot / (mag(a) * mag(b));
}

async function embed(text: string) {
  const r = await client.embeddings.create({ model: "text-embedding-3-small", input: text });
  return r.data[0].embedding;
}

export async function rag(query: string, chunks: Chunk[], topK = 3) {
  const qEmbed = await embed(query);

  const ranked = chunks
    .map(c => ({ ...c, score: cosine(qEmbed, c.embedding) }))
    .sort((a, b) => b.score - a.score)
    .slice(0, topK);

  const context = ranked.map(c => c.text).join("\n\n");

  const res = await client.chat.completions.create({
    model: "gpt-4o",
    messages: [
      { role: "system", content: `Answer using only this context:\n${context}` },
      { role: "user",   content: query },
    ],
  });

  return res.choices[0].message.content;
}

Use Cases

  • document Q&A
  • knowledge base chat
  • context-aware AI

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.