typescriptintermediate
Ollama Local LLM Inference
Run local LLM inference using Ollama REST API with streaming and model management.
typescriptPress ⌘/Ctrl + Shift + C to copy
// Ollama local inference
async function chatWithOllama(
prompt: string,
model = 'llama3.1'
): Promise<string> {
const response = await fetch('http://localhost:11434/api/generate', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model, prompt, stream: false }),
});
const data = await response.json();
return data.response;
}
// Streaming response
async function* streamOllama(
prompt: string,
model = 'llama3.1'
): AsyncGenerator<string> {
const response = await fetch('http://localhost:11434/api/generate', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model, prompt, stream: true }),
});
const reader = response.body!.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
for (const line of chunk.split('\n').filter(Boolean)) {
const json = JSON.parse(line);
if (json.response) yield json.response;
}
}
}
// Chat with conversation history
async function chatConversation(
messages: { role: string; content: string }[],
model = 'llama3.1'
) {
const response = await fetch('http://localhost:11434/api/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model, messages, stream: false }),
});
return response.json();
}
const result = await chatWithOllama('Explain closures in JavaScript');Use Cases
- local development
- privacy-sensitive inference
- offline AI
Tags
Related Snippets
Similar patterns you can reuse in the same workflow.
pythonbeginner
Local LLM with Ollama Python Client
Run local open-source models with Ollama and stream responses using the Python API.
Best for: local AI
#ollama#local-llm
typescriptintermediate
Next.js AI Streaming Route Handler
Stream OpenAI responses from a Next.js App Router route handler using the Vercel AI SDK.
Best for: AI chatbot backend
#nextjs#openai
pythonbeginner
Stream LLM Chat Responses
Stream OpenAI chat completions token-by-token for real-time UI updates.
Best for: Chat UIs
#ai#streaming
typescriptintermediate
OpenAI Chat Completion with Streaming
Stream GPT responses token-by-token using the OpenAI SDK with async iteration.
Best for: chatbot UI
#openai#streaming