typescriptintermediate

OpenAI Vision API Image Analysis

Analyze images using GPT-4o vision capabilities with base64 and URL inputs.

typescript
import OpenAI from 'openai';
import * as fs from 'fs';

const openai = new OpenAI();

async function analyzeImageUrl(imageUrl: string, prompt: string) {
  const response = await openai.chat.completions.create({
    model: 'gpt-4o',
    messages: [{
      role: 'user',
      content: [
        { type: 'text', text: prompt },
        { type: 'image_url', image_url: { url: imageUrl, detail: 'high' } },
      ],
    }],
    max_tokens: 500,
  });
  return response.choices[0].message.content;
}

async function analyzeLocalImage(imagePath: string, prompt: string) {
  const imageBuffer = fs.readFileSync(imagePath);
  const base64Image = imageBuffer.toString('base64');
  const mimeType = imagePath.endsWith('.png') ? 'image/png' : 'image/jpeg';

  const response = await openai.chat.completions.create({
    model: 'gpt-4o',
    messages: [{
      role: 'user',
      content: [
        { type: 'text', text: prompt },
        { type: 'image_url', image_url: { url: `data:${mimeType};base64,${base64Image}` } },
      ],
    }],
    max_tokens: 500,
  });
  return response.choices[0].message.content;
}

async function compareImages(imageUrls: string[], prompt: string) {
  const imageContent = imageUrls.map((url) => ({
    type: 'image_url' as const,
    image_url: { url },
  }));
  const response = await openai.chat.completions.create({
    model: 'gpt-4o',
    messages: [{
      role: 'user',
      content: [{ type: 'text', text: prompt }, ...imageContent],
    }],
    max_tokens: 1000,
  });
  return response.choices[0].message.content;
}

Use Cases

  • image captioning
  • visual QA
  • document OCR

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.