typescriptintermediate

Whisper Audio Transcription

Transcribe audio files to text using OpenAI Whisper API with language detection and timestamps.

typescriptPress ⌘/Ctrl + Shift + C to copy

import OpenAI from 'openai';
import * as fs from 'fs';

const openai = new OpenAI();

export async function transcribe(filePath: string) {
  const file = fs.createReadStream(filePath);

  const transcription = await openai.audio.transcriptions.create({
    file,
    model: 'whisper-1',
    response_format: 'verbose_json',
    timestamp_granularities: ['segment'],
  });

  return {
    text: transcription.text,
    language: transcription.language,
    duration: transcription.duration,
    segments: transcription.segments?.map((s) => ({
      start: s.start,
      end: s.end,
      text: s.text,
    })),
  };
}

// Usage:
// const result = await transcribe('./podcast-episode.mp3');
// console.log(result.text);
// result.segments?.forEach(s => console.log(`[${s.start}s] ${s.text}`));