pythonbeginner

Whisper Audio Transcription Pipeline

Transcribe audio files to text using OpenAI Whisper API with language detection and timestamps.

python
from openai import OpenAI
from pathlib import Path

client = OpenAI()

def transcribe(audio_path: str, language: str | None = None) -> dict:
    with open(audio_path, 'rb') as f:
        transcript = client.audio.transcriptions.create(
            model='whisper-1',
            file=f,
            language=language,
            response_format='verbose_json',
            timestamp_granularities=['word', 'segment'],
        )
    return {
        'text':     transcript.text,
        'language': transcript.language,
        'duration': transcript.duration,
        'segments': [{'start': s.start, 'end': s.end, 'text': s.text} for s in transcript.segments],
    }

result = transcribe('meeting.mp3')
print(f'Language: {result["language"]}, Duration: {result["duration"]:.1f}s')
print(result['text'][:200])

Use Cases

  • meeting transcription
  • audio indexing
  • accessibility features

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.