pythonbeginner

Whisper Audio Transcription Pipeline

Transcribe audio files to text using OpenAI Whisper API with language detection and timestamps.

pythonPress ⌘/Ctrl + Shift + C to copy

from openai import OpenAI
from pathlib import Path

client = OpenAI()

def transcribe(audio_path: str, language: str | None = None) -> dict:
    with open(audio_path, 'rb') as f:
        transcript = client.audio.transcriptions.create(
            model='whisper-1',
            file=f,
            language=language,
            response_format='verbose_json',
            timestamp_granularities=['word', 'segment'],
        )
    return {
        'text':     transcript.text,
        'language': transcript.language,
        'duration': transcript.duration,
        'segments': [{'start': s.start, 'end': s.end, 'text': s.text} for s in transcript.segments],
    }

result = transcribe('meeting.mp3')
print(f'Language: {result["language"]}, Duration: {result["duration"]:.1f}s')
print(result['text'][:200])