pythonadvanced

Async AI Inference with Celery

Offload slow LLM inference to Celery background workers with Redis as broker and result backend.

pythonPress ⌘/Ctrl + Shift + C to copy

from celery import Celery
from openai import OpenAI
import json

app    = Celery('ai_tasks', broker='redis://localhost:6379/0', backend='redis://localhost:6379/1')
client = OpenAI()

@app.task(bind=True, max_retries=3, default_retry_delay=5)
def generate_summary(self, text: str, max_tokens: int = 150) -> dict:
    try:
        resp = client.chat.completions.create(
            model='gpt-4o-mini',
            messages=[{'role':'system','content':'Summarise concisely.'}, {'role':'user','content':text}],
            max_tokens=max_tokens,
        )
        return {'summary': resp.choices[0].message.content, 'tokens': resp.usage.total_tokens}
    except Exception as exc:
        raise self.retry(exc=exc)

# Send task and check result
task   = generate_summary.delay('Python is a versatile programming language used in web development, data science, and AI. It is known for its readability.')
result = task.get(timeout=60)
print(json.dumps(result, indent=2))